From 687dd72dbaa581943dd5ede316d18edd6275e6b3 Mon Sep 17 00:00:00 2001 From: Nathan Lebrun Date: Wed, 15 Jan 2025 16:29:53 +0100 Subject: [PATCH] tokenisation --- tests/parse.c | 89 ++++++++++++--------- tests/tokenizer/tokenizer.c | 125 ++++++++++++++++++++++++++++++ tests/tokenizer/tokenizer.h | 32 ++++++++ tests/tokenizer/tokenizer_utils.c | 57 ++++++++++++++ 4 files changed, 268 insertions(+), 35 deletions(-) create mode 100644 tests/tokenizer/tokenizer.c create mode 100644 tests/tokenizer/tokenizer.h create mode 100644 tests/tokenizer/tokenizer_utils.c diff --git a/tests/parse.c b/tests/parse.c index ded8999..b501f1b 100644 --- a/tests/parse.c +++ b/tests/parse.c @@ -1,33 +1,45 @@ -#include "../includes/minishell.h" +/* ************************************************************************** */ +/* */ +/* ::: :::::::: */ +/* parse.c :+: :+: :+: */ +/* +:+ +:+ +:+ */ +/* By: nalebrun +#+ +:+ +#+ */ +/* +#+#+#+#+#+ +#+ */ +/* Created: 2025/01/15 08:23:41 by nalebrun #+# #+# */ +/* Updated: 2025/01/15 08:23:41 by nalebrun ### ########.fr */ +/* */ +/* ************************************************************************** */ -void truncate_after_exit_word(char **lst) -{ - int i; - int depth; - int truncate_mode; +#include "tokenizer/tokenizer.h" - i = 0; - depth = 0; - truncate_mode = FALSE; - while (lst[i]) - { - if (truncate_mode) - { - free(lst[i]); - lst[i] = NULL; - } - else - { - if (lst[i][0] == '(') - depth += 1; - if (lst[i][ft_strlen(lst[i]) - 1] == ')') - depth -= 1; - if (!ft_strncmp(lst[i], "exit", 4) && depth == 0) - truncate_mode = TRUE; - } - i++; - } -} +/*void truncate_after_exit_word(char **lst)*/ +/*{*/ +/* int i;*/ +/* int depth;*/ +/* int truncate_mode;*/ +/**/ +/* i = 0;*/ +/* depth = 0;*/ +/* truncate_mode = FALSE;*/ +/* while (lst[i])*/ +/* {*/ +/* if (truncate_mode)*/ +/* {*/ +/* free(lst[i]);*/ +/* lst[i] = NULL;*/ +/* }*/ +/* else*/ +/* {*/ +/* if (lst[i][0] == '(')*/ +/* depth += 1;*/ +/* if (lst[i][ft_strlen(lst[i]) - 1] == ')')*/ +/* depth -= 1;*/ +/* if (!ft_strncmp(lst[i], "exit", 4) && depth == 0)*/ +/* truncate_mode = TRUE;*/ +/* }*/ +/* i++;*/ +/* }*/ +/*}*/ /*void print_tab(char **lst)*/ /*{*/ @@ -39,19 +51,26 @@ void truncate_after_exit_word(char **lst) /* }*/ /*}*/ +void print_linked_list(t_node *head) { + t_node *current = head; + while (current != NULL) { + printf("Node - Priority: %d, Depth: %d, TOKEN: |%s|\n", current->priority, current->depth, current->token); + current = current->next; // Move to the next node + } +} + int main (int ac, char **av) { (void)ac; - char *str = av[1]; - char **lst; + char *str = ft_strtrim(av[1], " "); + t_node *lst; if (str) { - // replace by a custom split that also the token alone and under the form of a linked list - lst = ft_split(str, ' '); - truncate_after_exit_word(lst); - print_tab(lst); - free_tab(lst); + /*truncate_after_exit_word(lst);*/ + /*free_tab(lst);*/ + lst = tokenize(str); + print_linked_list(lst); } } diff --git a/tests/tokenizer/tokenizer.c b/tests/tokenizer/tokenizer.c new file mode 100644 index 0000000..1016f33 --- /dev/null +++ b/tests/tokenizer/tokenizer.c @@ -0,0 +1,125 @@ +/* ************************************************************************** */ +/* */ +/* ::: :::::::: */ +/* tokenizer.c :+: :+: :+: */ +/* +:+ +:+ +:+ */ +/* By: nalebrun +#+ +:+ +#+ */ +/* +#+#+#+#+#+ +#+ */ +/* Created: 2025/01/15 13:27:57 by nalebrun #+# #+# */ +/* Updated: 2025/01/15 13:27:57 by nalebrun ### ########.fr */ +/* */ +/* ************************************************************************** */ + +#include "tokenizer.h" + +int important_token(char c) +{ + if (c == '(' || c == ')' + || c == '|' || c == '&') + return (1); + return (0); +} + +char *copy_token_string(char *start) +{ + char *out; + int i; + int j; + + i = 0; + while (start[i] && start[i] == ' ') + i++; + if (start[i] == '&') + i = 2; + else if (start[i] == '|' && start[i + 1] && start[i + 1] == '|') + i = 2; + else if (start[i] == '|' && start[i + 1] && start[i + 1] != '|') + i = 1; + else if (start[i] == '(') + i = 1; + else if (start[i] == ')') + i = 1; + else + { + while (start [i] + && start[i] != '&' + && start[i] != '|' + && start[i] != '(' + && start[i] != ')') + i++; + } + out = malloc(i + 1); + if (!out) + return (NULL); + j = -1; + while (++j < i) + out[j] = start[j]; + out[j] = 0; + return (out); +} + +int goto_next_token(char *str) +{ + int i; + + i = 0; + if (str[0] == '(' || str[0] == ')') + { + if (str[1] && str[1] == ' ') + return (2); + else + return (1); + } + else if (str[0] == '&' && str[1] && str[1] == '&') + return (2); + else if (str[0] == '|' && str[1] && str[1] == '|') + return (2); + else if (str[0] == '|' && str[1] && str[1] != '|' && str[1] && str[1] == ' ') + return (2); + else if (str[0] == '|' && str[1] && str[1] != '|' && str[1] != ' ') + return (1); + else + { + while (str[i] && str[i] != '&' && str[i] != '|' + && str[i] != '(' && str[i] != ')') + i++; + if (str[i] == ' ') + i++; + } + return (i); +} + +t_node *tokenize(char *str) +{ + int i; + int depth; + char *token; + char *trimed_token; + t_node *head; + + i = 0; + depth = 0; + ajust_depth(&depth, str[i]); + token = copy_token_string(&str[i]); + if (!token) + return (NULL); + trimed_token = ft_strtrim(token, " "); + head = create_node(trimed_token, get_priority(trimed_token), depth); + free(token); + free(trimed_token); + i += goto_next_token(&str[i]); + while(str[i]) + { + ajust_depth(&depth, str[i]); + token = copy_token_string(&str[i]); + if (!token) + return (NULL); + trimed_token = ft_strtrim(token, " "); + if (trimed_token[0] != 0) + add_node_back(head, depth, trimed_token); + free(token); + free(trimed_token); + i += goto_next_token(&str[i]); + } + return (head); +} diff --git a/tests/tokenizer/tokenizer.h b/tests/tokenizer/tokenizer.h new file mode 100644 index 0000000..b72d2fe --- /dev/null +++ b/tests/tokenizer/tokenizer.h @@ -0,0 +1,32 @@ +/* ************************************************************************** */ +/* */ +/* ::: :::::::: */ +/* parser.h :+: :+: :+: */ +/* +:+ +:+ +:+ */ +/* By: nalebrun +#+ +:+ +#+ */ +/* +#+#+#+#+#+ +#+ */ +/* Created: 2025/01/15 13:30:12 by nalebrun #+# #+# */ +/* Updated: 2025/01/15 13:30:12 by nalebrun ### ########.fr */ +/* */ +/* ************************************************************************** */ + +#ifndef TOKENIZER_H +# define TOKENIZER_H + +# include "../includes/minishell.h" + +typedef struct s_node +{ + struct s_node *next; + char *token; + int priority; + int depth; +} t_node; + +t_node *tokenize(char *str); +t_node *create_node(char *token, int priority, int depth); +void add_node_back(t_node* head, int depth, char *token); +void ajust_depth(int *depth, char c); +int get_priority(char *token); + +#endif diff --git a/tests/tokenizer/tokenizer_utils.c b/tests/tokenizer/tokenizer_utils.c new file mode 100644 index 0000000..59a0533 --- /dev/null +++ b/tests/tokenizer/tokenizer_utils.c @@ -0,0 +1,57 @@ +/* ************************************************************************** */ +/* */ +/* ::: :::::::: */ +/* tokenizer_utils.c :+: :+: :+: */ +/* +:+ +:+ +:+ */ +/* By: nalebrun +#+ +:+ +#+ */ +/* +#+#+#+#+#+ +#+ */ +/* Created: 2025/01/15 13:38:49 by nalebrun #+# #+# */ +/* Updated: 2025/01/15 13:38:49 by nalebrun ### ########.fr */ +/* */ +/* ************************************************************************** */ + +#include "tokenizer.h" + +t_node *create_node(char *token, int priority, int depth) +{ + t_node *node; + + node = malloc(sizeof(t_node)); + if (!node) + return (NULL); + node->token = ft_strdup(token); + node->priority = priority; + node->depth = depth; + node->next = NULL; + return (node); +} + +int get_priority(char *token) +{ + int priority; + + if (token[0] == '&' && token[1] && token[1] == '&') + priority = 2; + else if (token[0] == '|' && token[1] && token[1] == '|') + priority = 2; + else if (token[0] == '|') + priority = 1; + else + priority = 0; + return (priority); +} + +void add_node_back(t_node* head, int depth, char *token) +{ + while (head->next != NULL) + head = head->next; + head->next = create_node(token, get_priority(token), depth); +} + +void ajust_depth(int *depth, char c) +{ + if (c == '(') + (*depth) += 1; + if (c == ')') + (*depth) -= 1; +}