tokenizer_v3

This commit is contained in:
gazhonsepaskwa
2025-01-17 17:04:38 +01:00
parent 8c64e2c6f6
commit c36c1550d7
7 changed files with 190 additions and 244 deletions

View File

@@ -12,45 +12,34 @@
#include "tokenizer/tokenizer.h" #include "tokenizer/tokenizer.h"
/*str_equal(char *original)*/ void truncate_comment(char *str)
/*{*/ {
/**/ int i;
/*}*/
/*void truncate_after_exit_word(char *str)*/ i = 0;
/*{*/ while (str[i])
/* int i;*/ {
/* int depth;*/ if (str[i] == '#')
/* int truncate_mode;*/ {
/**/ str[i] = 0;
/* i = 0;*/ return ;
/* depth = 0;*/ }
/* truncate_mode = FALSE;*/ i++;
/* while (lst[i])*/
/* {*/
/**/
/* }*/
/*}*/
void print_linked_list(t_node *head) {
t_node *current = head;
while (current != NULL) {
printf("Node - Priority: %d, Depth: %d, TOKEN: |%s|\n", current->priority, current->depth, current->token);
current = current->next; // Move to the next node
} }
} }
int main (int ac, char **av) int main (int ac, char **av)
{ {
(void)ac; (void)ac;
char *str = ft_strtrim(av[1], " ");
t_node *lst; t_node *lst;
if (str) truncate_comment(av[1]);
{ lst = tokenize(av[1]);
/*truncate_after_exit_word(str);*/ if (!lst)
lst = tokenize(str); return (1);
print_linked_list(lst); // debug_linked_list(lst);
} free_linked_list(lst);
// free la list wesh
} }

View File

@@ -1,93 +0,0 @@
/* ************************************************************************** */
/* */
/* ::: :::::::: */
/* copy_token_string.c :+: :+: :+: */
/* +:+ +:+ +:+ */
/* By: nalebrun <nalebrun@student.s19.be> +#+ +:+ +#+ */
/* +#+#+#+#+#+ +#+ */
/* Created: 2025/01/16 12:48:50 by nalebrun #+# #+# */
/* Updated: 2025/01/16 12:48:50 by nalebrun ### ########.fr */
/* */
/* ************************************************************************** */
#include "tokenizer.h"
int is_meta_token(char c)
{
if (c == '&' || c == '|'
|| c == '(' || c == ')'
|| c == '>' || c == '<')
return (1);
return (0);
}
int skip_meta_token(char *str)
{
int i;
i = 0;
if ((str[i] == '&')
|| (str[i] == '|' && str[i + 1] && str[i + 1] == '|')
|| (str[i] == '<' && str[i + 1] && str[i + 1] == '<')
|| (str[i] == '>' && str[i + 1] && str[i + 1] == '>'))
i = 2;
else
i = 1;
return (i);
}
int go_to_next_meta_token(char *str)
{
int i;
i = 0;
while (str [i]
&& str[i] != '&'
&& str[i] != '|'
&& str[i] != '('
&& str[i] != ')'
&& str[i] != '<'
&& str[i] != '>')
i++;
return (i);
}
static int go_to_second_next_space_block(char *str)
{
int i;
i = 0;
while (str[i] && str[i] == ' ')
i++;
while (str[i] && str[i] != ' ')
i++;
return (i);
}
char *copy_token_string(char *str, char last_token)
{
char *out;
char *trimed_out;
int i;
int j;
i = 0;
while (str[i] && str[i] == ' ')
i++;
if (is_meta_token(str[i]))
i += skip_meta_token(&str[i]);
else if (last_token != '<' && last_token != '>')
i += go_to_next_meta_token(&str[i]);
else
i += go_to_second_next_space_block(&str[i]);
out = malloc(i + 1);
if (!out)
return (NULL);
j = -1;
while (++j < i)
out[j] = str[j];
out[j] = 0;
trimed_out = ft_strtrim(out, " ");
free(out);
return (trimed_out);
}

View File

@@ -1,25 +0,0 @@
#include "tokenizer.h"
int goto_next_token(char *str)
{
int i;
static char last_token = '#';
i = 0;
if (is_meta_token(str[i]))
i += skip_meta_token(&str[i]);
else if (last_token != '<' && last_token != '>')
i += go_to_next_meta_token(&str[i]);
else
{
while (str[i] == ' ')
i++;
while (str[i] && str[i] != '&' && str[i] != '|'
&& str[i] != '(' && str[i] != ')'
&& str[i] != '<' && str[i] != '>'
&& str[i] != ' ')
i++;
}
last_token = str[0];
return (i);
}

View File

@@ -0,0 +1,88 @@
/* ************************************************************************** */
/* */
/* ::: :::::::: */
/* tokenizer_utils.c :+: :+: :+: */
/* +:+ +:+ +:+ */
/* By: nalebrun <nalebrun@student.s19.be> +#+ +:+ +#+ */
/* +#+#+#+#+#+ +#+ */
/* Created: 2025/01/15 13:38:49 by nalebrun #+# #+# */
/* Updated: 2025/01/15 13:38:49 by nalebrun ### ########.fr */
/* */
/* ************************************************************************** */
#include "tokenizer.h"
t_node *create_node(char *val, t_token token)
{
t_node *node;
if (!val)
return (NULL);
node = malloc(sizeof(t_node));
if (!node)
return (NULL);
node->val = ft_strdup(val);
node->token = token;
node->next = NULL;
return (node);
}
int add_node_back(t_node *head, char *val, t_token token)
{
if (!val)
return (0);
while (head->next != NULL)
head = head->next;
head->next = create_node(val, token);
if (head->next == NULL)
return (0);
return (1);
}
void free_linked_list(t_node *head)
{
t_node *tmp;
while (head)
{
tmp = head;
head = head->next;
free(tmp->val);
free(tmp);
}
}
int create_node_after(t_node *elem, char *val)
{
t_node *tmp_next;
tmp_next = elem->next;
elem->next = create_node(val, 0);
if (!elem->next)
return (0);
elem->next->next = tmp_next;
return (1);
}
// have to be deleted after
void debug_linked_list(t_node *head, char *msg)
{
t_node *current = head;
char *token;
printf("----------------------------------------{%s} \n", msg);
while (current != NULL) {
if (current->token == OPERATOR)
token = ft_strdup("OPERATOR");
else if (current->token == WORD)
token = ft_strdup("WORD");
else if (current->token == UNSET)
token = ft_strdup("UNSET");
else
token = ft_strdup("???");
printf("| Node - TOKEN: %s -> val: |%s|\n", token, current->val);
free(token);
current = current->next;
}
printf("----------------------------------------\n\n");
}

View File

@@ -12,48 +12,75 @@
#include "tokenizer.h" #include "tokenizer.h"
static t_node *create_head(int *depth, char *str, char *last_token) static t_node *tokenize_base(char *str)
{ {
int i;
t_node *head; t_node *head;
char *token; char **tab;
ajust_depth(depth, str[0]); tab = ft_split(str, " \t\n");
token = copy_token_string(&str[0], *last_token); if (!tab)
if (!(*token))
return (NULL); return (NULL);
head = create_node(token, get_priority(token), *depth); head = create_node(tab[0], 0);
if (!head) if (!head)
return (NULL); return (free(tab), NULL);
*last_token = str[0]; i = 1;
free(token); while (tab[i])
{
if (!add_node_back(head, tab[i], 0))
return (free(tab), NULL);
i++;
}
free_tab(tab);
return (head); return (head);
} }
static void set_token(t_node *head)
{
t_node *it;
it = head;
while (it != NULL)
{
it->token = get_token(it->val);
it = it->next;
}
}
static int unstick_nodes(t_node *head)
{
t_node *it;
it = head;
while (it != NULL)
{
if (is_sticked(it->val)) // undefined fct for the moment
{
// if meta -> first part = jusqua plus meta
// else -> first part = jusqua meta
// secnd part = rest
// it->val = first part
// create a new node after [create_node_after()] with the second part of the string
}
it = it->next;
}
return (1);
}
t_node *tokenize(char *str) t_node *tokenize(char *str)
{ {
int i;
int depth;
char *token;
t_node *head; t_node *head;
char last_token;
depth = 0; head = tokenize_base(str);
last_token = ' '; if (!head)
head = create_head(&depth, str, &last_token);
i = goto_next_token(&str[0]);
while (str[i])
{
ajust_depth(&depth, str[i]);
token = copy_token_string(&str[i], last_token);
if (!token)
return (NULL); return (NULL);
if (token[0] != 0) debug_linked_list(head, "base tokenized");
add_node_back(head, depth, token); if (!unstick_nodes(head))
while (str[i] == ' ') return (NULL);
i++; debug_linked_list(head, "nodes unsticked");
last_token = str[i]; // stick_quote_node(head);
i += goto_next_token(&str[i]); // debug_linked_list(head);
free(token); set_token(head);
} debug_linked_list(head, "token set");
return (head); return (head);
} }

View File

@@ -15,23 +15,26 @@
# include "../includes/minishell.h" # include "../includes/minishell.h"
typedef enum e_token
{
UNSET,
OPERATOR,
WORD
} t_token;
typedef struct s_node typedef struct s_node
{ {
struct s_node *next; struct s_node *next;
char *token; char *val;
int priority; enum e_token token;
int depth;
} t_node; } t_node;
t_node *tokenize(char *str); t_node *tokenize(char *str);
t_node *create_node(char *token, int priority, int depth); t_node *create_node(char *val, t_token token);
void add_node_back(t_node *head, int depth, char *token); int add_node_back(t_node *head, char *val, t_token token);
void ajust_depth(int *depth, char c); void free_linked_list(t_node *stack);
int get_priority(char *token); t_token get_token(char *str);
char *copy_token_string(char *start, char last_token); int create_node_after(t_node *elem, char *val);
int goto_next_token(char *str); void debug_linked_list(t_node *head, char *msg);
int skip_meta_token(char *str);
int is_meta_token(char c);
int go_to_next_meta_token(char *str);
#endif #endif

View File

@@ -1,57 +1,14 @@
/* ************************************************************************** */
/* */
/* ::: :::::::: */
/* tokenizer_utils.c :+: :+: :+: */
/* +:+ +:+ +:+ */
/* By: nalebrun <nalebrun@student.s19.be> +#+ +:+ +#+ */
/* +#+#+#+#+#+ +#+ */
/* Created: 2025/01/15 13:38:49 by nalebrun #+# #+# */
/* Updated: 2025/01/15 13:38:49 by nalebrun ### ########.fr */
/* */
/* ************************************************************************** */
#include "tokenizer.h" #include "tokenizer.h"
t_node *create_node(char *token, int priority, int depth) t_token get_token(char *str)
{ {
t_node *node; t_token token;
node = malloc(sizeof(t_node)); if (!strncmp(str, "&", 1) || !strncmp(str, "|", 1)
if (!node) || !strncmp(str, "(", 1) || !strncmp(str, ")", 1)
return (NULL); || !strncmp(str, "<", 1) || !strncmp(str, ">", 1))
node->token = ft_strdup(token); token = OPERATOR;
node->priority = priority;
node->depth = depth;
node->next = NULL;
return (node);
}
int get_priority(char *token)
{
int priority;
if (token[0] == '&' && token[1] && token[1] == '&')
priority = 2;
else if (token[0] == '|' && token[1] && token[1] == '|')
priority = 2;
else if (token[0] == '|')
priority = 1;
else else
priority = 0; token = WORD;
return (priority); return (token);
}
void add_node_back(t_node *head, int depth, char *token)
{
while (head->next != NULL)
head = head->next;
head->next = create_node(token, get_priority(token), depth);
}
void ajust_depth(int *depth, char c)
{
if (c == '(')
(*depth) += 1;
if (c == ')')
(*depth) -= 1;
} }