tokenisation

This commit is contained in:
Nathan Lebrun
2025-01-15 16:29:53 +01:00
parent ee0e122e70
commit 687dd72dba
4 changed files with 268 additions and 35 deletions

View File

@@ -1,33 +1,45 @@
#include "../includes/minishell.h" /* ************************************************************************** */
/* */
/* ::: :::::::: */
/* parse.c :+: :+: :+: */
/* +:+ +:+ +:+ */
/* By: nalebrun <nalebrun@student.s19.be> +#+ +:+ +#+ */
/* +#+#+#+#+#+ +#+ */
/* Created: 2025/01/15 08:23:41 by nalebrun #+# #+# */
/* Updated: 2025/01/15 08:23:41 by nalebrun ### ########.fr */
/* */
/* ************************************************************************** */
void truncate_after_exit_word(char **lst) #include "tokenizer/tokenizer.h"
{
int i;
int depth;
int truncate_mode;
i = 0; /*void truncate_after_exit_word(char **lst)*/
depth = 0; /*{*/
truncate_mode = FALSE; /* int i;*/
while (lst[i]) /* int depth;*/
{ /* int truncate_mode;*/
if (truncate_mode) /**/
{ /* i = 0;*/
free(lst[i]); /* depth = 0;*/
lst[i] = NULL; /* truncate_mode = FALSE;*/
} /* while (lst[i])*/
else /* {*/
{ /* if (truncate_mode)*/
if (lst[i][0] == '(') /* {*/
depth += 1; /* free(lst[i]);*/
if (lst[i][ft_strlen(lst[i]) - 1] == ')') /* lst[i] = NULL;*/
depth -= 1; /* }*/
if (!ft_strncmp(lst[i], "exit", 4) && depth == 0) /* else*/
truncate_mode = TRUE; /* {*/
} /* if (lst[i][0] == '(')*/
i++; /* depth += 1;*/
} /* if (lst[i][ft_strlen(lst[i]) - 1] == ')')*/
} /* depth -= 1;*/
/* if (!ft_strncmp(lst[i], "exit", 4) && depth == 0)*/
/* truncate_mode = TRUE;*/
/* }*/
/* i++;*/
/* }*/
/*}*/
/*void print_tab(char **lst)*/ /*void print_tab(char **lst)*/
/*{*/ /*{*/
@@ -39,19 +51,26 @@ void truncate_after_exit_word(char **lst)
/* }*/ /* }*/
/*}*/ /*}*/
void print_linked_list(t_node *head) {
t_node *current = head;
while (current != NULL) {
printf("Node - Priority: %d, Depth: %d, TOKEN: |%s|\n", current->priority, current->depth, current->token);
current = current->next; // Move to the next node
}
}
int main (int ac, char **av) int main (int ac, char **av)
{ {
(void)ac; (void)ac;
char *str = av[1]; char *str = ft_strtrim(av[1], " ");
char **lst; t_node *lst;
if (str) if (str)
{ {
// replace by a custom split that also the token alone and under the form of a linked list /*truncate_after_exit_word(lst);*/
lst = ft_split(str, ' '); /*free_tab(lst);*/
truncate_after_exit_word(lst); lst = tokenize(str);
print_tab(lst); print_linked_list(lst);
free_tab(lst);
} }
} }

125
tests/tokenizer/tokenizer.c Normal file
View File

@@ -0,0 +1,125 @@
/* ************************************************************************** */
/* */
/* ::: :::::::: */
/* tokenizer.c :+: :+: :+: */
/* +:+ +:+ +:+ */
/* By: nalebrun <nalebrun@student.s19.be> +#+ +:+ +#+ */
/* +#+#+#+#+#+ +#+ */
/* Created: 2025/01/15 13:27:57 by nalebrun #+# #+# */
/* Updated: 2025/01/15 13:27:57 by nalebrun ### ########.fr */
/* */
/* ************************************************************************** */
#include "tokenizer.h"
int important_token(char c)
{
if (c == '(' || c == ')'
|| c == '|' || c == '&')
return (1);
return (0);
}
char *copy_token_string(char *start)
{
char *out;
int i;
int j;
i = 0;
while (start[i] && start[i] == ' ')
i++;
if (start[i] == '&')
i = 2;
else if (start[i] == '|' && start[i + 1] && start[i + 1] == '|')
i = 2;
else if (start[i] == '|' && start[i + 1] && start[i + 1] != '|')
i = 1;
else if (start[i] == '(')
i = 1;
else if (start[i] == ')')
i = 1;
else
{
while (start [i]
&& start[i] != '&'
&& start[i] != '|'
&& start[i] != '('
&& start[i] != ')')
i++;
}
out = malloc(i + 1);
if (!out)
return (NULL);
j = -1;
while (++j < i)
out[j] = start[j];
out[j] = 0;
return (out);
}
int goto_next_token(char *str)
{
int i;
i = 0;
if (str[0] == '(' || str[0] == ')')
{
if (str[1] && str[1] == ' ')
return (2);
else
return (1);
}
else if (str[0] == '&' && str[1] && str[1] == '&')
return (2);
else if (str[0] == '|' && str[1] && str[1] == '|')
return (2);
else if (str[0] == '|' && str[1] && str[1] != '|' && str[1] && str[1] == ' ')
return (2);
else if (str[0] == '|' && str[1] && str[1] != '|' && str[1] != ' ')
return (1);
else
{
while (str[i] && str[i] != '&' && str[i] != '|'
&& str[i] != '(' && str[i] != ')')
i++;
if (str[i] == ' ')
i++;
}
return (i);
}
t_node *tokenize(char *str)
{
int i;
int depth;
char *token;
char *trimed_token;
t_node *head;
i = 0;
depth = 0;
ajust_depth(&depth, str[i]);
token = copy_token_string(&str[i]);
if (!token)
return (NULL);
trimed_token = ft_strtrim(token, " ");
head = create_node(trimed_token, get_priority(trimed_token), depth);
free(token);
free(trimed_token);
i += goto_next_token(&str[i]);
while(str[i])
{
ajust_depth(&depth, str[i]);
token = copy_token_string(&str[i]);
if (!token)
return (NULL);
trimed_token = ft_strtrim(token, " ");
if (trimed_token[0] != 0)
add_node_back(head, depth, trimed_token);
free(token);
free(trimed_token);
i += goto_next_token(&str[i]);
}
return (head);
}

View File

@@ -0,0 +1,32 @@
/* ************************************************************************** */
/* */
/* ::: :::::::: */
/* parser.h :+: :+: :+: */
/* +:+ +:+ +:+ */
/* By: nalebrun <nalebrun@student.s19.be> +#+ +:+ +#+ */
/* +#+#+#+#+#+ +#+ */
/* Created: 2025/01/15 13:30:12 by nalebrun #+# #+# */
/* Updated: 2025/01/15 13:30:12 by nalebrun ### ########.fr */
/* */
/* ************************************************************************** */
#ifndef TOKENIZER_H
# define TOKENIZER_H
# include "../includes/minishell.h"
typedef struct s_node
{
struct s_node *next;
char *token;
int priority;
int depth;
} t_node;
t_node *tokenize(char *str);
t_node *create_node(char *token, int priority, int depth);
void add_node_back(t_node* head, int depth, char *token);
void ajust_depth(int *depth, char c);
int get_priority(char *token);
#endif

View File

@@ -0,0 +1,57 @@
/* ************************************************************************** */
/* */
/* ::: :::::::: */
/* tokenizer_utils.c :+: :+: :+: */
/* +:+ +:+ +:+ */
/* By: nalebrun <nalebrun@student.s19.be> +#+ +:+ +#+ */
/* +#+#+#+#+#+ +#+ */
/* Created: 2025/01/15 13:38:49 by nalebrun #+# #+# */
/* Updated: 2025/01/15 13:38:49 by nalebrun ### ########.fr */
/* */
/* ************************************************************************** */
#include "tokenizer.h"
t_node *create_node(char *token, int priority, int depth)
{
t_node *node;
node = malloc(sizeof(t_node));
if (!node)
return (NULL);
node->token = ft_strdup(token);
node->priority = priority;
node->depth = depth;
node->next = NULL;
return (node);
}
int get_priority(char *token)
{
int priority;
if (token[0] == '&' && token[1] && token[1] == '&')
priority = 2;
else if (token[0] == '|' && token[1] && token[1] == '|')
priority = 2;
else if (token[0] == '|')
priority = 1;
else
priority = 0;
return (priority);
}
void add_node_back(t_node* head, int depth, char *token)
{
while (head->next != NULL)
head = head->next;
head->next = create_node(token, get_priority(token), depth);
}
void ajust_depth(int *depth, char c)
{
if (c == '(')
(*depth) += 1;
if (c == ')')
(*depth) -= 1;
}