From 687dd72dbaa581943dd5ede316d18edd6275e6b3 Mon Sep 17 00:00:00 2001
From: Nathan Lebrun <gaz.honsepaskwa@gmail.com>
Date: Wed, 15 Jan 2025 16:29:53 +0100
Subject: [PATCH] tokenisation

---
 tests/parse.c                     |  89 ++++++++++++---------
 tests/tokenizer/tokenizer.c       | 125 ++++++++++++++++++++++++++++++
 tests/tokenizer/tokenizer.h       |  32 ++++++++
 tests/tokenizer/tokenizer_utils.c |  57 ++++++++++++++
 4 files changed, 268 insertions(+), 35 deletions(-)
 create mode 100644 tests/tokenizer/tokenizer.c
 create mode 100644 tests/tokenizer/tokenizer.h
 create mode 100644 tests/tokenizer/tokenizer_utils.c

diff --git a/tests/parse.c b/tests/parse.c
index ded8999..b501f1b 100644
--- a/tests/parse.c
+++ b/tests/parse.c
@@ -1,33 +1,45 @@
-#include "../includes/minishell.h"
+/* ************************************************************************** */
+/*                                                                            */
+/*                                                        :::      ::::::::   */
+/*   parse.c                                            :+:      :+:    :+:   */
+/*                                                    +:+ +:+         +:+     */
+/*   By: nalebrun <nalebrun@student.s19.be>        +#+  +:+       +#+         */
+/*                                                +#+#+#+#+#+   +#+           */
+/*   Created: 2025/01/15 08:23:41  by nalebrun         #+#    #+#             */
+/*   Updated: 2025/01/15 08:23:41  by nalebrun        ###   ########.fr       */
+/*                                                                            */
+/* ************************************************************************** */
 
-void truncate_after_exit_word(char **lst)
-{
-	int	i;
-	int	depth;
-	int truncate_mode;
+#include "tokenizer/tokenizer.h"
 
-	i = 0;
-	depth = 0;
-	truncate_mode = FALSE;
-	while (lst[i])
-	{
-		if (truncate_mode)
-		{
-			free(lst[i]);
-			lst[i] = NULL;
-		}
-		else
-		{
-			if (lst[i][0] == '(')
-				depth += 1;
-			if (lst[i][ft_strlen(lst[i]) - 1] == ')')
-				depth -= 1;
-			if (!ft_strncmp(lst[i], "exit", 4) && depth == 0)
-				truncate_mode = TRUE;
-		}
-		i++;
-	}
-}
+/*void truncate_after_exit_word(char **lst)*/
+/*{*/
+/*	int	i;*/
+/*	int	depth;*/
+/*	int truncate_mode;*/
+/**/
+/*	i = 0;*/
+/*	depth = 0;*/
+/*	truncate_mode = FALSE;*/
+/*	while (lst[i])*/
+/*	{*/
+/*		if (truncate_mode)*/
+/*		{*/
+/*			free(lst[i]);*/
+/*			lst[i] = NULL;*/
+/*		}*/
+/*		else*/
+/*		{*/
+/*			if (lst[i][0] == '(')*/
+/*				depth += 1;*/
+/*			if (lst[i][ft_strlen(lst[i]) - 1] == ')')*/
+/*				depth -= 1;*/
+/*			if (!ft_strncmp(lst[i], "exit", 4) && depth == 0)*/
+/*				truncate_mode = TRUE;*/
+/*		}*/
+/*		i++;*/
+/*	}*/
+/*}*/
 
 /*void print_tab(char **lst)*/
 /*{*/
@@ -39,19 +51,26 @@ void truncate_after_exit_word(char **lst)
 /*	}*/
 /*}*/
 
+void print_linked_list(t_node *head) {
+    t_node *current = head;
+    while (current != NULL) {
+        printf("Node - Priority: %d, Depth: %d, TOKEN: |%s|\n", current->priority, current->depth, current->token);
+        current = current->next; // Move to the next node
+    }
+}
+
 int main (int ac, char **av)
 {
 	(void)ac;
 
-	char *str = av[1];
-	char **lst;
+	char *str = ft_strtrim(av[1], " ");
+	t_node *lst;
 
 	if (str)
 	{
-		// replace by a custom split that also the token alone and under the form of a linked list
-		lst = ft_split(str, ' ');
-		truncate_after_exit_word(lst);
-		print_tab(lst);
-		free_tab(lst);
+		/*truncate_after_exit_word(lst);*/
+		/*free_tab(lst);*/
+		lst = tokenize(str);
+		print_linked_list(lst);
 	}
 }
diff --git a/tests/tokenizer/tokenizer.c b/tests/tokenizer/tokenizer.c
new file mode 100644
index 0000000..1016f33
--- /dev/null
+++ b/tests/tokenizer/tokenizer.c
@@ -0,0 +1,125 @@
+/* ************************************************************************** */
+/*                                                                            */
+/*                                                        :::      ::::::::   */
+/*   tokenizer.c                                        :+:      :+:    :+:   */
+/*                                                    +:+ +:+         +:+     */
+/*   By: nalebrun <nalebrun@student.s19.be>        +#+  +:+       +#+         */
+/*                                                +#+#+#+#+#+   +#+           */
+/*   Created: 2025/01/15 13:27:57  by nalebrun         #+#    #+#             */
+/*   Updated: 2025/01/15 13:27:57  by nalebrun        ###   ########.fr       */
+/*                                                                            */
+/* ************************************************************************** */
+
+#include "tokenizer.h"
+
+int	important_token(char c)
+{
+	if (c == '(' || c == ')'
+		|| c == '|' || c == '&')
+		return (1);
+	return (0);
+}
+
+char *copy_token_string(char *start)
+{
+	char	*out;
+	int		i;
+	int		j;
+
+	i = 0;
+	while (start[i] && start[i] == ' ')
+		i++;
+	if (start[i] == '&')
+		i = 2;
+	else if (start[i] == '|' && start[i + 1] && start[i + 1] == '|')
+		i = 2;
+	else if (start[i] == '|' && start[i + 1] && start[i + 1] != '|')
+		i = 1;
+	else if (start[i] == '(')
+		i = 1;
+	else if (start[i] == ')')
+		i = 1;
+	else
+	{
+		while (start [i]
+			&& start[i] != '&'
+			&& start[i] != '|'
+			&& start[i] != '('
+			&& start[i] != ')')
+		i++;
+	}
+	out = malloc(i + 1);
+	if (!out)
+		return (NULL);
+	j = -1;
+	while (++j < i)
+		out[j] = start[j];
+	out[j] = 0;
+	return (out);
+}
+
+int	goto_next_token(char *str)
+{
+	int	i;
+
+	i = 0;
+	if (str[0] == '(' || str[0] == ')')
+	{
+		if (str[1] && str[1] == ' ')
+			return (2);
+		else
+			return (1);
+	}
+	else if (str[0] == '&' && str[1] && str[1] == '&')
+		return (2);
+	else if (str[0] == '|' && str[1] && str[1] == '|')
+		return (2);
+	else if (str[0] == '|' && str[1] && str[1] != '|' && str[1] && str[1] == ' ')
+		return (2);
+	else if (str[0] == '|' && str[1] && str[1] != '|' && str[1] != ' ')
+		return (1);
+	else
+	{
+		while (str[i] && str[i] != '&' && str[i] != '|'
+				&& str[i] != '(' && str[i] != ')')
+			i++;
+		if (str[i] == ' ')
+			i++;
+	}
+	return (i);
+}
+
+t_node	*tokenize(char *str)
+{
+	int		i;
+	int		depth;
+	char	*token;
+	char	*trimed_token;
+	t_node	*head;
+
+	i = 0;
+	depth = 0;
+	ajust_depth(&depth, str[i]);
+	token = copy_token_string(&str[i]);
+	if (!token)
+		return (NULL);
+	trimed_token = ft_strtrim(token, " ");
+	head = create_node(trimed_token, get_priority(trimed_token), depth);
+	free(token);
+	free(trimed_token);
+	i += goto_next_token(&str[i]);
+	while(str[i])
+	{
+		ajust_depth(&depth, str[i]);
+		token = copy_token_string(&str[i]);
+		if (!token)
+			return (NULL);
+		trimed_token = ft_strtrim(token, " ");
+		if (trimed_token[0] != 0)
+			add_node_back(head, depth, trimed_token);
+		free(token);
+		free(trimed_token);
+		i += goto_next_token(&str[i]);
+	}
+	return (head);
+}
diff --git a/tests/tokenizer/tokenizer.h b/tests/tokenizer/tokenizer.h
new file mode 100644
index 0000000..b72d2fe
--- /dev/null
+++ b/tests/tokenizer/tokenizer.h
@@ -0,0 +1,32 @@
+/* ************************************************************************** */
+/*                                                                            */
+/*                                                        :::      ::::::::   */
+/*   parser.h                                           :+:      :+:    :+:   */
+/*                                                    +:+ +:+         +:+     */
+/*   By: nalebrun <nalebrun@student.s19.be>        +#+  +:+       +#+         */
+/*                                                +#+#+#+#+#+   +#+           */
+/*   Created: 2025/01/15 13:30:12  by nalebrun         #+#    #+#             */
+/*   Updated: 2025/01/15 13:30:12  by nalebrun        ###   ########.fr       */
+/*                                                                            */
+/* ************************************************************************** */
+
+#ifndef TOKENIZER_H 
+# define TOKENIZER_H
+
+# include "../includes/minishell.h"
+
+typedef struct s_node
+{
+	struct s_node	*next;
+	char			*token;
+	int				priority;
+	int				depth;
+}					t_node;
+
+t_node	*tokenize(char *str);
+t_node	*create_node(char *token, int priority, int depth);
+void	add_node_back(t_node* head, int depth, char *token);
+void	ajust_depth(int *depth, char c);
+int		get_priority(char *token);
+
+#endif
diff --git a/tests/tokenizer/tokenizer_utils.c b/tests/tokenizer/tokenizer_utils.c
new file mode 100644
index 0000000..59a0533
--- /dev/null
+++ b/tests/tokenizer/tokenizer_utils.c
@@ -0,0 +1,57 @@
+/* ************************************************************************** */
+/*                                                                            */
+/*                                                        :::      ::::::::   */
+/*   tokenizer_utils.c                                  :+:      :+:    :+:   */
+/*                                                    +:+ +:+         +:+     */
+/*   By: nalebrun <nalebrun@student.s19.be>        +#+  +:+       +#+         */
+/*                                                +#+#+#+#+#+   +#+           */
+/*   Created: 2025/01/15 13:38:49  by nalebrun         #+#    #+#             */
+/*   Updated: 2025/01/15 13:38:49  by nalebrun        ###   ########.fr       */
+/*                                                                            */
+/* ************************************************************************** */
+
+#include "tokenizer.h"
+
+t_node	*create_node(char *token, int priority, int depth)
+{
+	t_node *node;
+
+	node = malloc(sizeof(t_node));
+	if (!node)
+		return (NULL);
+	node->token = ft_strdup(token);
+	node->priority = priority;
+	node->depth = depth;
+	node->next = NULL;
+	return (node);
+}
+
+int	get_priority(char *token)
+{
+	int	priority;
+
+	if (token[0] == '&' && token[1] && token[1] == '&')
+		priority = 2;
+	else if (token[0] == '|' && token[1] && token[1] == '|')
+		priority = 2;
+	else if (token[0] == '|')
+		priority = 1;
+	else
+		priority = 0;
+	return (priority);
+}
+
+void	add_node_back(t_node* head, int depth, char *token)
+{
+	while (head->next != NULL)
+		head = head->next;
+	head->next = create_node(token, get_priority(token), depth);
+}
+
+void	ajust_depth(int *depth, char c)
+{
+	if (c == '(')
+		(*depth) += 1;
+	if (c == ')')
+		(*depth) -= 1;
+}