diff --git a/.gitignore b/.gitignore index 5551c3f..ba5fe3a 100644 --- a/.gitignore +++ b/.gitignore @@ -2,4 +2,5 @@ minishell a.out *.d *.o -obj/ \ No newline at end of file +obj/ +.vscode diff --git a/Makefile b/Makefile index 4050d6f..b33949b 100644 --- a/Makefile +++ b/Makefile @@ -1,12 +1,12 @@ # **************************************************************************** # # # -# :::::::: # -# Makefile :+: :+: # -# +:+ # -# By: whaffman +#+ # -# +#+ # -# Created: 2024/10/15 11:48:46 by whaffman #+# #+# # -# Updated: 2025/02/04 16:47:57 by whaffman ######## odam.nl # +# ::: :::::::: # +# Makefile :+: :+: :+: # +# +:+ +:+ +:+ # +# By: qmennen +#+ +:+ +#+ # +# +#+#+#+#+#+ +#+ # +# Created: 2024/10/15 11:48:46 by whaffman #+# #+# # +# Updated: 2025/02/04 16:57:05 by qmennen ### ########.fr # # # # **************************************************************************** # @@ -22,7 +22,7 @@ LIBFT = $(LIBFT_PATH)/libft.a OBJ_PATH = obj -VPATH = src:src/enviroment:src/prompt +VPATH = src:src/enviroment:src/prompt:src/tokenizer SOURCES = $(shell basename -a $(shell find $(SRC_PATH) -type f -name "*.c")) OBJECTS = $(addprefix $(OBJ_PATH)/, $(SOURCES:.c=.o)) @@ -31,11 +31,13 @@ DEPENDS = ${OBJECTS:.o=.d} CC = cc RM = rm -rf -INCLUDES = -I./$(INC_PATH) -I./$(LIBFT_INC_PATH) -CFLAGS = -Wall -Wextra -Werror -MMD -fsanitize=address,undefined -g - -LDLIBS := -L$(LIBFT_PATH) -lft -lreadline +INCLUDES = -I./$(INC_PATH) -I./$(LIBFT_INC_PATH) +CFLAGS = -Wall -Wextra -Werror -fsanitize=address,undefined -MMD +UNAME_S := $(shell uname -s) +ifeq ($(UNAME_S),Linux) + LDLIBS := -L$(LIBFT_PATH) -lft -lreadline +endif all: $(NAME) diff --git a/README.md b/README.md index b7de134..e91f981 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,5 @@ # Minishell +======= ## Allowed Functions ### `` diff --git a/inc/minishell.h b/inc/minishell.h index 983de75..cb1371a 100644 --- a/inc/minishell.h +++ b/inc/minishell.h @@ -13,18 +13,68 @@ #ifndef MINISHELL_H # define MINISHELL_H -# define SUCCESS 1 -# define FAILURE 0 - # include "allowed.h" # include "libft.h" # include "enviroment.h" # include "prompt.h" +# define TRUE 1 +# define FALSE 0 + +# define SUCCESS 1 +# define FAILURE 0 + + + typedef struct s_minishell { t_enviroment *enviroment; char *pwd; } t_minishell; -#endif // MINISHELL_H + +typedef enum +{ + T_WORD, + T_PIPE, + T_REDIRECT_IN, + T_REDIRECT_OUT, + T_APPEND_OUT, + T_EOF, + T_ERROR +} TokenType; + +typedef struct s_token +{ + TokenType type; + char *value; + int position; +} t_token; + +typedef struct s_lexer +{ + char *input; + int pos; + int n_pos; + char current_char; +} t_lexer; + +int ft_isspace(const char c); + +/** + * Lexer + */ +t_lexer *ft_lexer_new(const char *input); +void ft_lexer_free(t_lexer *lexer); +void ft_lexer_readchar(t_lexer *lexer); +char *ft_lexer_readword(t_lexer *lexer); +t_list *ft_parse_input(t_lexer *lexer); +/** + * Token + */ +t_token *ft_token_next(t_lexer *lexer); +t_token *ft_token_new(TokenType type, char *c, int pos); +void ft_token_free(t_token *token); +void ft_clear_tokenlist(void *content); +t_token *ft_parse_token(t_lexer *lexer); +#endif diff --git a/src/main.c b/src/main.c index dcf12cb..4265206 100644 --- a/src/main.c +++ b/src/main.c @@ -12,6 +12,39 @@ #include "minishell.h" +// void print_list(void *content) +// { +// t_token *token; +// token = (t_token *)content; +// ft_printf("%s\n", token->value); +// } + +// int main(int argc, char **argv, char **envp) +// { +// (void)argc; +// (void)envp; +// // char **env; +// // t_enviroment *enviroment = NULL; +// t_lexer *lexer; +// t_list *list; + +// // while (*envp != NULL) +// // { +// // env = ft_split(*envp, '='); +// // add_enviroment(&enviroment, env[0], env[1]); +// // envp++; +// // } + +// lexer = ft_lexer_new(argv[1]); +// list = ft_parse_input(lexer); +// ft_lstiter(list, print_list); + +// ft_lstclear(&list, ft_clear_tokenlist); +// ft_lexer_free(lexer); +// // print_enviroment(enviroment); +// return 0; +// } + int main(int argc, char **argv, char **envp) { t_enviroment *enviroment; diff --git a/src/tokenizer/lexer.c b/src/tokenizer/lexer.c new file mode 100644 index 0000000..ed9a65a --- /dev/null +++ b/src/tokenizer/lexer.c @@ -0,0 +1,88 @@ +/* ************************************************************************** */ +/* */ +/* ::: :::::::: */ +/* lexer.c :+: :+: :+: */ +/* +:+ +:+ +:+ */ +/* By: qmennen +#+ +:+ +#+ */ +/* +#+#+#+#+#+ +#+ */ +/* Created: 2025/02/04 18:04:07 by qmennen #+# #+# */ +/* Updated: 2025/02/04 20:53:26 by qmennen ### ########.fr */ +/* */ +/* ************************************************************************** */ + +#include "minishell.h" + +t_lexer *ft_lexer_new(const char *input) +{ + t_lexer *lexer; + + lexer = malloc(sizeof(t_lexer)); + if (!lexer) + { + perror("failed assigning lexer memory"); + exit(EXIT_FAILURE); + } + lexer->input = ft_strdup(input); + lexer->pos = 0; + lexer->n_pos = 1; + lexer->current_char = '\0'; + if (ft_strlen(input) > 0) + lexer->current_char = *input; + return (lexer); +} + +void ft_lexer_readchar(t_lexer *lexer) +{ + if ((size_t)lexer->n_pos > ft_strlen(lexer->input)) + { + lexer->current_char = '\0'; + return ; + } + lexer->current_char = lexer->input[lexer->n_pos]; + lexer->pos = lexer->n_pos; + lexer->n_pos++; +} + +static char *ft_parse_quotes(t_lexer *lexer) +{ + int start; + int len; + char qc; + char *word; + + qc = lexer->current_char; + word = NULL; + ft_lexer_readchar(lexer); + start = lexer->pos; + while (lexer->current_char != '\0' && lexer->current_char != qc) + ft_lexer_readchar(lexer); + len = lexer->pos - start; + word = malloc(sizeof(char) * len); + ft_strlcpy(word, lexer->input + start, len + 1); + if (lexer->current_char == qc) + ft_lexer_readchar(lexer); + return (word); +} + +char *ft_lexer_readword(t_lexer *lexer) +{ + int start; + int len; + char *word; + + start = lexer->pos; + if (lexer->current_char == '"' || lexer->current_char == '\'') + { + return (ft_parse_quotes(lexer)); + } + while (ft_isprint(lexer->current_char) && lexer->current_char != '<' + && lexer->current_char != '>' && lexer->current_char != '|' + && lexer->current_char != '\0') + { + ft_lexer_readchar(lexer); + } + len = lexer->pos - start; + word = malloc(sizeof(char) * len); + ft_strlcpy(word, lexer->input + start, len + 1); + return (word); +} diff --git a/src/tokenizer/lexer_utils.c b/src/tokenizer/lexer_utils.c new file mode 100644 index 0000000..aad99ce --- /dev/null +++ b/src/tokenizer/lexer_utils.c @@ -0,0 +1,35 @@ +/* ************************************************************************** */ +/* */ +/* ::: :::::::: */ +/* lexer_utils.c :+: :+: :+: */ +/* +:+ +:+ +:+ */ +/* By: qmennen +#+ +:+ +#+ */ +/* +#+#+#+#+#+ +#+ */ +/* Created: 2025/02/04 20:54:09 by qmennen #+# #+# */ +/* Updated: 2025/02/04 20:54:36 by qmennen ### ########.fr */ +/* */ +/* ************************************************************************** */ + +#include "minishell.h" + +void ft_clear_tokenlist(void *content) +{ + t_token *token; + + token = (t_token *)content; + ft_token_free(token); +} + +void ft_token_free(t_token *token) +{ + if (token->value) + free(token->value); + free(token); +} + +void ft_lexer_free(t_lexer *lexer) +{ + if (lexer->input) + free(lexer->input); + free(lexer); +} diff --git a/src/tokenizer/tokenizer.c b/src/tokenizer/tokenizer.c new file mode 100644 index 0000000..263ab05 --- /dev/null +++ b/src/tokenizer/tokenizer.c @@ -0,0 +1,86 @@ +/* ************************************************************************** */ +/* */ +/* ::: :::::::: */ +/* tokenizer.c :+: :+: :+: */ +/* +:+ +:+ +:+ */ +/* By: qmennen +#+ +:+ +#+ */ +/* +#+#+#+#+#+ +#+ */ +/* Created: 2025/02/04 16:07:58 by qmennen #+# #+# */ +/* Updated: 2025/02/04 20:57:25 by qmennen ### ########.fr */ +/* */ +/* ************************************************************************** */ + +#include "minishell.h" + +/** + * @brief Parses the input from the lexer and returns a list of tokens. + * + * This function continuously retrieves the next token from the lexer and adds + * it to a linked list until an end-of-file (EOF) or error token is encountered. + * The list of tokens is then returned. + * + * @param lexer A pointer to the lexer structure containing + * the input to be parsed. + * @return A linked list of tokens parsed from the input. + */ +t_list *ft_parse_input(t_lexer *lexer) +{ + t_list *list; + t_token *token; + + list = NULL; + while (TRUE) + { + token = ft_token_next(lexer); + if (token->type == T_EOF || token->type == T_ERROR) + break ; + ft_lstadd_back(&list, ft_lstnew(token)); + } + ft_token_free(token); + return (list); +} + +/** + * @brief Retrieves the next token from the lexer. + * + * This function reads the next token from the lexer, skipping any whitespace + * characters. It handles different types of tokens such as end-of-file (EOF), + * special characters ('<', '>', '|'), printable characters, and errors. + * + * @param lexer A pointer to the lexer structure. + * @return A pointer to the newly created token. + * + * The function performs the following steps: + * 1. Skips any whitespace characters. + * 2. Checks the current character in the lexer: + * - If it is the end-of-file character ('\0'), creates an EOF token. + * - If it is a special character ('<', '>', '|'), parses + * the token accordingly. + * - If it is a printable character, reads the word and creates a word token. + * - Otherwise, creates an error token. + */ +t_token *ft_token_next(t_lexer *lexer) +{ + t_token *token; + char *word; + int current_pos; + + token = NULL; + while (ft_isspace(lexer->current_char)) + ft_lexer_readchar(lexer); + current_pos = lexer->pos; + if (lexer->current_char == '\0') + token = ft_token_new(T_EOF, NULL, current_pos); + else if (lexer->current_char == '<' || lexer->current_char == '>' + || lexer->current_char == '|') + token = ft_parse_token(lexer); + else if (ft_isprint(lexer->current_char)) + { + word = ft_lexer_readword(lexer); + token = ft_token_new(T_WORD, word, current_pos); + free(word); + } + else + token = ft_token_new(T_ERROR, NULL, current_pos); + return (token); +} diff --git a/src/tokenizer/tokens.c b/src/tokenizer/tokens.c new file mode 100644 index 0000000..00ad78f --- /dev/null +++ b/src/tokenizer/tokens.c @@ -0,0 +1,58 @@ +/* ************************************************************************** */ +/* */ +/* ::: :::::::: */ +/* tokens.c :+: :+: :+: */ +/* +:+ +:+ +:+ */ +/* By: qmennen +#+ +:+ +#+ */ +/* +#+#+#+#+#+ +#+ */ +/* Created: 2025/02/04 18:02:56 by qmennen #+# #+# */ +/* Updated: 2025/02/04 20:53:10 by qmennen ### ########.fr */ +/* */ +/* ************************************************************************** */ + +#include "minishell.h" + +t_token *ft_parse_token(t_lexer *lexer) +{ + t_token *token; + + token = NULL; + if (lexer->current_char == '|') + { + token = ft_token_new(T_PIPE, "|", lexer->pos); + } + else if (lexer->current_char == '<') + { + token = ft_token_new(T_REDIRECT_IN, "<", lexer->pos); + } + else if (lexer->current_char == '>' && lexer->input[lexer->pos + 1] == '>') + { + token = ft_token_new(T_APPEND_OUT, ">>", lexer->pos); + ft_lexer_readchar(lexer); + } + else if (lexer->current_char == '>') + { + token = ft_token_new(T_REDIRECT_OUT, ">", lexer->pos); + } + ft_lexer_readchar(lexer); + return (token); +} + +t_token *ft_token_new(TokenType type, char *c, int pos) +{ + t_token *token; + + token = malloc(sizeof(t_token)); + if (!token) + { + perror("failed assigning token memory"); + exit(EXIT_FAILURE); + } + token->type = type; + token->position = pos; + if (c) + token->value = ft_strdup(c); + else + token->value = NULL; + return (token); +} diff --git a/src/util.c b/src/util.c new file mode 100644 index 0000000..c9d402a --- /dev/null +++ b/src/util.c @@ -0,0 +1,18 @@ +/* ************************************************************************** */ +/* */ +/* ::: :::::::: */ +/* util.c :+: :+: :+: */ +/* +:+ +:+ +:+ */ +/* By: qmennen +#+ +:+ +#+ */ +/* +#+#+#+#+#+ +#+ */ +/* Created: 2025/02/04 16:31:29 by qmennen #+# #+# */ +/* Updated: 2025/02/04 20:54:21 by qmennen ### ########.fr */ +/* */ +/* ************************************************************************** */ + +#include "minishell.h" + +int ft_isspace(const char c) +{ + return (c == ' ' || c == '\t' || c == '\v' || c == '\n' || c == '\v'); +}