proof of concept on lexing

This commit is contained in:
Quinten Mennen 2025-02-04 20:15:06 +01:00
parent 1545e4af99
commit fc71f3c9ca
7 changed files with 295 additions and 20 deletions

View File

@ -1,12 +1,12 @@
# **************************************************************************** # # **************************************************************************** #
# # # #
# ::: o_ :::::: ::: # # ::: :::::::: #
# Makefile :+: / :+::+: :+: # # Makefile :+: :+: :+: #
# +:+ > +:++:+ +:+ # # +:+ +:+ +:+ #
# By: whaffman <whaffman@student.codam.nl> +#+ +:+ +#++#++:++#++ # # By: qmennen <qmennen@student.codam.nl> +#+ +:+ +#+ #
# +#+ +#+#+ +#++#+ +#+ \o/ # # +#+#+#+#+#+ +#+ #
# Created: 2024/10/15 11:48:46 by whaffman #+#+# #+#+# #+# #+# | # # Created: 2024/10/15 11:48:46 by whaffman #+# #+# #
# Updated: 2024/11/07 15:28:08 by whaffman ### ### ### ### / \ # # Updated: 2025/02/04 16:57:05 by qmennen ### ########.fr #
# # # #
# **************************************************************************** # # **************************************************************************** #
@ -22,7 +22,7 @@ LIBFT = $(LIBFT_PATH)/libft.a
OBJ_PATH = obj OBJ_PATH = obj
VPATH = src VPATH = src:src/tokenizer
SOURCES = $(shell basename -a $(shell find $(SRC_PATH) -type f -name "*.c")) SOURCES = $(shell basename -a $(shell find $(SRC_PATH) -type f -name "*.c"))
OBJECTS = $(addprefix $(OBJ_PATH)/, $(SOURCES:.c=.o)) OBJECTS = $(addprefix $(OBJ_PATH)/, $(SOURCES:.c=.o))
@ -32,7 +32,7 @@ CC = cc
RM = rm -rf RM = rm -rf
INCLUDES = -I./$(INC_PATH) -I./$(LIBFT_INC_PATH) INCLUDES = -I./$(INC_PATH) -I./$(LIBFT_INC_PATH)
CFLAGS = -Wall -Wextra -Werror -MMD CFLAGS = -Wall -Wextra -Werror -fsanitize=address,undefined -MMD
UNAME_S := $(shell uname -s) UNAME_S := $(shell uname -s)
ifeq ($(UNAME_S),Linux) ifeq ($(UNAME_S),Linux)

View File

@ -147,6 +147,8 @@
#ifndef MINISHELL_H #ifndef MINISHELL_H
# define MINISHELL_H # define MINISHELL_H
# define TRUE 1
# include <stdio.h> # include <stdio.h>
# include <stdlib.h> # include <stdlib.h>
# include <unistd.h> # include <unistd.h>
@ -161,5 +163,48 @@
# include <sys/ioctl.h> # include <sys/ioctl.h>
# include <termios.h> # include <termios.h>
# include <term.h> # include <term.h>
# include "libft.h"
#endif // MINISHELL_H typedef enum
{
T_WORD,
T_PIPE,
T_REDIRECT_IN,
T_REDIRECT_OUT,
T_APPEND_OUT,
T_EOF,
T_ERROR
} TokenType;
typedef struct s_token
{
TokenType type;
char *value;
int position;
} t_token;
typedef struct s_lexer
{
char *input;
int pos;
int n_pos;
char current_char;
} t_lexer;
int ft_isspace(const char c);
/**
* Lexer
*/
t_lexer *ft_lexer_new(const char *input);
void ft_lexer_free(t_lexer *lexer);
void ft_lexer_readchar(t_lexer *lexer);
char *ft_lexer_readword(t_lexer *lexer);
/**
* Token
*/
t_token *ft_token_next(t_lexer *lexer);
t_token *ft_token_new(TokenType type, char *c, int pos);
void ft_token_free(t_token *token);
void ft_clear_tokenlist(void *content);
#endif

View File

@ -4,6 +4,7 @@
#include <unistd.h> #include <unistd.h>
#include <string.h> #include <string.h>
#include "libft.h" #include "libft.h"
#include "minishell.h"
// /** // /**
@ -69,20 +70,43 @@ void print_prompt(void)
free(cwd); free(cwd);
} }
void print_list(void *content)
{
t_token *token;
token = (t_token *)content;
ft_printf("%s\n", token->value);
}
int main(int argc, char **argv, char **envp) int main(int argc, char **argv, char **envp)
{ {
(void)argc; (void)argc;
(void)argv; (void)envp;
char **env; // char **env;
t_enviroment *enviroment = NULL; // t_enviroment *enviroment = NULL;
t_lexer *lexer;
t_token *token;
t_list *list;
while (*envp != NULL) // while (*envp != NULL)
{ // {
env = ft_split(*envp, '='); // env = ft_split(*envp, '=');
add_enviroment(&enviroment, env[0], env[1]); // add_enviroment(&enviroment, env[0], env[1]);
envp++; // envp++;
} // }
print_enviroment(enviroment); lexer = ft_lexer_new(argv[1]);
list = NULL;
while (TRUE)
{
token = ft_token_next(lexer);
if (token->type == T_EOF)
break;
ft_lstadd_back(&list, ft_lstnew(token));
}
ft_lstiter(list, print_list);
ft_lstclear(&list, ft_clear_tokenlist);
ft_token_free(token);
ft_lexer_free(lexer);
// print_enviroment(enviroment);
return 0; return 0;
} }

84
src/tokenizer/lexer.c Normal file
View File

@ -0,0 +1,84 @@
/* ************************************************************************** */
/* */
/* ::: :::::::: */
/* lexer.c :+: :+: :+: */
/* +:+ +:+ +:+ */
/* By: qmennen <qmennen@student.codam.nl> +#+ +:+ +#+ */
/* +#+#+#+#+#+ +#+ */
/* Created: 2025/02/04 18:04:07 by qmennen #+# #+# */
/* Updated: 2025/02/04 20:11:48 by qmennen ### ########.fr */
/* */
/* ************************************************************************** */
#include "minishell.h"
t_lexer *ft_lexer_new(const char *input)
{
t_lexer *lexer;
lexer = malloc(sizeof(t_lexer));
if (!lexer)
{
perror("failed assigning lexer memory");
exit(EXIT_FAILURE);
}
lexer->input = ft_strdup(input);
lexer->pos = 0;
lexer->n_pos = 1;
lexer->current_char = '\0';
if (ft_strlen(input) > 0)
lexer->current_char = *input;
return (lexer);
}
void ft_lexer_readchar(t_lexer *lexer)
{
if ((size_t)lexer->n_pos > ft_strlen(lexer->input))
{
lexer->current_char = '\0';
return;
}
lexer->current_char = lexer->input[lexer->n_pos];
lexer->pos = lexer->n_pos;
lexer->n_pos++;
}
char *ft_lexer_readword(t_lexer *lexer)
{
int start;
int len;
char quote;
char *word;
start = lexer->pos;
if (lexer->current_char == '"' || lexer->current_char == '\'')
{
quote = lexer->current_char;
ft_lexer_readchar(lexer);
start = lexer->pos;
while (lexer->current_char != '\0' && lexer->current_char != quote)
ft_lexer_readchar(lexer);
if (lexer->current_char == quote)
{
len = lexer->pos - start;
word = malloc(sizeof(char) * len);
ft_strlcpy(word, lexer->input + start, len + 1);
ft_lexer_readchar(lexer);
return (word);
}
}
start = lexer->pos;
while (ft_isprint(lexer->current_char) && lexer->current_char != '<' && lexer->current_char != '>' && lexer->current_char != '|' && lexer->current_char != '\0')
ft_lexer_readchar(lexer);
len = lexer->pos - start;
word = malloc(sizeof(char) * len);
ft_strlcpy(word, lexer->input + start, len + 1);
return (word);
}
void ft_lexer_free(t_lexer *lexer)
{
if (lexer->input)
free(lexer->input);
free(lexer);
}

65
src/tokenizer/tokenizer.c Normal file
View File

@ -0,0 +1,65 @@
/* ************************************************************************** */
/* */
/* ::: :::::::: */
/* tokenizer.c :+: :+: :+: */
/* +:+ +:+ +:+ */
/* By: qmennen <qmennen@student.codam.nl> +#+ +:+ +#+ */
/* +#+#+#+#+#+ +#+ */
/* Created: 2025/02/04 16:07:58 by qmennen #+# #+# */
/* Updated: 2025/02/04 20:14:50 by qmennen ### ########.fr */
/* */
/* ************************************************************************** */
# include "minishell.h"
t_token *ft_token_next(t_lexer *lexer)
{
t_token *token;
char *word;
int current_pos;
token = NULL;
while (ft_isspace(lexer->current_char))
ft_lexer_readchar(lexer);
current_pos = lexer->pos;
switch(lexer->current_char)
{
case '\0':
token = ft_token_new(T_EOF, NULL, current_pos);
break;
case '|':
token = ft_token_new(T_PIPE, "|", current_pos);
ft_lexer_readchar(lexer);
break;
case '<':
token = ft_token_new(T_REDIRECT_IN, "<", current_pos);
ft_lexer_readchar(lexer);
break;
case '>':
ft_lexer_readchar(lexer);
if (lexer->current_char == '>')
{
token = ft_token_new(T_APPEND_OUT, ">>", current_pos);
ft_lexer_readchar(lexer);
}
else
token = ft_token_new(T_REDIRECT_OUT, ">", current_pos);
break;
default:
if (ft_isprint(lexer->current_char))
{
word = ft_lexer_readword(lexer);
token = ft_token_new(T_WORD, word, current_pos);
free(word);
}
}
return (token);
}
void ft_clear_tokenlist(void *content)
{
t_token *token;
token = (t_token *)content;
ft_token_free(token);
}

39
src/tokenizer/tokens.c Normal file
View File

@ -0,0 +1,39 @@
/* ************************************************************************** */
/* */
/* ::: :::::::: */
/* tokens.c :+: :+: :+: */
/* +:+ +:+ +:+ */
/* By: qmennen <qmennen@student.codam.nl> +#+ +:+ +#+ */
/* +#+#+#+#+#+ +#+ */
/* Created: 2025/02/04 18:02:56 by qmennen #+# #+# */
/* Updated: 2025/02/04 18:22:11 by qmennen ### ########.fr */
/* */
/* ************************************************************************** */
# include "minishell.h"
t_token *ft_token_new(TokenType type, char *c, int pos)
{
t_token *token;
token = malloc(sizeof(t_token));
if (!token)
{
perror("failed assigning token memory");
exit(EXIT_FAILURE);
}
token->type = type;
token->position = pos;
if (c)
token->value = ft_strdup(c);
else
token->value = NULL;
return (token);
}
void ft_token_free(t_token *token)
{
if (token->value)
free(token->value);
free(token);
}

18
src/util.c Normal file
View File

@ -0,0 +1,18 @@
/* ************************************************************************** */
/* */
/* ::: :::::::: */
/* util.c :+: :+: :+: */
/* +:+ +:+ +:+ */
/* By: qmennen <qmennen@student.codam.nl> +#+ +:+ +#+ */
/* +#+#+#+#+#+ +#+ */
/* Created: 2025/02/04 16:31:29 by qmennen #+# #+# */
/* Updated: 2025/02/04 16:32:08 by qmennen ### ########.fr */
/* */
/* ************************************************************************** */
# include "minishell.h"
int ft_isspace(const char c)
{
return (c == ' ' || c == '\t' || c == '\v' || c == '\n' || c == '\v');
}