minishell/src/tokenizer/tokenizer.c
2025-02-05 15:52:07 +01:00

89 lines
3.1 KiB
C

/* ************************************************************************** */
/* */
/* ::: :::::::: */
/* tokenizer.c :+: :+: :+: */
/* +:+ +:+ +:+ */
/* By: qmennen <qmennen@student.codam.nl> +#+ +:+ +#+ */
/* +#+#+#+#+#+ +#+ */
/* Created: 2025/02/04 16:07:58 by qmennen #+# #+# */
/* Updated: 2025/02/04 20:57:25 by qmennen ### ########.fr */
/* */
/* ************************************************************************** */
#include "minishell.h"
/**
* @brief Parses the input from the lexer and returns a list of tokens.
*
* This function continuously retrieves the next token from the lexer and adds
* it to a linked list until an end-of-file (EOF) or error token is encountered.
* The list of tokens is then returned.
*
* @param lexer A pointer to the lexer structure containing
* the input to be parsed.
* @return A linked list of tokens parsed from the input.
*/
t_list *ft_parse_input(t_lexer *lexer)
{
t_list *list;
t_token *token;
list = NULL;
while (TRUE)
{
token = ft_token_next(lexer);
if (token->type == T_EOF)
break ;
ft_lstadd_back(&list, ft_lstnew(token));
}
ft_token_free(token);
return (list);
}
/**
* @brief Retrieves the next token from the lexer.
*
* This function reads the next token from the lexer, skipping any whitespace
* characters. It handles different types of tokens such as end-of-file (EOF),
* special characters ('<', '>', '|'), printable characters, and errors.
*
* @param lexer A pointer to the lexer structure.
* @return A pointer to the newly created token.
*
* The function performs the following steps:
* 1. Skips any whitespace characters.
* 2. Checks the current character in the lexer:
* - If it is the end-of-file character ('\0'), creates an EOF token.
* - If it is a special character ('<', '>', '|'), parses
* the token accordingly.
* - If it is a printable character, reads the word and creates a word token.
* - Otherwise, creates an error token.
*/
t_token *ft_token_next(t_lexer *lexer)
{
t_token *token;
char *word;
int current_pos;
token = NULL;
while (ft_isspace(lexer->current_char))
ft_lexer_readchar(lexer);
current_pos = lexer->pos;
if (lexer->current_char == '\0')
token = ft_token_new(T_EOF, NULL, current_pos);
else if (lexer->current_char == '<' || lexer->current_char == '>'
|| lexer->current_char == '|')
token = ft_parse_token(lexer);
else if (ft_isprint(lexer->current_char))
{
word = ft_lexer_readword(lexer);
if (!word)
return (ft_token_new(T_ERROR, &lexer->current_char, current_pos));
token = ft_token_new(T_WORD, word, current_pos);
free(word);
}
else
token = ft_token_new(T_ERROR, NULL, current_pos);
return (token);
}