minishell/src/tokenizer/tokenizer.c

/* ************************************************************************** */
/*                                                                            */
/*                                                        :::      ::::::::   */
/*   tokenizer.c                                        :+:      :+:    :+:   */
/*                                                    +:+ +:+         +:+     */
/*   By: qmennen <qmennen@student.codam.nl>         +#+  +:+       +#+        */
/*                                                +#+#+#+#+#+   +#+           */
/*   Created: 2025/02/04 16:07:58 by qmennen           #+#    #+#             */
/*   Updated: 2025/02/04 20:57:25 by qmennen          ###   ########.fr       */
/*                                                                            */
/* ************************************************************************** */

#include "minishell.h"

/**
 * @brief Parses the input from the lexer and returns a list of tokens.
 *
 * This function continuously retrieves the next token from the lexer and adds
 * it to a linked list until an end-of-file (EOF) or error token is encountered.
 * The list of tokens is then returned.
 *
 * @param lexer A pointer to the lexer structure containing
 * 				the input to be parsed.
 * @return A linked list of tokens parsed from the input.
 */
t_list	*ft_parse_input(t_lexer *lexer)
{
	t_list			*list;
	t_token			*token;

	list = NULL;
	while (TRUE)
	{
		token = ft_token_next(lexer);
		if (token->type == T_EOF)
			break ;
		ft_lstadd_back(&list, ft_lstnew(token));
	}
	ft_token_free(token);
	return (list);
}

/**
 * @brief Retrieves the next token from the lexer.
 *
 * This function reads the next token from the lexer, skipping any whitespace
 * characters. It handles different types of tokens such as end-of-file (EOF),
 * special characters ('<', '>', '|'), printable characters, and errors.
 *
 * @param lexer A pointer to the lexer structure.
 * @return A pointer to the newly created token.
 *
 * The function performs the following steps:
 * 1. Skips any whitespace characters.
 * 2. Checks the current character in the lexer:
 *    - If it is the end-of-file character ('\0'), creates an EOF token.
 *    - If it is a special character ('<', '>', '|'), parses
 * 		the token accordingly.
 *    - If it is a printable character, reads the word and creates a word token.
 *    - Otherwise, creates an error token.
 */
t_token	*ft_token_next(t_lexer *lexer)
{
	t_token	*token;
	char	*word;
	int		current_pos;

	token = NULL;
	while (ft_isspace(lexer->current_char))
		ft_lexer_readchar(lexer);
	current_pos = lexer->pos;
	if (lexer->current_char == '\0')
		token = ft_token_new(T_EOF, NULL, current_pos);
	else if (lexer->current_char == '<' || lexer->current_char == '>'
		|| lexer->current_char == '|')
		token = ft_parse_token(lexer);
	else if (ft_isprint(lexer->current_char))
	{
		word = ft_lexer_readword(lexer);
		if (!word)
			return (ft_token_new(T_ERROR, &lexer->current_char, current_pos));
		token = ft_token_new(T_WORD, word, current_pos);
		free(word);
	}
	else
		token = ft_token_new(T_ERROR, NULL, current_pos);
	return (token);
}