From c3e5117fcbfdade6b841ec1e8a5c64b5dd752cf7 Mon Sep 17 00:00:00 2001 From: Quinten Date: Thu, 9 Oct 2025 16:26:31 +0200 Subject: [PATCH] refactor: uri parser now does parsing --- webserv/config/directive/DirectiveFactory.cpp | 2 +- webserv/handler/FileHandler.cpp | 39 ++--- webserv/handler/FileHandler.hpp | 8 +- webserv/handler/URIParser.cpp | 155 ++++++++++++------ webserv/handler/URIParser.hpp | 34 ++-- webserv/router/Router.cpp | 28 ++-- webserv/router/Router.hpp | 6 +- webserv/utils/FileUtils.cpp | 9 +- webserv/utils/FileUtils.hpp | 1 + webserv/utils/utils.cpp | 8 +- webserv/utils/utils.hpp | 4 +- 11 files changed, 182 insertions(+), 112 deletions(-) diff --git a/webserv/config/directive/DirectiveFactory.cpp b/webserv/config/directive/DirectiveFactory.cpp index c891f24..18fc06c 100644 --- a/webserv/config/directive/DirectiveFactory.cpp +++ b/webserv/config/directive/DirectiveFactory.cpp @@ -52,7 +52,7 @@ std::unique_ptr DirectiveFactory::create(std::string_view type, cons { throw std::invalid_argument("No factory found for directive type: " + std::string(type)); } - return it->second(name, utils::trimSemi(utils::trim(arg))); + return it->second(name, utils::trim(arg, " \t\n\r;")); } const std::unordered_map &DirectiveFactory::getFactories() diff --git a/webserv/handler/FileHandler.cpp b/webserv/handler/FileHandler.cpp index a8f4313..de9c958 100644 --- a/webserv/handler/FileHandler.cpp +++ b/webserv/handler/FileHandler.cpp @@ -1,5 +1,6 @@ -#include // for LocationConfig -#include // for ErrorHandler +#include "webserv/config/AConfig.hpp" + +#include // for ErrorHandler #include #include // for MIMETypes #include // for URIParser @@ -8,16 +9,12 @@ #include // for Log, LOCATION #include // for joinPath, getExtension, isFile, readBinaryFile -#include // for identity -#include // for unique_ptr, allocator, make_unique -#include // for optional -#include // for __find_if_fn, find_if -#include // for basic_string, string, operator+, char_traits -#include // for move -#include // for vector +#include // for unique_ptr, allocator, make_unique +#include +#include // for basic_string, string, operator+, char_traits +#include // for vector -FileHandler::FileHandler(const LocationConfig *location, const URIParser &uriParser) - : location_(location), uriParser_(uriParser) +FileHandler::FileHandler(const AConfig *config, const URIParser &uriParser) : config_(config), uriParser_(uriParser) { Log::trace(LOCATION); } @@ -35,7 +32,7 @@ std::unique_ptr FileHandler::handleFile(const std::string &filepat Log::debug("Serving file: " + filepath + " with MIME type: " + mimeType); if (fileData.empty()) { - return ErrorHandler::getErrorResponse(Http::StatusCode::NOT_FOUND, location_); + return ErrorHandler::getErrorResponse(Http::StatusCode::NOT_FOUND, config_); } // TODO: annoying: For reading files, vector is preferred, but for http data vector is preferred response->setBody(std::vector{fileData.begin(), fileData.end()}); @@ -49,28 +46,28 @@ std::unique_ptr FileHandler::handleDirectory(const std::string &di if (type == DIRECTORY_INDEX) { - auto possible_indexes = location_->get>("index").value(); + auto possible_indexes = config_->get>("index").value(); auto first_matching = std::ranges::find_if(possible_indexes, [&](const std::string &index) { return FileUtils::isFile(FileUtils::joinPath(dirpath, index)); }); if (first_matching == possible_indexes.end()) { - return ErrorHandler::getErrorResponse(Http::StatusCode::FORBIDDEN, location_); + return ErrorHandler::getErrorResponse(Http::StatusCode::FORBIDDEN, config_); } return handleFile(FileUtils::joinPath(dirpath, *first_matching)); } if (type == DIRECTORY_AUTOINDEX) { Log::debug("Requested path is a directory: " + dirpath); - return ErrorHandler::getErrorResponse(Http::StatusCode::FORBIDDEN, location_); + return ErrorHandler::getErrorResponse(Http::StatusCode::FORBIDDEN, config_); } - return ErrorHandler::getErrorResponse(Http::StatusCode::NOT_FOUND, location_); + return ErrorHandler::getErrorResponse(Http::StatusCode::NOT_FOUND, config_); } std::unique_ptr FileHandler::getResponse() const { Log::trace(LOCATION); - std::string filepath = uriParser_.getFilePath(); + std::string filepath = uriParser_.getFullPath(); ResourceType resourceType = getResourceType(filepath); switch (resourceType) @@ -78,9 +75,9 @@ std::unique_ptr FileHandler::getResponse() const case FILE: return handleFile(filepath); case DIRECTORY_AUTOINDEX: case DIRECTORY_INDEX: return handleDirectory(filepath, resourceType); - case NOT_FOUND: return ErrorHandler::getErrorResponse(Http::StatusCode::NOT_FOUND, location_); + case NOT_FOUND: return ErrorHandler::getErrorResponse(Http::StatusCode::NOT_FOUND, config_); } - return ErrorHandler::getErrorResponse(Http::StatusCode::NOT_FOUND, location_); + return ErrorHandler::getErrorResponse(Http::StatusCode::NOT_FOUND, config_); } FileHandler::ResourceType FileHandler::getResourceType(const std::string &path) const @@ -94,11 +91,11 @@ FileHandler::ResourceType FileHandler::getResourceType(const std::string &path) } if (uriParser_.isDirectory()) { - if (location_->get>("index").has_value()) + if (config_->get>("index").has_value()) { return DIRECTORY_INDEX; } - if (location_->get("autoindex").value_or(false)) + if (config_->get("autoindex").value_or(false)) { return DIRECTORY_AUTOINDEX; } diff --git a/webserv/handler/FileHandler.hpp b/webserv/handler/FileHandler.hpp index 51e714c..1b03b61 100644 --- a/webserv/handler/FileHandler.hpp +++ b/webserv/handler/FileHandler.hpp @@ -1,5 +1,7 @@ #pragma once +#include "webserv/config/AConfig.hpp" + #include #include #include // for HttpResponse @@ -8,18 +10,18 @@ #include // for unique_ptr #include // for string -class LocationConfig; +class AConfig; class URIParser; class FileHandler { public: - FileHandler(const LocationConfig *location, const URIParser &uriParser); + FileHandler(const AConfig *config, const URIParser &uriParser); [[nodiscard]] std::unique_ptr getResponse() const; private: - const LocationConfig *location_; + const AConfig *config_; const URIParser &uriParser_; enum ResourceType : uint8_t diff --git a/webserv/handler/URIParser.cpp b/webserv/handler/URIParser.cpp index 1bff992..7e207a3 100644 --- a/webserv/handler/URIParser.cpp +++ b/webserv/handler/URIParser.cpp @@ -1,96 +1,155 @@ -#include +#include "webserv/config/AConfig.hpp" +#include "webserv/utils/FileUtils.hpp" +#include "webserv/utils/utils.hpp" #include // for LocationConfig #include // for ServerConfig +#include #include // for optional #include // for size_t #include // for stat, S_ISDIR, S_ISREG -URIParser::URIParser(const std::string &uri, const ServerConfig &serverConfig) : _locationConfig(nullptr) +URIParser::URIParser(const std::string &uri, const ServerConfig &serverConfig) + : uriTrimmed_(utils::trim(uri, "/")), config_(matchConfig(uriTrimmed_, serverConfig)) +{ + parseUri(uri); + parseFullpath(); +} + +const AConfig *URIParser::matchConfig(const std::string &uri, const ServerConfig &serverConfig) { const auto &locations = serverConfig.getLocationPaths(); - size_t maxMatchLength = 0; + const AConfig *bestMatch = &serverConfig; + size_t maxMatchLength = 0; for (const auto &locationPath : locations) { - if (uri.starts_with((locationPath == "/") ? locationPath : locationPath + "/")) - { // TODO HMHMMz why does it need to end on a /? + if (uri.empty() && locationPath == "/") + { + return serverConfig.getLocation(locationPath); + } + if (uri.starts_with(utils::trim(locationPath, "/"))) + { if (locationPath.length() > maxMatchLength) { maxMatchLength = locationPath.length(); - _locationConfig = serverConfig.getLocation(locationPath); + bestMatch = serverConfig.getLocation(locationPath); } } } + return bestMatch; +} - root_ = _locationConfig != nullptr ? _locationConfig->get("root").value_or("") : ""; - if (!root_.empty() && root_.back() == '/') +void URIParser::parseUri(const std::string &uri) +{ + if (config_->getType() == "server") { - root_.pop_back(); // Remove trailing slash to avoid double slashes in path + fullPath_ = FileUtils::joinPath(config_->get("root").value_or(""), uriTrimmed_); + } + else + { + auto const *locConfig = dynamic_cast(config_); + std::string locTrimmed = utils::trim(locConfig->getPath(), "/"); + std::string uriSub = uri.substr(locTrimmed.length()); + fullPath_ = FileUtils::joinPath(locConfig->get("root").value_or(""), uriSub); } - relativePath_ = uri.substr(maxMatchLength); - if (relativePath_.empty() || relativePath_[0] != '/') + size_t fragmentPos = fullPath_.find_first_of('#'); + if (fragmentPos != std::string::npos) { - relativePath_ = "/" + relativePath_; + fragment_ = fullPath_.substr(fragmentPos + 1); + fullPath_ = fullPath_.substr(0, fragmentPos); + } + + size_t queryPos = fullPath_.find_first_of('?'); + if (queryPos != std::string::npos) + { + query_ = fullPath_.substr(queryPos + 1); + fullPath_ = fullPath_.substr(0, queryPos); } } -std::string URIParser::getFilePath() const +void URIParser::parseFullpath() { - return root_ + relativePath_; -} + auto uriSegments = utils::split(fullPath_, '/'); -std::string URIParser::getFilename() const -{ - size_t lastSlash = relativePath_.find_last_of('/'); - if (lastSlash == std::string::npos) + for (const auto &segment : uriSegments) { - return relativePath_; // No slashes, return the whole path + std::string curDir = FileUtils::joinPath(dir_, segment); + if (segment.empty()) + { + continue; + } + + if (FileUtils::isFile(curDir) && baseName_.empty()) + { + baseName_ = segment; + } + else if (FileUtils::isDirectory(curDir)) + { + dir_ = FileUtils::joinPath(dir_, segment); + } + else if (!baseName_.empty()) // not file or dir, but we have a baseName already + { + pathInfo_ = FileUtils::joinPath(pathInfo_, baseName_); + } } - return relativePath_.substr(lastSlash + 1); + fullPath_ = FileUtils::joinPath(dir_, baseName_); } -std::string URIParser::getExtension() const +const AConfig *URIParser::getConfig() const { - std::string filename = getFilename(); - size_t lastDot = filename.find_last_of('.'); - if (lastDot == std::string::npos || lastDot == 0 || lastDot == filename.length() - 1) - { - return ""; // No extension found or dot is at start/end - } - return filename.substr(lastDot + 1); -} - -LocationConfig const *URIParser::getLocation() const -{ - return _locationConfig; + return config_; } bool URIParser::isFile() const { - struct stat pathStat{}; - if (stat(getFilePath().c_str(), &pathStat) != 0) - { - return false; - } - return S_ISREG(pathStat.st_mode); + return !baseName_.empty(); } bool URIParser::isDirectory() const { - struct stat pathStat{}; - if (stat(getFilePath().c_str(), &pathStat) != 0) - { - return false; - } - return S_ISDIR(pathStat.st_mode); + return baseName_.empty(); } bool URIParser::isValid() const { - struct stat pathStat{}; - return stat(getFilePath().c_str(), &pathStat) == 0; + return FileUtils::isValidPath(fullPath_); } + +const std::string &URIParser::getBaseName() const +{ + return baseName_; +} + +std::string URIParser::getExtension() const +{ + return FileUtils::getExtension(baseName_); +} + +const std::string &URIParser::getFullPath() const +{ + return fullPath_; +} + +const std::string &URIParser::getDir() const +{ + return dir_; +} + +const std::string &URIParser::getPathInfo() const +{ + return pathInfo_; +} + +const std::string &URIParser::getQuery() const +{ + return query_; +} + +const std::string &URIParser::getFragment() const +{ + return fragment_; +} \ No newline at end of file diff --git a/webserv/handler/URIParser.hpp b/webserv/handler/URIParser.hpp index 9da9a1a..16a43e8 100644 --- a/webserv/handler/URIParser.hpp +++ b/webserv/handler/URIParser.hpp @@ -1,5 +1,7 @@ #pragma once +#include "webserv/config/AConfig.hpp" + #include #include #include @@ -14,19 +16,31 @@ class URIParser public: URIParser(const std::string &uri, const ServerConfig &serverConfig); - [[nodiscard]] std::string getFilePath() const; - [[nodiscard]] std::string getFilename() const; - [[nodiscard]] std::string getExtension() const; - - [[nodiscard]] const LocationConfig *getLocation() const; - [[nodiscard]] bool isFile() const; - [[nodiscard]] bool isDirectory() const; [[nodiscard]] bool isValid() const; + [[nodiscard]] std::string getExtension() const; + [[nodiscard]] const AConfig *getConfig() const; + [[nodiscard]] const std::string &getBaseName() const; + [[nodiscard]] const std::string &getFullPath() const; + [[nodiscard]] const std::string &getDir() const; + [[nodiscard]] const std::string &getPathInfo() const; + [[nodiscard]] const std::string &getQuery() const; + [[nodiscard]] const std::string &getFragment() const; + private: - const LocationConfig *_locationConfig; - std::string relativePath_; - std::string root_; + void parseUri(const std::string &uri); + void parseFullpath(); + + std::string uriTrimmed_; + const AConfig *config_; + std::string fullPath_; // dir_ + baseName_ + pathInfo_ + std::string baseName_; + std::string dir_; + std::string pathInfo_; + std::string query_; + std::string fragment_; + + static const AConfig *matchConfig(const std::string &uri, const ServerConfig &serverConfig); }; \ No newline at end of file diff --git a/webserv/router/Router.cpp b/webserv/router/Router.cpp index 7d3b9de..0cd715d 100644 --- a/webserv/router/Router.cpp +++ b/webserv/router/Router.cpp @@ -1,14 +1,14 @@ -#include - -#include +#include "webserv/config/AConfig.hpp" #include // for ConfigManager #include // for ServerConfig +#include #include // for ErrorHandler #include // for FileHandler #include // for URIParser #include // for HttpHeaders #include // for LOCATION, Log +#include #include #include // for unique_ptr @@ -18,11 +18,9 @@ class LocationConfig; -Router::Router() {} - -bool Router::isMethodSupported(const std::string &method, const LocationConfig &location) +bool Router::isMethodSupported(const std::string &method, const AConfig &config) { - const ADirective *allowedMethods = location.getDirective("allowed_methods"); + const ADirective *allowedMethods = config.getDirective("allowed_methods"); if (allowedMethods == nullptr || !allowedMethods->getValue().try_get>().has_value()) { return true; @@ -35,28 +33,24 @@ std::unique_ptr Router::handleRequest(const HttpRequest &request) { Log::trace(LOCATION); - ServerConfig *config = + ServerConfig *serverConfig = ConfigManager::getInstance().getMatchingServerConfig(request.getHeaders().getHost().value_or("")); - if (config == nullptr) + if (serverConfig == nullptr) { return ErrorHandler::getErrorResponse(400); } - URIParser uriParser{request.getTarget(), *config}; + URIParser uriParser{request.getTarget(), *serverConfig}; const std::string &target = request.getTarget(); static_cast(target); // Suppress unused variable warning const std::string &method = request.getMethod(); - const LocationConfig *location = uriParser.getLocation(); - if (location == nullptr) - { - return ErrorHandler::getErrorResponse(404, config); - } - if (!isMethodSupported(method, *location)) + const AConfig *config = uriParser.getConfig(); + if (!isMethodSupported(method, *config)) { return ErrorHandler::getErrorResponse(405, config); } - FileHandler fileHandler(location, uriParser); + FileHandler fileHandler(config, uriParser); return fileHandler.getResponse(); } \ No newline at end of file diff --git a/webserv/router/Router.hpp b/webserv/router/Router.hpp index 96cf8d8..6ae88ee 100644 --- a/webserv/router/Router.hpp +++ b/webserv/router/Router.hpp @@ -1,5 +1,6 @@ #pragma once +#include "webserv/config/AConfig.hpp" #include #include // for HttpRequest #include // for HttpResponse @@ -13,11 +14,8 @@ class ServerConfig; class Router { public: - Router(); - [[nodiscard]] static std::unique_ptr handleRequest(const HttpRequest &request); private: - [[nodiscard]] const LocationConfig *getLocation(const std::string &path, const ServerConfig &serverConfig) const; - [[nodiscard]] static bool isMethodSupported(const std::string &method, const LocationConfig &location); + [[nodiscard]] static bool isMethodSupported(const std::string &method, const AConfig &config); }; \ No newline at end of file diff --git a/webserv/utils/FileUtils.cpp b/webserv/utils/FileUtils.cpp index 2be0677..8ffb7fa 100644 --- a/webserv/utils/FileUtils.cpp +++ b/webserv/utils/FileUtils.cpp @@ -1,6 +1,5 @@ -#include - #include // for Log, LOCATION +#include #include // for size_t #include // for basic_ifstream, basic_ios, basic_istream, ios, ifstream, operator|, basic_istream::read, basic_istream::seekg, basic_istream::tellg, streamsize @@ -31,6 +30,12 @@ bool isFile(const std::string &path) return S_ISREG(pathStat.st_mode); } +bool isValidPath(const std::string &path) +{ + struct stat pathStat{}; + return stat(path.c_str(), &pathStat) == 0; +} + std::string getExtension(const std::string &filename) { size_t dotPos = filename.find_last_of('.'); diff --git a/webserv/utils/FileUtils.hpp b/webserv/utils/FileUtils.hpp index 0f7be9d..338cb06 100644 --- a/webserv/utils/FileUtils.hpp +++ b/webserv/utils/FileUtils.hpp @@ -7,6 +7,7 @@ namespace FileUtils { bool isDirectory(const std::string &path); bool isFile(const std::string &path); +bool isValidPath(const std::string &path); std::string getExtension(const std::string &filename); std::string joinPath(const std::string &base, const std::string &addition); diff --git a/webserv/utils/utils.cpp b/webserv/utils/utils.cpp index 4993275..0d11137 100644 --- a/webserv/utils/utils.cpp +++ b/webserv/utils/utils.cpp @@ -23,10 +23,10 @@ size_t stoul(const std::string &str, int base) return value; } -std::string trim(const std::string &str) +std::string trim(const std::string &str, const std::string &charset) { - size_t first = str.find_first_not_of(" \t\n\r"); - size_t last = str.find_last_not_of(" \t\n\r"); + size_t first = str.find_first_not_of(charset); + size_t last = str.find_last_not_of(charset); if (first == std::string::npos || last == std::string::npos) { return ""; @@ -82,7 +82,7 @@ void removeEmptyLines(std::string &str) { if (!utils::trim(line).empty()) { - result += utils::trimSemi(utils::trim(line)) + '\n'; + result += utils::trim(line, " \t\n\r;") + '\n'; } } str = result; diff --git a/webserv/utils/utils.hpp b/webserv/utils/utils.hpp index e666043..8a71cfd 100644 --- a/webserv/utils/utils.hpp +++ b/webserv/utils/utils.hpp @@ -7,8 +7,8 @@ namespace utils { size_t stoul(const std::string &str, int base = 10); -std::string trimSemi(const std::string &str); -std::string trim(const std::string &str); +// std::string trimSemi(const std::string &str); +std::string trim(const std::string &str, const std::string &charset = " \t\n\r"); size_t findCorrespondingClosingBrace(const std::string &str, size_t openPos); void removeEmptyLines(std::string &str); void removeComments(std::string &str);