From ff32ce9b39366e6e38e941037d4704850829d050 Mon Sep 17 00:00:00 2001 From: Arthur Bols Date: Sat, 27 Mar 2021 16:58:48 +0100 Subject: [PATCH] Cleanup parser and add documentation --- httplib/parser.py | 172 ++++++++------------------------------- server/requesthandler.py | 2 +- 2 files changed, 33 insertions(+), 141 deletions(-) diff --git a/httplib/parser.py b/httplib/parser.py index 02a290d..b928038 100644 --- a/httplib/parser.py +++ b/httplib/parser.py @@ -1,23 +1,18 @@ import logging -import os.path import re import urllib from urllib.parse import urlparse, urlsplit from httplib.exceptions import InvalidStatusLine, InvalidResponse, BadRequest, InvalidRequestLine -from httplib.httpsocket import HTTPSocket - - -def _get_start_line(client: HTTPSocket): - line = client.read_line().strip() - split = list(filter(None, line.split(" ", 2))) - if len(split) < 3: - raise InvalidStatusLine(line) # TODO fix exception - - return line, split def _is_valid_http_version(http_version: str): + """ + Returns True if the specified HTTP-version is valid. + + @param http_version: the string to be checked + @return: True if the specified HTTP-version is valid. + """ if len(http_version) < 8 or http_version[4] != "/": return False @@ -28,26 +23,19 @@ def _is_valid_http_version(http_version: str): return True -def get_status_line(client: HTTPSocket): - line, (http_version, status, reason) = _get_start_line(client) - - if not _is_valid_http_version(http_version): - raise InvalidStatusLine(line) - version = http_version[:4] - - if not re.match(r"\d{3}", status): - raise InvalidStatusLine(line) - status = int(status) - if status < 100 or status > 999: - raise InvalidStatusLine(line) - - return version, status, reason - - def parse_status_line(line: str): + """ + Parses the specified line as an HTTP status-line. + + @param line: the status-line to be parsed + @raise InvalidStatusLine: if the line couldn't be parsed, if the HTTP-version is invalid or if the status code + is invalid + @return: tuple of the HTTP-version, status and reason + """ + split = list(filter(None, line.strip().split(" ", 2))) if len(split) < 3: - raise InvalidStatusLine(line) # TODO fix exception + raise InvalidStatusLine(line) http_version, status, reason = split @@ -65,122 +53,35 @@ def parse_status_line(line: str): def parse_request_line(line: str): + """ + Parses the specified line as and HTTP request-line. + Returns the method, target as ParseResult and HTTP version from the request-line. + + @param line: the request-line to be parsed + @raise InvalidRequestLine: if the line couldn't be parsed. + @raise BadRequest: Invalid HTTP method, Invalid HTTP-version or Invalid target + @return: tuple of the method, target and HTTP-version + """ + split = list(filter(None, line.rstrip().split(" ", 2))) if len(split) < 3: raise InvalidRequestLine(line) method, target, version = split if method not in ("CONNECT", "DELETE", "GET", "HEAD", "OPTIONS", "POST", "PUT", "TRACE"): - raise BadRequest() + raise BadRequest(f"Invalid method: {method}") if not _is_valid_http_version(version): logging.debug("[ABRT] request: invalid http-version=%r", version) - raise BadRequest() + raise BadRequest(f"Invalid HTTP-version: {version}") if len(target) == "": raise BadRequest() - parsed_target = urlparse(target) - if len(parsed_target.path) > 0 and parsed_target.path[0] != "/" and parsed_target.netloc != "": - parsed_target = urlparse(f"//{target}") + parsed_target = urlsplit(target) return method, parsed_target, version.split("/")[1] -def retrieve_headers(client: HTTPSocket): - raw_headers = [] - # first header after the status-line may not contain a space - while True: - line = client.read_line() - if line[0].isspace(): - continue - else: - break - - while True: - if line in ("\r\n", "\n", " "): - break - - if line[0].isspace(): - raw_headers[-1] = raw_headers[-1].rstrip("\r\n") - - raw_headers.append(line.lstrip()) - line = client.read_line() - - result = [] - header_str = "".join(raw_headers) - for line in header_str.splitlines(): - pos = line.find(":") - - if pos <= 0 or pos >= len(line) - 1: - continue - - (header, value) = line.split(":", 1) - result.append((header.lower(), value.strip().lower())) - - return result - - -def parse_request_headers(client: HTTPSocket): - raw_headers = retrieve_headers(client) - logging.debug("Received headers: %r", raw_headers) - headers = {} - - key: str - for (key, value) in raw_headers: - if any((c.isspace()) for c in key): - raise BadRequest() - - if key == "content-length": - if key in headers: - logging.error("Multiple content-length headers specified") - raise BadRequest() - if not value.isnumeric() or int(value) <= 0: - logging.error("Invalid content-length value: %r", value) - raise BadRequest() - elif key == "host": - if value != client.host and value != client.host.split(":")[0] or key in headers: - raise BadRequest() - - headers[key] = value - - return headers - - -def get_headers(client: HTTPSocket): - headers = [] - # first header after the status-line may not start with a space - while True: - line = client.read_line() - if line[0].isspace(): - continue - else: - break - - while True: - if line in ("\r\n", "\n", " "): - break - - if line[0].isspace(): - headers[-1] = headers[-1].rstrip("\r\n") - - headers.append(line.lstrip()) - line = client.read_line() - - result = {} - header_str = "".join(headers) - for line in header_str.splitlines(): - pos = line.find(":") - - if pos <= 0 or pos >= len(line) - 1: - continue - - (header, value) = map(str.strip, line.split(":", 1)) - check_next_header(result, header, value) - result[header.lower()] = value.lower() - - return result - - def parse_headers(lines): headers = [] @@ -269,17 +170,8 @@ def get_uri(url: str): return result -def base_url(uri: str): - parsed = urlsplit(uri) - path = parsed.path.rsplit("/", 1)[0] - return f"{parsed.scheme}://{parsed.hostname}{path}/" - - -def absolute_url(uri: str, rel_path: str): - parsed = urlsplit(uri) - path = os.path.normpath(os.path.join(parsed.path, rel_path)) - return f"{parsed.scheme}://{parsed.hostname}{path}" - - def urljoin(base, url): + """ + Join a base url and a URL to form a absolute url. + """ return urllib.parse.urljoin(base, url) diff --git a/server/requesthandler.py b/server/requesthandler.py index 8e619e4..ef02965 100644 --- a/server/requesthandler.py +++ b/server/requesthandler.py @@ -120,4 +120,4 @@ class RequestHandler: message += "\r\n" logging.debug("Sending: %r", message) - client.sendall(message.encode(FORMAT)) \ No newline at end of file + client.sendall(message.encode(FORMAT))