Cleanup parser and add documentation

This commit is contained in:
2021-03-27 16:58:48 +01:00
parent 3615c56152
commit ff32ce9b39
2 changed files with 33 additions and 141 deletions

View File

@@ -1,23 +1,18 @@
import logging import logging
import os.path
import re import re
import urllib import urllib
from urllib.parse import urlparse, urlsplit from urllib.parse import urlparse, urlsplit
from httplib.exceptions import InvalidStatusLine, InvalidResponse, BadRequest, InvalidRequestLine from httplib.exceptions import InvalidStatusLine, InvalidResponse, BadRequest, InvalidRequestLine
from httplib.httpsocket import HTTPSocket
def _get_start_line(client: HTTPSocket):
line = client.read_line().strip()
split = list(filter(None, line.split(" ", 2)))
if len(split) < 3:
raise InvalidStatusLine(line) # TODO fix exception
return line, split
def _is_valid_http_version(http_version: str): def _is_valid_http_version(http_version: str):
"""
Returns True if the specified HTTP-version is valid.
@param http_version: the string to be checked
@return: True if the specified HTTP-version is valid.
"""
if len(http_version) < 8 or http_version[4] != "/": if len(http_version) < 8 or http_version[4] != "/":
return False return False
@@ -28,26 +23,19 @@ def _is_valid_http_version(http_version: str):
return True return True
def get_status_line(client: HTTPSocket):
line, (http_version, status, reason) = _get_start_line(client)
if not _is_valid_http_version(http_version):
raise InvalidStatusLine(line)
version = http_version[:4]
if not re.match(r"\d{3}", status):
raise InvalidStatusLine(line)
status = int(status)
if status < 100 or status > 999:
raise InvalidStatusLine(line)
return version, status, reason
def parse_status_line(line: str): def parse_status_line(line: str):
"""
Parses the specified line as an HTTP status-line.
@param line: the status-line to be parsed
@raise InvalidStatusLine: if the line couldn't be parsed, if the HTTP-version is invalid or if the status code
is invalid
@return: tuple of the HTTP-version, status and reason
"""
split = list(filter(None, line.strip().split(" ", 2))) split = list(filter(None, line.strip().split(" ", 2)))
if len(split) < 3: if len(split) < 3:
raise InvalidStatusLine(line) # TODO fix exception raise InvalidStatusLine(line)
http_version, status, reason = split http_version, status, reason = split
@@ -65,122 +53,35 @@ def parse_status_line(line: str):
def parse_request_line(line: str): def parse_request_line(line: str):
"""
Parses the specified line as and HTTP request-line.
Returns the method, target as ParseResult and HTTP version from the request-line.
@param line: the request-line to be parsed
@raise InvalidRequestLine: if the line couldn't be parsed.
@raise BadRequest: Invalid HTTP method, Invalid HTTP-version or Invalid target
@return: tuple of the method, target and HTTP-version
"""
split = list(filter(None, line.rstrip().split(" ", 2))) split = list(filter(None, line.rstrip().split(" ", 2)))
if len(split) < 3: if len(split) < 3:
raise InvalidRequestLine(line) raise InvalidRequestLine(line)
method, target, version = split method, target, version = split
if method not in ("CONNECT", "DELETE", "GET", "HEAD", "OPTIONS", "POST", "PUT", "TRACE"): if method not in ("CONNECT", "DELETE", "GET", "HEAD", "OPTIONS", "POST", "PUT", "TRACE"):
raise BadRequest() raise BadRequest(f"Invalid method: {method}")
if not _is_valid_http_version(version): if not _is_valid_http_version(version):
logging.debug("[ABRT] request: invalid http-version=%r", version) logging.debug("[ABRT] request: invalid http-version=%r", version)
raise BadRequest() raise BadRequest(f"Invalid HTTP-version: {version}")
if len(target) == "": if len(target) == "":
raise BadRequest() raise BadRequest()
parsed_target = urlparse(target) parsed_target = urlsplit(target)
if len(parsed_target.path) > 0 and parsed_target.path[0] != "/" and parsed_target.netloc != "":
parsed_target = urlparse(f"//{target}")
return method, parsed_target, version.split("/")[1] return method, parsed_target, version.split("/")[1]
def retrieve_headers(client: HTTPSocket):
raw_headers = []
# first header after the status-line may not contain a space
while True:
line = client.read_line()
if line[0].isspace():
continue
else:
break
while True:
if line in ("\r\n", "\n", " "):
break
if line[0].isspace():
raw_headers[-1] = raw_headers[-1].rstrip("\r\n")
raw_headers.append(line.lstrip())
line = client.read_line()
result = []
header_str = "".join(raw_headers)
for line in header_str.splitlines():
pos = line.find(":")
if pos <= 0 or pos >= len(line) - 1:
continue
(header, value) = line.split(":", 1)
result.append((header.lower(), value.strip().lower()))
return result
def parse_request_headers(client: HTTPSocket):
raw_headers = retrieve_headers(client)
logging.debug("Received headers: %r", raw_headers)
headers = {}
key: str
for (key, value) in raw_headers:
if any((c.isspace()) for c in key):
raise BadRequest()
if key == "content-length":
if key in headers:
logging.error("Multiple content-length headers specified")
raise BadRequest()
if not value.isnumeric() or int(value) <= 0:
logging.error("Invalid content-length value: %r", value)
raise BadRequest()
elif key == "host":
if value != client.host and value != client.host.split(":")[0] or key in headers:
raise BadRequest()
headers[key] = value
return headers
def get_headers(client: HTTPSocket):
headers = []
# first header after the status-line may not start with a space
while True:
line = client.read_line()
if line[0].isspace():
continue
else:
break
while True:
if line in ("\r\n", "\n", " "):
break
if line[0].isspace():
headers[-1] = headers[-1].rstrip("\r\n")
headers.append(line.lstrip())
line = client.read_line()
result = {}
header_str = "".join(headers)
for line in header_str.splitlines():
pos = line.find(":")
if pos <= 0 or pos >= len(line) - 1:
continue
(header, value) = map(str.strip, line.split(":", 1))
check_next_header(result, header, value)
result[header.lower()] = value.lower()
return result
def parse_headers(lines): def parse_headers(lines):
headers = [] headers = []
@@ -269,17 +170,8 @@ def get_uri(url: str):
return result return result
def base_url(uri: str):
parsed = urlsplit(uri)
path = parsed.path.rsplit("/", 1)[0]
return f"{parsed.scheme}://{parsed.hostname}{path}/"
def absolute_url(uri: str, rel_path: str):
parsed = urlsplit(uri)
path = os.path.normpath(os.path.join(parsed.path, rel_path))
return f"{parsed.scheme}://{parsed.hostname}{path}"
def urljoin(base, url): def urljoin(base, url):
"""
Join a base url and a URL to form a absolute url.
"""
return urllib.parse.urljoin(base, url) return urllib.parse.urljoin(base, url)