Cleanup parser and add documentation
This commit is contained in:
@@ -1,23 +1,18 @@
|
|||||||
import logging
|
import logging
|
||||||
import os.path
|
|
||||||
import re
|
import re
|
||||||
import urllib
|
import urllib
|
||||||
from urllib.parse import urlparse, urlsplit
|
from urllib.parse import urlparse, urlsplit
|
||||||
|
|
||||||
from httplib.exceptions import InvalidStatusLine, InvalidResponse, BadRequest, InvalidRequestLine
|
from httplib.exceptions import InvalidStatusLine, InvalidResponse, BadRequest, InvalidRequestLine
|
||||||
from httplib.httpsocket import HTTPSocket
|
|
||||||
|
|
||||||
|
|
||||||
def _get_start_line(client: HTTPSocket):
|
|
||||||
line = client.read_line().strip()
|
|
||||||
split = list(filter(None, line.split(" ", 2)))
|
|
||||||
if len(split) < 3:
|
|
||||||
raise InvalidStatusLine(line) # TODO fix exception
|
|
||||||
|
|
||||||
return line, split
|
|
||||||
|
|
||||||
|
|
||||||
def _is_valid_http_version(http_version: str):
|
def _is_valid_http_version(http_version: str):
|
||||||
|
"""
|
||||||
|
Returns True if the specified HTTP-version is valid.
|
||||||
|
|
||||||
|
@param http_version: the string to be checked
|
||||||
|
@return: True if the specified HTTP-version is valid.
|
||||||
|
"""
|
||||||
if len(http_version) < 8 or http_version[4] != "/":
|
if len(http_version) < 8 or http_version[4] != "/":
|
||||||
return False
|
return False
|
||||||
|
|
||||||
@@ -28,26 +23,19 @@ def _is_valid_http_version(http_version: str):
|
|||||||
return True
|
return True
|
||||||
|
|
||||||
|
|
||||||
def get_status_line(client: HTTPSocket):
|
|
||||||
line, (http_version, status, reason) = _get_start_line(client)
|
|
||||||
|
|
||||||
if not _is_valid_http_version(http_version):
|
|
||||||
raise InvalidStatusLine(line)
|
|
||||||
version = http_version[:4]
|
|
||||||
|
|
||||||
if not re.match(r"\d{3}", status):
|
|
||||||
raise InvalidStatusLine(line)
|
|
||||||
status = int(status)
|
|
||||||
if status < 100 or status > 999:
|
|
||||||
raise InvalidStatusLine(line)
|
|
||||||
|
|
||||||
return version, status, reason
|
|
||||||
|
|
||||||
|
|
||||||
def parse_status_line(line: str):
|
def parse_status_line(line: str):
|
||||||
|
"""
|
||||||
|
Parses the specified line as an HTTP status-line.
|
||||||
|
|
||||||
|
@param line: the status-line to be parsed
|
||||||
|
@raise InvalidStatusLine: if the line couldn't be parsed, if the HTTP-version is invalid or if the status code
|
||||||
|
is invalid
|
||||||
|
@return: tuple of the HTTP-version, status and reason
|
||||||
|
"""
|
||||||
|
|
||||||
split = list(filter(None, line.strip().split(" ", 2)))
|
split = list(filter(None, line.strip().split(" ", 2)))
|
||||||
if len(split) < 3:
|
if len(split) < 3:
|
||||||
raise InvalidStatusLine(line) # TODO fix exception
|
raise InvalidStatusLine(line)
|
||||||
|
|
||||||
http_version, status, reason = split
|
http_version, status, reason = split
|
||||||
|
|
||||||
@@ -65,122 +53,35 @@ def parse_status_line(line: str):
|
|||||||
|
|
||||||
|
|
||||||
def parse_request_line(line: str):
|
def parse_request_line(line: str):
|
||||||
|
"""
|
||||||
|
Parses the specified line as and HTTP request-line.
|
||||||
|
Returns the method, target as ParseResult and HTTP version from the request-line.
|
||||||
|
|
||||||
|
@param line: the request-line to be parsed
|
||||||
|
@raise InvalidRequestLine: if the line couldn't be parsed.
|
||||||
|
@raise BadRequest: Invalid HTTP method, Invalid HTTP-version or Invalid target
|
||||||
|
@return: tuple of the method, target and HTTP-version
|
||||||
|
"""
|
||||||
|
|
||||||
split = list(filter(None, line.rstrip().split(" ", 2)))
|
split = list(filter(None, line.rstrip().split(" ", 2)))
|
||||||
if len(split) < 3:
|
if len(split) < 3:
|
||||||
raise InvalidRequestLine(line)
|
raise InvalidRequestLine(line)
|
||||||
|
|
||||||
method, target, version = split
|
method, target, version = split
|
||||||
if method not in ("CONNECT", "DELETE", "GET", "HEAD", "OPTIONS", "POST", "PUT", "TRACE"):
|
if method not in ("CONNECT", "DELETE", "GET", "HEAD", "OPTIONS", "POST", "PUT", "TRACE"):
|
||||||
raise BadRequest()
|
raise BadRequest(f"Invalid method: {method}")
|
||||||
|
|
||||||
if not _is_valid_http_version(version):
|
if not _is_valid_http_version(version):
|
||||||
logging.debug("[ABRT] request: invalid http-version=%r", version)
|
logging.debug("[ABRT] request: invalid http-version=%r", version)
|
||||||
raise BadRequest()
|
raise BadRequest(f"Invalid HTTP-version: {version}")
|
||||||
|
|
||||||
if len(target) == "":
|
if len(target) == "":
|
||||||
raise BadRequest()
|
raise BadRequest()
|
||||||
parsed_target = urlparse(target)
|
parsed_target = urlsplit(target)
|
||||||
if len(parsed_target.path) > 0 and parsed_target.path[0] != "/" and parsed_target.netloc != "":
|
|
||||||
parsed_target = urlparse(f"//{target}")
|
|
||||||
|
|
||||||
return method, parsed_target, version.split("/")[1]
|
return method, parsed_target, version.split("/")[1]
|
||||||
|
|
||||||
|
|
||||||
def retrieve_headers(client: HTTPSocket):
|
|
||||||
raw_headers = []
|
|
||||||
# first header after the status-line may not contain a space
|
|
||||||
while True:
|
|
||||||
line = client.read_line()
|
|
||||||
if line[0].isspace():
|
|
||||||
continue
|
|
||||||
else:
|
|
||||||
break
|
|
||||||
|
|
||||||
while True:
|
|
||||||
if line in ("\r\n", "\n", " "):
|
|
||||||
break
|
|
||||||
|
|
||||||
if line[0].isspace():
|
|
||||||
raw_headers[-1] = raw_headers[-1].rstrip("\r\n")
|
|
||||||
|
|
||||||
raw_headers.append(line.lstrip())
|
|
||||||
line = client.read_line()
|
|
||||||
|
|
||||||
result = []
|
|
||||||
header_str = "".join(raw_headers)
|
|
||||||
for line in header_str.splitlines():
|
|
||||||
pos = line.find(":")
|
|
||||||
|
|
||||||
if pos <= 0 or pos >= len(line) - 1:
|
|
||||||
continue
|
|
||||||
|
|
||||||
(header, value) = line.split(":", 1)
|
|
||||||
result.append((header.lower(), value.strip().lower()))
|
|
||||||
|
|
||||||
return result
|
|
||||||
|
|
||||||
|
|
||||||
def parse_request_headers(client: HTTPSocket):
|
|
||||||
raw_headers = retrieve_headers(client)
|
|
||||||
logging.debug("Received headers: %r", raw_headers)
|
|
||||||
headers = {}
|
|
||||||
|
|
||||||
key: str
|
|
||||||
for (key, value) in raw_headers:
|
|
||||||
if any((c.isspace()) for c in key):
|
|
||||||
raise BadRequest()
|
|
||||||
|
|
||||||
if key == "content-length":
|
|
||||||
if key in headers:
|
|
||||||
logging.error("Multiple content-length headers specified")
|
|
||||||
raise BadRequest()
|
|
||||||
if not value.isnumeric() or int(value) <= 0:
|
|
||||||
logging.error("Invalid content-length value: %r", value)
|
|
||||||
raise BadRequest()
|
|
||||||
elif key == "host":
|
|
||||||
if value != client.host and value != client.host.split(":")[0] or key in headers:
|
|
||||||
raise BadRequest()
|
|
||||||
|
|
||||||
headers[key] = value
|
|
||||||
|
|
||||||
return headers
|
|
||||||
|
|
||||||
|
|
||||||
def get_headers(client: HTTPSocket):
|
|
||||||
headers = []
|
|
||||||
# first header after the status-line may not start with a space
|
|
||||||
while True:
|
|
||||||
line = client.read_line()
|
|
||||||
if line[0].isspace():
|
|
||||||
continue
|
|
||||||
else:
|
|
||||||
break
|
|
||||||
|
|
||||||
while True:
|
|
||||||
if line in ("\r\n", "\n", " "):
|
|
||||||
break
|
|
||||||
|
|
||||||
if line[0].isspace():
|
|
||||||
headers[-1] = headers[-1].rstrip("\r\n")
|
|
||||||
|
|
||||||
headers.append(line.lstrip())
|
|
||||||
line = client.read_line()
|
|
||||||
|
|
||||||
result = {}
|
|
||||||
header_str = "".join(headers)
|
|
||||||
for line in header_str.splitlines():
|
|
||||||
pos = line.find(":")
|
|
||||||
|
|
||||||
if pos <= 0 or pos >= len(line) - 1:
|
|
||||||
continue
|
|
||||||
|
|
||||||
(header, value) = map(str.strip, line.split(":", 1))
|
|
||||||
check_next_header(result, header, value)
|
|
||||||
result[header.lower()] = value.lower()
|
|
||||||
|
|
||||||
return result
|
|
||||||
|
|
||||||
|
|
||||||
def parse_headers(lines):
|
def parse_headers(lines):
|
||||||
headers = []
|
headers = []
|
||||||
|
|
||||||
@@ -269,17 +170,8 @@ def get_uri(url: str):
|
|||||||
return result
|
return result
|
||||||
|
|
||||||
|
|
||||||
def base_url(uri: str):
|
|
||||||
parsed = urlsplit(uri)
|
|
||||||
path = parsed.path.rsplit("/", 1)[0]
|
|
||||||
return f"{parsed.scheme}://{parsed.hostname}{path}/"
|
|
||||||
|
|
||||||
|
|
||||||
def absolute_url(uri: str, rel_path: str):
|
|
||||||
parsed = urlsplit(uri)
|
|
||||||
path = os.path.normpath(os.path.join(parsed.path, rel_path))
|
|
||||||
return f"{parsed.scheme}://{parsed.hostname}{path}"
|
|
||||||
|
|
||||||
|
|
||||||
def urljoin(base, url):
|
def urljoin(base, url):
|
||||||
|
"""
|
||||||
|
Join a base url and a URL to form a absolute url.
|
||||||
|
"""
|
||||||
return urllib.parse.urljoin(base, url)
|
return urllib.parse.urljoin(base, url)
|
||||||
|
Reference in New Issue
Block a user