This commit is contained in:
2021-03-21 23:01:09 +01:00
parent 638576f471
commit d25d2ef993
14 changed files with 681 additions and 226 deletions

41
httplib/exceptions.py Normal file
View File

@@ -0,0 +1,41 @@
class HTTPException(Exception):
""" Base class for HTTP exceptions """
class InvalidResponse(HTTPException):
""" Response message cannot be parsed """
def __init(self, message):
self.message = message
class InvalidStatusLine(HTTPException):
""" Response status line is invalid """
def __init(self, line):
self.line = line
class UnsupportedEncoding(HTTPException):
""" Reponse Encoding not support """
def __init(self, enc_type, encoding):
self.enc_type = enc_type
self.encoding = encoding
class IncompleteResponse(HTTPException):
def __init(self, cause):
self.cause = cause
class HTTPServerException(Exception):
""" Base class for HTTP Server exceptions """
class BadRequest(HTTPServerException):
""" Malformed HTTP request"""
class MethodNotAllowed(HTTPServerException):
""" Method is not allowed """
def __init(self, allowed_methods):
self.allowed_methods = allowed_methods

82
httplib/httpsocket.py Normal file
View File

@@ -0,0 +1,82 @@
import logging
import socket
from io import BufferedReader
BUFSIZE = 4096
TIMEOUT = 3
FORMAT = "UTF-8"
MAXLINE = 4096
class HTTPSocket:
host: str
conn: socket.socket
file: BufferedReader
def __init__(self, conn: socket.socket, host: str):
self.host = host
self.conn = conn
self.conn.settimeout(TIMEOUT)
self.conn.setblocking(True)
self.conn.settimeout(3.0)
self.file = self.conn.makefile("rb")
def close(self):
self.file.close()
self.conn.close()
def reset_request(self):
self.file.close()
self.file = self.conn.makefile("rb")
def __do_receive(self):
if self.conn.fileno() == -1:
raise Exception("Connection closed")
result = self.conn.recv(BUFSIZE)
return result
def receive(self):
"""Receive data from the client up to BUFSIZE
"""
count = 0
while True:
count += 1
try:
return self.__do_receive()
except socket.timeout:
logging.debug("Socket receive timed out after %s seconds", TIMEOUT)
if count == 3:
break
logging.debug("Retrying %s", count)
logging.debug("Timed out after waiting %s seconds for response", TIMEOUT * count)
raise TimeoutError("Request timed out")
def read(self, size=BUFSIZE, blocking=True) -> bytes:
if blocking:
return self.file.read(size)
return self.file.read1(size)
def read_line(self):
return str(self.read_bytes_line(), FORMAT)
def read_bytes_line(self) -> bytes:
line = self.file.readline(MAXLINE + 1)
if len(line) > MAXLINE:
raise InvalidResponse("Line too long")
return line
class HTTPException(Exception):
""" Base class for HTTP exceptions """
class InvalidResponse(HTTPException):
""" Response message cannot be parsed """
def __init(self, message):
self.message = message

160
httplib/parser.py Normal file
View File

@@ -0,0 +1,160 @@
import logging
import re
from urllib.parse import urlparse
from httplib.exceptions import InvalidStatusLine, InvalidResponse, BadRequest
from httplib.httpsocket import HTTPSocket
def _get_start_line(client: HTTPSocket):
line = client.read_line()
split = list(filter(None, line.split(" ")))
if len(split) < 3:
raise InvalidStatusLine(line) # TODO fix exception
return line, split
def _is_valid_http_version(http_version: str):
if len(http_version) < 8 or http_version[4] != "/":
return False
(name, version) = http_version[:4], http_version[5:]
if name != "HTTP" or not re.match(r"1\.[0|1]", version):
return False
def get_status_line(client: HTTPSocket):
line, (http_version, status, reason) = _get_start_line(client)
if not _is_valid_http_version(http_version):
raise InvalidStatusLine(line)
version = http_version[:4]
if not re.match(r"\d{3}", status):
raise InvalidStatusLine(line)
status = int(status)
if status < 100 or status > 999:
raise InvalidStatusLine(line)
return version, status, reason
def parse_request_line(client: HTTPSocket):
line, (method, target, version) = _get_start_line(client)
if method not in ("CONNECT", "DELETE", "GET", "HEAD", "OPTIONS", "POST", "PUT", "TRACE"):
raise BadRequest()
if not _is_valid_http_version(version):
raise BadRequest()
if len(target) == "":
raise BadRequest()
parsed_target = urlparse(target)
return method, parsed_target, version
def retrieve_headers(client: HTTPSocket):
raw_headers = []
# first header after the status-line may not contain a space
while True:
line = client.read_line()
if line[0].isspace():
continue
else:
break
while True:
if line in ("\r\n", "\n", " "):
break
if line[0].isspace():
raw_headers[-1] = raw_headers[-1].rstrip("\r\n")
raw_headers.append(line.lstrip())
line = client.read_line()
result = []
header_str = "".join(raw_headers)
for line in header_str.splitlines():
pos = line.find(":")
if pos <= 0 or pos >= len(line) - 1:
continue
(header, value) = line.split(":", 1)
result.append((header.lower(), value.lower()))
return result
def parse_request_headers(client: HTTPSocket):
raw_headers = retrieve_headers(client)
headers = {}
key: str
for (key, value) in raw_headers:
if any((c.isspace()) for c in key):
raise BadRequest()
if key == "content-length":
if key in headers:
logging.error("Multiple content-length headers specified")
raise BadRequest()
if not value.isnumeric() or int(value) <= 0:
logging.error("Invalid content-length value: %r", value)
raise BadRequest()
elif key == "host":
if value != client.host or key in headers:
raise BadRequest()
headers[key] = value
return headers
def get_headers(client: HTTPSocket):
headers = []
# first header after the status-line may not contain a space
while True:
line = client.read_line()
if line[0].isspace():
continue
else:
break
while True:
if line in ("\r\n", "\n", " "):
break
if line[0].isspace():
headers[-1] = headers[-1].rstrip("\r\n")
headers.append(line.lstrip())
line = client.read_line()
result = {}
header_str = "".join(headers)
for line in header_str.splitlines():
pos = line.find(":")
if pos <= 0 or pos >= len(line) - 1:
continue
(header, value) = map(str.strip, line.split(":", 1))
check_next_header(result, header, value)
result[header.lower()] = value.lower()
return result
def check_next_header(headers, next_header: str, next_value: str):
if next_header == "content-length":
if "content-length" in headers:
logging.error("Multiple content-length headers specified")
raise InvalidResponse()
if not next_value.isnumeric() or int(next_value) <= 0:
logging.error("Invalid content-length value: %r", next_value)
raise InvalidResponse()

122
httplib/retriever.py Normal file
View File

@@ -0,0 +1,122 @@
import logging
from abc import ABC, abstractmethod
from typing import Dict
from httplib.exceptions import IncompleteResponse, InvalidResponse, UnsupportedEncoding
from httplib.httpsocket import HTTPSocket, BUFSIZE
class Retriever(ABC):
client: HTTPSocket
def __init__(self, client: HTTPSocket):
self.client = client
@abstractmethod
def retrieve(self):
pass
@staticmethod
def create(client: HTTPSocket, headers: Dict[str, str]):
# only chunked transfer-encoding is supported
transfer_encoding = headers.get("transfer-encoding")
if transfer_encoding and transfer_encoding != "chunked":
raise UnsupportedEncoding("transfer-encoding", transfer_encoding)
chunked = transfer_encoding
# content-encoding is not supported
content_encoding = headers.get("content-encoding")
if content_encoding:
raise UnsupportedEncoding("content-encoding", content_encoding)
if chunked:
return ChunkedRetriever(client)
else:
content_length = headers.get("content-length")
if not content_length:
logging.warning("Transfer-encoding and content-length not specified, trying without")
return RawRetriever(client)
return ContentLengthRetriever(client, int(content_length))
class ContentLengthRetriever(Retriever):
length: int
def __init__(self, client: HTTPSocket, length: int):
super().__init__(client)
self.length = length
def retrieve(self):
cur_payload_size = 0
read_size = BUFSIZE
while cur_payload_size < self.length:
remaining = self.length - cur_payload_size
if remaining < read_size:
read_size = remaining
try:
buffer = self.client.read(remaining)
except TimeoutError:
logging.error("Timed out before receiving complete payload")
self.client.close()
raise IncompleteResponse("Timed out before receiving complete payload")
except ConnectionError:
logging.error("Timed out before receiving complete payload")
self.client.close()
raise IncompleteResponse("Connection closed before receiving complete payload")
logging.debug("Received payload length: %s", len(buffer))
if len(buffer) == 0:
logging.warning("Received payload length %s less than expected %s", cur_payload_size, self.length)
break
cur_payload_size += len(buffer)
logging.debug("Processed payload: %r", cur_payload_size)
yield buffer
return b""
class RawRetriever(Retriever):
def retrieve(self):
while True:
try:
yield self.client.read()
except TimeoutError or ConnectionError:
return b""
class ChunkedRetriever(Retriever):
def retrieve(self):
while True:
chunk_size = self.__get_chunk_size()
logging.debug("chunk-size: %s", chunk_size)
if chunk_size == 0:
self.client.reset_request()
break
buffer = self.client.read(chunk_size)
logging.debug("chunk: %r", buffer)
yield buffer
self.client.read_line() # remove CRLF
return b""
def __get_chunk_size(self):
line = self.client.read_line()
sep_pos = line.find(";")
if sep_pos >= 0:
line = line[:sep_pos]
try:
return int(line, 16)
except ValueError:
raise InvalidResponse()