Improve documentation

This commit is contained in:
2021-03-28 18:54:52 +02:00
parent c748387b48
commit b7315c2348
11 changed files with 79 additions and 38 deletions

View File

@@ -1,6 +1,6 @@
import logging import logging
from abc import ABC, abstractmethod from abc import ABC, abstractmethod
from typing import Dict, Tuple from typing import Dict
from urllib.parse import urlparse from urllib.parse import urlparse
from client.httpclient import HTTPClient from client.httpclient import HTTPClient

View File

@@ -4,12 +4,23 @@ from httplib.httpsocket import HTTPSocket, InvalidResponse
class HTTPClient(HTTPSocket): class HTTPClient(HTTPSocket):
"""
Wrapper class for a socket. Represents a client which connects to a server.
"""
host: str host: str
def __init__(self, host: str): def __init__(self, host: str):
super().__init__(socket.socket(socket.AF_INET, socket.SOCK_STREAM), host) super().__init__(socket.socket(socket.AF_INET, socket.SOCK_STREAM))
self.host = host
def read_line(self): def read_line(self):
"""
Reads the next line decoded as `httpsocket.FORMAT`
@return: the decoded next line retrieved from the socket
@raise InvalidResponse: If the next line couldn't be decoded, but was expected to
"""
try: try:
return super().read_line() return super().read_line()
except UnicodeDecodeError: except UnicodeDecodeError:

View File

@@ -22,7 +22,7 @@ def handle(client: HTTPClient, msg: Message, command: AbstractCommand, directory
@param client: the client which sent the request. @param client: the client which sent the request.
@param msg: the response message @param msg: the response message
@param command: the command of the sent request message @param command: the command of the sent request-message
@param directory: the directory to download the response to (if available) @param directory: the directory to download the response to (if available)
""" """
handler = BasicResponseHandler(client, msg, command) handler = BasicResponseHandler(client, msg, command)
@@ -81,7 +81,7 @@ class BasicResponseHandler(ResponseHandler):
for line in self.retriever.retrieve(): for line in self.retriever.retrieve():
try: try:
logging.debug("%s", line.decode(FORMAT)) logging.debug("%s", line.decode(FORMAT))
except Exception: except UnicodeDecodeError:
logging.debug("%r", line) logging.debug("%r", line)
logging.debug("] done.") logging.debug("] done.")
@@ -223,7 +223,7 @@ class HTMLDownloadHandler(DownloadHandler):
def _download_images(self, tmp_path, target_path, charset=FORMAT): def _download_images(self, tmp_path, target_path, charset=FORMAT):
""" """
Downloads images referenced in the html of `tmp_filename` and replaces the references in the html Download images referenced in the html of `tmp_filename` and replaces the references in the html
and writes it to `target_filename`. and writes it to `target_filename`.
@param tmp_path: the path to the temporary html file @param tmp_path: the path to the temporary html file
@param target_path: the path for the final html file @param target_path: the path for the final html file
@@ -247,7 +247,7 @@ class HTMLDownloadHandler(DownloadHandler):
processed = {} processed = {}
to_replace = [] to_replace = []
# Find all <img> tags and the urls from the corresponding `src` fields # Find all <img> tags, and the urls from the corresponding `src` fields
for m in IMG_REGEX.finditer(html): for m in IMG_REGEX.finditer(html):
url_start = m.start(1) url_start = m.start(1)
url_end = m.end(1) url_end = m.end(1)
@@ -272,7 +272,7 @@ class HTMLDownloadHandler(DownloadHandler):
logging.error("Failed to download image: %s, skipping...", target, exc_info=e) logging.error("Failed to download image: %s, skipping...", target, exc_info=e)
# reverse the list so urls at the bottom of the html file are processed first. # reverse the list so urls at the bottom of the html file are processed first.
# Otherwise our start and end positions won't be correct. # Otherwise, our start and end positions won't be correct.
to_replace.reverse() to_replace.reverse()
for (start, end, path) in to_replace: for (start, end, path) in to_replace:
html = html[:start] + path + html[end:] html = html[:start] + path + html[end:]

View File

@@ -1,10 +1,7 @@
import logging
import socket import socket
from io import BufferedReader from io import BufferedReader
from typing import Tuple from typing import Tuple
from httplib.exceptions import BadRequest
BUFSIZE = 4096 BUFSIZE = 4096
TIMEOUT = 3 TIMEOUT = 3
FORMAT = "UTF-8" FORMAT = "UTF-8"
@@ -12,13 +9,20 @@ MAXLINE = 4096
class HTTPSocket: class HTTPSocket:
host: str """
Wrapper class for a socket. Represents an HTTP connection.
This class adds helper methods to read the underlying socket as a file.
"""
conn: socket.socket conn: socket.socket
file: Tuple[BufferedReader, None] file: BufferedReader
def __init__(self, conn: socket.socket, host: str): def __init__(self, conn: socket.socket):
"""
Initialize an HTTPSocket with the given socket and host.
@param conn: the socket object
"""
self.host = host
self.conn = conn self.conn = conn
self.conn.settimeout(TIMEOUT) self.conn.settimeout(TIMEOUT)
self.conn.setblocking(True) self.conn.setblocking(True)

View File

@@ -108,7 +108,7 @@ def parse_headers(lines):
break break
while True: while True:
if line in ("\r\n", "\n", ""): if line in ("\r\n", "\r", "\n", ""):
break break
if line[0].isspace(): if line[0].isspace():
@@ -189,14 +189,14 @@ def get_uri(url: str):
def urljoin(base, url): def urljoin(base, url):
""" """
Join a base url and a URL to form an absolute url. Join a base url, and a URL to form an absolute url.
""" """
return urllib.parse.urljoin(base, url) return urllib.parse.urljoin(base, url)
def get_charset(headers: Dict[str, str]): def get_charset(headers: Dict[str, str]):
""" """
Returns the charset of the content from the headers if found. Otherwise returns `FORMAT` Returns the charset of the content from the headers if found. Otherwise, returns `FORMAT`
@param headers: the headers to retrieve the charset from @param headers: the headers to retrieve the charset from
@return: A charset @return: A charset

View File

@@ -62,13 +62,20 @@ class PreambleRetriever(Retriever):
""" """
Retriever instance for retrieving the start-line and headers of an HTTP message. Retriever instance for retrieving the start-line and headers of an HTTP message.
""" """
client: HTTPSocket client: HTTPSocket
_buffer: [] _buffer: []
@property @property
def buffer(self): def buffer(self):
"""
Returns a copy of the internal buffer.
Clears the internal buffer afterwards.
@return: A list of the buffered lines.
"""
tmp_buffer = self._buffer tmp_buffer = self._buffer
self._buffer = [] self._buffer.clear()
return tmp_buffer return tmp_buffer
@@ -87,7 +94,7 @@ class PreambleRetriever(Retriever):
while True: while True:
self._buffer.append(line) self._buffer.append(line)
if line in ("\r\n", "\n", ""): if line in ("\r\n", "\r", "\n", ""):
return line return line
yield line yield line
@@ -140,8 +147,8 @@ class ContentLengthRetriever(Retriever):
class RawRetriever(Retriever): class RawRetriever(Retriever):
""" """
Retriever instance for retrieve a message body without any length specifier or encoding. Retriever instance for retrieving a message body without any length specifier or encoding.
This retriever will keep waiting until a timeout occurs or the connection is disconnected. This retriever will keep waiting until a timeout occurs, or the connection is disconnected.
""" """
def retrieve(self): def retrieve(self):
@@ -161,6 +168,7 @@ class ChunkedRetriever(Retriever):
""" """
Returns an iterator of the received message bytes. Returns an iterator of the received message bytes.
The size of each iteration is not necessarily constant. The size of each iteration is not necessarily constant.
@raise IncompleteResponse: if the connection is closed or timed out before receiving the complete payload. @raise IncompleteResponse: if the connection is closed or timed out before receiving the complete payload.
@raise InvalidResponse: if the length of a chunk could not be determined. @raise InvalidResponse: if the length of a chunk could not be determined.
""" """
@@ -184,6 +192,12 @@ class ChunkedRetriever(Retriever):
raise IncompleteResponse("Connection closed before receiving the complete payload!") raise IncompleteResponse("Connection closed before receiving the complete payload!")
def __get_chunk_size(self): def __get_chunk_size(self):
"""
Returns the next chunk size.
@return: The chunk size in bytes
@raise InvalidResponse: If an error occured when parsing the chunk size.
"""
line = self.client.read_line() line = self.client.read_line()
sep_pos = line.find(";") sep_pos = line.find(";")
if sep_pos >= 0: if sep_pos >= 0:
@@ -192,4 +206,4 @@ class ChunkedRetriever(Retriever):
try: try:
return int(line, 16) return int(line, 16)
except ValueError: except ValueError:
raise InvalidResponse() raise InvalidResponse("Failed to parse chunk size")

View File

@@ -148,7 +148,7 @@ class AbstractCommand(ABC):
@return: True if the header is invalid, and thus shouldn't be taken into account, throws NotModified @return: True if the header is invalid, and thus shouldn't be taken into account, throws NotModified
if the content isn't modified since the given date. if the content isn't modified since the given date.
@raise NotModified: If the date of if-modified-since greater than the modify date of the resource. @raise NotModified: If the date of if-modified-since greater than the modify-date of the resource.
""" """
date_val = self.msg.headers.get("if-modified-since") date_val = self.msg.headers.get("if-modified-since")
if not date_val: if not date_val:
@@ -164,7 +164,8 @@ class AbstractCommand(ABC):
return True return True
def get_mimetype(self, path): @staticmethod
def get_mimetype(path):
""" """
Guess the type of file. Guess the type of file.
@param path: the path to the file to guess the type of @param path: the path to the file to guess the type of
@@ -243,8 +244,8 @@ class HeadCommand(AbstractCommand):
def execute(self): def execute(self):
path = self._get_path() path = self._get_path()
mime = self.get_mimetype(path) mime = self.get_mimetype(path)
return self._build_message(200, mime, b"") return self._build_message(200, mime, b"")
@@ -301,6 +302,6 @@ class PutCommand(AbstractModifyCommand):
def execute(self): def execute(self):
if "content-range" in self.msg.headers: if "content-range" in self.msg.headers:
raise BadRequest("PUT request contains Content-Range header") raise BadRequest("PUT request contains a Content-Range header")
super().execute() super().execute()

View File

@@ -111,7 +111,7 @@ class HTTPServer:
""" """
Create worker processes up to `self.worker_count`. Create worker processes up to `self.worker_count`.
A worker process is created with start method "spawn", target `worker.worker` and the `self.logging_level` A worker process is created with start method "spawn", target `worker.worker`, and the `self.logging_level`
is passed along with the `self.dispatch_queue` and `self._stop_event` is passed along with the `self.dispatch_queue` and `self._stop_event`
""" """
for i in range(self.worker_count): for i in range(self.worker_count):

View File

@@ -20,13 +20,15 @@ class RequestHandler:
A RequestHandler instance processes incoming HTTP requests messages from a single client. A RequestHandler instance processes incoming HTTP requests messages from a single client.
RequestHandler instances are created everytime a client connects. They will read the incoming RequestHandler instances are created everytime a client connects. They will read the incoming
messages, parse, verify them and send a respond. messages, parse, verify them and send a response.
""" """
conn: ServerSocket conn: ServerSocket
host: str
def __init__(self, conn: socket, host): def __init__(self, conn: socket, host):
self.conn = ServerSocket(conn, host) self.conn = ServerSocket(conn)
self.host = host
def listen(self): def listen(self):
""" """
@@ -111,7 +113,7 @@ class RequestHandler:
# Only http is supported... # Only http is supported...
raise BadRequest(f"scheme={target.scheme}") raise BadRequest(f"scheme={target.scheme}")
if target.netloc != "" and target.netloc != self.conn.host and target.netloc != self.conn.host.split(":")[0]: if target.netloc != "" and target.netloc != self.host and target.netloc != self.host.split(":")[0]:
raise NotFound(str(target)) raise NotFound(str(target))
if target.path == "" or target.path[0] != "/": if target.path == "" or target.path[0] != "/":
@@ -123,7 +125,7 @@ class RequestHandler:
@see: _check_request_line for exceptions raised when validating the request-line. @see: _check_request_line for exceptions raised when validating the request-line.
@param msg: the message to validate @param msg: the message to validate
@raise BadRequest: if HTTP 1.1 and the Host header is missing @raise BadRequest: if HTTP 1.1, and the Host header is missing
""" """
if msg.version == "1.1" and "host" not in msg.headers: if msg.version == "1.1" and "host" not in msg.headers:

View File

@@ -1,11 +1,18 @@
import socket
from httplib.exceptions import BadRequest from httplib.exceptions import BadRequest
from httplib.httpsocket import HTTPSocket from httplib.httpsocket import HTTPSocket
class ServerSocket(HTTPSocket): class ServerSocket(HTTPSocket):
"""
Wrapper class for a socket. Represents a client connected to this server.
"""
"""
Reads the next line decoded as `httpsocket.FORMAT`
@return: the decoded next line retrieved from the socket
@raise InvalidResponse: If the next line couldn't be decoded, but was expected to
"""
def read_line(self): def read_line(self):
try: try:
return super().read_line() return super().read_line()

View File

@@ -70,9 +70,9 @@ class Worker:
""" """
while not self.stop_event.is_set(): while not self.stop_event.is_set():
# Blocks until thread is free # Blocks until the thread is free
self.finished_queue.get() self.finished_queue.get()
# Blocks until new client connects # Blocks until a new client connects
conn, addr = self.queue.get() conn, addr = self.queue.get()
if conn is None or addr is None: if conn is None or addr is None:
@@ -80,7 +80,7 @@ class Worker:
logging.debug("Processing new client: %s", addr) logging.debug("Processing new client: %s", addr)
# submit client to thread # submit the client to the executor
self.executor.submit(self._handle_client, conn, addr) self.executor.submit(self._handle_client, conn, addr)
self.shutdown() self.shutdown()
@@ -145,8 +145,10 @@ class Worker:
self.executor.shutdown(False) self.executor.shutdown(False)
logging.info("Closing sockets") logging.info("Closing sockets")
# Copy dictionary to prevent issues with concurrency # Copy dictionary to prevent issues with concurrency
clients = self.dispatched_sockets.copy().values() clients = self.dispatched_sockets.copy().values()
for client in clients: for client in clients:
client: socket.socket client: socket.socket
try: try: