Improve documentation

This commit is contained in:
2021-03-28 18:54:52 +02:00
parent c748387b48
commit b7315c2348
11 changed files with 79 additions and 38 deletions

View File

@@ -1,6 +1,6 @@
import logging
from abc import ABC, abstractmethod
from typing import Dict, Tuple
from typing import Dict
from urllib.parse import urlparse
from client.httpclient import HTTPClient

View File

@@ -4,12 +4,23 @@ from httplib.httpsocket import HTTPSocket, InvalidResponse
class HTTPClient(HTTPSocket):
"""
Wrapper class for a socket. Represents a client which connects to a server.
"""
host: str
def __init__(self, host: str):
super().__init__(socket.socket(socket.AF_INET, socket.SOCK_STREAM), host)
super().__init__(socket.socket(socket.AF_INET, socket.SOCK_STREAM))
self.host = host
def read_line(self):
"""
Reads the next line decoded as `httpsocket.FORMAT`
@return: the decoded next line retrieved from the socket
@raise InvalidResponse: If the next line couldn't be decoded, but was expected to
"""
try:
return super().read_line()
except UnicodeDecodeError:

View File

@@ -22,7 +22,7 @@ def handle(client: HTTPClient, msg: Message, command: AbstractCommand, directory
@param client: the client which sent the request.
@param msg: the response message
@param command: the command of the sent request message
@param command: the command of the sent request-message
@param directory: the directory to download the response to (if available)
"""
handler = BasicResponseHandler(client, msg, command)
@@ -81,7 +81,7 @@ class BasicResponseHandler(ResponseHandler):
for line in self.retriever.retrieve():
try:
logging.debug("%s", line.decode(FORMAT))
except Exception:
except UnicodeDecodeError:
logging.debug("%r", line)
logging.debug("] done.")
@@ -223,7 +223,7 @@ class HTMLDownloadHandler(DownloadHandler):
def _download_images(self, tmp_path, target_path, charset=FORMAT):
"""
Downloads images referenced in the html of `tmp_filename` and replaces the references in the html
Download images referenced in the html of `tmp_filename` and replaces the references in the html
and writes it to `target_filename`.
@param tmp_path: the path to the temporary html file
@param target_path: the path for the final html file
@@ -247,7 +247,7 @@ class HTMLDownloadHandler(DownloadHandler):
processed = {}
to_replace = []
# Find all <img> tags and the urls from the corresponding `src` fields
# Find all <img> tags, and the urls from the corresponding `src` fields
for m in IMG_REGEX.finditer(html):
url_start = m.start(1)
url_end = m.end(1)
@@ -272,7 +272,7 @@ class HTMLDownloadHandler(DownloadHandler):
logging.error("Failed to download image: %s, skipping...", target, exc_info=e)
# reverse the list so urls at the bottom of the html file are processed first.
# Otherwise our start and end positions won't be correct.
# Otherwise, our start and end positions won't be correct.
to_replace.reverse()
for (start, end, path) in to_replace:
html = html[:start] + path + html[end:]

View File

@@ -1,10 +1,7 @@
import logging
import socket
from io import BufferedReader
from typing import Tuple
from httplib.exceptions import BadRequest
BUFSIZE = 4096
TIMEOUT = 3
FORMAT = "UTF-8"
@@ -12,13 +9,20 @@ MAXLINE = 4096
class HTTPSocket:
host: str
"""
Wrapper class for a socket. Represents an HTTP connection.
This class adds helper methods to read the underlying socket as a file.
"""
conn: socket.socket
file: Tuple[BufferedReader, None]
file: BufferedReader
def __init__(self, conn: socket.socket, host: str):
def __init__(self, conn: socket.socket):
"""
Initialize an HTTPSocket with the given socket and host.
@param conn: the socket object
"""
self.host = host
self.conn = conn
self.conn.settimeout(TIMEOUT)
self.conn.setblocking(True)

View File

@@ -108,7 +108,7 @@ def parse_headers(lines):
break
while True:
if line in ("\r\n", "\n", ""):
if line in ("\r\n", "\r", "\n", ""):
break
if line[0].isspace():
@@ -189,14 +189,14 @@ def get_uri(url: str):
def urljoin(base, url):
"""
Join a base url and a URL to form an absolute url.
Join a base url, and a URL to form an absolute url.
"""
return urllib.parse.urljoin(base, url)
def get_charset(headers: Dict[str, str]):
"""
Returns the charset of the content from the headers if found. Otherwise returns `FORMAT`
Returns the charset of the content from the headers if found. Otherwise, returns `FORMAT`
@param headers: the headers to retrieve the charset from
@return: A charset

View File

@@ -62,13 +62,20 @@ class PreambleRetriever(Retriever):
"""
Retriever instance for retrieving the start-line and headers of an HTTP message.
"""
client: HTTPSocket
_buffer: []
@property
def buffer(self):
"""
Returns a copy of the internal buffer.
Clears the internal buffer afterwards.
@return: A list of the buffered lines.
"""
tmp_buffer = self._buffer
self._buffer = []
self._buffer.clear()
return tmp_buffer
@@ -87,7 +94,7 @@ class PreambleRetriever(Retriever):
while True:
self._buffer.append(line)
if line in ("\r\n", "\n", ""):
if line in ("\r\n", "\r", "\n", ""):
return line
yield line
@@ -140,8 +147,8 @@ class ContentLengthRetriever(Retriever):
class RawRetriever(Retriever):
"""
Retriever instance for retrieve a message body without any length specifier or encoding.
This retriever will keep waiting until a timeout occurs or the connection is disconnected.
Retriever instance for retrieving a message body without any length specifier or encoding.
This retriever will keep waiting until a timeout occurs, or the connection is disconnected.
"""
def retrieve(self):
@@ -161,6 +168,7 @@ class ChunkedRetriever(Retriever):
"""
Returns an iterator of the received message bytes.
The size of each iteration is not necessarily constant.
@raise IncompleteResponse: if the connection is closed or timed out before receiving the complete payload.
@raise InvalidResponse: if the length of a chunk could not be determined.
"""
@@ -184,6 +192,12 @@ class ChunkedRetriever(Retriever):
raise IncompleteResponse("Connection closed before receiving the complete payload!")
def __get_chunk_size(self):
"""
Returns the next chunk size.
@return: The chunk size in bytes
@raise InvalidResponse: If an error occured when parsing the chunk size.
"""
line = self.client.read_line()
sep_pos = line.find(";")
if sep_pos >= 0:
@@ -192,4 +206,4 @@ class ChunkedRetriever(Retriever):
try:
return int(line, 16)
except ValueError:
raise InvalidResponse()
raise InvalidResponse("Failed to parse chunk size")

View File

@@ -148,7 +148,7 @@ class AbstractCommand(ABC):
@return: True if the header is invalid, and thus shouldn't be taken into account, throws NotModified
if the content isn't modified since the given date.
@raise NotModified: If the date of if-modified-since greater than the modify date of the resource.
@raise NotModified: If the date of if-modified-since greater than the modify-date of the resource.
"""
date_val = self.msg.headers.get("if-modified-since")
if not date_val:
@@ -164,7 +164,8 @@ class AbstractCommand(ABC):
return True
def get_mimetype(self, path):
@staticmethod
def get_mimetype(path):
"""
Guess the type of file.
@param path: the path to the file to guess the type of
@@ -243,8 +244,8 @@ class HeadCommand(AbstractCommand):
def execute(self):
path = self._get_path()
mime = self.get_mimetype(path)
return self._build_message(200, mime, b"")
@@ -301,6 +302,6 @@ class PutCommand(AbstractModifyCommand):
def execute(self):
if "content-range" in self.msg.headers:
raise BadRequest("PUT request contains Content-Range header")
raise BadRequest("PUT request contains a Content-Range header")
super().execute()

View File

@@ -111,7 +111,7 @@ class HTTPServer:
"""
Create worker processes up to `self.worker_count`.
A worker process is created with start method "spawn", target `worker.worker` and the `self.logging_level`
A worker process is created with start method "spawn", target `worker.worker`, and the `self.logging_level`
is passed along with the `self.dispatch_queue` and `self._stop_event`
"""
for i in range(self.worker_count):

View File

@@ -20,13 +20,15 @@ class RequestHandler:
A RequestHandler instance processes incoming HTTP requests messages from a single client.
RequestHandler instances are created everytime a client connects. They will read the incoming
messages, parse, verify them and send a respond.
messages, parse, verify them and send a response.
"""
conn: ServerSocket
host: str
def __init__(self, conn: socket, host):
self.conn = ServerSocket(conn, host)
self.conn = ServerSocket(conn)
self.host = host
def listen(self):
"""
@@ -111,7 +113,7 @@ class RequestHandler:
# Only http is supported...
raise BadRequest(f"scheme={target.scheme}")
if target.netloc != "" and target.netloc != self.conn.host and target.netloc != self.conn.host.split(":")[0]:
if target.netloc != "" and target.netloc != self.host and target.netloc != self.host.split(":")[0]:
raise NotFound(str(target))
if target.path == "" or target.path[0] != "/":
@@ -123,7 +125,7 @@ class RequestHandler:
@see: _check_request_line for exceptions raised when validating the request-line.
@param msg: the message to validate
@raise BadRequest: if HTTP 1.1 and the Host header is missing
@raise BadRequest: if HTTP 1.1, and the Host header is missing
"""
if msg.version == "1.1" and "host" not in msg.headers:

View File

@@ -1,11 +1,18 @@
import socket
from httplib.exceptions import BadRequest
from httplib.httpsocket import HTTPSocket
class ServerSocket(HTTPSocket):
"""
Wrapper class for a socket. Represents a client connected to this server.
"""
"""
Reads the next line decoded as `httpsocket.FORMAT`
@return: the decoded next line retrieved from the socket
@raise InvalidResponse: If the next line couldn't be decoded, but was expected to
"""
def read_line(self):
try:
return super().read_line()

View File

@@ -70,9 +70,9 @@ class Worker:
"""
while not self.stop_event.is_set():
# Blocks until thread is free
# Blocks until the thread is free
self.finished_queue.get()
# Blocks until new client connects
# Blocks until a new client connects
conn, addr = self.queue.get()
if conn is None or addr is None:
@@ -80,7 +80,7 @@ class Worker:
logging.debug("Processing new client: %s", addr)
# submit client to thread
# submit the client to the executor
self.executor.submit(self._handle_client, conn, addr)
self.shutdown()
@@ -145,8 +145,10 @@ class Worker:
self.executor.shutdown(False)
logging.info("Closing sockets")
# Copy dictionary to prevent issues with concurrency
clients = self.dispatched_sockets.copy().values()
for client in clients:
client: socket.socket
try: