From b7315c2348e4086da96f4d1a275eda7e619d092f Mon Sep 17 00:00:00 2001 From: Arthur Bols Date: Sun, 28 Mar 2021 18:54:52 +0200 Subject: [PATCH] Improve documentation --- client/command.py | 2 +- client/httpclient.py | 13 ++++++++++++- client/responsehandler.py | 10 +++++----- httplib/httpsocket.py | 18 +++++++++++------- httplib/parser.py | 6 +++--- httplib/retriever.py | 24 +++++++++++++++++++----- server/command.py | 9 +++++---- server/httpserver.py | 4 ++-- server/requesthandler.py | 10 ++++++---- server/serversocket.py | 11 +++++++++-- server/worker.py | 10 ++++++---- 11 files changed, 79 insertions(+), 38 deletions(-) diff --git a/client/command.py b/client/command.py index c2f2e46..3774f52 100644 --- a/client/command.py +++ b/client/command.py @@ -1,6 +1,6 @@ import logging from abc import ABC, abstractmethod -from typing import Dict, Tuple +from typing import Dict from urllib.parse import urlparse from client.httpclient import HTTPClient diff --git a/client/httpclient.py b/client/httpclient.py index 52c21b4..b7e2156 100644 --- a/client/httpclient.py +++ b/client/httpclient.py @@ -4,12 +4,23 @@ from httplib.httpsocket import HTTPSocket, InvalidResponse class HTTPClient(HTTPSocket): + """ + Wrapper class for a socket. Represents a client which connects to a server. + """ + host: str def __init__(self, host: str): - super().__init__(socket.socket(socket.AF_INET, socket.SOCK_STREAM), host) + super().__init__(socket.socket(socket.AF_INET, socket.SOCK_STREAM)) + self.host = host def read_line(self): + """ + Reads the next line decoded as `httpsocket.FORMAT` + + @return: the decoded next line retrieved from the socket + @raise InvalidResponse: If the next line couldn't be decoded, but was expected to + """ try: return super().read_line() except UnicodeDecodeError: diff --git a/client/responsehandler.py b/client/responsehandler.py index 37863e6..841a1de 100644 --- a/client/responsehandler.py +++ b/client/responsehandler.py @@ -22,7 +22,7 @@ def handle(client: HTTPClient, msg: Message, command: AbstractCommand, directory @param client: the client which sent the request. @param msg: the response message - @param command: the command of the sent request message + @param command: the command of the sent request-message @param directory: the directory to download the response to (if available) """ handler = BasicResponseHandler(client, msg, command) @@ -81,7 +81,7 @@ class BasicResponseHandler(ResponseHandler): for line in self.retriever.retrieve(): try: logging.debug("%s", line.decode(FORMAT)) - except Exception: + except UnicodeDecodeError: logging.debug("%r", line) logging.debug("] done.") @@ -223,7 +223,7 @@ class HTMLDownloadHandler(DownloadHandler): def _download_images(self, tmp_path, target_path, charset=FORMAT): """ - Downloads images referenced in the html of `tmp_filename` and replaces the references in the html + Download images referenced in the html of `tmp_filename` and replaces the references in the html and writes it to `target_filename`. @param tmp_path: the path to the temporary html file @param target_path: the path for the final html file @@ -247,7 +247,7 @@ class HTMLDownloadHandler(DownloadHandler): processed = {} to_replace = [] - # Find all tags and the urls from the corresponding `src` fields + # Find all tags, and the urls from the corresponding `src` fields for m in IMG_REGEX.finditer(html): url_start = m.start(1) url_end = m.end(1) @@ -272,7 +272,7 @@ class HTMLDownloadHandler(DownloadHandler): logging.error("Failed to download image: %s, skipping...", target, exc_info=e) # reverse the list so urls at the bottom of the html file are processed first. - # Otherwise our start and end positions won't be correct. + # Otherwise, our start and end positions won't be correct. to_replace.reverse() for (start, end, path) in to_replace: html = html[:start] + path + html[end:] diff --git a/httplib/httpsocket.py b/httplib/httpsocket.py index f070f71..25cd80f 100644 --- a/httplib/httpsocket.py +++ b/httplib/httpsocket.py @@ -1,10 +1,7 @@ -import logging import socket from io import BufferedReader from typing import Tuple -from httplib.exceptions import BadRequest - BUFSIZE = 4096 TIMEOUT = 3 FORMAT = "UTF-8" @@ -12,13 +9,20 @@ MAXLINE = 4096 class HTTPSocket: - host: str + """ + Wrapper class for a socket. Represents an HTTP connection. + + This class adds helper methods to read the underlying socket as a file. + """ conn: socket.socket - file: Tuple[BufferedReader, None] + file: BufferedReader - def __init__(self, conn: socket.socket, host: str): + def __init__(self, conn: socket.socket): + """ + Initialize an HTTPSocket with the given socket and host. + @param conn: the socket object + """ - self.host = host self.conn = conn self.conn.settimeout(TIMEOUT) self.conn.setblocking(True) diff --git a/httplib/parser.py b/httplib/parser.py index 88f8025..781b7fd 100644 --- a/httplib/parser.py +++ b/httplib/parser.py @@ -108,7 +108,7 @@ def parse_headers(lines): break while True: - if line in ("\r\n", "\n", ""): + if line in ("\r\n", "\r", "\n", ""): break if line[0].isspace(): @@ -189,14 +189,14 @@ def get_uri(url: str): def urljoin(base, url): """ - Join a base url and a URL to form an absolute url. + Join a base url, and a URL to form an absolute url. """ return urllib.parse.urljoin(base, url) def get_charset(headers: Dict[str, str]): """ - Returns the charset of the content from the headers if found. Otherwise returns `FORMAT` + Returns the charset of the content from the headers if found. Otherwise, returns `FORMAT` @param headers: the headers to retrieve the charset from @return: A charset diff --git a/httplib/retriever.py b/httplib/retriever.py index 3c7e35d..f6fd8a7 100644 --- a/httplib/retriever.py +++ b/httplib/retriever.py @@ -62,13 +62,20 @@ class PreambleRetriever(Retriever): """ Retriever instance for retrieving the start-line and headers of an HTTP message. """ + client: HTTPSocket _buffer: [] @property def buffer(self): + """ + Returns a copy of the internal buffer. + Clears the internal buffer afterwards. + + @return: A list of the buffered lines. + """ tmp_buffer = self._buffer - self._buffer = [] + self._buffer.clear() return tmp_buffer @@ -87,7 +94,7 @@ class PreambleRetriever(Retriever): while True: self._buffer.append(line) - if line in ("\r\n", "\n", ""): + if line in ("\r\n", "\r", "\n", ""): return line yield line @@ -140,8 +147,8 @@ class ContentLengthRetriever(Retriever): class RawRetriever(Retriever): """ - Retriever instance for retrieve a message body without any length specifier or encoding. - This retriever will keep waiting until a timeout occurs or the connection is disconnected. + Retriever instance for retrieving a message body without any length specifier or encoding. + This retriever will keep waiting until a timeout occurs, or the connection is disconnected. """ def retrieve(self): @@ -161,6 +168,7 @@ class ChunkedRetriever(Retriever): """ Returns an iterator of the received message bytes. The size of each iteration is not necessarily constant. + @raise IncompleteResponse: if the connection is closed or timed out before receiving the complete payload. @raise InvalidResponse: if the length of a chunk could not be determined. """ @@ -184,6 +192,12 @@ class ChunkedRetriever(Retriever): raise IncompleteResponse("Connection closed before receiving the complete payload!") def __get_chunk_size(self): + """ + Returns the next chunk size. + + @return: The chunk size in bytes + @raise InvalidResponse: If an error occured when parsing the chunk size. + """ line = self.client.read_line() sep_pos = line.find(";") if sep_pos >= 0: @@ -192,4 +206,4 @@ class ChunkedRetriever(Retriever): try: return int(line, 16) except ValueError: - raise InvalidResponse() + raise InvalidResponse("Failed to parse chunk size") diff --git a/server/command.py b/server/command.py index 926e96f..5c831ed 100644 --- a/server/command.py +++ b/server/command.py @@ -148,7 +148,7 @@ class AbstractCommand(ABC): @return: True if the header is invalid, and thus shouldn't be taken into account, throws NotModified if the content isn't modified since the given date. - @raise NotModified: If the date of if-modified-since greater than the modify date of the resource. + @raise NotModified: If the date of if-modified-since greater than the modify-date of the resource. """ date_val = self.msg.headers.get("if-modified-since") if not date_val: @@ -164,7 +164,8 @@ class AbstractCommand(ABC): return True - def get_mimetype(self, path): + @staticmethod + def get_mimetype(path): """ Guess the type of file. @param path: the path to the file to guess the type of @@ -243,8 +244,8 @@ class HeadCommand(AbstractCommand): def execute(self): path = self._get_path() - mime = self.get_mimetype(path) + return self._build_message(200, mime, b"") @@ -301,6 +302,6 @@ class PutCommand(AbstractModifyCommand): def execute(self): if "content-range" in self.msg.headers: - raise BadRequest("PUT request contains Content-Range header") + raise BadRequest("PUT request contains a Content-Range header") super().execute() diff --git a/server/httpserver.py b/server/httpserver.py index 16780e2..8367edf 100644 --- a/server/httpserver.py +++ b/server/httpserver.py @@ -83,7 +83,7 @@ class HTTPServer: """ Cleanly shutdown the server - Notifies the worker processes to shutdown and eventually closes the server socket + Notifies the worker processes to shut down and eventually closes the server socket """ # Set stop event @@ -111,7 +111,7 @@ class HTTPServer: """ Create worker processes up to `self.worker_count`. - A worker process is created with start method "spawn", target `worker.worker` and the `self.logging_level` + A worker process is created with start method "spawn", target `worker.worker`, and the `self.logging_level` is passed along with the `self.dispatch_queue` and `self._stop_event` """ for i in range(self.worker_count): diff --git a/server/requesthandler.py b/server/requesthandler.py index 3a92e54..8102214 100644 --- a/server/requesthandler.py +++ b/server/requesthandler.py @@ -20,13 +20,15 @@ class RequestHandler: A RequestHandler instance processes incoming HTTP requests messages from a single client. RequestHandler instances are created everytime a client connects. They will read the incoming - messages, parse, verify them and send a respond. + messages, parse, verify them and send a response. """ conn: ServerSocket + host: str def __init__(self, conn: socket, host): - self.conn = ServerSocket(conn, host) + self.conn = ServerSocket(conn) + self.host = host def listen(self): """ @@ -111,7 +113,7 @@ class RequestHandler: # Only http is supported... raise BadRequest(f"scheme={target.scheme}") - if target.netloc != "" and target.netloc != self.conn.host and target.netloc != self.conn.host.split(":")[0]: + if target.netloc != "" and target.netloc != self.host and target.netloc != self.host.split(":")[0]: raise NotFound(str(target)) if target.path == "" or target.path[0] != "/": @@ -123,7 +125,7 @@ class RequestHandler: @see: _check_request_line for exceptions raised when validating the request-line. @param msg: the message to validate - @raise BadRequest: if HTTP 1.1 and the Host header is missing + @raise BadRequest: if HTTP 1.1, and the Host header is missing """ if msg.version == "1.1" and "host" not in msg.headers: diff --git a/server/serversocket.py b/server/serversocket.py index 08edd49..893f409 100644 --- a/server/serversocket.py +++ b/server/serversocket.py @@ -1,11 +1,18 @@ -import socket - from httplib.exceptions import BadRequest from httplib.httpsocket import HTTPSocket class ServerSocket(HTTPSocket): + """ + Wrapper class for a socket. Represents a client connected to this server. + """ + """ + Reads the next line decoded as `httpsocket.FORMAT` + + @return: the decoded next line retrieved from the socket + @raise InvalidResponse: If the next line couldn't be decoded, but was expected to + """ def read_line(self): try: return super().read_line() diff --git a/server/worker.py b/server/worker.py index e4440f9..491dd7d 100644 --- a/server/worker.py +++ b/server/worker.py @@ -48,7 +48,7 @@ class Worker: @param host: The hostname of the HTTP server @param name: The name of this Worker instance @param queue: The dispatch queue for incoming socket connections - @param stop_event: The Event that signals when to shutdown this worker. + @param stop_event: The Event that signals when to shut down this worker. """ self.host = host self.name = name @@ -70,9 +70,9 @@ class Worker: """ while not self.stop_event.is_set(): - # Blocks until thread is free + # Blocks until the thread is free self.finished_queue.get() - # Blocks until new client connects + # Blocks until a new client connects conn, addr = self.queue.get() if conn is None or addr is None: @@ -80,7 +80,7 @@ class Worker: logging.debug("Processing new client: %s", addr) - # submit client to thread + # submit the client to the executor self.executor.submit(self._handle_client, conn, addr) self.shutdown() @@ -145,8 +145,10 @@ class Worker: self.executor.shutdown(False) logging.info("Closing sockets") + # Copy dictionary to prevent issues with concurrency clients = self.dispatched_sockets.copy().values() + for client in clients: client: socket.socket try: