diff --git a/client/command.py b/client/command.py
index c2f2e46..3774f52 100644
--- a/client/command.py
+++ b/client/command.py
@@ -1,6 +1,6 @@
import logging
from abc import ABC, abstractmethod
-from typing import Dict, Tuple
+from typing import Dict
from urllib.parse import urlparse
from client.httpclient import HTTPClient
diff --git a/client/httpclient.py b/client/httpclient.py
index 52c21b4..b7e2156 100644
--- a/client/httpclient.py
+++ b/client/httpclient.py
@@ -4,12 +4,23 @@ from httplib.httpsocket import HTTPSocket, InvalidResponse
class HTTPClient(HTTPSocket):
+ """
+ Wrapper class for a socket. Represents a client which connects to a server.
+ """
+
host: str
def __init__(self, host: str):
- super().__init__(socket.socket(socket.AF_INET, socket.SOCK_STREAM), host)
+ super().__init__(socket.socket(socket.AF_INET, socket.SOCK_STREAM))
+ self.host = host
def read_line(self):
+ """
+ Reads the next line decoded as `httpsocket.FORMAT`
+
+ @return: the decoded next line retrieved from the socket
+ @raise InvalidResponse: If the next line couldn't be decoded, but was expected to
+ """
try:
return super().read_line()
except UnicodeDecodeError:
diff --git a/client/responsehandler.py b/client/responsehandler.py
index 37863e6..841a1de 100644
--- a/client/responsehandler.py
+++ b/client/responsehandler.py
@@ -22,7 +22,7 @@ def handle(client: HTTPClient, msg: Message, command: AbstractCommand, directory
@param client: the client which sent the request.
@param msg: the response message
- @param command: the command of the sent request message
+ @param command: the command of the sent request-message
@param directory: the directory to download the response to (if available)
"""
handler = BasicResponseHandler(client, msg, command)
@@ -81,7 +81,7 @@ class BasicResponseHandler(ResponseHandler):
for line in self.retriever.retrieve():
try:
logging.debug("%s", line.decode(FORMAT))
- except Exception:
+ except UnicodeDecodeError:
logging.debug("%r", line)
logging.debug("] done.")
@@ -223,7 +223,7 @@ class HTMLDownloadHandler(DownloadHandler):
def _download_images(self, tmp_path, target_path, charset=FORMAT):
"""
- Downloads images referenced in the html of `tmp_filename` and replaces the references in the html
+ Download images referenced in the html of `tmp_filename` and replaces the references in the html
and writes it to `target_filename`.
@param tmp_path: the path to the temporary html file
@param target_path: the path for the final html file
@@ -247,7 +247,7 @@ class HTMLDownloadHandler(DownloadHandler):
processed = {}
to_replace = []
- # Find all
tags and the urls from the corresponding `src` fields
+ # Find all
tags, and the urls from the corresponding `src` fields
for m in IMG_REGEX.finditer(html):
url_start = m.start(1)
url_end = m.end(1)
@@ -272,7 +272,7 @@ class HTMLDownloadHandler(DownloadHandler):
logging.error("Failed to download image: %s, skipping...", target, exc_info=e)
# reverse the list so urls at the bottom of the html file are processed first.
- # Otherwise our start and end positions won't be correct.
+ # Otherwise, our start and end positions won't be correct.
to_replace.reverse()
for (start, end, path) in to_replace:
html = html[:start] + path + html[end:]
diff --git a/httplib/httpsocket.py b/httplib/httpsocket.py
index f070f71..25cd80f 100644
--- a/httplib/httpsocket.py
+++ b/httplib/httpsocket.py
@@ -1,10 +1,7 @@
-import logging
import socket
from io import BufferedReader
from typing import Tuple
-from httplib.exceptions import BadRequest
-
BUFSIZE = 4096
TIMEOUT = 3
FORMAT = "UTF-8"
@@ -12,13 +9,20 @@ MAXLINE = 4096
class HTTPSocket:
- host: str
+ """
+ Wrapper class for a socket. Represents an HTTP connection.
+
+ This class adds helper methods to read the underlying socket as a file.
+ """
conn: socket.socket
- file: Tuple[BufferedReader, None]
+ file: BufferedReader
- def __init__(self, conn: socket.socket, host: str):
+ def __init__(self, conn: socket.socket):
+ """
+ Initialize an HTTPSocket with the given socket and host.
+ @param conn: the socket object
+ """
- self.host = host
self.conn = conn
self.conn.settimeout(TIMEOUT)
self.conn.setblocking(True)
diff --git a/httplib/parser.py b/httplib/parser.py
index 88f8025..781b7fd 100644
--- a/httplib/parser.py
+++ b/httplib/parser.py
@@ -108,7 +108,7 @@ def parse_headers(lines):
break
while True:
- if line in ("\r\n", "\n", ""):
+ if line in ("\r\n", "\r", "\n", ""):
break
if line[0].isspace():
@@ -189,14 +189,14 @@ def get_uri(url: str):
def urljoin(base, url):
"""
- Join a base url and a URL to form an absolute url.
+ Join a base url, and a URL to form an absolute url.
"""
return urllib.parse.urljoin(base, url)
def get_charset(headers: Dict[str, str]):
"""
- Returns the charset of the content from the headers if found. Otherwise returns `FORMAT`
+ Returns the charset of the content from the headers if found. Otherwise, returns `FORMAT`
@param headers: the headers to retrieve the charset from
@return: A charset
diff --git a/httplib/retriever.py b/httplib/retriever.py
index 3c7e35d..f6fd8a7 100644
--- a/httplib/retriever.py
+++ b/httplib/retriever.py
@@ -62,13 +62,20 @@ class PreambleRetriever(Retriever):
"""
Retriever instance for retrieving the start-line and headers of an HTTP message.
"""
+
client: HTTPSocket
_buffer: []
@property
def buffer(self):
+ """
+ Returns a copy of the internal buffer.
+ Clears the internal buffer afterwards.
+
+ @return: A list of the buffered lines.
+ """
tmp_buffer = self._buffer
- self._buffer = []
+ self._buffer.clear()
return tmp_buffer
@@ -87,7 +94,7 @@ class PreambleRetriever(Retriever):
while True:
self._buffer.append(line)
- if line in ("\r\n", "\n", ""):
+ if line in ("\r\n", "\r", "\n", ""):
return line
yield line
@@ -140,8 +147,8 @@ class ContentLengthRetriever(Retriever):
class RawRetriever(Retriever):
"""
- Retriever instance for retrieve a message body without any length specifier or encoding.
- This retriever will keep waiting until a timeout occurs or the connection is disconnected.
+ Retriever instance for retrieving a message body without any length specifier or encoding.
+ This retriever will keep waiting until a timeout occurs, or the connection is disconnected.
"""
def retrieve(self):
@@ -161,6 +168,7 @@ class ChunkedRetriever(Retriever):
"""
Returns an iterator of the received message bytes.
The size of each iteration is not necessarily constant.
+
@raise IncompleteResponse: if the connection is closed or timed out before receiving the complete payload.
@raise InvalidResponse: if the length of a chunk could not be determined.
"""
@@ -184,6 +192,12 @@ class ChunkedRetriever(Retriever):
raise IncompleteResponse("Connection closed before receiving the complete payload!")
def __get_chunk_size(self):
+ """
+ Returns the next chunk size.
+
+ @return: The chunk size in bytes
+ @raise InvalidResponse: If an error occured when parsing the chunk size.
+ """
line = self.client.read_line()
sep_pos = line.find(";")
if sep_pos >= 0:
@@ -192,4 +206,4 @@ class ChunkedRetriever(Retriever):
try:
return int(line, 16)
except ValueError:
- raise InvalidResponse()
+ raise InvalidResponse("Failed to parse chunk size")
diff --git a/server/command.py b/server/command.py
index 926e96f..5c831ed 100644
--- a/server/command.py
+++ b/server/command.py
@@ -148,7 +148,7 @@ class AbstractCommand(ABC):
@return: True if the header is invalid, and thus shouldn't be taken into account, throws NotModified
if the content isn't modified since the given date.
- @raise NotModified: If the date of if-modified-since greater than the modify date of the resource.
+ @raise NotModified: If the date of if-modified-since greater than the modify-date of the resource.
"""
date_val = self.msg.headers.get("if-modified-since")
if not date_val:
@@ -164,7 +164,8 @@ class AbstractCommand(ABC):
return True
- def get_mimetype(self, path):
+ @staticmethod
+ def get_mimetype(path):
"""
Guess the type of file.
@param path: the path to the file to guess the type of
@@ -243,8 +244,8 @@ class HeadCommand(AbstractCommand):
def execute(self):
path = self._get_path()
-
mime = self.get_mimetype(path)
+
return self._build_message(200, mime, b"")
@@ -301,6 +302,6 @@ class PutCommand(AbstractModifyCommand):
def execute(self):
if "content-range" in self.msg.headers:
- raise BadRequest("PUT request contains Content-Range header")
+ raise BadRequest("PUT request contains a Content-Range header")
super().execute()
diff --git a/server/httpserver.py b/server/httpserver.py
index 16780e2..8367edf 100644
--- a/server/httpserver.py
+++ b/server/httpserver.py
@@ -83,7 +83,7 @@ class HTTPServer:
"""
Cleanly shutdown the server
- Notifies the worker processes to shutdown and eventually closes the server socket
+ Notifies the worker processes to shut down and eventually closes the server socket
"""
# Set stop event
@@ -111,7 +111,7 @@ class HTTPServer:
"""
Create worker processes up to `self.worker_count`.
- A worker process is created with start method "spawn", target `worker.worker` and the `self.logging_level`
+ A worker process is created with start method "spawn", target `worker.worker`, and the `self.logging_level`
is passed along with the `self.dispatch_queue` and `self._stop_event`
"""
for i in range(self.worker_count):
diff --git a/server/requesthandler.py b/server/requesthandler.py
index 3a92e54..8102214 100644
--- a/server/requesthandler.py
+++ b/server/requesthandler.py
@@ -20,13 +20,15 @@ class RequestHandler:
A RequestHandler instance processes incoming HTTP requests messages from a single client.
RequestHandler instances are created everytime a client connects. They will read the incoming
- messages, parse, verify them and send a respond.
+ messages, parse, verify them and send a response.
"""
conn: ServerSocket
+ host: str
def __init__(self, conn: socket, host):
- self.conn = ServerSocket(conn, host)
+ self.conn = ServerSocket(conn)
+ self.host = host
def listen(self):
"""
@@ -111,7 +113,7 @@ class RequestHandler:
# Only http is supported...
raise BadRequest(f"scheme={target.scheme}")
- if target.netloc != "" and target.netloc != self.conn.host and target.netloc != self.conn.host.split(":")[0]:
+ if target.netloc != "" and target.netloc != self.host and target.netloc != self.host.split(":")[0]:
raise NotFound(str(target))
if target.path == "" or target.path[0] != "/":
@@ -123,7 +125,7 @@ class RequestHandler:
@see: _check_request_line for exceptions raised when validating the request-line.
@param msg: the message to validate
- @raise BadRequest: if HTTP 1.1 and the Host header is missing
+ @raise BadRequest: if HTTP 1.1, and the Host header is missing
"""
if msg.version == "1.1" and "host" not in msg.headers:
diff --git a/server/serversocket.py b/server/serversocket.py
index 08edd49..893f409 100644
--- a/server/serversocket.py
+++ b/server/serversocket.py
@@ -1,11 +1,18 @@
-import socket
-
from httplib.exceptions import BadRequest
from httplib.httpsocket import HTTPSocket
class ServerSocket(HTTPSocket):
+ """
+ Wrapper class for a socket. Represents a client connected to this server.
+ """
+ """
+ Reads the next line decoded as `httpsocket.FORMAT`
+
+ @return: the decoded next line retrieved from the socket
+ @raise InvalidResponse: If the next line couldn't be decoded, but was expected to
+ """
def read_line(self):
try:
return super().read_line()
diff --git a/server/worker.py b/server/worker.py
index e4440f9..491dd7d 100644
--- a/server/worker.py
+++ b/server/worker.py
@@ -48,7 +48,7 @@ class Worker:
@param host: The hostname of the HTTP server
@param name: The name of this Worker instance
@param queue: The dispatch queue for incoming socket connections
- @param stop_event: The Event that signals when to shutdown this worker.
+ @param stop_event: The Event that signals when to shut down this worker.
"""
self.host = host
self.name = name
@@ -70,9 +70,9 @@ class Worker:
"""
while not self.stop_event.is_set():
- # Blocks until thread is free
+ # Blocks until the thread is free
self.finished_queue.get()
- # Blocks until new client connects
+ # Blocks until a new client connects
conn, addr = self.queue.get()
if conn is None or addr is None:
@@ -80,7 +80,7 @@ class Worker:
logging.debug("Processing new client: %s", addr)
- # submit client to thread
+ # submit the client to the executor
self.executor.submit(self._handle_client, conn, addr)
self.shutdown()
@@ -145,8 +145,10 @@ class Worker:
self.executor.shutdown(False)
logging.info("Closing sockets")
+
# Copy dictionary to prevent issues with concurrency
clients = self.dispatched_sockets.copy().values()
+
for client in clients:
client: socket.socket
try: