Improve documentation
This commit is contained in:
@@ -1,6 +1,6 @@
|
|||||||
import logging
|
import logging
|
||||||
from abc import ABC, abstractmethod
|
from abc import ABC, abstractmethod
|
||||||
from typing import Dict, Tuple
|
from typing import Dict
|
||||||
from urllib.parse import urlparse
|
from urllib.parse import urlparse
|
||||||
|
|
||||||
from client.httpclient import HTTPClient
|
from client.httpclient import HTTPClient
|
||||||
|
@@ -4,12 +4,23 @@ from httplib.httpsocket import HTTPSocket, InvalidResponse
|
|||||||
|
|
||||||
|
|
||||||
class HTTPClient(HTTPSocket):
|
class HTTPClient(HTTPSocket):
|
||||||
|
"""
|
||||||
|
Wrapper class for a socket. Represents a client which connects to a server.
|
||||||
|
"""
|
||||||
|
|
||||||
host: str
|
host: str
|
||||||
|
|
||||||
def __init__(self, host: str):
|
def __init__(self, host: str):
|
||||||
super().__init__(socket.socket(socket.AF_INET, socket.SOCK_STREAM), host)
|
super().__init__(socket.socket(socket.AF_INET, socket.SOCK_STREAM))
|
||||||
|
self.host = host
|
||||||
|
|
||||||
def read_line(self):
|
def read_line(self):
|
||||||
|
"""
|
||||||
|
Reads the next line decoded as `httpsocket.FORMAT`
|
||||||
|
|
||||||
|
@return: the decoded next line retrieved from the socket
|
||||||
|
@raise InvalidResponse: If the next line couldn't be decoded, but was expected to
|
||||||
|
"""
|
||||||
try:
|
try:
|
||||||
return super().read_line()
|
return super().read_line()
|
||||||
except UnicodeDecodeError:
|
except UnicodeDecodeError:
|
||||||
|
@@ -22,7 +22,7 @@ def handle(client: HTTPClient, msg: Message, command: AbstractCommand, directory
|
|||||||
|
|
||||||
@param client: the client which sent the request.
|
@param client: the client which sent the request.
|
||||||
@param msg: the response message
|
@param msg: the response message
|
||||||
@param command: the command of the sent request message
|
@param command: the command of the sent request-message
|
||||||
@param directory: the directory to download the response to (if available)
|
@param directory: the directory to download the response to (if available)
|
||||||
"""
|
"""
|
||||||
handler = BasicResponseHandler(client, msg, command)
|
handler = BasicResponseHandler(client, msg, command)
|
||||||
@@ -81,7 +81,7 @@ class BasicResponseHandler(ResponseHandler):
|
|||||||
for line in self.retriever.retrieve():
|
for line in self.retriever.retrieve():
|
||||||
try:
|
try:
|
||||||
logging.debug("%s", line.decode(FORMAT))
|
logging.debug("%s", line.decode(FORMAT))
|
||||||
except Exception:
|
except UnicodeDecodeError:
|
||||||
logging.debug("%r", line)
|
logging.debug("%r", line)
|
||||||
|
|
||||||
logging.debug("] done.")
|
logging.debug("] done.")
|
||||||
@@ -223,7 +223,7 @@ class HTMLDownloadHandler(DownloadHandler):
|
|||||||
|
|
||||||
def _download_images(self, tmp_path, target_path, charset=FORMAT):
|
def _download_images(self, tmp_path, target_path, charset=FORMAT):
|
||||||
"""
|
"""
|
||||||
Downloads images referenced in the html of `tmp_filename` and replaces the references in the html
|
Download images referenced in the html of `tmp_filename` and replaces the references in the html
|
||||||
and writes it to `target_filename`.
|
and writes it to `target_filename`.
|
||||||
@param tmp_path: the path to the temporary html file
|
@param tmp_path: the path to the temporary html file
|
||||||
@param target_path: the path for the final html file
|
@param target_path: the path for the final html file
|
||||||
@@ -247,7 +247,7 @@ class HTMLDownloadHandler(DownloadHandler):
|
|||||||
processed = {}
|
processed = {}
|
||||||
to_replace = []
|
to_replace = []
|
||||||
|
|
||||||
# Find all <img> tags and the urls from the corresponding `src` fields
|
# Find all <img> tags, and the urls from the corresponding `src` fields
|
||||||
for m in IMG_REGEX.finditer(html):
|
for m in IMG_REGEX.finditer(html):
|
||||||
url_start = m.start(1)
|
url_start = m.start(1)
|
||||||
url_end = m.end(1)
|
url_end = m.end(1)
|
||||||
@@ -272,7 +272,7 @@ class HTMLDownloadHandler(DownloadHandler):
|
|||||||
logging.error("Failed to download image: %s, skipping...", target, exc_info=e)
|
logging.error("Failed to download image: %s, skipping...", target, exc_info=e)
|
||||||
|
|
||||||
# reverse the list so urls at the bottom of the html file are processed first.
|
# reverse the list so urls at the bottom of the html file are processed first.
|
||||||
# Otherwise our start and end positions won't be correct.
|
# Otherwise, our start and end positions won't be correct.
|
||||||
to_replace.reverse()
|
to_replace.reverse()
|
||||||
for (start, end, path) in to_replace:
|
for (start, end, path) in to_replace:
|
||||||
html = html[:start] + path + html[end:]
|
html = html[:start] + path + html[end:]
|
||||||
|
@@ -1,10 +1,7 @@
|
|||||||
import logging
|
|
||||||
import socket
|
import socket
|
||||||
from io import BufferedReader
|
from io import BufferedReader
|
||||||
from typing import Tuple
|
from typing import Tuple
|
||||||
|
|
||||||
from httplib.exceptions import BadRequest
|
|
||||||
|
|
||||||
BUFSIZE = 4096
|
BUFSIZE = 4096
|
||||||
TIMEOUT = 3
|
TIMEOUT = 3
|
||||||
FORMAT = "UTF-8"
|
FORMAT = "UTF-8"
|
||||||
@@ -12,13 +9,20 @@ MAXLINE = 4096
|
|||||||
|
|
||||||
|
|
||||||
class HTTPSocket:
|
class HTTPSocket:
|
||||||
host: str
|
"""
|
||||||
|
Wrapper class for a socket. Represents an HTTP connection.
|
||||||
|
|
||||||
|
This class adds helper methods to read the underlying socket as a file.
|
||||||
|
"""
|
||||||
conn: socket.socket
|
conn: socket.socket
|
||||||
file: Tuple[BufferedReader, None]
|
file: BufferedReader
|
||||||
|
|
||||||
def __init__(self, conn: socket.socket, host: str):
|
def __init__(self, conn: socket.socket):
|
||||||
|
"""
|
||||||
|
Initialize an HTTPSocket with the given socket and host.
|
||||||
|
@param conn: the socket object
|
||||||
|
"""
|
||||||
|
|
||||||
self.host = host
|
|
||||||
self.conn = conn
|
self.conn = conn
|
||||||
self.conn.settimeout(TIMEOUT)
|
self.conn.settimeout(TIMEOUT)
|
||||||
self.conn.setblocking(True)
|
self.conn.setblocking(True)
|
||||||
|
@@ -108,7 +108,7 @@ def parse_headers(lines):
|
|||||||
break
|
break
|
||||||
|
|
||||||
while True:
|
while True:
|
||||||
if line in ("\r\n", "\n", ""):
|
if line in ("\r\n", "\r", "\n", ""):
|
||||||
break
|
break
|
||||||
|
|
||||||
if line[0].isspace():
|
if line[0].isspace():
|
||||||
@@ -189,14 +189,14 @@ def get_uri(url: str):
|
|||||||
|
|
||||||
def urljoin(base, url):
|
def urljoin(base, url):
|
||||||
"""
|
"""
|
||||||
Join a base url and a URL to form an absolute url.
|
Join a base url, and a URL to form an absolute url.
|
||||||
"""
|
"""
|
||||||
return urllib.parse.urljoin(base, url)
|
return urllib.parse.urljoin(base, url)
|
||||||
|
|
||||||
|
|
||||||
def get_charset(headers: Dict[str, str]):
|
def get_charset(headers: Dict[str, str]):
|
||||||
"""
|
"""
|
||||||
Returns the charset of the content from the headers if found. Otherwise returns `FORMAT`
|
Returns the charset of the content from the headers if found. Otherwise, returns `FORMAT`
|
||||||
|
|
||||||
@param headers: the headers to retrieve the charset from
|
@param headers: the headers to retrieve the charset from
|
||||||
@return: A charset
|
@return: A charset
|
||||||
|
@@ -62,13 +62,20 @@ class PreambleRetriever(Retriever):
|
|||||||
"""
|
"""
|
||||||
Retriever instance for retrieving the start-line and headers of an HTTP message.
|
Retriever instance for retrieving the start-line and headers of an HTTP message.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
client: HTTPSocket
|
client: HTTPSocket
|
||||||
_buffer: []
|
_buffer: []
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def buffer(self):
|
def buffer(self):
|
||||||
|
"""
|
||||||
|
Returns a copy of the internal buffer.
|
||||||
|
Clears the internal buffer afterwards.
|
||||||
|
|
||||||
|
@return: A list of the buffered lines.
|
||||||
|
"""
|
||||||
tmp_buffer = self._buffer
|
tmp_buffer = self._buffer
|
||||||
self._buffer = []
|
self._buffer.clear()
|
||||||
|
|
||||||
return tmp_buffer
|
return tmp_buffer
|
||||||
|
|
||||||
@@ -87,7 +94,7 @@ class PreambleRetriever(Retriever):
|
|||||||
while True:
|
while True:
|
||||||
self._buffer.append(line)
|
self._buffer.append(line)
|
||||||
|
|
||||||
if line in ("\r\n", "\n", ""):
|
if line in ("\r\n", "\r", "\n", ""):
|
||||||
return line
|
return line
|
||||||
|
|
||||||
yield line
|
yield line
|
||||||
@@ -140,8 +147,8 @@ class ContentLengthRetriever(Retriever):
|
|||||||
|
|
||||||
class RawRetriever(Retriever):
|
class RawRetriever(Retriever):
|
||||||
"""
|
"""
|
||||||
Retriever instance for retrieve a message body without any length specifier or encoding.
|
Retriever instance for retrieving a message body without any length specifier or encoding.
|
||||||
This retriever will keep waiting until a timeout occurs or the connection is disconnected.
|
This retriever will keep waiting until a timeout occurs, or the connection is disconnected.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def retrieve(self):
|
def retrieve(self):
|
||||||
@@ -161,6 +168,7 @@ class ChunkedRetriever(Retriever):
|
|||||||
"""
|
"""
|
||||||
Returns an iterator of the received message bytes.
|
Returns an iterator of the received message bytes.
|
||||||
The size of each iteration is not necessarily constant.
|
The size of each iteration is not necessarily constant.
|
||||||
|
|
||||||
@raise IncompleteResponse: if the connection is closed or timed out before receiving the complete payload.
|
@raise IncompleteResponse: if the connection is closed or timed out before receiving the complete payload.
|
||||||
@raise InvalidResponse: if the length of a chunk could not be determined.
|
@raise InvalidResponse: if the length of a chunk could not be determined.
|
||||||
"""
|
"""
|
||||||
@@ -184,6 +192,12 @@ class ChunkedRetriever(Retriever):
|
|||||||
raise IncompleteResponse("Connection closed before receiving the complete payload!")
|
raise IncompleteResponse("Connection closed before receiving the complete payload!")
|
||||||
|
|
||||||
def __get_chunk_size(self):
|
def __get_chunk_size(self):
|
||||||
|
"""
|
||||||
|
Returns the next chunk size.
|
||||||
|
|
||||||
|
@return: The chunk size in bytes
|
||||||
|
@raise InvalidResponse: If an error occured when parsing the chunk size.
|
||||||
|
"""
|
||||||
line = self.client.read_line()
|
line = self.client.read_line()
|
||||||
sep_pos = line.find(";")
|
sep_pos = line.find(";")
|
||||||
if sep_pos >= 0:
|
if sep_pos >= 0:
|
||||||
@@ -192,4 +206,4 @@ class ChunkedRetriever(Retriever):
|
|||||||
try:
|
try:
|
||||||
return int(line, 16)
|
return int(line, 16)
|
||||||
except ValueError:
|
except ValueError:
|
||||||
raise InvalidResponse()
|
raise InvalidResponse("Failed to parse chunk size")
|
||||||
|
@@ -148,7 +148,7 @@ class AbstractCommand(ABC):
|
|||||||
@return: True if the header is invalid, and thus shouldn't be taken into account, throws NotModified
|
@return: True if the header is invalid, and thus shouldn't be taken into account, throws NotModified
|
||||||
if the content isn't modified since the given date.
|
if the content isn't modified since the given date.
|
||||||
|
|
||||||
@raise NotModified: If the date of if-modified-since greater than the modify date of the resource.
|
@raise NotModified: If the date of if-modified-since greater than the modify-date of the resource.
|
||||||
"""
|
"""
|
||||||
date_val = self.msg.headers.get("if-modified-since")
|
date_val = self.msg.headers.get("if-modified-since")
|
||||||
if not date_val:
|
if not date_val:
|
||||||
@@ -164,7 +164,8 @@ class AbstractCommand(ABC):
|
|||||||
|
|
||||||
return True
|
return True
|
||||||
|
|
||||||
def get_mimetype(self, path):
|
@staticmethod
|
||||||
|
def get_mimetype(path):
|
||||||
"""
|
"""
|
||||||
Guess the type of file.
|
Guess the type of file.
|
||||||
@param path: the path to the file to guess the type of
|
@param path: the path to the file to guess the type of
|
||||||
@@ -243,8 +244,8 @@ class HeadCommand(AbstractCommand):
|
|||||||
|
|
||||||
def execute(self):
|
def execute(self):
|
||||||
path = self._get_path()
|
path = self._get_path()
|
||||||
|
|
||||||
mime = self.get_mimetype(path)
|
mime = self.get_mimetype(path)
|
||||||
|
|
||||||
return self._build_message(200, mime, b"")
|
return self._build_message(200, mime, b"")
|
||||||
|
|
||||||
|
|
||||||
@@ -301,6 +302,6 @@ class PutCommand(AbstractModifyCommand):
|
|||||||
|
|
||||||
def execute(self):
|
def execute(self):
|
||||||
if "content-range" in self.msg.headers:
|
if "content-range" in self.msg.headers:
|
||||||
raise BadRequest("PUT request contains Content-Range header")
|
raise BadRequest("PUT request contains a Content-Range header")
|
||||||
|
|
||||||
super().execute()
|
super().execute()
|
||||||
|
@@ -111,7 +111,7 @@ class HTTPServer:
|
|||||||
"""
|
"""
|
||||||
Create worker processes up to `self.worker_count`.
|
Create worker processes up to `self.worker_count`.
|
||||||
|
|
||||||
A worker process is created with start method "spawn", target `worker.worker` and the `self.logging_level`
|
A worker process is created with start method "spawn", target `worker.worker`, and the `self.logging_level`
|
||||||
is passed along with the `self.dispatch_queue` and `self._stop_event`
|
is passed along with the `self.dispatch_queue` and `self._stop_event`
|
||||||
"""
|
"""
|
||||||
for i in range(self.worker_count):
|
for i in range(self.worker_count):
|
||||||
|
@@ -20,13 +20,15 @@ class RequestHandler:
|
|||||||
A RequestHandler instance processes incoming HTTP requests messages from a single client.
|
A RequestHandler instance processes incoming HTTP requests messages from a single client.
|
||||||
|
|
||||||
RequestHandler instances are created everytime a client connects. They will read the incoming
|
RequestHandler instances are created everytime a client connects. They will read the incoming
|
||||||
messages, parse, verify them and send a respond.
|
messages, parse, verify them and send a response.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
conn: ServerSocket
|
conn: ServerSocket
|
||||||
|
host: str
|
||||||
|
|
||||||
def __init__(self, conn: socket, host):
|
def __init__(self, conn: socket, host):
|
||||||
self.conn = ServerSocket(conn, host)
|
self.conn = ServerSocket(conn)
|
||||||
|
self.host = host
|
||||||
|
|
||||||
def listen(self):
|
def listen(self):
|
||||||
"""
|
"""
|
||||||
@@ -111,7 +113,7 @@ class RequestHandler:
|
|||||||
# Only http is supported...
|
# Only http is supported...
|
||||||
raise BadRequest(f"scheme={target.scheme}")
|
raise BadRequest(f"scheme={target.scheme}")
|
||||||
|
|
||||||
if target.netloc != "" and target.netloc != self.conn.host and target.netloc != self.conn.host.split(":")[0]:
|
if target.netloc != "" and target.netloc != self.host and target.netloc != self.host.split(":")[0]:
|
||||||
raise NotFound(str(target))
|
raise NotFound(str(target))
|
||||||
|
|
||||||
if target.path == "" or target.path[0] != "/":
|
if target.path == "" or target.path[0] != "/":
|
||||||
@@ -123,7 +125,7 @@ class RequestHandler:
|
|||||||
|
|
||||||
@see: _check_request_line for exceptions raised when validating the request-line.
|
@see: _check_request_line for exceptions raised when validating the request-line.
|
||||||
@param msg: the message to validate
|
@param msg: the message to validate
|
||||||
@raise BadRequest: if HTTP 1.1 and the Host header is missing
|
@raise BadRequest: if HTTP 1.1, and the Host header is missing
|
||||||
"""
|
"""
|
||||||
|
|
||||||
if msg.version == "1.1" and "host" not in msg.headers:
|
if msg.version == "1.1" and "host" not in msg.headers:
|
||||||
|
@@ -1,11 +1,18 @@
|
|||||||
import socket
|
|
||||||
|
|
||||||
from httplib.exceptions import BadRequest
|
from httplib.exceptions import BadRequest
|
||||||
from httplib.httpsocket import HTTPSocket
|
from httplib.httpsocket import HTTPSocket
|
||||||
|
|
||||||
|
|
||||||
class ServerSocket(HTTPSocket):
|
class ServerSocket(HTTPSocket):
|
||||||
|
"""
|
||||||
|
Wrapper class for a socket. Represents a client connected to this server.
|
||||||
|
"""
|
||||||
|
|
||||||
|
"""
|
||||||
|
Reads the next line decoded as `httpsocket.FORMAT`
|
||||||
|
|
||||||
|
@return: the decoded next line retrieved from the socket
|
||||||
|
@raise InvalidResponse: If the next line couldn't be decoded, but was expected to
|
||||||
|
"""
|
||||||
def read_line(self):
|
def read_line(self):
|
||||||
try:
|
try:
|
||||||
return super().read_line()
|
return super().read_line()
|
||||||
|
@@ -70,9 +70,9 @@ class Worker:
|
|||||||
"""
|
"""
|
||||||
while not self.stop_event.is_set():
|
while not self.stop_event.is_set():
|
||||||
|
|
||||||
# Blocks until thread is free
|
# Blocks until the thread is free
|
||||||
self.finished_queue.get()
|
self.finished_queue.get()
|
||||||
# Blocks until new client connects
|
# Blocks until a new client connects
|
||||||
conn, addr = self.queue.get()
|
conn, addr = self.queue.get()
|
||||||
|
|
||||||
if conn is None or addr is None:
|
if conn is None or addr is None:
|
||||||
@@ -80,7 +80,7 @@ class Worker:
|
|||||||
|
|
||||||
logging.debug("Processing new client: %s", addr)
|
logging.debug("Processing new client: %s", addr)
|
||||||
|
|
||||||
# submit client to thread
|
# submit the client to the executor
|
||||||
self.executor.submit(self._handle_client, conn, addr)
|
self.executor.submit(self._handle_client, conn, addr)
|
||||||
|
|
||||||
self.shutdown()
|
self.shutdown()
|
||||||
@@ -145,8 +145,10 @@ class Worker:
|
|||||||
self.executor.shutdown(False)
|
self.executor.shutdown(False)
|
||||||
|
|
||||||
logging.info("Closing sockets")
|
logging.info("Closing sockets")
|
||||||
|
|
||||||
# Copy dictionary to prevent issues with concurrency
|
# Copy dictionary to prevent issues with concurrency
|
||||||
clients = self.dispatched_sockets.copy().values()
|
clients = self.dispatched_sockets.copy().values()
|
||||||
|
|
||||||
for client in clients:
|
for client in clients:
|
||||||
client: socket.socket
|
client: socket.socket
|
||||||
try:
|
try:
|
||||||
|
Reference in New Issue
Block a user