From 3615c56152e24bdec1bb8cfd157cdd53b0d10d59 Mon Sep 17 00:00:00 2001 From: Arthur Bols Date: Sat, 27 Mar 2021 16:30:53 +0100 Subject: [PATCH] update --- client/command.py | 37 ++++++++++++++++-- client/httpclient.py | 8 +++- client/response_handler.py | 50 +++++++++++------------- httplib/exceptions.py | 44 +++++++++++++-------- httplib/httpsocket.py | 50 +++++++++--------------- httplib/message.py | 1 + httplib/parser.py | 18 +++++++++ public/index.html | 1 + server.py | 2 +- server/command.py | 80 ++++++++++++++++++++++++++++++++------ server/httpserver.py | 42 +++++++++++++++++++- server/requesthandler.py | 7 ++-- server/serversocket.py | 18 +++++++++ server/worker.py | 32 +++++++++------ 14 files changed, 280 insertions(+), 110 deletions(-) create mode 100644 server/serversocket.py diff --git a/client/command.py b/client/command.py index f525c2d..9fec1b9 100644 --- a/client/command.py +++ b/client/command.py @@ -13,19 +13,30 @@ sockets: Dict[str, HTTPClient] = {} def create(command: str, url: str, port): + """ + Create a corresponding Command instance of the specified HTTP `command` with the specified `url` and `port`. + @param command: The command type to create + @param url: The url for the command + @param port: The port for the command + """ + + uri = parser.get_uri(url) if command == "GET": - return GetCommand(url, port) + return GetCommand(uri, port) elif command == "HEAD": - return HeadCommand(url, port) + return HeadCommand(uri, port) elif command == "POST": - return PostCommand(url, port) + return PostCommand(uri, port) elif command == "PUT": - return PutCommand(url, port) + return PutCommand(uri, port) else: raise ValueError() class AbstractCommand(ABC): + """ + A class representing the command for sending an HTTP command. + """ uri: str host: str path: str @@ -111,6 +122,9 @@ class AbstractCommand(ABC): class AbstractWithBodyCommand(AbstractCommand, ABC): + """ + The building block for creating an HTTP message for an HTTP command with a body. + """ def _build_message(self, message: str) -> bytes: body = input(f"Enter {self.command} data: ").encode(FORMAT) @@ -127,12 +141,19 @@ class AbstractWithBodyCommand(AbstractCommand, ABC): class HeadCommand(AbstractCommand): + """ + A Command for sending a `HEAD` message. + """ + @property def command(self): return "HEAD" class GetCommand(AbstractCommand): + """ + A Command for sending a `GET` message. + """ def __init__(self, uri: str, port, dir=None): super().__init__(uri, port) @@ -160,12 +181,20 @@ class GetCommand(AbstractCommand): class PostCommand(AbstractWithBodyCommand): + """ + A command for sending a `POST` command. + """ + @property def command(self): return "POST" class PutCommand(AbstractWithBodyCommand): + """ + A command for sending a `PUT` command. + """ + @property def command(self): return "PUT" diff --git a/client/httpclient.py b/client/httpclient.py index e0f23bc..68d8b71 100644 --- a/client/httpclient.py +++ b/client/httpclient.py @@ -1,6 +1,6 @@ import socket -from httplib.httpsocket import HTTPSocket +from httplib.httpsocket import HTTPSocket, InvalidResponse BUFSIZE = 4096 TIMEOUT = 3 @@ -13,3 +13,9 @@ class HTTPClient(HTTPSocket): def __init__(self, host: str): super().__init__(socket.socket(socket.AF_INET, socket.SOCK_STREAM), host) + + def read_line(self): + try: + return super().read_line() + except UnicodeDecodeError: + raise InvalidResponse("Unexpected decoding error") diff --git a/client/response_handler.py b/client/response_handler.py index c163a56..d725bef 100644 --- a/client/response_handler.py +++ b/client/response_handler.py @@ -14,7 +14,7 @@ from httplib.message import ClientMessage as Message from httplib.retriever import Retriever -def handle(client: HTTPClient, msg: Message, command: AbstractCommand, dir=None): +def handle(client: HTTPClient, msg: Message, command: AbstractCommand, directory=None): handler = BasicResponseHandler(client, msg, command) retriever = handler.handle() @@ -23,9 +23,9 @@ def handle(client: HTTPClient, msg: Message, command: AbstractCommand, dir=None) content_type = msg.headers.get("content-type") if content_type and "text/html" in content_type: - handler = HTMLDownloadHandler(retriever, client, msg, command, dir) + handler = HTMLDownloadHandler(retriever, client, msg, command, directory) else: - handler = RawDownloadHandler(retriever, client, msg, command, dir) + handler = RawDownloadHandler(retriever, client, msg, command, directory) return handler.handle() @@ -130,20 +130,20 @@ class BasicResponseHandler(ResponseHandler): class DownloadHandler(ResponseHandler, ABC): - def __init__(self, retriever: Retriever, client: HTTPClient, msg, cmd, dir=None): + def __init__(self, retriever: Retriever, client: HTTPClient, msg, cmd, directory=None): super().__init__(retriever, client, msg, cmd) - if not dir: - dir = self._create_directory() + if not directory: + directory = self._create_directory() - self.path = self._get_duplicate_name(os.path.join(dir, self.get_filename())) + self.path = self._get_duplicate_name(os.path.join(directory, self.get_filename())) @staticmethod - def create(retriever: Retriever, client: HTTPClient, msg, cmd, dir=None): + def create(retriever: Retriever, client: HTTPClient, msg, cmd, directory=None): content_type = msg.headers.get("content-type") if content_type and "text/html" in content_type: - return HTMLDownloadHandler(retriever, client, msg, cmd, dir) - return RawDownloadHandler(retriever, client, msg, cmd, dir) + return HTMLDownloadHandler(retriever, client, msg, cmd, directory) + return RawDownloadHandler(retriever, client, msg, cmd, directory) def _create_directory(self): path = self._get_duplicate_name(os.path.abspath(self.client.host)) @@ -194,14 +194,14 @@ class RawDownloadHandler(DownloadHandler): class HTMLDownloadHandler(DownloadHandler): - def __init__(self, retriever: Retriever, client: HTTPClient, msg: Message, cmd: AbstractCommand, dir=None): - super().__init__(retriever, client, msg, cmd, dir) + def __init__(self, retriever: Retriever, client: HTTPClient, msg: Message, cmd: AbstractCommand, directory=None): + super().__init__(retriever, client, msg, cmd, directory) def handle(self) -> str: - (dir, file) = os.path.split(self.path) + (directory, file) = os.path.split(self.path) tmp_filename = f".{file}.tmp" - tmp_path = os.path.join(dir, tmp_filename) + tmp_path = os.path.join(directory, tmp_filename) file = open(tmp_path, "wb") for buffer in self.retriever.retrieve(): @@ -217,11 +217,11 @@ class HTMLDownloadHandler(DownloadHandler): with open(tmp_filename, "rb") as fp: soup = BeautifulSoup(fp, 'lxml') - base_url = parser.base_url(self.cmd.uri) base_element = soup.find("base") + base_url = self.cmd.uri if base_element: - base_url = f"http://{self.cmd.host}" + base_element["href"] + base_url = parser.urljoin(self.cmd.uri, base_element["href"]) processed = {} tag: Tag @@ -241,22 +241,18 @@ class HTMLDownloadHandler(DownloadHandler): logging.error("Failed to download image: %s, skipping...", tag["src"], exc_info=e) with open(target_filename, 'w') as file: - file.write(str(soup)) + file.write(soup.prettify(formatter="minimal")) def __download_image(self, img_src, base_url): + """ + Download image from the specified `img_src` and `base_url`. + If the image is available, it will be downloaded to the directory of `self.path` + """ + logging.info("Downloading image: %s", img_src) parsed = urlsplit(img_src) - - if parsed.scheme not in ("", "http", "https"): - # Not a valid url - return None - - if parsed.hostname is None: - if img_src[0] == "/": - img_src = f"http://{self.cmd.host}{img_src}" - else: - img_src = parser.absolute_url(base_url, img_src) + img_src = parser.urljoin(base_url, img_src) if parsed.hostname is None or parsed.hostname == self.cmd.host: port = self.cmd.port diff --git a/httplib/exceptions.py b/httplib/exceptions.py index 55bd1fc..1257718 100644 --- a/httplib/exceptions.py +++ b/httplib/exceptions.py @@ -17,7 +17,7 @@ class InvalidStatusLine(HTTPException): class UnsupportedEncoding(HTTPException): - """ Reponse Encoding not support """ + """ Encoding not supported """ def __init(self, enc_type, encoding): self.enc_type = enc_type @@ -39,12 +39,28 @@ class HTTPServerException(Exception): self.body = body -class BadRequest(HTTPServerException): +class HTTPServerCloseException(HTTPServerException): + """ When thrown, the connection should be closed """ + + +class BadRequest(HTTPServerCloseException): """ Malformed HTTP request""" status_code = 400 message = "Bad Request" +class Forbidden(HTTPServerException): + """ Request not allowed """ + status_code = 403 + message = "Forbidden" + + +class NotFound(HTTPServerException): + """ Resource not found """ + status_code = 404 + message = "Not Found" + + class MethodNotAllowed(HTTPServerException): """ Method is not allowed """ status_code = 405 @@ -54,7 +70,7 @@ class MethodNotAllowed(HTTPServerException): self.allowed_methods = allowed_methods -class InternalServerError(HTTPServerException): +class InternalServerError(HTTPServerCloseException): """ Internal Server Error """ status_code = 500 message = "Internal Server Error" @@ -66,16 +82,10 @@ class NotImplemented(HTTPServerException): message = "Not Implemented" -class NotFound(HTTPServerException): - """ Resource not found """ - status_code = 404 - message = "Not Found" - - -class Forbidden(HTTPServerException): - """ Request not allowed """ - status_code = 403 - message = "Forbidden" +class HTTPVersionNotSupported(HTTPServerCloseException): + """ The server does not support the major version HTTP used in the request message """ + status_code = 505 + message = "HTTP Version Not Supported" class Conflict(HTTPServerException): @@ -84,10 +94,10 @@ class Conflict(HTTPServerException): message = "Conflict" -class HTTPVersionNotSupported(HTTPServerException): - """ The server does not support the major version HTTP used in the request message """ - status_code = 505 - message = "HTTP Version Not Supported" +class NotModified(HTTPServerException): + """ Requested resource was not modified """ + status_code = 304 + message = "Not Modified" class InvalidRequestLine(BadRequest): diff --git a/httplib/httpsocket.py b/httplib/httpsocket.py index 2dc1372..f070f71 100644 --- a/httplib/httpsocket.py +++ b/httplib/httpsocket.py @@ -26,42 +26,26 @@ class HTTPSocket: self.file = self.conn.makefile("rb") def close(self): + """ + Close this socket + """ self.file.close() - # self.conn.shutdown(socket.SHUT_RDWR) self.conn.close() def is_closed(self): return self.file is None def reset_request(self): + """ + Close the file handle of this socket and create a new one. + """ self.file.close() self.file = self.conn.makefile("rb") - def __do_receive(self): - if self.conn.fileno() == -1: - raise Exception("Connection closed") - - result = self.conn.recv(BUFSIZE) - return result - - def receive(self): - """Receive data from the client up to BUFSIZE - """ - count = 0 - while True: - count += 1 - try: - return self.__do_receive() - except socket.timeout: - logging.debug("Socket receive timed out after %s seconds", TIMEOUT) - if count == 3: - break - logging.debug("Retrying %s", count) - - logging.debug("Timed out after waiting %s seconds for response", TIMEOUT * count) - raise TimeoutError("Request timed out") - def read(self, size=BUFSIZE, blocking=True) -> bytes: + """ + Read bytes up to the specified buffer size. This method will block when `blocking` is set to True (Default). + """ if blocking: buffer = self.file.read(size) else: @@ -72,14 +56,18 @@ class HTTPSocket: return buffer def read_line(self): - try: - line = str(self.read_bytes_line(), FORMAT) - except UnicodeDecodeError: - # Expected UTF-8 - raise BadRequest() - return line + """ + Read a line decoded as `httpsocket.FORMAT`. + @return: the decoded line + @raise: UnicodeDecodeError + """ + return str(self.read_bytes_line(), FORMAT) def read_bytes_line(self) -> bytes: + """ + Read a line as bytes. + """ + line = self.file.readline(MAXLINE + 1) if len(line) > MAXLINE: raise InvalidResponse("Line too long") diff --git a/httplib/message.py b/httplib/message.py index 16a9545..d21c5d6 100644 --- a/httplib/message.py +++ b/httplib/message.py @@ -23,6 +23,7 @@ class ClientMessage(Message): def __init__(self, version: str, status: int, msg: str, headers: Dict[str, str], raw=None, body: bytes = None): super().__init__(version, headers, raw, body) self.status = status + self.msg = msg class ServerMessage(Message): diff --git a/httplib/parser.py b/httplib/parser.py index 32b04c1..02a290d 100644 --- a/httplib/parser.py +++ b/httplib/parser.py @@ -1,6 +1,7 @@ import logging import os.path import re +import urllib from urllib.parse import urlparse, urlsplit from httplib.exceptions import InvalidStatusLine, InvalidResponse, BadRequest, InvalidRequestLine @@ -255,6 +256,19 @@ def parse_uri(uri: str): return host, port, path +def get_uri(url: str): + """ + Returns a valid URI of the specified URL. + """ + parsed = urlsplit(url) + + result = f"http://{parsed.netloc}{parsed.path}" + if parsed.query != '': + result = f"{result}?{parsed.query}" + + return result + + def base_url(uri: str): parsed = urlsplit(uri) path = parsed.path.rsplit("/", 1)[0] @@ -265,3 +279,7 @@ def absolute_url(uri: str, rel_path: str): parsed = urlsplit(uri) path = os.path.normpath(os.path.join(parsed.path, rel_path)) return f"{parsed.scheme}://{parsed.hostname}{path}" + + +def urljoin(base, url): + return urllib.parse.urljoin(base, url) diff --git a/public/index.html b/public/index.html index 225dbac..64a5752 100644 --- a/public/index.html +++ b/public/index.html @@ -48,6 +48,7 @@

Local image

+
diff --git a/server.py b/server.py index 2bbddc9..c1a2a19 100644 --- a/server.py +++ b/server.py @@ -46,4 +46,4 @@ try: except Exception as e: print("[ABRT] Internal error: " + str(e), file=sys.stderr) logging.debug("Internal error", exc_info=e) - sys.exit(70) \ No newline at end of file + sys.exit(70) diff --git a/server/command.py b/server/command.py index 31ab58c..185d068 100644 --- a/server/command.py +++ b/server/command.py @@ -4,11 +4,10 @@ import sys from abc import ABC, abstractmethod from datetime import datetime from time import mktime -from typing import Dict from wsgiref.handlers import format_date_time from client.httpclient import FORMAT -from httplib.exceptions import NotFound, Conflict, Forbidden +from httplib.exceptions import NotFound, Forbidden, NotModified from httplib.message import ServerMessage as Message root = os.path.join(os.path.dirname(sys.argv[0]), "public") @@ -21,7 +20,6 @@ status_message = { 400: "Bad Request", 404: "Not Found", 500: "Internal Server Error", - } @@ -40,7 +38,6 @@ def create(message: Message): class AbstractCommand(ABC): path: str - headers: Dict[str, str] msg: Message def __init__(self, message: Message): @@ -52,7 +49,15 @@ class AbstractCommand(ABC): def command(self): pass + @property + @abstractmethod + def _conditional_headers(self): + pass + def _get_date(self): + """ + Returns a string representation of the current date according to RFC 1123 + """ now = datetime.now() stamp = mktime(now.timetuple()) return format_date_time(stamp) @@ -61,7 +66,12 @@ class AbstractCommand(ABC): def execute(self): pass - def _build_message(self, status: int, content_type: str, body: bytes): + def _build_message(self, status: int, content_type: str, body: bytes, extra_headers=None): + + if extra_headers is None: + extra_headers = {} + self._process_conditional_headers() + message = f"HTTP/1.1 {status} {status_message[status]}\r\n" message += self._get_date() + "\r\n" @@ -72,15 +82,17 @@ class AbstractCommand(ABC): message += f"Content-Type: {content_type}" if content_type.startswith("text"): message += "; charset=UTF-8" - message += "\r\n" + message += "\r\n" elif content_length > 0: - message += f"Content-Type: application/octet-stream" + message += f"Content-Type: application/octet-stream\r\n" + + for header in extra_headers: + message += f"{header}: {extra_headers[header]}\r\n" message += "\r\n" message = message.encode(FORMAT) if content_length > 0: message += body - message += b"\r\n" return message @@ -97,6 +109,30 @@ class AbstractCommand(ABC): return path + def _process_conditional_headers(self): + + for header in self._conditional_headers: + tmp = self.msg.headers.get(header) + + if not tmp: + continue + self._conditional_headers[header]() + + def _if_modified_since(self): + date_val = self.msg.headers.get("if-modified-since") + if not date_val: + return True + modified = datetime.utcfromtimestamp(os.path.getmtime(self._get_path(False))) + try: + min_date = datetime.strptime(date_val, '%a, %d %b %Y %H:%M:%S GMT') + except ValueError: + return True + + if modified <= min_date: + raise NotModified() + + return True + class AbstractModifyCommand(AbstractCommand, ABC): @@ -105,6 +141,10 @@ class AbstractModifyCommand(AbstractCommand, ABC): def _file_mode(self): pass + @property + def _conditional_headers(self): + return {} + def execute(self): path = self._get_path(False) dir = os.path.dirname(path) @@ -114,31 +154,47 @@ class AbstractModifyCommand(AbstractCommand, ABC): if os.path.exists(dir) and not os.path.isdir(dir): raise Forbidden("Target directory is an existing file!") + exists = os.path.exists(path) + try: with open(path, mode=f"{self._file_mode}b") as file: file.write(self.msg.body) except IsADirectoryError: raise Forbidden("The target resource is a directory!") + if exists: + status = 204 + else: + status = 201 + + return self._build_message(status, None, ) class HeadCommand(AbstractCommand): + @property + def command(self): + return "HEAD" + + @property + def _conditional_headers(self): + return {'if-modified-since': self._if_modified_since} + def execute(self): path = self._get_path() mime = mimetypes.guess_type(path)[0] return self._build_message(200, mime, b"") - @property - def command(self): - return "HEAD" - class GetCommand(AbstractCommand): @property def command(self): return "GET" + @property + def _conditional_headers(self): + return {'if-modified-since': self._if_modified_since} + def get_mimetype(self, path): mime = mimetypes.guess_type(path)[0] diff --git a/server/httpserver.py b/server/httpserver.py index b9f141e..161cceb 100644 --- a/server/httpserver.py +++ b/server/httpserver.py @@ -10,6 +10,9 @@ from server import worker class HTTPServer: + """ + + """ address: str port: int workers = [] @@ -20,6 +23,13 @@ class HTTPServer: _stop_event: Event def __init__(self, address: str, port: int, worker_count, logging_level): + """ + Initialize a HTTP server with the specified address, port, worker_count and logging_level + @param address: the address to listen on for connections + @param port: the port to listen on for connections + @param worker_count: + @param logging_level: + """ self.address = address self.port = port self.worker_count = worker_count @@ -30,24 +40,39 @@ class HTTPServer: self._stop_event = mp.Event() def start(self): + """ + Start the HTTP server. + """ try: self.__do_start() except KeyboardInterrupt: self.__shutdown() def __do_start(self): + """ + Internal method to start the server. + + @raise: + """ # Create socket self.server = socket.socket(socket.AF_INET, socket.SOCK_STREAM) + # self.server.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) self.server.bind((self.address, self.port)) + # Create workers processes to handle requests self.__create_workers() self.__listen() def __listen(self): + """ + Start listening for new connections + + If a connection is received, it will be dispatched to the worker queue, and picked up by a worker process. + """ self.server.listen() - logging.debug("Listening for connections") + logging.debug("Listening on %s:%d", self.address, self.port) while True: if self._dispatch_queue.qsize() > self.worker_count: @@ -62,6 +87,11 @@ class HTTPServer: logging.debug("Dispatched connection %s", addr) def __shutdown(self): + """ + Cleanly shutdown the server + + Notifies the worker processes to shutdown and eventually closes the server socket + """ # Set stop event self._stop_event.set() @@ -85,10 +115,18 @@ class HTTPServer: self.server.close() def __create_workers(self): + """ + Create worker processes up to `self.worker_count`. + + A worker process is created with start method "spawn", target `worker.worker` and the `self.logging_level` + is passed along with the `self.dispatch_queue` and `self._stop_event` + + """ for i in range(self.worker_count): logging.debug("Creating worker: %d", i + 1) p = mp.Process(target=worker.worker, - args=(f"{self.address}:{self.port}", i + 1, self.logging_level, self._dispatch_queue, self._stop_event)) + args=(f"{self.address}:{self.port}", i + 1, self.logging_level, self._dispatch_queue, + self._stop_event)) p.start() self.workers.append(p) diff --git a/server/requesthandler.py b/server/requesthandler.py index 311ca1c..8e619e4 100644 --- a/server/requesthandler.py +++ b/server/requesthandler.py @@ -1,7 +1,6 @@ import logging import os import sys -import time from datetime import datetime from socket import socket from time import mktime @@ -16,6 +15,7 @@ from httplib.httpsocket import HTTPSocket, FORMAT from httplib.message import ServerMessage as Message from httplib.retriever import Retriever, PreambleRetriever from server import command +from server.serversocket import ServerSocket METHODS = ("GET", "HEAD", "PUT", "POST") @@ -25,7 +25,7 @@ class RequestHandler: root = os.path.join(os.path.dirname(sys.argv[0]), "public") def __init__(self, conn: socket, host): - self.conn = HTTPSocket(conn, host) + self.conn = ServerSocket(conn, host) def listen(self): @@ -68,6 +68,7 @@ class RequestHandler: cmd = command.create(message) msg = cmd.execute() + logging.debug("---response begin---\r\n%s---response end---", msg) self.conn.conn.sendall(msg) def _check_request_line(self, method: str, target: Union[ParseResultBytes, ParseResult], version): @@ -119,4 +120,4 @@ class RequestHandler: message += "\r\n" logging.debug("Sending: %r", message) - client.sendall(message.encode(FORMAT)) + client.sendall(message.encode(FORMAT)) \ No newline at end of file diff --git a/server/serversocket.py b/server/serversocket.py new file mode 100644 index 0000000..adad5eb --- /dev/null +++ b/server/serversocket.py @@ -0,0 +1,18 @@ +import socket + +from httplib.exceptions import BadRequest +from httplib.httpsocket import HTTPSocket + +BUFSIZE = 4096 +TIMEOUT = 3 +FORMAT = "UTF-8" +MAXLINE = 4096 + + +class ServerSocket(HTTPSocket): + + def read_line(self): + try: + return super().read_line() + except UnicodeDecodeError: + raise BadRequest() diff --git a/server/worker.py b/server/worker.py index 28f6ec9..6b3eca5 100644 --- a/server/worker.py +++ b/server/worker.py @@ -4,7 +4,7 @@ import socket import threading from concurrent.futures import ThreadPoolExecutor -from httplib.exceptions import HTTPServerException, InternalServerError +from httplib.exceptions import HTTPServerException, InternalServerError, HTTPServerCloseException from server.requesthandler import RequestHandler THREAD_LIMIT = 128 @@ -61,17 +61,25 @@ class Worker: self.shutdown() def _handle_client(self, conn: socket.socket, addr): - try: - handler = RequestHandler(conn, self.host) - handler.listen() - except HTTPServerException as e: - logging.debug("HTTP Exception:", exc_info=e) - RequestHandler.send_error(conn, e.status_code, e.message) - except socket.timeout: - logging.debug("Socket for client %s timed out", addr) - except Exception as e: - logging.debug("Internal error", exc_info=e) - RequestHandler.send_error(conn, InternalServerError.status_code, InternalServerError.message) + + while True: + try: + handler = RequestHandler(conn, self.host) + handler.listen() + except HTTPServerCloseException as e: + logging.debug("HTTP Exception:", exc_info=e) + RequestHandler.send_error(conn, e.status_code, e.message) + break + except HTTPServerException as e: + logging.debug("HTTP Exception:", exc_info=e) + RequestHandler.send_error(conn, e.status_code, e.message) + except socket.timeout: + logging.debug("Socket for client %s timed out", addr) + break + except Exception as e: + logging.debug("Internal error", exc_info=e) + RequestHandler.send_error(conn, InternalServerError.status_code, InternalServerError.message) + break conn.shutdown(socket.SHUT_RDWR) conn.close()