fix buffer in PreambleRetriever

Log more as info
change default server port
2021-03-28 20:27:40 +02:00 · 2021-03-28 20:21:53 +02:00 · 2021-03-28 20:11:16 +02:00 · 2021-03-28 19:53:14 +02:00 · 2021-03-28 18:55:00 +02:00 · 2021-03-28 17:57:08 +02:00
21 changed files with 1497 additions and 868 deletions
--- a/client.py
+++ b/client.py
@@ -4,18 +4,20 @@ import logging
 import sys

 from client import command as cmd
+from httplib.exceptions import UnhandledHTTPCode


 def main():
    parser = argparse.ArgumentParser(description='HTTP Client')
    parser.add_argument("--verbose", "-v", action='count', default=0, help="Increase verbosity level of logging")
    parser.add_argument("--command", "-c", help="HEAD, GET, PUT or POST", default="GET")
-    parser.add_argument("--port", "-p", help="The port used to connect with the server", default=80)
+    parser.add_argument("--port", "-p", help="The port used to connect with the server", default=80, type=int)
    parser.add_argument("URI", help="The URI to connect to")

    arguments = parser.parse_args()

-    logging.basicConfig(level=logging.ERROR - (10 * arguments.verbose))
+    # Setup logging
+    logging.basicConfig(level=logging.INFO - (10 * arguments.verbose), format="[%(levelname)s] %(message)s")
    logging.debug("Arguments: %s", arguments)

    command = cmd.create(arguments.command, arguments.URI, arguments.port)
@@ -24,7 +26,10 @@ def main():

 try:
    main()
+except UnhandledHTTPCode as e:
+    logging.info(f"[{e.status_code}] {e.cause}:\r\n{e.headers}")
+    sys.exit(2)
 except Exception as e:
-    print("[ABRT] Internal error: " + str(e), file=sys.stderr)
+    logging.info("[ABRT] Internal error: %s", e)
    logging.debug("Internal error", exc_info=e)
-    sys.exit(70)
+    sys.exit(1)
--- a/client/command.py
+++ b/client/command.py
@@ -1,63 +1,103 @@
 import logging
 from abc import ABC, abstractmethod
-from typing import Dict, Tuple
-from urllib.parse import urlparse
+from typing import Dict

-from client.httpclient import FORMAT, HTTPClient
+from client.httpclient import HTTPClient
 from httplib import parser
-from httplib.exceptions import InvalidResponse, InvalidStatusLine, UnsupportedEncoding
-from httplib.message import Message
+from httplib.exceptions import InvalidResponse, InvalidStatusLine, UnsupportedEncoding, UnsupportedProtocol
+from httplib.httpsocket import FORMAT
+from httplib.message import ResponseMessage as Message
 from httplib.retriever import PreambleRetriever

 sockets: Dict[str, HTTPClient] = {}


-def create(command: str, url: str, port):
-    if command == "GET":
-        return GetCommand(url, port)
-    elif command == "HEAD":
-        return HeadCommand(url, port)
-    elif command == "POST":
-        return PostCommand(url, port)
-    elif command == "PUT":
-        return PutCommand(url, port)
+def create(method: str, url: str, port):
+    """
+    Create a corresponding Command instance of the specified HTTP `method` with the specified `url` and `port`.
+    @param method: The command type to create
+    @param url: The url for the command
+    @param port: The port for the command
+    """
+
+    uri = parser.uri_from_url(url)
+    if method == "GET":
+        return GetCommand(uri, port)
+    elif method == "HEAD":
+        return HeadCommand(uri, port)
+    elif method == "POST":
+        return PostCommand(uri, port)
+    elif method == "PUT":
+        return PutCommand(uri, port)
    else:
-        raise ValueError()
+        raise ValueError("Unknown HTTP method")


 class AbstractCommand(ABC):
-    uri: str
-    host: str
-    path: str
-    port: Tuple[str, int]
+    """
+    A class representing the command for sending an HTTP request.
+    """
+    _uri: str
+    _host: str
+    _path: str
+    _port: int

    def __init__(self, uri: str, port):
        self.uri = uri
-        self.host, _, self.path = parser.parse_uri(uri)
-        self.port = port
+        self._port = int(port)
+
+    @property
+    def uri(self):
+        return self._uri
+
+    @uri.setter
+    def uri(self, value):
+        self._uri = value
+        self._host, self._port, self._path = parser.parse_uri(value)
+
+    @property
+    def host(self):
+        return self._host
+
+    @property
+    def path(self):
+        return self._path
+
+    @property
+    def port(self):
+        return self._port

    @property
    @abstractmethod
-    def command(self):
+    def method(self):
        pass

    def execute(self, sub_request=False):
-        (host, path) = self.parse_uri()
+        """
+        Creates and sends the HTTP message for this Command.

-        client = sockets.get(host)
+        @param sub_request: If this execution is in function of a prior command.
+        """
+
+        client = sockets.get(self.host)

        if client and client.is_closed():
            sockets.pop(self.host)
            client = None

        if not client:
-            client = HTTPClient(host)
-            client.conn.connect((host, self.port))
-            sockets[host] = client
+            logging.info("Connecting to %s", self.host)
+            client = HTTPClient(self.host)
+            client.conn.connect((self.host, self.port))
+            logging.info("Connected.")
+            sockets[self.host] = client
+        else:
+            logging.info("Reusing socket for %s", self.host)

-        message = f"{self.command} {path} HTTP/1.1\r\n"
-        message += f"Host: {host}:{self.port}\r\n"
-        message += "Accept: */*\r\nAccept-Encoding: identity\r\n"
+        message = f"{self.method} {self.path} HTTP/1.1\r\n"
+        message += f"Host: {self.host}:{self.port}\r\n"
+        message += "Accept: */*\r\n"
+        message += "Accept-Encoding: identity\r\n"
        encoded_msg = self._build_message(message)

        logging.debug("---request begin---\r\n%s---request end---", encoded_msg.decode(FORMAT))
@@ -67,53 +107,98 @@ class AbstractCommand(ABC):
        logging.info("HTTP request sent, awaiting response...")

        try:
-            retriever = PreambleRetriever(client)
-            self._await_response(client, retriever)
+            self._await_response(client)
        except InvalidResponse as e:
-            logging.debug("Internal error: Response could not be parsed", exc_info=e)
-            return
+            logging.error("Response could not be parsed")
+            logging.debug("", exc_info=e)
        except InvalidStatusLine as e:
-            logging.debug("Internal error: Invalid status-line in response", exc_info=e)
-            return
+            logging.error("Invalid status-line in response")
+            logging.debug("", exc_info=e)
        except UnsupportedEncoding as e:
-            logging.debug("Internal error: Unsupported encoding in response", exc_info=e)
+            logging.error("Unsupported encoding in response")
+            logging.debug("", exc_info=e)
+        except UnsupportedProtocol as e:
+            logging.error("Unsupported protocol: %s", e.protocol)
+            logging.debug("", exc_info=e)
        finally:
            if not sub_request:
                client.close()

-    def _await_response(self, client, retriever):
-        while True:
-            line = client.read_line()
-            print(line, end="")
-            if line in ("\r\n", "\n", ""):
-                break
+    def _get_preamble(self, client):
+        """
+        Returns the preamble (start-line and headers) of the response of this command.
+        @param client: the client object to retrieve from
+        @return: A Message object containing the HTTP-version, status code, status message, headers and buffer
+        """
+        retriever = PreambleRetriever(client)
+        lines = retriever.retrieve()
+        (version, status, msg) = parser.parse_status_line(next(lines))
+        headers = parser.parse_headers(lines)
+
+        buffer = retriever.buffer
+        logging.debug("---response begin---\r\n%s---response end---", "".join(buffer))
+
+        return Message(version, status, msg, headers, buffer)
+
+    def _await_response(self, client):
+        """
+        Simple response method.
+
+        Receives the response and prints to stdout.
+        """
+
+        msg = self._get_preamble(client)
+
+        print("".join(msg.raw))

    def _build_message(self, message: str) -> bytes:
        return (message + "\r\n").encode(FORMAT)

-    def parse_uri(self):
-        parsed = urlparse(self.uri)

-        # If there is no netloc, the url is invalid, so prepend `//` and try again
-        if parsed.netloc == "":
-            parsed = urlparse("//" + self.uri)
+class HeadCommand(AbstractCommand):
+    """
+    A Command for sending a `HEAD` request.
+    """

-        host = parsed.netloc
-        path = parsed.path
-        if len(path) == 0 or path[0] != '/':
-            path = "/" + path
+    @property
+    def method(self):
+        return "HEAD"

-        port_pos = host.find(":")
-        if port_pos >= 0:
-            host = host[:port_pos]

-        return host, path
+class GetCommand(AbstractCommand):
+    """
+    A Command for sending a `GET` request.
+    """
+    dir: str
+
+    def __init__(self, uri: str, port, directory=None):
+        super().__init__(uri, port)
+        self.dir = directory
+        self.filename = None
+
+    @property
+    def method(self):
+        return "GET"
+
+    def _await_response(self, client):
+        """
+        Handles the response of this command.
+        """
+        msg = self._get_preamble(client)
+
+        from client import responsehandler
+        self.filename = responsehandler.handle(client, msg, self, self.dir)


 class AbstractWithBodyCommand(AbstractCommand, ABC):
+    """
+    The building block for creating an HTTP message for an HTTP method with a body (POST and PUT).
+    """

    def _build_message(self, message: str) -> bytes:
-        body = input(f"Enter {self.command} data: ").encode(FORMAT)
+        input_line = input(f"Enter {self.method} data: ")
+        input_line += "\r\n"
+        body = input_line.encode(FORMAT)
        print()

        message += "Content-Type: text/plain\r\n"
@@ -126,46 +211,21 @@ class AbstractWithBodyCommand(AbstractCommand, ABC):
        return message


-class HeadCommand(AbstractCommand):
-    @property
-    def command(self):
-        return "HEAD"
-
-
-class GetCommand(AbstractCommand):
-
-    def __init__(self, uri: str, port, dir=None):
-        super().__init__(uri, port)
-        self.dir = dir
-        self.filename = None
-
-    @property
-    def command(self):
-        return "GET"
-
-    def _get_preamble(self, retriever):
-        lines = retriever.retrieve()
-        (version, status, msg) = parser.parse_status_line(next(lines))
-        headers = parser.parse_headers(lines)
-
-        logging.debug("---response begin---\r\n%s--- response end---", "".join(retriever.buffer))
-
-        return Message(version, status, msg, headers)
-
-    def _await_response(self, client, retriever):
-        msg = self._get_preamble(retriever)
-
-        from client import response_handler
-        self.filename = response_handler.handle(client, msg, self, self.dir)
-
-
 class PostCommand(AbstractWithBodyCommand):
+    """
+    A command for sending a `POST` request.
+    """
+
    @property
-    def command(self):
+    def method(self):
        return "POST"


 class PutCommand(AbstractWithBodyCommand):
+    """
+    A command for sending a `PUT` request.
+    """
+
    @property
-    def command(self):
+    def method(self):
        return "PUT"
--- a/client/htmlparser.py
+++ b/client/htmlparser.py
@@ -1,6 +0,0 @@
-from bs4 import BeautifulSoup
-
-
-class HTMLParser:
-    def __init__(self, soup: BeautifulSoup):
-        pass
--- a/client/httpclient.py
+++ b/client/httpclient.py
@@ -1,15 +1,27 @@
 import socket

-from httplib.httpsocket import HTTPSocket
-
-BUFSIZE = 4096
-TIMEOUT = 3
-FORMAT = "UTF-8"
-MAXLINE = 4096
+from httplib.httpsocket import HTTPSocket, InvalidResponse


 class HTTPClient(HTTPSocket):
+    """
+    Wrapper class for a socket. Represents a client which connects to a server.
+    """
+
    host: str

    def __init__(self, host: str):
-        super().__init__(socket.socket(socket.AF_INET, socket.SOCK_STREAM), host)
+        super().__init__(socket.socket(socket.AF_INET, socket.SOCK_STREAM))
+        self.host = host
+
+    def read_line(self):
+        """
+        Reads the next line decoded as `httpsocket.FORMAT`
+
+        @return: the decoded next line retrieved from the socket
+        @raise InvalidResponse: If the next line couldn't be decoded, but was expected to
+        """
+        try:
+            return super().read_line()
+        except UnicodeDecodeError:
+            raise InvalidResponse("Unexpected decoding error")
--- a/client/response_handler.py
+++ b/client/response_handler.py
@@ -1,274 +0,0 @@
-import logging
-import os
-import re
-from abc import ABC, abstractmethod
-from urllib.parse import urlsplit, unquote
-
-from bs4 import BeautifulSoup, Tag
-
-from client.command import AbstractCommand, GetCommand
-from client.httpclient import HTTPClient, FORMAT
-from httplib import parser
-from httplib.exceptions import InvalidResponse
-from httplib.message import Message
-from httplib.retriever import Retriever
-
-
-def handle(client: HTTPClient, msg: Message, command: AbstractCommand, dir=None):
-    handler = BasicResponseHandler(client, msg, command)
-    retriever = handler.handle()
-
-    if retriever is None:
-        return
-
-    content_type = msg.headers.get("content-type")
-    if content_type and "text/html" in content_type:
-        handler = HTMLDownloadHandler(retriever, client, msg, command, dir)
-    else:
-        handler = RawDownloadHandler(retriever, client, msg, command, dir)
-
-    return handler.handle()
-
-
-class ResponseHandler(ABC):
-    client: HTTPClient
-    retriever: Retriever
-    msg: Message
-    cmd: AbstractCommand
-
-    def __init__(self, retriever: Retriever, client: HTTPClient, msg, cmd):
-        self.client = client
-        self.retriever = retriever
-        self.msg = msg
-        self.cmd = cmd
-
-    @abstractmethod
-    def handle(self):
-        pass
-
-    @staticmethod
-    def parse_uri(uri: str):
-        parsed = urlsplit(uri)
-
-        # If there is no netloc, the url is invalid, so prepend `//` and try again
-        if parsed.netloc == "":
-            parsed = urlsplit("//" + uri)
-
-        host = parsed.netloc
-        path = parsed.path
-        if len(path) == 0 or path[0] != '/':
-            path = "/" + path
-        return host, path
-
-
-class BasicResponseHandler(ResponseHandler):
-    """ Response handler which throws away the body and only shows the headers.
-    In case of a redirect, it will process it and pass it to the appropriate response handler.
-     """
-
-    def __init__(self, client: HTTPClient, msg: Message, cmd: AbstractCommand):
-        retriever = Retriever.create(client, msg.headers)
-        super().__init__(retriever, client, msg, cmd)
-
-    def handle(self):
-        return self._handle_status()
-
-    def _skip_body(self):
-        logging.debug("Skipping body: [")
-        for line in self.retriever.retrieve():
-            try:
-                logging.debug("%s", line.decode(FORMAT))
-            except Exception:
-                logging.debug("%r", line)
-
-        logging.debug("] done.")
-
-    def _handle_status(self):
-        logging.info("%d %s", self.msg.status, self.msg.msg)
-
-        if self.msg.status == 101:
-            # Switching protocols is not supported
-            print(f"{self.msg.version} {self.msg.status} {self.msg.msg}")
-            print(self.msg.headers)
-            return
-
-        if 200 <= self.msg.status < 300:
-            return self.retriever
-
-        if 300 <= self.msg.status < 400:
-            # Redirect
-            return self._do_handle_redirect()
-        if 400 <= self.msg.status < 500:
-            # Dump headers and exit with error
-            print(f"{self.msg.version} {self.msg.status} {self.msg.msg}")
-            print(self.msg.headers)
-            return None
-
-    def _do_handle_redirect(self):
-        self._skip_body()
-
-        location = self.msg.headers.get("location")
-        if not location:
-            raise InvalidResponse("No location in redirect")
-
-        parsed_location = urlsplit(location)
-        if not parsed_location.hostname:
-            raise InvalidResponse("Invalid location")
-
-        if not parsed_location.scheme == "http":
-            raise InvalidResponse("Only http is supported")
-
-        self.cmd.uri = location
-        self.cmd.host, self.cmd.port, self.cmd.path = parser.parse_uri(location)
-
-        if self.msg.status == 301:
-            logging.info("Status 301. Closing socket [%s]", self.cmd.host)
-            self.client.close()
-
-        self.cmd.execute()
-
-        return None
-
-
-class DownloadHandler(ResponseHandler, ABC):
-
-    def __init__(self, retriever: Retriever, client: HTTPClient, msg, cmd, dir=None):
-        super().__init__(retriever, client, msg, cmd)
-
-        if not dir:
-            dir = self._create_directory()
-
-        self.path = self._get_duplicate_name(os.path.join(dir, self.get_filename()))
-
-    @staticmethod
-    def create(retriever: Retriever, client: HTTPClient, msg, cmd, dir=None):
-        content_type = msg.headers.get("content-type")
-        if content_type and "text/html" in content_type:
-            return HTMLDownloadHandler(retriever, client, msg, cmd, dir)
-        return RawDownloadHandler(retriever, client, msg, cmd, dir)
-
-    def _create_directory(self):
-        path = self._get_duplicate_name(os.path.abspath(self.client.host))
-        os.mkdir(path)
-        return path
-
-    def _get_duplicate_name(self, path):
-        tmp_path = path
-        i = 0
-        while os.path.exists(tmp_path):
-            i += 1
-            tmp_path = "{path}.{counter}".format(path=path, counter=i)
-
-        return tmp_path
-
-    def get_filename(self):
-        """Returns the filename to download the payload to.
-        """
-        filename = os.path.basename(self.cmd.path)
-        if filename == '':
-            return "index.html"
-
-        while "%" in filename:
-            filename = unquote(filename)
-
-        filename = re.sub(r"[^\w.+-]+[.]*", '', filename)
-        result = os.path.basename(filename).strip()
-        if any(letter.isalnum() for letter in result):
-            return result
-
-        return "index.html"
-
-
-class RawDownloadHandler(DownloadHandler):
-
-    def __init__(self, retriever: Retriever, client: HTTPClient, msg: Message, cmd: AbstractCommand, dir=None):
-        super().__init__(retriever, client, msg, cmd, dir)
-
-    def handle(self) -> str:
-        logging.debug("Retrieving payload")
-        file = open(self.path, "wb")
-
-        for buffer in self.retriever.retrieve():
-            file.write(buffer)
-        file.close()
-
-        return self.path
-
-
-class HTMLDownloadHandler(DownloadHandler):
-    def __init__(self, retriever: Retriever, client: HTTPClient, msg: Message, cmd: AbstractCommand, dir=None):
-        super().__init__(retriever, client, msg, cmd, dir)
-
-    def handle(self) -> str:
-
-        (dir, file) = os.path.split(self.path)
-        tmp_filename = f".{file}.tmp"
-        tmp_path = os.path.join(dir, tmp_filename)
-        file = open(tmp_path, "wb")
-
-        for buffer in self.retriever.retrieve():
-            file.write(buffer)
-        file.close()
-
-        self._download_images(tmp_path, self.path)
-        os.remove(tmp_path)
-        return self.path
-
-    def _download_images(self, tmp_filename, target_filename):
-
-        (host, path) = ResponseHandler.parse_uri(self.cmd.uri)
-        with open(tmp_filename, "rb") as fp:
-            soup = BeautifulSoup(fp, 'lxml')
-
-            base_url = self.cmd.uri
-            base_element = soup.find("base")
-
-            if base_element:
-                base_url = base_element["href"]
-
-            processed = {}
-            tag: Tag
-            for tag in soup.find_all("img"):
-                try:
-                    if not tag.has_attr("src"):
-                        continue
-
-                    if tag["src"] in processed:
-                        new_url = processed.get(tag["src"])
-                    else:
-                        new_url = self.__download_image(tag["src"], host, base_url)
-                        processed[tag["src"]] = new_url
-                    if new_url:
-                        tag["src"] = os.path.basename(new_url)
-                except Exception as e:
-                    logging.error("Failed to download image: %s, skipping...", tag["src"], exc_info=e)
-
-        with open(target_filename, 'w') as file:
-            file.write(str(soup))
-
-    def __download_image(self, img_src, host, base_url):
-        logging.debug("Downloading image: %s", img_src)
-
-        parsed = urlsplit(img_src)
-
-        if parsed.scheme not in ("", "http", "https"):
-            # Not a valid url
-            return None
-
-        if parsed.hostname is None:
-            if img_src[0] == "/":
-                img_src = host + img_src
-            else:
-                img_src = os.path.join(os.path.dirname(base_url), img_src)
-
-        if parsed.hostname is None or parsed.hostname == host:
-            port = self.cmd.port
-        elif ":" in parsed.netloc:
-            port = parsed.netloc.split(":", 1)[1]
-        else:
-            port = 80
-
-        command = GetCommand(img_src, port, os.path.dirname(self.path))
-        command.execute(True)
-
-        return command.filename
--- a/client/responsehandler.py
+++ b/client/responsehandler.py
@@ -0,0 +1,308 @@
+import logging
+import os
+import re
+from abc import ABC, abstractmethod
+from urllib.parse import urlsplit, unquote
+
+from client.command import AbstractCommand, GetCommand
+from client.httpclient import HTTPClient
+from httplib import parser
+from httplib.exceptions import InvalidResponse, UnhandledHTTPCode, UnsupportedProtocol
+from httplib.httpsocket import FORMAT
+from httplib.message import ResponseMessage as Message
+from httplib.retriever import Retriever
+
+BASE_REGEX = re.compile(r"<\s*base[^>]*\shref\s*=\s*['\"]([^\"']+)['\"][^>]*>", re.M | re.I)
+IMG_REGEX = re.compile(r"<\s*img[^>]*\ssrc\s*=\s*['\"]([^\"']+)['\"][^>]*>", re.M | re.I)
+
+
+def handle(client: HTTPClient, msg: Message, command: AbstractCommand, directory=None):
+    """
+    Handle the response of the request message
+
+    @param client: the client which sent the request.
+    @param msg: the response message
+    @param command: the command of the sent request-message
+    @param directory: the directory to download the response to (if available)
+    """
+    handler = BasicResponseHandler(client, msg, command)
+    retriever = handler.handle()
+
+    if retriever is None:
+        return
+
+    content_type = msg.headers.get("content-type")
+    if content_type and "text/html" in content_type:
+        handler = HTMLDownloadHandler(retriever, client, msg, command, directory)
+    else:
+        handler = RawDownloadHandler(retriever, client, msg, command, directory)
+
+    return handler.handle()
+
+
+class ResponseHandler(ABC):
+    """
+    Helper class for handling response messages.
+    """
+    client: HTTPClient
+    retriever: Retriever
+    msg: Message
+    cmd: AbstractCommand
+
+    def __init__(self, retriever: Retriever, client: HTTPClient, msg, cmd):
+        self.client = client
+        self.retriever = retriever
+        self.msg = msg
+        self.cmd = cmd
+
+    @abstractmethod
+    def handle(self):
+        """
+        Handle the response.
+        """
+        pass
+
+
+class BasicResponseHandler(ResponseHandler):
+    """
+    Response handler which will handle redirects and other HTTP status codes.
+    In case of a redirect, it will process it and pass it to the appropriate response handler.
+    """
+
+    def __init__(self, client: HTTPClient, msg: Message, cmd: AbstractCommand):
+        retriever = Retriever.create(client, msg.headers)
+        super().__init__(retriever, client, msg, cmd)
+
+    def handle(self):
+        return self._handle_status()
+
+    def _skip_body(self):
+        logging.debug("Skipping body: [")
+        for line in self.retriever.retrieve():
+            try:
+                logging.debug("%s", line.decode(FORMAT))
+            except UnicodeDecodeError:
+                logging.debug("%r", line)
+
+        logging.debug("] done.")
+
+    def _handle_status(self):
+        logging.info("%d %s", self.msg.status, self.msg.msg)
+
+        if self.msg.status == 101:
+            # Switching protocols is not supported
+            raise UnhandledHTTPCode(self.msg.status, "".join(self.msg.raw), "Switching protocols is not supported")
+
+        if 200 <= self.msg.status < 300:
+            return self.retriever
+
+        if 300 <= self.msg.status < 400:
+            # Redirect
+            self._skip_body()
+            return self._handle_redirect()
+
+        if 400 <= self.msg.status < 600:
+            self._skip_body()
+            # Dump headers and exit with error
+            raise UnhandledHTTPCode(self.msg.status, "".join(self.msg.raw), self.msg.msg)
+
+        return None
+
+    def _handle_redirect(self):
+        if self.msg.status == 304:
+            raise UnhandledHTTPCode(self.msg.status, "".join(self.msg.raw), self.msg.msg)
+
+        location = self.msg.headers.get("location")
+        if not location or len(location.strip()) == 0:
+            raise InvalidResponse("No location in redirect")
+
+        location = parser.urljoin(self.cmd.uri, location)
+        parsed_location = urlsplit(location)
+        if not parsed_location.hostname:
+            raise InvalidResponse("Invalid location")
+
+        if not parsed_location.scheme == "http":
+            raise UnsupportedProtocol(parsed_location.scheme)
+
+        self.cmd.uri = location
+
+        if self.msg.status == 301:
+            logging.info("Status 301. Closing socket [%s]", self.cmd.host)
+            self.client.close()
+
+        self.cmd.execute()
+
+        return None
+
+
+class DownloadHandler(ResponseHandler, ABC):
+
+    def __init__(self, retriever: Retriever, client: HTTPClient, msg, cmd, directory=None):
+        super().__init__(retriever, client, msg, cmd)
+
+        if not directory:
+            directory = self._create_directory()
+
+        self.path = self._get_duplicate_name(os.path.join(directory, self.get_filename()))
+
+    @staticmethod
+    def create(retriever: Retriever, client: HTTPClient, msg, cmd, directory=None):
+        content_type = msg.headers.get("content-type")
+        if content_type and "text/html" in content_type:
+            return HTMLDownloadHandler(retriever, client, msg, cmd, directory)
+        return RawDownloadHandler(retriever, client, msg, cmd, directory)
+
+    def _create_directory(self):
+        path = self._get_duplicate_name(os.path.abspath(self.client.host))
+        os.mkdir(path)
+        return path
+
+    def _get_duplicate_name(self, path):
+        tmp_path = path
+        i = 0
+        while os.path.exists(tmp_path):
+            i += 1
+            tmp_path = "{path}.{counter}".format(path=path, counter=i)
+
+        return tmp_path
+
+    def get_filename(self):
+        """
+        Returns the filename to download the payload to.
+        """
+        filename = os.path.basename(self.cmd.path)
+        if filename == '':
+            return "index.html"
+
+        while "%" in filename:
+            filename = unquote(filename)
+
+        filename = re.sub(r"[^\w.+-]+[.]*", '', filename)
+        result = os.path.basename(filename).strip()
+        if any(letter.isalnum() for letter in result):
+            return result
+
+        return "index.html"
+
+
+class RawDownloadHandler(DownloadHandler):
+
+    def __init__(self, retriever: Retriever, client: HTTPClient, msg: Message, cmd: AbstractCommand, directory=None):
+        super().__init__(retriever, client, msg, cmd, directory)
+
+    def handle(self) -> str:
+        logging.info("Saving to '%s'", parser.get_relative_save_path(self.path))
+        file = open(self.path, "wb")
+
+        for buffer in self.retriever.retrieve():
+            file.write(buffer)
+        file.close()
+
+        return self.path
+
+
+class HTMLDownloadHandler(DownloadHandler):
+    def __init__(self, retriever: Retriever, client: HTTPClient, msg: Message, cmd: AbstractCommand, directory=None):
+        super().__init__(retriever, client, msg, cmd, directory)
+
+    def handle(self) -> str:
+
+        (directory, file) = os.path.split(self.path)
+        tmp_filename = f".{file}.tmp"
+        tmp_path = os.path.join(directory, tmp_filename)
+        file = open(tmp_path, "wb")
+
+        for buffer in self.retriever.retrieve():
+            file.write(buffer)
+        file.close()
+
+        charset = parser.get_charset(self.msg.headers)
+        self._download_images(tmp_path, self.path, charset)
+        os.remove(tmp_path)
+        return self.path
+
+    def _download_images(self, tmp_path, target_path, charset=FORMAT):
+        """
+        Download images referenced in the html of `tmp_filename` and replaces the references in the html
+        and writes it to `target_filename`.
+        @param tmp_path: the path to the temporary html file
+        @param target_path: the path for the final html file
+        @param charset: the charset to decode `tmp_filename`
+        """
+
+        try:
+            fp = open(tmp_path, "r", encoding=charset)
+            html = fp.read()
+        except UnicodeDecodeError or LookupError:
+            fp = open(tmp_path, "r", encoding=FORMAT, errors="replace")
+            html = fp.read()
+
+        fp.close()
+
+        base_element = BASE_REGEX.search(html)
+        base_url = self.cmd.uri
+        if base_element:
+            base_url = parser.urljoin(self.cmd.uri, base_element.group(1))
+
+        processed = {}
+        to_replace = []
+
+        # Find all <img> tags, and the urls from the corresponding `src` fields
+        for m in IMG_REGEX.finditer(html):
+            url_start = m.start(1)
+            url_end = m.end(1)
+            target = m.group(1)
+
+            try:
+                if len(target) == 0:
+                    continue
+
+                if target in processed:
+                    # url is already processed
+                    new_url = processed.get(target)
+                else:
+                    new_url = self.__download_image(target, base_url)
+                    processed[target] = new_url
+
+                if new_url:
+                    local_path = os.path.basename(new_url)
+                    to_replace.append((url_start, url_end, local_path))
+
+            except Exception as e:
+                logging.error("Failed to download image: %s, skipping...", target)
+                logging.debug("", exc_info=e)
+                processed[target] = None
+
+        # reverse the list so urls at the bottom of the html file are processed first.
+        # Otherwise, our start and end positions won't be correct.
+        to_replace.reverse()
+        for (start, end, path) in to_replace:
+            html = html[:start] + path + html[end:]
+
+        logging.info("Saving HTML to '%s'", parser.get_relative_save_path(target_path))
+        with open(target_path, 'w', encoding=FORMAT) as file:
+            file.write(html)
+
+    def __download_image(self, img_src, base_url):
+        """
+        Download image from the specified `img_src` and `base_url`.
+        If the image is available, it will be downloaded to the directory of `self.path`
+        """
+
+        logging.info("Downloading image: %s", img_src)
+
+        parsed = urlsplit(img_src)
+        img_src = parser.urljoin(base_url, img_src)
+
+        # Check if the port of the image sh
+        if parsed.hostname is None or parsed.hostname == self.cmd.host:
+            port = self.cmd.port
+        elif ":" in parsed.netloc:
+            port = parsed.netloc.split(":", 1)[1]
+        else:
+            port = 80
+
+        command = GetCommand(img_src, port, os.path.dirname(self.path))
+        command.execute(True)
+
+        return command.filename
--- a/httplib/exceptions.py
+++ b/httplib/exceptions.py
@@ -1,68 +1,163 @@
 class HTTPException(Exception):
-    """ Base class for HTTP exceptions """
+    """
+    Base class for HTTP exceptions
+    """
+
+
+class UnhandledHTTPCode(Exception):
+    """
+    Exception thrown if HTTP codes are not further processed.
+    """
+    status_code: str
+    headers: str
+    cause: str
+
+    def __init__(self, status, headers, cause):
+        self.status_code = status
+        self.headers = headers
+        self.cause = cause


 class InvalidResponse(HTTPException):
-    """ Response message cannot be parsed """
+    """
+    Response message cannot be parsed
+    """

-    def __init(self, message):
+    def __init__(self, message):
        self.message = message


 class InvalidStatusLine(HTTPException):
-    """ Response status line is invalid """
+    """
+    Response status line is invalid
+    """

-    def __init(self, line):
+    def __init__(self, line):
        self.line = line


 class UnsupportedEncoding(HTTPException):
-    """ Reponse Encoding not support """
+    """
+    Encoding not supported
+    """

-    def __init(self, enc_type, encoding):
+    def __init__(self, enc_type, encoding):
        self.enc_type = enc_type
        self.encoding = encoding


+class UnsupportedProtocol(HTTPException):
+    """
+    Protocol is not supported
+    """
+
+    def __init__(self, protocol):
+        self.protocol = protocol
+
+
 class IncompleteResponse(HTTPException):
-    def __init(self, cause):
+    def __init__(self, cause):
        self.cause = cause


-class HTTPServerException(Exception):
-    """ Base class for HTTP Server exceptions """
+class HTTPServerException(HTTPException):
+    """
+    Base class for HTTP Server exceptions
+    """
    status_code: str
    message: str
+    arg: str
+
+    def __init__(self, arg):
+        self.arg = arg


-class BadRequest(HTTPServerException):
-    """ Malformed HTTP request"""
+class HTTPServerCloseException(HTTPServerException):
+    """
+    When raised, the connection should be closed
+    """
+
+
+class BadRequest(HTTPServerCloseException):
+    """
+    Malformed HTTP request
+    """
    status_code = 400
    message = "Bad Request"


+class Forbidden(HTTPServerException):
+    """
+    Request not allowed
+    """
+    status_code = 403
+    message = "Forbidden"
+
+
+class NotFound(HTTPServerException):
+    """
+    Resource not found
+    """
+    status_code = 404
+    message = "Not Found"
+
+
 class MethodNotAllowed(HTTPServerException):
-    """ Method is not allowed """
+    """
+    Method is not allowed
+    """
    status_code = 405
    message = "Method Not Allowed"

-    def __init(self, allowed_methods):
+    def __init__(self, allowed_methods):
        self.allowed_methods = allowed_methods


-class InternalServerError(HTTPServerException):
-    """ Internal Server Error """
+class InternalServerError(HTTPServerCloseException):
+    """
+    Internal Server Error
+    """
    status_code = 500
    message = "Internal Server Error"


 class NotImplemented(HTTPServerException):
-    """ Functionality not implemented """
+    """
+    Functionality not implemented
+    """
    status_code = 501
    message = "Not Implemented"


-class NotFound(HTTPServerException):
-    """ Resource not found """
-    status_code = 404
-    message = "Not Found"
+class HTTPVersionNotSupported(HTTPServerCloseException):
+    """
+    The server does not support the major version HTTP used in the request message
+    """
+    status_code = 505
+    message = "HTTP Version Not Supported"
+
+
+class Conflict(HTTPServerException):
+    """
+    Conflict in the current state of the target resource
+    """
+    status_code = 409
+    message = "Conflict"
+
+
+class NotModified(HTTPServerException):
+    """
+    Requested resource was not modified
+    """
+    status_code = 304
+    message = "Not Modified"
+
+
+class InvalidRequestLine(BadRequest):
+    """
+    Request start-line is invalid
+    """
+
+    def __init__(self, line, arg):
+        super().__init__(arg)
+        self.request_line = line
--- a/httplib/httpsocket.py
+++ b/httplib/httpsocket.py
@@ -1,4 +1,3 @@
-import logging
 import socket
 from io import BufferedReader
 from typing import Tuple
@@ -10,13 +9,20 @@ MAXLINE = 4096


 class HTTPSocket:
-    host: str
+    """
+    Wrapper class for a socket. Represents an HTTP connection.
+
+    This class adds helper methods to read the underlying socket as a file.
+    """
    conn: socket.socket
-    file: Tuple[BufferedReader, None]
+    file: BufferedReader

-    def __init__(self, conn: socket.socket, host: str):
+    def __init__(self, conn: socket.socket):
+        """
+        Initialize an HTTPSocket with the given socket and host.
+        @param conn: the socket object
+        """

-        self.host = host
        self.conn = conn
        self.conn.settimeout(TIMEOUT)
        self.conn.setblocking(True)
@@ -24,64 +30,67 @@ class HTTPSocket:
        self.file = self.conn.makefile("rb")

    def close(self):
+        """
+        Close this socket
+        """
        self.file.close()
-        # self.conn.shutdown(socket.SHUT_RDWR)
        self.conn.close()

    def is_closed(self):
        return self.file is None

    def reset_request(self):
+        """
+        Close the file handle of this socket and create a new one.
+        """
        self.file.close()
        self.file = self.conn.makefile("rb")

-    def __do_receive(self):
-        if self.conn.fileno() == -1:
-            raise Exception("Connection closed")
-
-        result = self.conn.recv(BUFSIZE)
-        return result
-
-    def receive(self):
-        """Receive data from the client up to BUFSIZE
-        """
-        count = 0
-        while True:
-            count += 1
-            try:
-                return self.__do_receive()
-            except socket.timeout:
-                logging.debug("Socket receive timed out after %s seconds", TIMEOUT)
-                if count == 3:
-                    break
-                logging.debug("Retrying %s", count)
-
-        logging.debug("Timed out after waiting %s seconds for response", TIMEOUT * count)
-        raise TimeoutError("Request timed out")
-
    def read(self, size=BUFSIZE, blocking=True) -> bytes:
+        """
+        Read bytes up to the specified buffer size. This method will block when `blocking` is set to True (Default).
+        """
        if blocking:
-            return self.file.read(size)
+            buffer = self.file.read(size)
+        else:
+            buffer = self.file.read1(size)

-        return self.file.read1(size)
+        if len(buffer) == 0:
+            raise ConnectionAbortedError
+        return buffer

    def read_line(self):
+        """
+        Read a line decoded as `httpsocket.FORMAT`.
+        @return: the decoded line
+        @raise: UnicodeDecodeError
+        """
        return str(self.read_bytes_line(), FORMAT)

    def read_bytes_line(self) -> bytes:
+        """
+        Read a line as bytes.
+        """
+
        line = self.file.readline(MAXLINE + 1)
        if len(line) > MAXLINE:
            raise InvalidResponse("Line too long")
+        elif len(line) == 0:
+            raise ConnectionAbortedError

        return line


 class HTTPException(Exception):
-    """ Base class for HTTP exceptions """
+    """
+    Base class for HTTP exceptions
+    """


 class InvalidResponse(HTTPException):
-    """ Response message cannot be parsed """
+    """
+    Response message cannot be parsed
+    """

    def __init(self, message):
        self.message = message
--- a/httplib/message.py
+++ b/httplib/message.py
@@ -1,16 +1,36 @@
+from abc import ABC
 from typing import Dict
+from urllib.parse import SplitResult


-class Message:
+class Message(ABC):
    version: str
-    status: int
-    msg: str
    headers: Dict[str, str]
+    raw: [str]
    body: bytes

-    def __init__(self, version: str, status: int, msg: str, headers: Dict[str, str], body: bytes = None):
+    def __init__(self, version: str, headers: Dict[str, str], raw=None, body: bytes = None):
        self.version = version
+        self.headers = headers
+        self.raw = raw
+        self.body = body
+
+
+class ResponseMessage(Message):
+    status: int
+    msg: str
+
+    def __init__(self, version: str, status: int, msg: str, headers: Dict[str, str], raw=None, body: bytes = None):
+        super().__init__(version, headers, raw, body)
        self.status = status
        self.msg = msg
-        self.headers = headers
-        self.body = body
+
+
+class RequestMessage(Message):
+    method: str
+    target: SplitResult
+
+    def __init__(self, version: str, method: str, target, headers: Dict[str, str], raw=None, body: bytes = None):
+        super().__init__(version, headers, raw, body)
+        self.method = method
+        self.target = target
--- a/httplib/parser.py
+++ b/httplib/parser.py
@@ -1,21 +1,25 @@
 import logging
+import os
+import pathlib
 import re
+import urllib
+from datetime import datetime
+from time import mktime
+from typing import Dict
 from urllib.parse import urlparse, urlsplit
+from wsgiref.handlers import format_date_time

-from httplib.exceptions import InvalidStatusLine, InvalidResponse, BadRequest
-from httplib.httpsocket import HTTPSocket
-
-
-def _get_start_line(client: HTTPSocket):
-    line = client.read_line().strip()
-    split = list(filter(None, line.split(" ", 2)))
-    if len(split) < 3:
-        raise InvalidStatusLine(line)  # TODO fix exception
-
-    return line, split
+from httplib.exceptions import InvalidStatusLine, InvalidResponse, BadRequest, InvalidRequestLine
+from httplib.httpsocket import FORMAT


 def _is_valid_http_version(http_version: str):
+    """
+    Returns True if the specified HTTP-version is valid.
+
+    @param http_version: the string to be checked
+    @return: True if the specified HTTP-version is valid.
+    """
    if len(http_version) < 8 or http_version[4] != "/":
        return False

@@ -26,28 +30,21 @@ def _is_valid_http_version(http_version: str):
    return True


-def get_status_line(client: HTTPSocket):
-    line, (http_version, status, reason) = _get_start_line(client)
-
-    if not _is_valid_http_version(http_version):
-        raise InvalidStatusLine(line)
-    version = http_version[:4]
-
-    if not re.match(r"\d{3}", status):
-        raise InvalidStatusLine(line)
-    status = int(status)
-    if status < 100 or status > 999:
-        raise InvalidStatusLine(line)
-
-    return version, status, reason
-
-
 def parse_status_line(line: str):
+    """
+    Parses the specified line as an HTTP status-line.
+
+    @param line: the status-line to be parsed
+    @raise InvalidStatusLine: if the line couldn't be parsed, if the HTTP-version is invalid or if the status code
+    is invalid
+    @return: tuple of the HTTP-version, status and reason
+    """
+
    split = list(filter(None, line.strip().split(" ", 2)))
    if len(split) < 3:
-        raise InvalidStatusLine(line)  # TODO fix exception
+        raise InvalidStatusLine(line)

-    (http_version, status, reason) = split
+    http_version, status, reason = split

    if not _is_valid_http_version(http_version):
        raise InvalidStatusLine(line)
@@ -62,140 +59,66 @@ def parse_status_line(line: str):
    return version, status, reason


-def parse_request_line(client: HTTPSocket):
-    line, (method, target, version) = _get_start_line(client)
+def parse_request_line(line: str):
+    """
+    Parses the specified line as an HTTP request-line.
+    Returns the method, target as ParseResult and HTTP version from the request-line.

-    logging.debug("Parsed request-line=%r, method=%r, target=%r, version=%r", line, method, target, version)
+    @param line: the request-line to be parsed
+    @raise InvalidRequestLine: if the line couldn't be parsed.
+    @raise BadRequest: Invalid HTTP method, Invalid HTTP-version or Invalid target
+    @return: tuple of the method, target and HTTP-version
+    """

+    split = list(filter(None, line.rstrip().split(" ", 2)))
+    if len(split) < 3:
+        raise InvalidRequestLine(line, "missing argument in request-line")
+
+    method, target, version = split
    if method not in ("CONNECT", "DELETE", "GET", "HEAD", "OPTIONS", "POST", "PUT", "TRACE"):
-        raise BadRequest()
+        raise BadRequest(f"Invalid method: {method}")

    if not _is_valid_http_version(version):
        logging.debug("[ABRT] request: invalid http-version=%r", version)
-        raise BadRequest()
+        raise BadRequest(f"Invalid HTTP-version: {version}")

    if len(target) == "":
-        raise BadRequest()
-    parsed_target = urlparse(target)
-    if len(parsed_target.path) > 0 and parsed_target.path[0] != "/" and parsed_target.netloc != "":
-        parsed_target = urlparse(f"//{target}")
+        raise BadRequest("request-target not specified")
+    parsed_target = urlsplit(target)

    return method, parsed_target, version.split("/")[1]


-def retrieve_headers(client: HTTPSocket):
-    raw_headers = []
-    # first header after the status-line may not contain a space
-    while True:
-        line = client.read_line()
-        if line[0].isspace():
-            continue
-        else:
-            break
-
-    while True:
-        if line in ("\r\n", "\n", " "):
-            break
-
-        if line[0].isspace():
-            raw_headers[-1] = raw_headers[-1].rstrip("\r\n")
-
-        raw_headers.append(line.lstrip())
-        line = client.read_line()
-
-    result = []
-    header_str = "".join(raw_headers)
-    for line in header_str.splitlines():
-        pos = line.find(":")
-
-        if pos <= 0 or pos >= len(line) - 1:
-            continue
-
-        (header, value) = line.split(":", 1)
-        result.append((header.lower(), value.strip().lower()))
-
-    return result
-
-
-def parse_request_headers(client: HTTPSocket):
-    raw_headers = retrieve_headers(client)
-    logging.debug("Received headers: %r", raw_headers)
-    headers = {}
-
-    key: str
-    for (key, value) in raw_headers:
-        if any((c.isspace()) for c in key):
-            raise BadRequest()
-
-        if key == "content-length":
-            if key in headers:
-                logging.error("Multiple content-length headers specified")
-                raise BadRequest()
-            if not value.isnumeric() or int(value) <= 0:
-                logging.error("Invalid content-length value: %r", value)
-                raise BadRequest()
-        elif key == "host":
-            if value != client.host and value != client.host.split(":")[0] or key in headers:
-                raise BadRequest()
-
-        headers[key] = value
-
-    return headers
-
-
-def get_headers(client: HTTPSocket):
-    headers = []
-    # first header after the status-line may not contain a space
-    while True:
-        line = client.read_line()
-        if line[0].isspace():
-            continue
-        else:
-            break
-
-    while True:
-        if line in ("\r\n", "\n", " "):
-            break
-
-        if line[0].isspace():
-            headers[-1] = headers[-1].rstrip("\r\n")
-
-        headers.append(line.lstrip())
-        line = client.read_line()
-
-    result = {}
-    header_str = "".join(headers)
-    for line in header_str.splitlines():
-        pos = line.find(":")
-
-        if pos <= 0 or pos >= len(line) - 1:
-            continue
-
-        (header, value) = map(str.strip, line.split(":", 1))
-        check_next_header(result, header, value)
-        result[header.lower()] = value.lower()
-
-    return result
-
-
 def parse_headers(lines):
+    """
+    Parses the lines from the `lines` iterator as headers.
+
+    @param lines: iterator to retrieve the lines from.
+    @return: A dictionary with header as key and value as value.
+    """
    headers = []
-    # first header after the status-line may not contain a space
-    for line in lines:
+
+    try:
+        # first header after the start-line may not start with a space
        line = next(lines)
+        while True:
            if line[0].isspace():
                continue
            else:
                break

-    for line in lines:
-        if line in ("\r\n", "\n", " "):
+        while True:
+            if line in ("\r\n", "\r", "\n", ""):
                break

            if line[0].isspace():
                headers[-1] = headers[-1].rstrip("\r\n")

            headers.append(line.lstrip())
+            line = next(lines)
+    except StopIteration:
+        # No more lines to be parsed
+        pass

    result = {}
    header_str = "".join(headers)
@@ -215,17 +138,21 @@ def parse_headers(lines):
 def check_next_header(headers, next_header: str, next_value: str):
    if next_header == "content-length":
        if "content-length" in headers:
-            logging.error("Multiple content-length headers specified")
-            raise InvalidResponse()
+            raise InvalidResponse("Multiple content-length headers specified")
        if not next_value.isnumeric() or int(next_value) <= 0:
-            logging.error("Invalid content-length value: %r", next_value)
-            raise InvalidResponse()
+            raise InvalidResponse(f"Invalid content-length value: {next_value}")


 def parse_uri(uri: str):
+    """
+    Parse the specified URI into the host, port and path.
+    If the URI is invalid, this method will try to create one.
+    @param uri: the URI to be parsed
+    @return: A tuple with the host, port and path
+    """
    parsed = urlsplit(uri)

-    # If there is no netloc, the given string is not a valid URI, so split on /
+    # If there is no hostname, the given string is not a valid URI, so split on /
    if parsed.hostname:
        host = parsed.hostname
        path = parsed.path
@@ -245,3 +172,70 @@ def parse_uri(uri: str):
        port = 80

    return host, port, path
+
+
+def uri_from_url(url: str):
+    """
+    Returns a valid URI of the specified URL.
+    """
+    parsed = urlsplit(url)
+
+    if parsed.hostname is None:
+        url = f"http://{url}"
+        parsed = urlsplit(url)
+
+    path = parsed.path
+    if path == "":
+        path = "/"
+
+    result = f"http://{parsed.netloc}{path}"
+    if parsed.query != "":
+        result = f"{result}?{parsed.query}"
+
+    return result
+
+
+def urljoin(base, url):
+    """
+    Join a base url, and a URL to form an absolute url.
+    """
+    return urllib.parse.urljoin(base, url)
+
+
+def get_charset(headers: Dict[str, str]):
+    """
+    Returns the charset of the content from the headers if found. Otherwise, returns `FORMAT`
+
+    @param headers: the headers to retrieve the charset from
+    @return: A charset
+    """
+    if "content-type" in headers:
+        content_type = headers["content-type"]
+        match = re.search(r"charset\s*=\s*([a-z\-0-9]*)", content_type, re.I)
+        if match:
+            return match.group(1)
+
+    return FORMAT
+
+
+def get_relative_save_path(path: str):
+    """
+    Returns the specified path relative to the working directory.
+
+    @param path: the path to compute
+    @return: the relative path
+    """
+
+    path_obj = pathlib.PurePath(path)
+    root = pathlib.PurePath(os.getcwd())
+    rel = path_obj.relative_to(root)
+    return str(rel)
+
+
+def get_date():
+    """
+    Returns a string representation of the current date according to RFC 1123.
+    """
+    now = datetime.now()
+    stamp = mktime(now.timetuple())
+    return format_date_time(stamp)
--- a/httplib/retriever.py
+++ b/httplib/retriever.py
@@ -7,6 +7,9 @@ from httplib.httpsocket import HTTPSocket, BUFSIZE


 class Retriever(ABC):
+    """
+    This is a helper class for retrieving HTTP messages.
+    """
    client: HTTPSocket

    def __init__(self, client: HTTPSocket):
@@ -14,10 +17,23 @@ class Retriever(ABC):

    @abstractmethod
    def retrieve(self):
+        """
+        Creates an iterator of the retrieved message content.
+        """
        pass

    @staticmethod
    def create(client: HTTPSocket, headers: Dict[str, str]):
+        """
+        Creates a Retriever instance depending on the give headers.
+
+        @param client: the socket to retrieve from
+        @param headers: the message headers for choosing the retriever instance
+        @return: ChunkedRetriever if the message uses chunked encoding, ContentLengthRetriever if the message
+        specifies a content-length, RawRetriever if none of the above is True.
+        @raise UnsupportedEncoding: if the `transfer-encoding` is not supported or if the `content-encoding` is not
+        supported.
+        """

        # only chunked transfer-encoding is supported
        transfer_encoding = headers.get("transfer-encoding")
@@ -32,7 +48,7 @@ class Retriever(ABC):

        if chunked:
            return ChunkedRetriever(client)
-        else:
+
        content_length = headers.get("content-length")

        if not content_length:
@@ -43,28 +59,56 @@ class Retriever(ABC):


 class PreambleRetriever(Retriever):
+    """
+    Retriever instance for retrieving the start-line and headers of an HTTP message.
+    """
+
    client: HTTPSocket
-    buffer: []
+    _buffer: []
+
+    @property
+    def buffer(self):
+        """
+        Returns a copy of the internal buffer.
+        Clears the internal buffer afterwards.
+
+        @return: A list of the buffered lines.
+        """
+        tmp_buffer = self._buffer
+        self._buffer = []
+
+        return tmp_buffer

    def __init__(self, client: HTTPSocket):
        super().__init__(client)
        self.client = client
-        self.buffer = []
+        self._buffer = []

    def retrieve(self):
+        """
+        Returns an iterator of the retrieved lines.
+        @return:
+        """

        line = self.client.read_line()
        while True:
-            self.buffer.append(line)
+            self._buffer.append(line)

-            if line in ("\r\n", "\n", " "):
-                break
+            if line in ("\r\n", "\r", "\n", ""):
+                return line

            yield line
            line = self.client.read_line()

+    def reset_buffer(self, line):
+        self._buffer.clear()
+        self._buffer.append(line)
+

 class ContentLengthRetriever(Retriever):
+    """
+    Retriever instance for retrieving a message body with a given content-length.
+    """
    length: int

    def __init__(self, client: HTTPSocket, length: int):
@@ -72,6 +116,11 @@ class ContentLengthRetriever(Retriever):
        self.length = length

    def retrieve(self):
+        """
+        Returns an iterator of the received message bytes.
+        The size of each iteration is not necessarily constant.
+        @raise IncompleteResponse: if the connection is closed or timed out before receiving the complete payload.
+        """

        cur_payload_size = 0
        read_size = BUFSIZE
@@ -84,11 +133,9 @@ class ContentLengthRetriever(Retriever):
            try:
                buffer = self.client.read(remaining)
            except TimeoutError:
-                logging.error("Timed out before receiving complete payload")
                raise IncompleteResponse("Timed out before receiving complete payload")
            except ConnectionError:
-                logging.error("Timed out before receiving complete payload")
-                raise IncompleteResponse("Connection closed before receiving complete payload")
+                raise IncompleteResponse("Connection closed before receiving the complete payload")

            if len(buffer) == 0:
                logging.warning("Received payload length %s less than expected %s", cur_payload_size, self.length)
@@ -99,6 +146,10 @@ class ContentLengthRetriever(Retriever):


 class RawRetriever(Retriever):
+    """
+    Retriever instance for retrieving a message body without any length specifier or encoding.
+    This retriever will keep waiting until a timeout occurs, or the connection is disconnected.
+    """

    def retrieve(self):
        while True:
@@ -109,22 +160,44 @@ class RawRetriever(Retriever):


 class ChunkedRetriever(Retriever):
+    """
+    Retriever instance for retrieving a message body with chunked encoding.
+    """

    def retrieve(self):
+        """
+        Returns an iterator of the received message bytes.
+        The size of each iteration is not necessarily constant.
+
+        @raise IncompleteResponse: if the connection is closed or timed out before receiving the complete payload.
+        @raise InvalidResponse: if the length of a chunk could not be determined.
+        """
+        try:
            while True:
                chunk_size = self.__get_chunk_size()
                logging.debug("chunk-size: %s", chunk_size)
                if chunk_size == 0:
+                    # remove all trailing lines
                    self.client.reset_request()
                    break

                buffer = self.client.read(chunk_size)
-            logging.debug("chunk: %r", buffer)
                yield buffer

-            self.client.read_line()  # remove CRLF
+                self.client.read_line()  # remove trailing CRLF
+
+        except TimeoutError:
+            raise IncompleteResponse("Timed out before receiving the complete payload!")
+        except ConnectionError:
+            raise IncompleteResponse("Connection closed before receiving the complete payload!")

    def __get_chunk_size(self):
+        """
+        Returns the next chunk size.
+
+        @return: The chunk size in bytes
+        @raise InvalidResponse: If an error occured when parsing the chunk size.
+        """
        line = self.client.read_line()
        sep_pos = line.find(";")
        if sep_pos >= 0:
@@ -133,4 +206,4 @@ class ChunkedRetriever(Retriever):
        try:
            return int(line, 16)
        except ValueError:
-            raise InvalidResponse()
+            raise InvalidResponse("Failed to parse chunk size")
--- a/public/index.html
+++ b/public/index.html
@@ -48,6 +48,7 @@
 <div>
 	<h2>Local image</h2>
 	<img width="200px" src="ulyssis.png">
+</div>
 </body>
 </html>

--- a/requirements.txt
+++ b/requirements.txt
@@ -1,3 +0,0 @@
-beautifulsoup4~=4.9.3
-lxml~=4.6.2
-cssutils~=2.2.0
--- a/server.py
+++ b/server.py
@@ -14,7 +14,7 @@ def main():
    parser.add_argument("--workers", "-w",
                        help="The amount of worker processes. This is by default based on the number of cpu threads.",
                        type=int)
-    parser.add_argument("--port", "-p", help="The port to listen on", default=8000)
+    parser.add_argument("--port", "-p", help="The port to listen on", default=5055)
    arguments = parser.parse_args()

    logging_level = logging.ERROR - (10 * arguments.verbose)
@@ -47,44 +47,3 @@ except Exception as e:
    print("[ABRT] Internal error: " + str(e), file=sys.stderr)
    logging.debug("Internal error", exc_info=e)
    sys.exit(70)
-
-# import socket
-#
-# # Get hostname and address
-# hostname = socket.gethostname()
-# address = socket.gethostbyname(hostname)
-#
-# # socket heeft een listening and accept method
-#
-# SERVER = "127.0.0.1"  # dynamisch fixen in project
-# PORT = 5055
-# server = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
-#
-# ADDR = (SERVER, PORT)  # hier wordt de socket gebonden aan mijn IP adres, dit moet wel anders
-# server.bind(ADDR)  # in het project gebeuren
-#
-# HEADER = 64  # maximum size messages
-# FORMAT = 'utf-8'
-# DISCONNECT_MESSAGE = "DISCONNECT!"  # special message for disconnecting client and server
-#
-#
-# # function for starting server
-# def start():
-#     pass
-#     server.listen()
-#     while True:  # infinite loop in which server accept incoming connections, we want to run it forever
-#         conn, addr = server.accept()  # Server blocks untill a client connects
-#         print("new connection: ", addr[0], " connected.")
-#         connected = True
-#         while connected:  # while client is connected, we want to recieve messages
-#             msg = conn.recv(HEADER).decode(
-#                 FORMAT).rstrip()  # Argument is maximum size of msg (in project look into details of accp), decode is for converting bytes to strings, rstrip is for stripping messages for special hidden characters
-#             print("message: ", msg)
-#             if msg == DISCONNECT_MESSAGE:
-#                 connected = False
-#         print("close connection ", addr[0], " disconnected.")
-#         conn.close()
-#
-#
-# print("server is starting ... ")
-# start()
--- a/server/RequestHandler.py
+++ b/server/RequestHandler.py
@@ -1,127 +0,0 @@
-import logging
-import mimetypes
-import os
-import sys
-from datetime import datetime
-from socket import socket
-from time import mktime
-from typing import Union
-from urllib.parse import ParseResultBytes, ParseResult
-from wsgiref.handlers import format_date_time
-
-from httplib import parser
-from httplib.exceptions import MethodNotAllowed, BadRequest, UnsupportedEncoding, NotImplemented, NotFound
-from httplib.httpsocket import HTTPSocket, FORMAT
-from httplib.retriever import Retriever
-
-METHODS = ("GET", "HEAD", "PUT", "POST")
-
-
-class RequestHandler:
-    conn: HTTPSocket
-    root = os.path.join(os.path.dirname(sys.argv[0]), "public")
-
-    def __init__(self, conn: socket, host):
-        self.conn = HTTPSocket(conn, host)
-
-    def listen(self):
-        logging.debug("Parsing request line")
-        (method, target, version) = parser.parse_request_line(self.conn)
-        headers = parser.parse_request_headers(self.conn)
-
-        self._validate_request(method, target, version, headers)
-
-        logging.debug("Parsed request-line: method: %s, target: %r", method, target)
-
-        body = b""
-        if self._has_body(headers):
-            try:
-                retriever = Retriever.create(self.conn, headers)
-            except UnsupportedEncoding as e:
-                logging.error("Encoding not supported: %s=%s", e.enc_type, e.encoding)
-                raise NotImplemented()
-
-            for buffer in retriever.retrieve():
-                body += buffer
-
-        # completed message
-        self._handle_message(method, target.path, body)
-
-    def _check_request_line(self, method: str, target: Union[ParseResultBytes, ParseResult], version):
-
-        if method not in METHODS:
-            raise MethodNotAllowed(METHODS)
-
-        if version not in ("1.0", "1.1"):
-            raise BadRequest()
-
-        # only origin-form and absolute-form are allowed
-        if target.scheme not in ("", "http"):
-            # Only http is supported...
-            raise BadRequest()
-        if target.netloc != "" and target.netloc != self.conn.host and target.netloc != self.conn.host.split(":")[0]:
-            raise NotFound()
-
-        if target.path == "" or target.path[0] != "/":
-            raise NotFound()
-
-        norm_path = os.path.normpath(target.path)
-        if not os.path.exists(self.root + norm_path):
-            raise NotFound()
-
-    def _validate_request(self, method, target, version, headers):
-        if version == "1.1" and "host" not in headers:
-            raise BadRequest()
-
-        self._check_request_line(method, target, version)
-
-    def _has_body(self, headers):
-        return "transfer-encoding" in headers or "content-encoding" in headers
-
-    @staticmethod
-    def _get_date():
-        now = datetime.now()
-        stamp = mktime(now.timetuple())
-        return format_date_time(stamp)
-
-    def _handle_message(self, method: str, target, body: bytes):
-        date = self._get_date()
-
-        if method == "GET":
-            if target == "/":
-                path = self.root + "/index.html"
-            else:
-                path = self.root + target
-            mime = mimetypes.guess_type(path)[0]
-            if mime.startswith("text"):
-                file = open(path, "rb", FORMAT)
-            else:
-                file = open(path, "rb")
-            buffer = file.read()
-            file.close()
-
-            message = "HTTP/1.1 200 OK\r\n"
-            message += date + "\r\n"
-            if mime:
-                message += f"Content-Type: {mime}"
-                if mime.startswith("text"):
-                    message += "; charset=UTF-8"
-                message += "\r\n"
-            message += f"Content-Length: {len(buffer)}\r\n"
-            message += "\r\n"
-            message = message.encode(FORMAT)
-            message += buffer
-            message += b"\r\n"
-
-            logging.debug("Sending: %r", message)
-            self.conn.conn.sendall(message)
-
-    @staticmethod
-    def send_error(client: socket, code, message):
-        message = f"HTTP/1.1 {code} {message}\r\n"
-        message += RequestHandler._get_date() + "\r\n"
-        message += "Content-Length: 0\r\n"
-        message += "\r\n"
-
-        logging.debug("Sending: %r", message)
-        client.sendall(message.encode(FORMAT))
--- a/server/command.py
+++ b/server/command.py
@@ -1,34 +1,53 @@
-import logging
+import mimetypes
+import os
+import sys
 from abc import ABC, abstractmethod
-from typing import Dict, Tuple
-from urllib.parse import urlparse
+from datetime import datetime

-from client.httpclient import FORMAT, HTTPClient
 from httplib import parser
-from httplib.exceptions import InvalidResponse, InvalidStatusLine, UnsupportedEncoding
-from httplib.message import Message
-from httplib.retriever import PreambleRetriever
+from httplib.exceptions import NotFound, Forbidden, NotModified, BadRequest
+from httplib.httpsocket import FORMAT
+from httplib.message import RequestMessage as Message
+
+CONTENT_ROOT = os.path.join(os.path.dirname(sys.argv[0]), "public")
+
+status_message = {
+    200: "OK",
+    201: "Created",
+    202: "Accepted",
+    204: "No Content",
+    304: "Not Modified",
+    400: "Bad Request",
+    404: "Not Found",
+    500: "Internal Server Error",
+}


-def create(method: str, message: Message):
+def create(message: Message):
+    """
+    Creates a Command based on the specified message
+    @param message: the message to create the Command with.
+    @return: An instance of `AbstractCommand`
+    """

-    if method == "GET":
-        return GetCommand(url, port)
-    elif method == "HEAD":
-        return HeadCommand(url, port)
-    elif method == "POST":
-        return PostCommand(url, port)
-    elif method == "PUT":
-        return PutCommand(url, port)
+    if message.method == "GET":
+        return GetCommand(message)
+    elif message.method == "HEAD":
+        return HeadCommand(message)
+    elif message.method == "POST":
+        return PostCommand(message)
+    elif message.method == "PUT":
+        return PutCommand(message)
    else:
        raise ValueError()


 class AbstractCommand(ABC):
    path: str
-    headers: Dict[str, str]
+    msg: Message

-    def __init(self):
+    def __init__(self, message: Message):
+        self.msg = message
        pass

    @property
@@ -36,63 +55,253 @@ class AbstractCommand(ABC):
    def command(self):
        pass

+    @property
+    @abstractmethod
+    def _conditional_headers(self):
+        """
+        The conditional headers specific to this command instance.
+        """
+        pass

-class AbstractWithBodyCommand(AbstractCommand, ABC):
+    @abstractmethod
+    def execute(self):
+        """
+        Execute the command
+        """
+        pass

-    def _build_message(self, message: str) -> bytes:
-        body = input(f"Enter {self.command} data: ").encode(FORMAT)
-        print()
+    def _build_message(self, status: int, content_type: str, body: bytes, extra_headers=None):
+        """
+        Build the response message.
+
+        @param status: The response status code
+        @param content_type: The response content-type header
+        @param body: The response body, may be empty.
+        @param extra_headers: Extra headers needed in the response message
+        @return: The encoded response message
+        """
+
+        if extra_headers is None:
+            extra_headers = {}
+
+        self._process_conditional_headers()
+
+        message = f"HTTP/1.1 {status} {status_message[status]}\r\n"
+        message += f"Date: {parser.get_date()}\r\n"
+
+        content_length = len(body)
+        message += f"Content-Length: {content_length}\r\n"
+
+        if content_type:
+            message += f"Content-Type: {content_type}"
+            if content_type.startswith("text"):
+                message += f"; charset={FORMAT}"
+            message += "\r\n"
+        elif content_length > 0:
+            message += f"Content-Type: application/octet-stream\r\n"
+
+        for header in extra_headers:
+            message += f"{header}: {extra_headers[header]}\r\n"

-        message += "Content-Type: text/plain\r\n"
-        message += f"Content-Length: {len(body)}\r\n"
        message += "\r\n"
        message = message.encode(FORMAT)
+        if content_length > 0:
            message += body
-        message += b"\r\n"

        return message

+    def _get_path(self, check=True):
+        """
+        Returns the absolute file system path of the resource in the request.
+
+        @param check: If True, throws an error if the file doesn't exist
+        @raise NotFound: if `check` is True and the path doesn't exist
+        """
+
+        norm_path = os.path.normpath(self.msg.target.path)
+
+        if norm_path == "/":
+            path = CONTENT_ROOT + "/index.html"
+        else:
+            path = CONTENT_ROOT + norm_path
+
+        if check and not os.path.exists(path):
+            raise NotFound(path)
+
+        return path
+
+    def _process_conditional_headers(self):
+        """
+        Processes the conditional headers for this command instance.
+        """
+
+        for header in self._conditional_headers:
+            tmp = self.msg.headers.get(header)
+
+            if not tmp:
+                continue
+            self._conditional_headers[header]()
+
+    def _if_modified_since(self):
+        """
+        Processes the if-modified-since header.
+        @return: True if the header is invalid, and thus shouldn't be taken into account, throws NotModified
+        if the content isn't modified since the given date.
+
+        @raise NotModified: If the date of if-modified-since greater than the modify-date of the resource.
+        """
+        date_val = self.msg.headers.get("if-modified-since")
+        if not date_val:
+            return True
+        modified = datetime.utcfromtimestamp(os.path.getmtime(self._get_path(False)))
+        try:
+            min_date = datetime.strptime(date_val, '%a, %d %b %Y %H:%M:%S GMT')
+        except ValueError:
+            return True
+
+        if modified <= min_date:
+            raise NotModified(f"{modified} <= {min_date}")
+
+        return True
+
+    @staticmethod
+    def get_mimetype(path):
+        """
+        Guess the type of file.
+        @param path: the path to the file to guess the type of
+        @return: The mimetype based on the extension, or if that fails, returns "text/plain" if the file is text,
+        otherwise returns "application/octet-stream"
+        """
+        mime = mimetypes.guess_type(path)[0]
+
+        if mime:
+            return mime
+
+        try:
+            file = open(path, "r", encoding=FORMAT)
+            file.readline()
+            file.close()
+            return "text/plain"
+        except UnicodeDecodeError:
+            return "application/octet-stream"
+
+
+class AbstractModifyCommand(AbstractCommand, ABC):
+    """
+    Base class for commands which modify a resource based on the request.
+    """
+
+    @property
+    @abstractmethod
+    def _file_mode(self):
+        """
+        The mode to open the target resource with. (e.a. 'a' or 'w')
+        """
+        pass
+
+    @property
+    def _conditional_headers(self):
+        return {}
+
+    def execute(self):
+        path = self._get_path(False)
+        directory = os.path.dirname(path)
+
+        if not os.path.exists(directory):
+            raise Forbidden("Target directory does not exists!")
+        if os.path.exists(directory) and not os.path.isdir(directory):
+            raise Forbidden("Target directory is an existing file!")
+
+        exists = os.path.exists(path)
+
+        try:
+            with open(path, mode=f"{self._file_mode}b") as file:
+                file.write(self.msg.body)
+        except IsADirectoryError:
+            raise Forbidden("The target resource is a directory!")
+
+        if exists:
+            status = 204
+        else:
+            status = 201
+
+        location = parser.urljoin("/", os.path.relpath(path, CONTENT_ROOT))
+        return self._build_message(status, "text/plain", b"", {"Location": location})
+

 class HeadCommand(AbstractCommand):
+    """
+    A Command instance which represents an HEAD request
+    """
+
    @property
    def command(self):
        return "HEAD"

+    @property
+    def _conditional_headers(self):
+        return {'if-modified-since': self._if_modified_since}
+
+    def execute(self):
+        path = self._get_path()
+        mime = self.get_mimetype(path)
+
+        return self._build_message(200, mime, b"")
+

 class GetCommand(AbstractCommand):
-
-    def __init__(self, uri: str, port, dir=None):
-        super().__init__(uri, port)
-        self.dir = dir
-        self.filename = None
+    """
+    A Command instance which represents a GET request
+    """

    @property
    def command(self):
        return "GET"

-    def _get_preamble(self, retriever):
-        lines = retriever.retrieve()
-        (version, status, msg) = parser.parse_status_line(next(lines))
-        headers = parser.parse_headers(lines)
+    @property
+    def _conditional_headers(self):
+        return {'if-modified-since': self._if_modified_since}

-        logging.debug("---response begin---\r\n%s--- response end---", "".join(retriever.buffer))
+    def execute(self):
+        path = self._get_path()
+        mime = self.get_mimetype(path)

-        return Message(version, status, msg, headers)
+        file = open(path, "rb")
+        buffer = file.read()
+        file.close()

-    def _await_response(self, client, retriever):
-        msg = self._get_preamble(retriever)
-
-        from client import response_handler
-        self.filename = response_handler.handle(client, msg, self, self.dir)
+        return self._build_message(200, mime, buffer)


-class PostCommand(AbstractWithBodyCommand):
+class PostCommand(AbstractModifyCommand):
+    """
+    A Command instance which represents a POST request
+    """
+
    @property
    def command(self):
        return "POST"

+    @property
+    def _file_mode(self):
+        return "a"
+
+
+class PutCommand(AbstractModifyCommand):
+    """
+    A Command instance which represents a PUT request
+    """

-class PutCommand(AbstractWithBodyCommand):
    @property
    def command(self):
        return "PUT"
+
+    @property
+    def _file_mode(self):
+        return "w"
+
+    def execute(self):
+        if "content-range" in self.msg.headers:
+            raise BadRequest("PUT request contains a Content-Range header")
+
+        super().execute()
--- a/server/httpserver.py
+++ b/server/httpserver.py
@@ -1,7 +1,6 @@
 import logging
 import multiprocessing as mp
 import socket
-import time
 from multiprocessing.context import Process
 from multiprocessing.queues import Queue
 from multiprocessing.synchronize import Event
@@ -20,54 +19,81 @@ class HTTPServer:
    _stop_event: Event

    def __init__(self, address: str, port: int, worker_count, logging_level):
+        """
+        Initialize an HTTP server with the specified address, port, worker_count and logging_level
+        @param address: the address to listen on for connections
+        @param port: the port to listen on for connections
+        @param worker_count: The amount of worker processes to create
+        @param logging_level: verbosity level for the logger
+        """
        self.address = address
        self.port = port
        self.worker_count = worker_count
        self.logging_level = logging_level

        mp.set_start_method("spawn")
-        self._dispatch_queue = mp.Queue()
+
+        # Create a queue with maximum size of worker_count.
+        self._dispatch_queue = mp.Queue(worker_count)
        self._stop_event = mp.Event()

    def start(self):
+        """
+        Start the HTTP server.
+        """
        try:
            self.__do_start()
        except KeyboardInterrupt:
            self.__shutdown()

    def __do_start(self):
+        """
+        Internal method to start the server.
+        """
        # Create socket
-
        self.server = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
        self.server.bind((self.address, self.port))

+        # Create workers processes to handle requests
        self.__create_workers()

        self.__listen()

    def __listen(self):
+        """
+        Start listening for new connections
+
+        If a connection is received, it will be dispatched to the worker queue, and picked up by a worker process.
+        """

        self.server.listen()
-        logging.debug("Listening for connections")
+        logging.info("Listening on %s:%d", self.address, self.port)

        while True:
-            if self._dispatch_queue.qsize() > self.worker_count:
-                time.sleep(0.01)
-                continue
-
            conn, addr = self.server.accept()
+            conn.settimeout(5)
+
            logging.info("New connection: %s", addr[0])
+
+            # blocks when the queue is full (contains self.worker_count items).
            self._dispatch_queue.put((conn, addr))
            logging.debug("Dispatched connection %s", addr)

    def __shutdown(self):
+        """
+        Cleanly shutdown the server
+
+        Notifies the worker processes to shut down and eventually closes the server socket
+        """
+
+        logging.info("Shutting down server...")

        # Set stop event
        self._stop_event.set()

        # Wake up workers
        logging.debug("Waking up workers")
-        for p in self.workers:
+        for _ in self.workers:
            self._dispatch_queue.put((None, None))

        logging.debug("Closing dispatch queue")
@@ -84,12 +110,16 @@ class HTTPServer:
        self.server.close()

    def __create_workers(self):
+        """
+        Create worker processes up to `self.worker_count`.
+
+        A worker process is created with start method "spawn", target `worker.worker`, and the `self.logging_level`
+        is passed along with the `self.dispatch_queue` and `self._stop_event`
+        """
        for i in range(self.worker_count):
            logging.debug("Creating worker: %d", i + 1)
            p = mp.Process(target=worker.worker,
-                           args=(f"{self.address}:{self.port}", i + 1, self.logging_level, self._dispatch_queue, self._stop_event))
+                           args=(f"{self.address}:{self.port}", i + 1, self.logging_level, self._dispatch_queue,
+                                 self._stop_event))
            p.start()
            self.workers.append(p)
-
-            time.sleep(0.2)
-        time.sleep(1)
--- a/server/requesthandler.py
+++ b/server/requesthandler.py
@@ -0,0 +1,168 @@
+import logging
+from socket import socket
+from typing import Union
+from urllib.parse import ParseResultBytes, ParseResult
+
+from httplib import parser
+from httplib.exceptions import MethodNotAllowed, BadRequest, UnsupportedEncoding, NotImplemented, NotFound, \
+    HTTPVersionNotSupported
+from httplib.httpsocket import FORMAT
+from httplib.message import RequestMessage as Message
+from httplib.retriever import Retriever, PreambleRetriever
+from server import command
+from server.serversocket import ServerSocket
+
+METHODS = ("GET", "HEAD", "PUT", "POST")
+
+
+class RequestHandler:
+    """
+    A RequestHandler instance processes incoming HTTP requests messages from a single client.
+
+    RequestHandler instances are created everytime a client connects. They will read the incoming
+    messages, parse, verify them and send a response.
+    """
+
+    conn: ServerSocket
+    host: str
+
+    def __init__(self, conn: socket, host):
+        self.conn = ServerSocket(conn)
+        self.host = host
+
+    def listen(self):
+        """
+        Listen to incoming messages and process them.
+        """
+
+        retriever = PreambleRetriever(self.conn)
+
+        while True:
+            line = self.conn.read_line()
+
+            if line in ("\r\n", "\r", "\n"):
+                continue
+
+            retriever.reset_buffer(line)
+            self._handle_message(retriever, line)
+
+    def _handle_message(self, retriever, line):
+        """
+        Retrieves and processes the request message.
+
+        @param retriever: the retriever instance to retrieve the lines.
+        @param line: the first received line.
+        """
+        lines = retriever.retrieve()
+
+        # Parse the request-line and headers
+        (method, target, version) = parser.parse_request_line(line)
+        headers = parser.parse_headers(lines)
+
+        # Create the response message object
+        message = Message(version, method, target, headers, retriever.buffer)
+
+        logging.debug("---request begin---\r\n%s---request end---", "".join(message.raw))
+
+        # validate if the request is valid
+        self._validate_request(message)
+
+        # The body (if available) hasn't been retrieved up till now.
+        body = b""
+        if self._has_body(headers):
+            try:
+                retriever = Retriever.create(self.conn, headers)
+            except UnsupportedEncoding as e:
+                logging.error("Encoding not supported: %s=%s", e.enc_type, e.encoding)
+                raise NotImplemented(f"{e.enc_type}={e.encoding}")
+
+            for buffer in retriever.retrieve():
+                body += buffer
+
+        message.body = body
+
+        # message completed
+        cmd = command.create(message)
+        msg = cmd.execute()
+
+        logging.debug("---response begin---\r\n%s\r\n---response end---", msg.split(b"\r\n\r\n", 1)[0].decode(FORMAT))
+        # Send the response message
+        self.conn.conn.sendall(msg)
+
+    def _check_request_line(self, method: str, target: Union[ParseResultBytes, ParseResult], version):
+        """
+        Checks if the request-line is valid. Throws an appropriate exception if not.
+
+        @param method: HTTP request method
+        @param target: The request target
+        @param version: The HTTP version
+        @raise MethodNotAllowed: if the method is not any of the allowed methods in `METHODS`
+        @raise HTTPVersionNotSupported: If the HTTP version is not supported by this server
+        @raise BadRequest: If the scheme of the target is not supported
+        @raise NotFound: If the target is not found on this server
+        """
+
+        if method not in METHODS:
+            raise MethodNotAllowed(METHODS)
+
+        if version not in ("1.0", "1.1"):
+            raise HTTPVersionNotSupported(version)
+
+        # only origin-form and absolute-form are allowed
+        if target.scheme not in ("", "http"):
+            # Only http is supported...
+            raise BadRequest(f"scheme={target.scheme}")
+
+        if target.netloc != "" and target.netloc != self.host and target.netloc != self.host.split(":")[0]:
+            raise NotFound(str(target))
+
+        if target.path == "" or target.path[0] != "/":
+            raise NotFound(str(target))
+
+    def _validate_request(self, msg):
+        """
+        Validates the message request-line and headers. Throws an error if the message is invalid.
+
+        @see: _check_request_line for exceptions raised when validating the request-line.
+        @param msg: the message to validate
+        @raise BadRequest: if HTTP 1.1, and the Host header is missing
+        """
+
+        if msg.version == "1.1" and "host" not in msg.headers:
+            raise BadRequest("Missing host header")
+
+        self._check_request_line(msg.method, msg.target, msg.version)
+
+    def _has_body(self, headers):
+        """
+        Check if the headers notify the existing of a message body.
+
+        @param headers: the headers to check
+        @return: True if the message has a body. False otherwise.
+        """
+
+        if "transfer-encoding" in headers:
+            return True
+
+        if "content-length" in headers and int(headers["content-length"]) > 0:
+            return True
+
+        return False
+
+    @staticmethod
+    def send_error(client: socket, code, message):
+        """
+        Send and HTTP error response to the client
+
+        @param client: the client to send the response to
+        @param code: the HTTP status code
+        @param message: the status code message
+        """
+
+        message = f"HTTP/1.1 {code} {message}\r\n"
+        message += parser.get_date() + "\r\n"
+        message += "Content-Length: 0\r\n"
+        message += "\r\n"
+
+        logging.debug("---response begin---\r\n%s---response end---", message)
+        client.sendall(message.encode(FORMAT))
--- a/server/serversocket.py
+++ b/server/serversocket.py
@@ -0,0 +1,20 @@
+from httplib.exceptions import BadRequest
+from httplib.httpsocket import HTTPSocket
+
+
+class ServerSocket(HTTPSocket):
+    """
+    Wrapper class for a socket. Represents a client connected to this server.
+    """
+
+    """
+    Reads the next line decoded as `httpsocket.FORMAT`
+
+    @return: the decoded next line retrieved from the socket
+    @raise InvalidResponse: If the next line couldn't be decoded, but was expected to
+    """
+    def read_line(self):
+        try:
+            return super().read_line()
+        except UnicodeDecodeError:
+            raise BadRequest("UnicodeDecodeError")
--- a/server/worker.py
+++ b/server/worker.py
@@ -3,9 +3,10 @@ import multiprocessing as mp
 import socket
 import threading
 from concurrent.futures import ThreadPoolExecutor
+from typing import Dict

-from httplib.exceptions import HTTPServerException, InternalServerError
-from server.RequestHandler import RequestHandler
+from httplib.exceptions import HTTPServerException, InternalServerError, HTTPServerCloseException
+from server.requesthandler import RequestHandler

 THREAD_LIMIT = 128

@@ -18,11 +19,19 @@ def worker(address, name, logging_level, queue: mp.Queue, stop_event: mp.Event):
    try:
        runner.run()
    except KeyboardInterrupt:
+        # Catch exit signals and close the threads appropriately.
        logging.debug("Ctrl+C pressed, terminating")
        runner.shutdown()


 class Worker:
+    """
+    A Worker instance represents a parallel execution process to handle incoming connections.
+
+    Worker instances are created when the HTTP server starts. They are used to handle many incoming connections
+    asynchronously.
+    """
+
    host: str
    name: str
    queue: mp.Queue
@@ -30,24 +39,40 @@ class Worker:
    stop_event: mp.Event

    finished_queue: mp.Queue
+    dispatched_sockets: Dict[int, socket.socket]

    def __init__(self, host, name, queue: mp.Queue, stop_event: mp.Event):
+        """
+        Create a new Worker instance
+
+        @param host: The hostname of the HTTP server
+        @param name: The name of this Worker instance
+        @param queue: The dispatch queue for incoming socket connections
+        @param stop_event: The Event that signals when to shut down this worker.
+        """
        self.host = host
        self.name = name
        self.queue = queue
        self.executor = ThreadPoolExecutor(THREAD_LIMIT)
        self.stop_event = stop_event
        self.finished_queue = mp.Queue()
+        self.dispatched_sockets = {}

        for i in range(THREAD_LIMIT):
            self.finished_queue.put(i)

    def run(self):
+        """
+        Run this worker.
+
+        The worker will start waiting for incoming clients being added to the queue and submit them to
+        the executor.
+        """
        while not self.stop_event.is_set():

-            # Blocks until thread is free
+            # Blocks until the thread is free
            self.finished_queue.get()
-            # Blocks until new client connects
+            # Blocks until a new client connects
            conn, addr = self.queue.get()

            if conn is None or addr is None:
@@ -55,28 +80,83 @@ class Worker:

            logging.debug("Processing new client: %s", addr)

-            # submit client to thread
+            # submit the client to the executor
            self.executor.submit(self._handle_client, conn, addr)

        self.shutdown()

    def _handle_client(self, conn: socket.socket, addr):
+        """
+        Target method for the worker threads.
+        Creates a RequestHandler and handles any exceptions which may occur.
+
+        @param conn: The client socket
+        @param addr: The address of the client.
+        """
+
+        self.dispatched_sockets[threading.get_ident()] = conn
        try:
-            logging.debug("Handling client: %s", addr)
+            self.__do_handle_client(conn, addr)
+        except Exception:
+            if not self.stop_event:
+                logging.debug("Internal error in thread:", exc_info=True)

-            handler = RequestHandler(conn, self.host)
-            handler.listen()
-        except HTTPServerException as e:
-            RequestHandler.send_error(conn, e.status_code, e.message)
-        except Exception as e:
-            RequestHandler.send_error(conn, InternalServerError.status_code, InternalServerError.message)
-            logging.debug("Internal error", exc_info=e)
-
-        conn.shutdown(socket.SHUT_RDWR)
-        conn.close()
+        self.dispatched_sockets.pop(threading.get_ident())
        # Finished, put back into queue
        self.finished_queue.put(threading.get_ident())

+    def __do_handle_client(self, conn: socket.socket, addr):
+
+        handler = RequestHandler(conn, self.host)
+
+        while True:
+            try:
+                handler.listen()
+            except HTTPServerCloseException as e:
+                # Exception raised after which the client should be disconnected.
+                logging.warning("[HTTP: %s] %s. Reason: %s", e.status_code, e.message, e.arg)
+                RequestHandler.send_error(conn, e.status_code, e.message)
+
+                break
+            except HTTPServerException as e:
+                # Normal HTTP exception raised (e.a. 404) continue listening.
+                logging.debug("[HTTP: %s] %s. Reason: %s", e.status_code, e.message, e.arg)
+                RequestHandler.send_error(conn, e.status_code, e.message)
+            except socket.timeout:
+                # socket timed out, disconnect.
+                logging.info("Socket for client %s timed out.", addr)
+                break
+            except ConnectionAbortedError:
+                # Client aborted connection
+                logging.info("Socket for client %s disconnected.", addr)
+                break
+            except Exception as e:
+                # Unexpected exception raised. Send 500 and disconnect.
+                logging.error("Internal error", exc_info=e)
+                RequestHandler.send_error(conn, InternalServerError.status_code, InternalServerError.message)
+                break
+
+        conn.shutdown(socket.SHUT_RDWR)
+        conn.close()
+
    def shutdown(self):
        logging.info("shutting down")
+        # shutdown executor, but do not wait
+        self.executor.shutdown(False)
+
+        logging.info("Closing sockets")
+
+        # Copy dictionary to prevent issues with concurrency
+        clients = self.dispatched_sockets.copy().values()
+
+        for client in clients:
+            client: socket.socket
+            try:
+                client.shutdown(socket.SHUT_RDWR)
+                client.close()
+            except OSError:
+                # Ignore exception due to already closed sockets
+                pass
+
+        # Call shutdown again and wait this time
        self.executor.shutdown()
--- a/server_flow.md
+++ b/server_flow.md
@@ -1,4 +0,0 @@
-# Flow
- listen
- dispatch asap
- throw error if too full
Author	SHA1	Message	Date
Arthur Bols	baaa3941d6	fix buffer in PreambleRetriever	2021-03-28 20:27:40 +02:00
Arthur Bols	6fd015c770	Log more as info	2021-03-28 20:21:53 +02:00
Arthur Bols	032c71144d	change default server port	2021-03-28 20:11:16 +02:00
Arthur Bols	8eae777265	Fix issues	2021-03-28 19:53:14 +02:00
Arthur Bols	b7315c2348	Improve documentation	2021-03-28 18:55:00 +02:00
Arthur Bols	c748387b48	Improve documentation	2021-03-28 17:57:08 +02:00
Arthur Bols	0f2b039e71	Fix issues with shutdown, improve documentation	2021-03-28 17:12:07 +02:00
Arthur Bols	210c03b73f	Improve logging, fix small issues	2021-03-28 15:58:07 +02:00
Arthur Bols	cd053bc74e	Improve documentation, cleanup duplicated code	2021-03-28 15:20:28 +02:00
Arthur Bols	07b018d2ab	Fix small issues, improve error handling and documentation	2021-03-28 14:04:39 +02:00
Arthur Bols	850535a060	Improve documentation	2021-03-28 03:33:00 +02:00
Arthur Bols	b42c17c420	Rename response_handler to responsehandler	2021-03-28 03:00:53 +02:00
Arthur Bols	7ecfedbec7	Command add properties for fields	2021-03-28 03:00:04 +02:00
Arthur Bols	48c4f207a8	Rename Server- and ClientMessage Renamed ServerMessage and ClientMessage to respectively ResponseMessage and RequestMessage to make it more clear.	2021-03-28 01:59:08 +01:00
Arthur Bols	1f0ade0f09	Parse port argument as int	2021-03-28 01:58:16 +01:00
Arthur Bols	07023f2837	Remove server_flow.md	2021-03-28 01:17:30 +01:00
Arthur Bols	7329a2b9a5	Fix recreating ServerSocket on error	2021-03-28 01:16:40 +01:00
Arthur Bols	0ffdc73a6d	server: use queue blocking instead of sleep	2021-03-28 01:09:11 +01:00
Arthur Bols	a3ce68330f	small update	2021-03-28 00:35:58 +01:00
Arthur Bols	5b5a821522	fix base regex	2021-03-27 23:52:16 +01:00
Arthur Bols	4473d1bec9	Parse html with regex, fix small issues	2021-03-27 23:41:28 +01:00
Arthur Bols	bbca6f603b	Improve ChunkedRetriever error handling and documentation	2021-03-27 19:08:06 +01:00
Arthur Bols	9036755a62	Fix some issues, improve documentation	2021-03-27 19:05:09 +01:00
Arthur Bols	0f7d67c98d	small fixes	2021-03-27 17:32:16 +01:00
Arthur Bols	ff32ce9b39	Cleanup parser and add documentation	2021-03-27 16:58:48 +01:00
Arthur Bols	3615c56152	update	2021-03-27 16:30:53 +01:00
Arthur Bols	fdbd865889	Update	2021-03-26 18:25:03 +01:00
Arthur Bols	7476870acc	client: fix relative paths	2021-03-25 18:26:50 +01:00
Arthur Bols	f15ff38f69	client: fix image url parsing	2021-03-25 17:56:21 +01:00