From d14252f707e8b43119d13f186535c91f4d3a6529 Mon Sep 17 00:00:00 2001
From: Arthur Bols <arthur@bols.dev>
Date: Wed, 24 Mar 2021 16:35:12 +0100
Subject: [PATCH] Update

---
 client.py                  |   4 +-
 client/command.py          | 102 +++++++++-----
 client/htmlparser.py       |   6 +
 client/response_handler.py | 266 ++++++++++++++++++-------------------
 httplib/MessageParser.py   |   0
 httplib/httpsocket.py      |   7 +-
 httplib/message.py         |  16 +++
 httplib/parser.py          |  78 ++++++++++-
 httplib/retriever.py       |  28 +++-
 server/worker.py           |   3 +-
 10 files changed, 325 insertions(+), 185 deletions(-)
 create mode 100644 client/htmlparser.py
 delete mode 100644 httplib/MessageParser.py
 create mode 100644 httplib/message.py

diff --git a/client.py b/client.py
index 988a4d0..86338ac 100644
--- a/client.py
+++ b/client.py
@@ -3,7 +3,7 @@ import argparse
 import logging
 import sys
 
-from client.command import AbstractCommand
+from client import command as cmd
 
 
 def main():
@@ -18,7 +18,7 @@ def main():
     logging.basicConfig(level=logging.ERROR - (10 * arguments.verbose))
     logging.debug("Arguments: %s", arguments)
 
-    command = AbstractCommand.create(arguments.command, arguments.URI, arguments.port)
+    command = cmd.create(arguments.command, arguments.URI, arguments.port)
     command.execute()
 
 
diff --git a/client/command.py b/client/command.py
index bce53d8..817ad1b 100644
--- a/client/command.py
+++ b/client/command.py
@@ -1,16 +1,39 @@
 import logging
 from abc import ABC, abstractmethod
+from typing import Dict, Tuple
 from urllib.parse import urlparse
 
-from client.response_handler import ResponseHandler
 from client.httpclient import FORMAT, HTTPClient
 from httplib import parser
 from httplib.exceptions import InvalidResponse, InvalidStatusLine, UnsupportedEncoding
+from httplib.message import Message
+from httplib.retriever import PreambleRetriever
+
+sockets: Dict[str, HTTPClient] = {}
+
+
+def create(command: str, url: str, port):
+    if command == "GET":
+        return GetCommand(url, port)
+    elif command == "HEAD":
+        return HeadCommand(url, port)
+    elif command == "POST":
+        return PostCommand(url, port)
+    elif command == "PUT":
+        return PutCommand(url, port)
+    else:
+        raise ValueError()
+
 
 class AbstractCommand(ABC):
+    uri: str
+    host: str
+    path: str
+    port: Tuple[str, int]
 
-    def __init__(self, url: str, port: str):
-        self.url = url
+    def __init__(self, uri: str, port):
+        self.uri = uri
+        self.host, _, self.path = parser.parse_uri(uri)
         self.port = port
 
     @property
@@ -18,20 +41,6 @@ class AbstractCommand(ABC):
     def command(self):
         pass
 
-    @staticmethod
-    def create(command: str, url: str, port: str):
-        if command == "GET":
-            return GetCommand(url, port)
-        elif command == "HEAD":
-            return HeadCommand(url, port)
-        elif command == "POST":
-            return PostCommand(url, port)
-        elif command == "PUT":
-            return PutCommand(url, port)
-        else:
-            raise ValueError()
-
-
     @staticmethod
     def build_message(command, host, path):
         message = f"{command} {path} HTTP/1.1\r\n"
@@ -40,26 +49,34 @@ class AbstractCommand(ABC):
 
         return message.encode(FORMAT)
 
-    def execute(self):
+    def execute(self, sub_request=False):
         (host, path) = self.parse_uri()
 
-        client = HTTPClient(host)
-        client.conn.connect((host, int(self.port)))
+        client = sockets.get(host)
+
+        if client and client.is_closed():
+            sockets.pop(self.host)
+            client = None
+
+        if not client:
+            client = HTTPClient(host)
+            client.conn.connect((host, self.port))
+            sockets[host] = client
 
         message = f"{self.command} {path} HTTP/1.1\r\n"
-        message += f"Host: {host}\r\n"
+        message += f"Host: {host}:{self.port}\r\n"
         message += "Accept: */*\r\nAccept-Encoding: identity\r\n"
         encoded_msg = self._build_message(message)
 
-        logging.info("---request begin---\r\n%s---request end---", encoded_msg.decode(FORMAT))
+        logging.debug("---request begin---\r\n%s---request end---", encoded_msg.decode(FORMAT))
 
-        logging.debug("Sending HTTP message: %r", encoded_msg)
         client.conn.sendall(encoded_msg)
 
         logging.info("HTTP request sent, awaiting response...")
 
         try:
-            self._await_response(client)
+            retriever = PreambleRetriever(client)
+            self._await_response(client, retriever)
         except InvalidResponse as e:
             logging.debug("Internal error: Response could not be parsed", exc_info=e)
             return
@@ -69,9 +86,10 @@ class AbstractCommand(ABC):
         except UnsupportedEncoding as e:
             logging.debug("Internal error: Unsupported encoding in response", exc_info=e)
         finally:
-            client.close()
+            if not sub_request:
+                client.close()
 
-    def _await_response(self, client):
+    def _await_response(self, client, retriever):
         while True:
             line = client.read_line()
             print(line, end="")
@@ -82,11 +100,11 @@ class AbstractCommand(ABC):
         return (message + "\r\n").encode(FORMAT)
 
     def parse_uri(self):
-        parsed = urlparse(self.url)
+        parsed = urlparse(self.uri)
 
         # If there is no netloc, the url is invalid, so prepend `//` and try again
         if parsed.netloc == "":
-            parsed = urlparse("//" + self.url)
+            parsed = urlparse("//" + self.uri)
 
         host = parsed.netloc
         path = parsed.path
@@ -105,6 +123,7 @@ class AbstractWithBodyCommand(AbstractCommand, ABC):
     @staticmethod
     def build_message(command, host, path):
         message = AbstractCommand.build_message()
+
     def _build_message(self, message: str) -> bytes:
         body = input(f"Enter {self.command} data: ").encode(FORMAT)
         print()
@@ -126,18 +145,31 @@ class HeadCommand(AbstractCommand):
 
 
 class GetCommand(AbstractCommand):
+
+    def __init__(self, uri: str, port, dir=None):
+        super().__init__(uri, port)
+        self.dir = dir
+        self.filename = None
+
     @property
     def command(self):
         return "GET"
 
-    def _await_response(self, client):
-        (version, status, msg) = parser.get_status_line(client)
-        logging.debug("Parsed status-line: version: %s, status: %s", version, status)
-        headers = parser.get_headers(client)
-        logging.debug("Parsed headers: %r", headers)
+    def _get_preamble(self, retriever):
+        lines = retriever.retrieve()
+        (version, status, msg) = parser.parse_status_line(next(lines))
+        headers = parser.parse_headers(lines)
 
-        handler = ResponseHandler.create(client, headers, status, self.url)
-        handler.handle()
+        logging.debug("---response begin---\r\n%s--- response end---", "".join(retriever.buffer))
+
+        return Message(version, status, msg, headers)
+
+    def _await_response(self, client, retriever) -> str:
+        msg = self._get_preamble(retriever)
+
+        from client import response_handler
+        self.filename = response_handler.handle(client, msg, self, self.dir)
+        return
 
 
 class PostCommand(AbstractWithBodyCommand):
diff --git a/client/htmlparser.py b/client/htmlparser.py
new file mode 100644
index 0000000..ebd91da
--- /dev/null
+++ b/client/htmlparser.py
@@ -0,0 +1,6 @@
+from bs4 import BeautifulSoup
+
+
+class HTMLParser:
+    def __init__(self, soup: BeautifulSoup):
+        pass
\ No newline at end of file
diff --git a/client/response_handler.py b/client/response_handler.py
index 1ab4247..43e3839 100644
--- a/client/response_handler.py
+++ b/client/response_handler.py
@@ -2,52 +2,57 @@ import logging
 import os
 import re
 from abc import ABC, abstractmethod
-from typing import Dict
-from urllib.parse import urlparse, unquote
+from urllib.parse import urlsplit, unquote
 
-import cssutils
 from bs4 import BeautifulSoup, Tag
 
+from client.command import AbstractCommand, GetCommand
 from client.httpclient import HTTPClient, FORMAT
 from httplib import parser
 from httplib.exceptions import InvalidResponse
+from httplib.message import Message
 from httplib.retriever import Retriever
 
 
+def handle(client: HTTPClient, msg: Message, command: AbstractCommand, dir=None):
+    handler = BasicResponseHandler(client, msg, command)
+    retriever = handler.handle()
+
+    if retriever is None:
+        return
+
+    content_type = msg.headers.get("content-type")
+    if content_type and "text/html" in content_type:
+        handler = HTMLDownloadHandler(retriever, client, msg, command, dir)
+    else:
+        handler = RawDownloadHandler(retriever, client, msg, command, dir)
+
+    return handler.handle()
+
+
 class ResponseHandler(ABC):
     client: HTTPClient
-    headers: Dict[str, str]
-    status_code: int
-    url: str
     retriever: Retriever
+    msg: Message
+    cmd: AbstractCommand
 
-    def __init__(self, retriever: Retriever, client: HTTPClient, headers: Dict[str, str], url: str):
+    def __init__(self, retriever: Retriever, client: HTTPClient, msg, cmd):
         self.client = client
-        self.headers = headers
-        self.url = url
         self.retriever = retriever
-        pass
+        self.msg = msg
+        self.cmd = cmd
 
     @abstractmethod
     def handle(self):
         pass
 
-    @staticmethod
-    def create(client: HTTPClient, headers, status_code, url):
-        retriever = Retriever.create(client, headers)
-
-        content_type = headers.get("content-type")
-        if content_type and "text/html" in content_type:
-            return HTMLDownloadHandler(retriever, client, headers, url)
-        return RawDownloadHandler(retriever, client, headers, url)
-
     @staticmethod
     def parse_uri(uri: str):
-        parsed = urlparse(uri)
+        parsed = urlsplit(uri)
 
         # If there is no netloc, the url is invalid, so prepend `//` and try again
         if parsed.netloc == "":
-            parsed = urlparse("//" + uri)
+            parsed = urlsplit("//" + uri)
 
         host = parsed.netloc
         path = parsed.path
@@ -56,11 +61,79 @@ class ResponseHandler(ABC):
         return host, path
 
 
-class DownloadHandler(ResponseHandler, ABC):
-    path: str
+class BasicResponseHandler(ResponseHandler):
+    """ Response handler which throws away the body and only shows the headers.
+    In case of a redirect, it will process it and pass it to the appropriate response handler.
+     """
 
-    def __init__(self, retriever: Retriever, client: HTTPClient, headers: Dict[str, str], url: str, dir=None):
-        super().__init__(retriever, client, headers, url)
+    def __init__(self, client: HTTPClient, msg: Message, cmd: AbstractCommand):
+        retriever = Retriever.create(client, msg.headers)
+        super().__init__(retriever, client, msg, cmd)
+
+    def handle(self):
+        return self._handle_status()
+
+    def _skip_body(self):
+        logging.debug("Skipping body: [")
+        for line in self.retriever.retrieve():
+            try:
+                logging.debug("%s", line.decode(FORMAT))
+            except Exception:
+                logging.debug("%r", line)
+
+        logging.debug("] done.")
+
+    def _handle_status(self):
+        logging.info("%d %s", self.msg.status, self.msg.msg)
+
+        if self.msg.status == 101:
+            # Switching protocols is not supported
+            print(f"{self.msg.version} {self.msg.status} {self.msg.msg}")
+            print(self.msg.headers)
+            return
+
+        if 200 <= self.msg.status < 300:
+            return self.retriever
+
+        if 300 <= self.msg.status < 400:
+            # Redirect
+            return self._do_handle_redirect()
+        if 400 <= self.msg.status < 500:
+            # Dump headers and exit with error
+            print(f"{self.msg.version} {self.msg.status} {self.msg.msg}")
+            print(self.msg.headers)
+            return None
+
+    def _do_handle_redirect(self):
+        self._skip_body()
+
+        location = self.msg.headers.get("location")
+        if not location:
+            raise InvalidResponse("No location in redirect")
+
+        parsed_location = urlsplit(location)
+        if not parsed_location.hostname:
+            raise InvalidResponse("Invalid location")
+
+        if not parsed_location.scheme == "http":
+            raise InvalidResponse("Only http is supported")
+
+        self.cmd.uri = location
+        self.cmd.host, self.cmd.port, self.cmd.path = parser.parse_uri(location)
+
+        if self.msg.status == 301:
+            logging.info("Status 301. Closing socket [%s]", self.cmd.host)
+            self.client.close()
+
+        self.cmd.execute()
+
+        return None
+
+
+class DownloadHandler(ResponseHandler, ABC):
+
+    def __init__(self, retriever: Retriever, client: HTTPClient, msg, cmd, dir=None):
+        super().__init__(retriever, client, msg, cmd)
 
         if not dir:
             dir = self._create_directory()
@@ -68,11 +141,11 @@ class DownloadHandler(ResponseHandler, ABC):
         self.path = self._get_duplicate_name(os.path.join(dir, self.get_filename()))
 
     @staticmethod
-    def create(retriever: Retriever, client: HTTPClient, headers: Dict[str, str], url: str, dir=None):
-        content_type = headers.get("content-type")
+    def create(retriever: Retriever, client: HTTPClient, msg, cmd, dir=None):
+        content_type = msg.headers.get("content-type")
         if content_type and "text/html" in content_type:
-            return HTMLDownloadHandler(retriever, client, headers, url, dir)
-        return RawDownloadHandler(retriever, client, headers, url, dir)
+            return HTMLDownloadHandler(retriever, client, msg, cmd, dir)
+        return RawDownloadHandler(retriever, client, msg, cmd, dir)
 
     def _create_directory(self):
         path = self._get_duplicate_name(os.path.abspath(self.client.host))
@@ -91,54 +164,25 @@ class DownloadHandler(ResponseHandler, ABC):
     def get_filename(self):
         """Returns the filename to download the payload to.
         """
-        filename = "index.html"
-
-        parsed = urlparse(self.url)
-
-        # If there is no netloc, the url is invalid, so prepend `//` and try again
-        if parsed.netloc == "":
-            parsed = urlparse("//" + self.url)
-
-        # If the path contains a `/` get only the last part and use it as filename
-        # If the path end with a `/`, it's a directory so ignore it.
-        if len(parsed.path) != 0:
-            index = parsed.path.rfind("/")
-            if index == -1:
-                filename = parsed.path
-            elif parsed.path[-1] != "/":
-                filename = parsed.path[index:]
+        filename = os.path.basename(self.cmd.path)
+        if filename == '':
+            return "index.html"
 
         while "%" in filename:
             filename = unquote(filename)
 
         filename = re.sub(r"[^\w.+-]+[.]*", '', filename)
-
         result = os.path.basename(filename).strip()
         if any(letter.isalnum() for letter in result):
             return result
 
         return "index.html"
 
-    def _handle_sub_request(self, client, url):
-
-        (version, status, _) = parser.get_status_line(client)
-        logging.debug("Parsed status-line: version: %s, status: %s", version, status)
-        headers = parser.get_headers(client)
-        logging.debug("Parsed headers: %r", headers)
-
-        if status != 200:
-            raise InvalidResponse("Status not expected 200: " + str(status))
-
-        retriever = Retriever.create(client, headers)
-        handler = RawDownloadHandler(retriever, client, headers, url, os.path.dirname(self.path))
-
-        return handler.handle()
-
 
 class RawDownloadHandler(DownloadHandler):
 
-    def __init__(self, retriever: Retriever, client: HTTPClient, headers: Dict[str, str], url: str, dir=None):
-        super().__init__(retriever, client, headers, url, dir)
+    def __init__(self, retriever: Retriever, client: HTTPClient, msg: Message, cmd: AbstractCommand, dir=None):
+        super().__init__(retriever, client, msg, cmd, dir)
 
     def handle(self) -> str:
         logging.debug("Retrieving payload")
@@ -152,8 +196,8 @@ class RawDownloadHandler(DownloadHandler):
 
 
 class HTMLDownloadHandler(DownloadHandler):
-    def __init__(self, retriever: Retriever, client: HTTPClient, headers: Dict[str, str], url: str, dir=None):
-        super().__init__(retriever, client, headers, url, dir)
+    def __init__(self, retriever: Retriever, client: HTTPClient, msg: Message, cmd: AbstractCommand, dir=None):
+        super().__init__(retriever, client, msg, cmd, dir)
 
     def handle(self) -> str:
 
@@ -172,11 +216,11 @@ class HTMLDownloadHandler(DownloadHandler):
 
     def _download_images(self, tmp_filename, target_filename):
 
-        (host, path) = ResponseHandler.parse_uri(self.url)
+        (host, path) = ResponseHandler.parse_uri(self.cmd.uri)
         with open(tmp_filename, "rb") as fp:
             soup = BeautifulSoup(fp, 'lxml')
 
-            base_url = self.url
+            base_url = self.cmd.uri
             base_element = soup.find("base")
 
             if base_element:
@@ -186,58 +230,24 @@ class HTMLDownloadHandler(DownloadHandler):
             tag: Tag
             for tag in soup.find_all("img"):
                 try:
-                    if tag.has_attr("src"):
-                        el_name = "src"
-                    elif tag.has_attr("data-src"):
-                        el_name = "data-src"
-                    else:
+                    if not tag.has_attr("src"):
                         continue
 
-                    if tag[el_name] in processed:
-                        new_url = processed.get(tag[el_name])
+                    if tag["src"] in processed:
+                        new_url = processed.get(tag["src"])
                     else:
-                        new_url = self.__download_image(tag[el_name], host, base_url)
-                        processed[tag[el_name]] = new_url
+                        new_url = self.__download_image(tag["src"], host, base_url)
+                        processed[tag["src"]] = new_url
                     if new_url:
-                        tag[el_name] = new_url
+                        tag["src"] = new_url
                 except Exception as e:
-                    logging.error("Failed to download image: %s, skipping...", tag[el_name], exc_info=e)
-
-            for tag in soup.find_all("div"):
-                if not tag.has_attr("style"):
-                    continue
-                style = cssutils.parseStyle(tag["style"])
-
-                if "background" in style and "url(" in style["background"]:
-                    el_name = "background"
-                elif "background-image" in style and "url(" in style["background-image"]:
-                    el_name = "background-image"
-                else:
-                    continue
-                el = style[el_name]
-                start = el.find("url(") + 4
-                end = el.find(")", start)
-                url = el[start:end].strip()
-
-                try:
-                    if url in processed:
-                        new_url = url
-                    else:
-                        new_url = self.__download_image(url, host, base_url)
-                        processed[url] = new_url
-                    if new_url:
-                        el = el[:start] + new_url + el[end:]
-                        style[el_name] = el
-                        tag["style"] = style.cssText
-                except Exception as e:
-                    logging.debug("Internal error", exc_info=e)
-                    logging.error("Failed to download image: %s, skipping...", tag["src"])
+                    logging.error("Failed to download image: %s, skipping...", tag["src"], exc_info=e)
 
         with open(target_filename, 'w') as file:
             file.write(str(soup))
 
     def __download_image(self, img_src, host, base_url):
-        parsed = urlparse(img_src)
+        parsed = urlsplit(img_src)
 
         logging.debug("Downloading image: %s", img_src)
 
@@ -245,36 +255,18 @@ class HTMLDownloadHandler(DownloadHandler):
             # Not a valid url
             return None
 
+        if parsed.hostname == host:
+            port = self.cmd.port
+        elif ":" in parsed.netloc:
+            port = parsed.netloc.split(":", 1)[1]
+        else:
+            port = 80
+
         if len(parsed.netloc) == 0 and parsed.path != "/":
             # relative url, append base_url
             img_src = os.path.join(os.path.dirname(base_url), parsed.path)
 
-        parsed = urlparse(img_src)
+        command = GetCommand(img_src, port, os.path.dirname(self.path))
+        command.execute(True)
 
-        # Check if the image is located on the same server
-        if len(parsed.netloc) == 0 or parsed.netloc == host:
-            same_host = True
-            img_host = host
-            img_path = parsed.path
-        else:
-            same_host = False
-            (img_host, img_path) = ResponseHandler.parse_uri(img_src)
-
-        message = f"GET {img_path} HTTP/1.1\r\n"
-        message += "Accept: */*\r\nAccept-Encoding: identity\r\n"
-        message += f"Host: {img_host}\r\n\r\n"
-        message = message.encode(FORMAT)
-
-        if same_host:
-            client = self.client
-            client.reset_request()
-        else:
-            client = HTTPClient(img_src)
-            client.conn.connect((img_host, 80))
-        client.conn.sendall(message)
-        filename = self._handle_sub_request(client, img_host + img_path)
-
-        if not same_host:
-            client.close()
-
-        return filename
+        return command.filename
diff --git a/httplib/MessageParser.py b/httplib/MessageParser.py
deleted file mode 100644
index e69de29..0000000
diff --git a/httplib/httpsocket.py b/httplib/httpsocket.py
index 7df1fd8..a881760 100644
--- a/httplib/httpsocket.py
+++ b/httplib/httpsocket.py
@@ -1,6 +1,7 @@
 import logging
 import socket
 from io import BufferedReader
+from typing import Tuple
 
 BUFSIZE = 4096
 TIMEOUT = 3
@@ -11,7 +12,7 @@ MAXLINE = 4096
 class HTTPSocket:
     host: str
     conn: socket.socket
-    file: BufferedReader
+    file: Tuple[BufferedReader, None]
 
     def __init__(self, conn: socket.socket, host: str):
 
@@ -24,8 +25,12 @@ class HTTPSocket:
 
     def close(self):
         self.file.close()
+        # self.conn.shutdown(socket.SHUT_RDWR)
         self.conn.close()
 
+    def is_closed(self):
+        return self.file is None
+
     def reset_request(self):
         self.file.close()
         self.file = self.conn.makefile("rb")
diff --git a/httplib/message.py b/httplib/message.py
new file mode 100644
index 0000000..a773368
--- /dev/null
+++ b/httplib/message.py
@@ -0,0 +1,16 @@
+from typing import Dict
+
+
+class Message:
+    version: str
+    status: int
+    msg: str
+    headers: Dict[str, str]
+    body: bytes
+
+    def __init__(self, version: str, status: int, msg: str, headers: Dict[str, str], body: bytes = None):
+        self.version = version
+        self.status = status
+        self.msg = msg
+        self.headers = headers
+        self.body = body
diff --git a/httplib/parser.py b/httplib/parser.py
index 366db86..91c621c 100644
--- a/httplib/parser.py
+++ b/httplib/parser.py
@@ -1,6 +1,6 @@
 import logging
 import re
-from urllib.parse import urlparse
+from urllib.parse import urlparse, urlsplit
 
 from httplib.exceptions import InvalidStatusLine, InvalidResponse, BadRequest
 from httplib.httpsocket import HTTPSocket
@@ -42,6 +42,26 @@ def get_status_line(client: HTTPSocket):
     return version, status, reason
 
 
+def parse_status_line(line: str):
+    split = list(filter(None, line.strip().split(" ", 2)))
+    if len(split) < 3:
+        raise InvalidStatusLine(line)  # TODO fix exception
+
+    (http_version, status, reason) = split
+
+    if not _is_valid_http_version(http_version):
+        raise InvalidStatusLine(line)
+    version = http_version[:4]
+
+    if not re.match(r"\d{3}", status):
+        raise InvalidStatusLine(line)
+    status = int(status)
+    if status < 100 or status > 999:
+        raise InvalidStatusLine(line)
+
+    return version, status, reason
+
+
 def parse_request_line(client: HTTPSocket):
     line, (method, target, version) = _get_start_line(client)
 
@@ -119,7 +139,7 @@ def parse_request_headers(client: HTTPSocket):
                 raise BadRequest()
 
         headers[key] = value
-    
+
     return headers
 
 
@@ -157,6 +177,38 @@ def get_headers(client: HTTPSocket):
 
     return result
 
+def parse_headers(lines):
+    headers = []
+    # first header after the status-line may not contain a space
+    for line in lines:
+        line = next(lines)
+        if line[0].isspace():
+            continue
+        else:
+            break
+
+    for line in lines:
+        if line in ("\r\n", "\n", " "):
+            break
+
+        if line[0].isspace():
+            headers[-1] = headers[-1].rstrip("\r\n")
+
+        headers.append(line.lstrip())
+
+    result = {}
+    header_str = "".join(headers)
+    for line in header_str.splitlines():
+        pos = line.find(":")
+
+        if pos <= 0 or pos >= len(line) - 1:
+            continue
+
+        (header, value) = map(str.strip, line.split(":", 1))
+        check_next_header(result, header, value)
+        result[header.lower()] = value.lower()
+
+    return result
 
 def check_next_header(headers, next_header: str, next_value: str):
     if next_header == "content-length":
@@ -166,3 +218,25 @@ def check_next_header(headers, next_header: str, next_value: str):
         if not next_value.isnumeric() or int(next_value) <= 0:
             logging.error("Invalid content-length value: %r", next_value)
             raise InvalidResponse()
+
+
+def parse_uri(uri: str):
+    parsed = urlsplit(uri)
+
+    # If there is no netloc, the given string is not a valid URI, so split on /
+    if parsed.hostname:
+        host = parsed.hostname
+        path = parsed.path
+        if parsed.query != '':
+            path = f"{path}?{parsed.query}"
+    else:
+        (host, path) = uri.split("/", 1)
+
+    if ":" in host:
+        host, port = host.split(":", 1)
+    elif parsed.scheme == "https":
+        port = 443
+    else:
+        port = 80
+
+    return host, port, path
diff --git a/httplib/retriever.py b/httplib/retriever.py
index 280a3d6..eaee0cf 100644
--- a/httplib/retriever.py
+++ b/httplib/retriever.py
@@ -42,6 +42,28 @@ class Retriever(ABC):
             return ContentLengthRetriever(client, int(content_length))
 
 
+class PreambleRetriever(Retriever):
+    client: HTTPSocket
+    buffer: []
+
+    def __init__(self, client: HTTPSocket):
+        super().__init__(client)
+        self.client = client
+        self.buffer = []
+
+    def retrieve(self):
+
+        line = self.client.read_line()
+        while True:
+            self.buffer.append(line)
+
+            if line in ("\r\n", "\n", " "):
+                break
+
+            yield line
+            line = self.client.read_line()
+
+
 class ContentLengthRetriever(Retriever):
     length: int
 
@@ -63,21 +85,16 @@ class ContentLengthRetriever(Retriever):
                 buffer = self.client.read(remaining)
             except TimeoutError:
                 logging.error("Timed out before receiving complete payload")
-                self.client.close()
                 raise IncompleteResponse("Timed out before receiving complete payload")
             except ConnectionError:
                 logging.error("Timed out before receiving complete payload")
-                self.client.close()
                 raise IncompleteResponse("Connection closed before receiving complete payload")
 
-            logging.debug("Received payload length: %s", len(buffer))
-
             if len(buffer) == 0:
                 logging.warning("Received payload length %s less than expected %s", cur_payload_size, self.length)
                 break
 
             cur_payload_size += len(buffer)
-            logging.debug("Processed payload: %r", cur_payload_size)
             yield buffer
 
         return b""
@@ -108,7 +125,6 @@ class ChunkedRetriever(Retriever):
             yield buffer
 
             self.client.read_line()  # remove CRLF
-        return b""
 
     def __get_chunk_size(self):
         line = self.client.read_line()
diff --git a/server/worker.py b/server/worker.py
index 497d270..36b2c5d 100644
--- a/server/worker.py
+++ b/server/worker.py
@@ -1,5 +1,4 @@
 import logging
-import multiprocessing
 import multiprocessing as mp
 import threading
 from concurrent.futures import ThreadPoolExecutor
@@ -69,7 +68,7 @@ class Worker:
 
             handler = RequestHandler(conn, self.host)
             handler.listen()
-        except Exception as e:
+        except Exception:
             logging.debug("Internal error")
 
         conn.shutdown(socket.SHUT_RDWR)