Rename response_handler to responsehandler

2021-03-28 03:00:53 +02:00
parent 7ecfedbec7
commit b42c17c420
2 changed files with 2 additions and 2 deletions
--- a/client/responsehandler.py
+++ b/client/responsehandler.py
@@ -0,0 +1,293 @@
+import logging
+import os
+import re
+from abc import ABC, abstractmethod
+from urllib.parse import urlsplit, unquote
+
+from client.command import AbstractCommand, GetCommand
+from client.httpclient import HTTPClient
+from httplib import parser
+from httplib.exceptions import InvalidResponse
+from httplib.httpsocket import FORMAT
+from httplib.message import ResponseMessage as Message
+from httplib.retriever import Retriever
+
+BASE_REGEX = re.compile(r"<\s*base[^>]*\shref\s*=\s*['\"]([^\"']+)['\"][^>]*>", re.M | re.I)
+IMG_REGEX = re.compile(r"<\s*img[^>]*\ssrc\s*=\s*['\"]([^\"']+)['\"][^>]*>", re.M | re.I)
+
+
+def handle(client: HTTPClient, msg: Message, command: AbstractCommand, directory=None):
+    handler = BasicResponseHandler(client, msg, command)
+    retriever = handler.handle()
+
+    if retriever is None:
+        return
+
+    content_type = msg.headers.get("content-type")
+    if content_type and "text/html" in content_type:
+        handler = HTMLDownloadHandler(retriever, client, msg, command, directory)
+    else:
+        handler = RawDownloadHandler(retriever, client, msg, command, directory)
+
+    return handler.handle()
+
+
+class ResponseHandler(ABC):
+    client: HTTPClient
+    retriever: Retriever
+    msg: Message
+    cmd: AbstractCommand
+
+    def __init__(self, retriever: Retriever, client: HTTPClient, msg, cmd):
+        self.client = client
+        self.retriever = retriever
+        self.msg = msg
+        self.cmd = cmd
+
+    @abstractmethod
+    def handle(self):
+        pass
+
+
+class BasicResponseHandler(ResponseHandler):
+    """
+    Response handler which throws away the body and only shows the headers.
+    In case of a redirect, it will process it and pass it to the appropriate response handler.
+    """
+
+    def __init__(self, client: HTTPClient, msg: Message, cmd: AbstractCommand):
+        retriever = Retriever.create(client, msg.headers)
+        super().__init__(retriever, client, msg, cmd)
+
+    def handle(self):
+        return self._handle_status()
+
+    def _skip_body(self):
+        logging.debug("Skipping body: [")
+        for line in self.retriever.retrieve():
+            try:
+                logging.debug("%s", line.decode(FORMAT))
+            except Exception:
+                logging.debug("%r", line)
+
+        logging.debug("] done.")
+
+    def _handle_status(self):
+        logging.info("%d %s", self.msg.status, self.msg.msg)
+
+        if self.msg.status == 101:
+            # Switching protocols is not supported
+            print("".join(self.msg.raw), end="")
+            return None
+
+        if 200 <= self.msg.status < 300:
+            return self.retriever
+
+        if 300 <= self.msg.status < 400:
+            # Redirect
+            self._skip_body()
+            return self._handle_redirect()
+
+        if 400 <= self.msg.status < 600:
+            self._skip_body()
+            # Dump headers and exit with error
+            if not self.cmd.sub_request:
+                print("".join(self.msg.raw), end="")
+            return None
+
+        return None
+
+    def _handle_redirect(self):
+        if self.msg.status == 304:
+            print("".join(self.msg.raw), end="")
+            return None
+
+        location = self.msg.headers.get("location")
+        if not location or len(location.strip()) == 0:
+            raise InvalidResponse("No location in redirect")
+
+        location = parser.urljoin(self.cmd.uri, location)
+        parsed_location = urlsplit(location)
+        if not parsed_location.hostname:
+            raise InvalidResponse("Invalid location")
+
+        if not parsed_location.scheme == "http":
+            raise InvalidResponse("Only http is supported")
+
+        self.cmd.uri = location
+
+        if self.msg.status == 301:
+            logging.info("Status 301. Closing socket [%s]", self.cmd.host)
+            self.client.close()
+
+        self.cmd.execute()
+
+        return None
+
+
+class DownloadHandler(ResponseHandler, ABC):
+
+    def __init__(self, retriever: Retriever, client: HTTPClient, msg, cmd, directory=None):
+        super().__init__(retriever, client, msg, cmd)
+
+        if not directory:
+            directory = self._create_directory()
+
+        self.path = self._get_duplicate_name(os.path.join(directory, self.get_filename()))
+
+    @staticmethod
+    def create(retriever: Retriever, client: HTTPClient, msg, cmd, directory=None):
+        content_type = msg.headers.get("content-type")
+        if content_type and "text/html" in content_type:
+            return HTMLDownloadHandler(retriever, client, msg, cmd, directory)
+        return RawDownloadHandler(retriever, client, msg, cmd, directory)
+
+    def _create_directory(self):
+        path = self._get_duplicate_name(os.path.abspath(self.client.host))
+        os.mkdir(path)
+        return path
+
+    def _get_duplicate_name(self, path):
+        tmp_path = path
+        i = 0
+        while os.path.exists(tmp_path):
+            i += 1
+            tmp_path = "{path}.{counter}".format(path=path, counter=i)
+
+        return tmp_path
+
+    def get_filename(self):
+        """
+        Returns the filename to download the payload to.
+        """
+        filename = os.path.basename(self.cmd.path)
+        if filename == '':
+            return "index.html"
+
+        while "%" in filename:
+            filename = unquote(filename)
+
+        filename = re.sub(r"[^\w.+-]+[.]*", '', filename)
+        result = os.path.basename(filename).strip()
+        if any(letter.isalnum() for letter in result):
+            return result
+
+        return "index.html"
+
+
+class RawDownloadHandler(DownloadHandler):
+
+    def __init__(self, retriever: Retriever, client: HTTPClient, msg: Message, cmd: AbstractCommand, directory=None):
+        super().__init__(retriever, client, msg, cmd, directory)
+
+    def handle(self) -> str:
+        logging.debug("Retrieving payload")
+        file = open(self.path, "wb")
+
+        for buffer in self.retriever.retrieve():
+            file.write(buffer)
+        file.close()
+
+        return self.path
+
+
+class HTMLDownloadHandler(DownloadHandler):
+    def __init__(self, retriever: Retriever, client: HTTPClient, msg: Message, cmd: AbstractCommand, directory=None):
+        super().__init__(retriever, client, msg, cmd, directory)
+
+    def handle(self) -> str:
+
+        (directory, file) = os.path.split(self.path)
+        tmp_filename = f".{file}.tmp"
+        tmp_path = os.path.join(directory, tmp_filename)
+        file = open(tmp_path, "wb")
+
+        for buffer in self.retriever.retrieve():
+            file.write(buffer)
+        file.close()
+
+        charset = parser.get_charset(self.msg.headers)
+        self._download_images(tmp_path, self.path, charset)
+        os.remove(tmp_path)
+        return self.path
+
+    def _download_images(self, tmp_filename, target_filename, charset=FORMAT):
+        """
+        Downloads images referenced in the html of `tmp_filename` and replaces the references in the html
+        and writes it to `target_filename`.
+        @param tmp_filename: the path to the temporary html file
+        @param target_filename: the path for the final html fil
+        @param charset: the charset to decode `tmp_filename`
+        """
+
+        try:
+            fp = open(tmp_filename, "r", encoding=charset)
+            html = fp.read()
+        except UnicodeDecodeError:
+            fp = open(tmp_filename, "r", encoding=FORMAT, errors="replace")
+            html = fp.read()
+
+        fp.close()
+
+        base_element = BASE_REGEX.search(html)
+        base_url = self.cmd.uri
+        if base_element:
+            base_url = parser.urljoin(self.cmd.uri, base_element.group(1))
+
+        processed = {}
+        to_replace = []
+
+        for m in IMG_REGEX.finditer(html):
+            url_start = m.start(1)
+            url_end = m.end(1)
+            target = m.group(1)
+
+            try:
+                if len(target) == 0:
+                    continue
+                if target in processed:
+                    new_url = processed.get(target)
+                else:
+                    new_url = self.__download_image(target, base_url)
+                    if not new_url:
+                        # Image failed to download
+                        continue
+
+                    processed[target] = new_url
+
+                if new_url:
+                    local_path = os.path.basename(new_url)
+                    to_replace.append((url_start, url_end, local_path))
+
+            except Exception as e:
+                logging.error("Failed to download image: %s, skipping...", target, exc_info=e)
+
+        to_replace.reverse()
+        for (start, end, path) in to_replace:
+            html = html[:start] + path + html[end:]
+
+        with open(target_filename, 'w', encoding=FORMAT) as file:
+            file.write(html)
+
+    def __download_image(self, img_src, base_url):
+        """
+        Download image from the specified `img_src` and `base_url`.
+        If the image is available, it will be downloaded to the directory of `self.path`
+        """
+
+        logging.info("Downloading image: %s", img_src)
+
+        parsed = urlsplit(img_src)
+        img_src = parser.urljoin(base_url, img_src)
+
+        if parsed.hostname is None or parsed.hostname == self.cmd.host:
+            port = self.cmd.port
+        elif ":" in parsed.netloc:
+            port = parsed.netloc.split(":", 1)[1]
+        else:
+            port = 80
+
+        command = GetCommand(img_src, port, os.path.dirname(self.path))
+        command.execute(True)
+
+        return command.filename