Files
CN2021/client/response_handler.py
Arthur Bols 48c4f207a8 Rename Server- and ClientMessage
Renamed ServerMessage and ClientMessage to respectively ResponseMessage
and RequestMessage to make it more clear.
2021-03-28 01:59:08 +01:00

294 lines
9.3 KiB
Python

import logging
import os
import re
from abc import ABC, abstractmethod
from urllib.parse import urlsplit, unquote
from client.command import AbstractCommand, GetCommand
from client.httpclient import HTTPClient
from httplib import parser
from httplib.exceptions import InvalidResponse
from httplib.httpsocket import FORMAT
from httplib.message import ResponseMessage as Message
from httplib.retriever import Retriever
BASE_REGEX = re.compile(r"<\s*base[^>]*\shref\s*=\s*['\"]([^\"']+)['\"][^>]*>", re.M | re.I)
IMG_REGEX = re.compile(r"<\s*img[^>]*\ssrc\s*=\s*['\"]([^\"']+)['\"][^>]*>", re.M | re.I)
def handle(client: HTTPClient, msg: Message, command: AbstractCommand, directory=None):
handler = BasicResponseHandler(client, msg, command)
retriever = handler.handle()
if retriever is None:
return
content_type = msg.headers.get("content-type")
if content_type and "text/html" in content_type:
handler = HTMLDownloadHandler(retriever, client, msg, command, directory)
else:
handler = RawDownloadHandler(retriever, client, msg, command, directory)
return handler.handle()
class ResponseHandler(ABC):
client: HTTPClient
retriever: Retriever
msg: Message
cmd: AbstractCommand
def __init__(self, retriever: Retriever, client: HTTPClient, msg, cmd):
self.client = client
self.retriever = retriever
self.msg = msg
self.cmd = cmd
@abstractmethod
def handle(self):
pass
class BasicResponseHandler(ResponseHandler):
"""
Response handler which throws away the body and only shows the headers.
In case of a redirect, it will process it and pass it to the appropriate response handler.
"""
def __init__(self, client: HTTPClient, msg: Message, cmd: AbstractCommand):
retriever = Retriever.create(client, msg.headers)
super().__init__(retriever, client, msg, cmd)
def handle(self):
return self._handle_status()
def _skip_body(self):
logging.debug("Skipping body: [")
for line in self.retriever.retrieve():
try:
logging.debug("%s", line.decode(FORMAT))
except Exception:
logging.debug("%r", line)
logging.debug("] done.")
def _handle_status(self):
logging.info("%d %s", self.msg.status, self.msg.msg)
if self.msg.status == 101:
# Switching protocols is not supported
print("".join(self.msg.raw), end="")
return
if 200 <= self.msg.status < 300:
return self.retriever
if 300 <= self.msg.status < 400:
# Redirect
self._skip_body()
return self._handle_redirect()
if 400 <= self.msg.status < 600:
self._skip_body()
# Dump headers and exit with error
if not self.cmd.sub_request:
print("".join(self.msg.raw), end="")
return None
return None
def _handle_redirect(self):
if self.msg.status == 304:
print("".join(self.msg.raw), end="")
return None
location = self.msg.headers.get("location")
if not location or len(location.strip()) == 0:
raise InvalidResponse("No location in redirect")
location = parser.urljoin(self.cmd.uri, location)
parsed_location = urlsplit(location)
if not parsed_location.hostname:
raise InvalidResponse("Invalid location")
if not parsed_location.scheme == "http":
raise InvalidResponse("Only http is supported")
self.cmd.uri = location
self.cmd.host, self.cmd.port, self.cmd.path = parser.parse_uri(location)
if self.msg.status == 301:
logging.info("Status 301. Closing socket [%s]", self.cmd.host)
self.client.close()
self.cmd.execute()
return None
class DownloadHandler(ResponseHandler, ABC):
def __init__(self, retriever: Retriever, client: HTTPClient, msg, cmd, directory=None):
super().__init__(retriever, client, msg, cmd)
if not directory:
directory = self._create_directory()
self.path = self._get_duplicate_name(os.path.join(directory, self.get_filename()))
@staticmethod
def create(retriever: Retriever, client: HTTPClient, msg, cmd, directory=None):
content_type = msg.headers.get("content-type")
if content_type and "text/html" in content_type:
return HTMLDownloadHandler(retriever, client, msg, cmd, directory)
return RawDownloadHandler(retriever, client, msg, cmd, directory)
def _create_directory(self):
path = self._get_duplicate_name(os.path.abspath(self.client.host))
os.mkdir(path)
return path
def _get_duplicate_name(self, path):
tmp_path = path
i = 0
while os.path.exists(tmp_path):
i += 1
tmp_path = "{path}.{counter}".format(path=path, counter=i)
return tmp_path
def get_filename(self):
"""
Returns the filename to download the payload to.
"""
filename = os.path.basename(self.cmd.path)
if filename == '':
return "index.html"
while "%" in filename:
filename = unquote(filename)
filename = re.sub(r"[^\w.+-]+[.]*", '', filename)
result = os.path.basename(filename).strip()
if any(letter.isalnum() for letter in result):
return result
return "index.html"
class RawDownloadHandler(DownloadHandler):
def __init__(self, retriever: Retriever, client: HTTPClient, msg: Message, cmd: AbstractCommand, dir=None):
super().__init__(retriever, client, msg, cmd, dir)
def handle(self) -> str:
logging.debug("Retrieving payload")
file = open(self.path, "wb")
for buffer in self.retriever.retrieve():
file.write(buffer)
file.close()
return self.path
class HTMLDownloadHandler(DownloadHandler):
def __init__(self, retriever: Retriever, client: HTTPClient, msg: Message, cmd: AbstractCommand, directory=None):
super().__init__(retriever, client, msg, cmd, directory)
def handle(self) -> str:
(directory, file) = os.path.split(self.path)
tmp_filename = f".{file}.tmp"
tmp_path = os.path.join(directory, tmp_filename)
file = open(tmp_path, "wb")
for buffer in self.retriever.retrieve():
file.write(buffer)
file.close()
charset = parser.get_charset(self.msg.headers)
self._download_images(tmp_path, self.path, charset)
os.remove(tmp_path)
return self.path
def _download_images(self, tmp_filename, target_filename, charset=FORMAT):
"""
Downloads images referenced in the html of `tmp_filename` and replaces the references in the html
and writes it to `target_filename`.
@param tmp_filename: the path to the temporary html file
@param target_filename: the path for the final html fil
@param charset: the charset to decode `tmp_filename`
"""
try:
fp = open(tmp_filename, "r", encoding=charset)
html = fp.read()
except UnicodeDecodeError:
fp = open(tmp_filename, "r", encoding=FORMAT, errors="replace")
html = fp.read()
fp.close()
base_element = BASE_REGEX.search(html)
base_url = self.cmd.uri
if base_element:
base_url = parser.urljoin(self.cmd.uri, base_element.group(1))
processed = {}
to_replace = []
for m in IMG_REGEX.finditer(html):
url_start = m.start(1)
url_end = m.end(1)
target = m.group(1)
try:
if len(target) == 0:
continue
if target in processed:
new_url = processed.get(target)
else:
new_url = self.__download_image(target, base_url)
if not new_url:
# Image failed to download
continue
processed[target] = new_url
if new_url:
local_path = os.path.basename(new_url)
to_replace.append((url_start, url_end, local_path))
except Exception as e:
logging.error("Failed to download image: %s, skipping...", target, exc_info=e)
to_replace.reverse()
for (start, end, path) in to_replace:
html = html[:start] + path + html[end:]
with open(target_filename, 'w', encoding=FORMAT) as file:
file.write(html)
def __download_image(self, img_src, base_url):
"""
Download image from the specified `img_src` and `base_url`.
If the image is available, it will be downloaded to the directory of `self.path`
"""
logging.info("Downloading image: %s", img_src)
parsed = urlsplit(img_src)
img_src = parser.urljoin(base_url, img_src)
if parsed.hostname is None or parsed.hostname == self.cmd.host:
port = self.cmd.port
elif ":" in parsed.netloc:
port = parsed.netloc.split(":", 1)[1]
else:
port = 80
command = GetCommand(img_src, port, os.path.dirname(self.path))
command.execute(True)
return command.filename