Rename response_handler to responsehandler
This commit is contained in:
293
client/responsehandler.py
Normal file
293
client/responsehandler.py
Normal file
@@ -0,0 +1,293 @@
|
||||
import logging
|
||||
import os
|
||||
import re
|
||||
from abc import ABC, abstractmethod
|
||||
from urllib.parse import urlsplit, unquote
|
||||
|
||||
from client.command import AbstractCommand, GetCommand
|
||||
from client.httpclient import HTTPClient
|
||||
from httplib import parser
|
||||
from httplib.exceptions import InvalidResponse
|
||||
from httplib.httpsocket import FORMAT
|
||||
from httplib.message import ResponseMessage as Message
|
||||
from httplib.retriever import Retriever
|
||||
|
||||
BASE_REGEX = re.compile(r"<\s*base[^>]*\shref\s*=\s*['\"]([^\"']+)['\"][^>]*>", re.M | re.I)
|
||||
IMG_REGEX = re.compile(r"<\s*img[^>]*\ssrc\s*=\s*['\"]([^\"']+)['\"][^>]*>", re.M | re.I)
|
||||
|
||||
|
||||
def handle(client: HTTPClient, msg: Message, command: AbstractCommand, directory=None):
|
||||
handler = BasicResponseHandler(client, msg, command)
|
||||
retriever = handler.handle()
|
||||
|
||||
if retriever is None:
|
||||
return
|
||||
|
||||
content_type = msg.headers.get("content-type")
|
||||
if content_type and "text/html" in content_type:
|
||||
handler = HTMLDownloadHandler(retriever, client, msg, command, directory)
|
||||
else:
|
||||
handler = RawDownloadHandler(retriever, client, msg, command, directory)
|
||||
|
||||
return handler.handle()
|
||||
|
||||
|
||||
class ResponseHandler(ABC):
|
||||
client: HTTPClient
|
||||
retriever: Retriever
|
||||
msg: Message
|
||||
cmd: AbstractCommand
|
||||
|
||||
def __init__(self, retriever: Retriever, client: HTTPClient, msg, cmd):
|
||||
self.client = client
|
||||
self.retriever = retriever
|
||||
self.msg = msg
|
||||
self.cmd = cmd
|
||||
|
||||
@abstractmethod
|
||||
def handle(self):
|
||||
pass
|
||||
|
||||
|
||||
class BasicResponseHandler(ResponseHandler):
|
||||
"""
|
||||
Response handler which throws away the body and only shows the headers.
|
||||
In case of a redirect, it will process it and pass it to the appropriate response handler.
|
||||
"""
|
||||
|
||||
def __init__(self, client: HTTPClient, msg: Message, cmd: AbstractCommand):
|
||||
retriever = Retriever.create(client, msg.headers)
|
||||
super().__init__(retriever, client, msg, cmd)
|
||||
|
||||
def handle(self):
|
||||
return self._handle_status()
|
||||
|
||||
def _skip_body(self):
|
||||
logging.debug("Skipping body: [")
|
||||
for line in self.retriever.retrieve():
|
||||
try:
|
||||
logging.debug("%s", line.decode(FORMAT))
|
||||
except Exception:
|
||||
logging.debug("%r", line)
|
||||
|
||||
logging.debug("] done.")
|
||||
|
||||
def _handle_status(self):
|
||||
logging.info("%d %s", self.msg.status, self.msg.msg)
|
||||
|
||||
if self.msg.status == 101:
|
||||
# Switching protocols is not supported
|
||||
print("".join(self.msg.raw), end="")
|
||||
return None
|
||||
|
||||
if 200 <= self.msg.status < 300:
|
||||
return self.retriever
|
||||
|
||||
if 300 <= self.msg.status < 400:
|
||||
# Redirect
|
||||
self._skip_body()
|
||||
return self._handle_redirect()
|
||||
|
||||
if 400 <= self.msg.status < 600:
|
||||
self._skip_body()
|
||||
# Dump headers and exit with error
|
||||
if not self.cmd.sub_request:
|
||||
print("".join(self.msg.raw), end="")
|
||||
return None
|
||||
|
||||
return None
|
||||
|
||||
def _handle_redirect(self):
|
||||
if self.msg.status == 304:
|
||||
print("".join(self.msg.raw), end="")
|
||||
return None
|
||||
|
||||
location = self.msg.headers.get("location")
|
||||
if not location or len(location.strip()) == 0:
|
||||
raise InvalidResponse("No location in redirect")
|
||||
|
||||
location = parser.urljoin(self.cmd.uri, location)
|
||||
parsed_location = urlsplit(location)
|
||||
if not parsed_location.hostname:
|
||||
raise InvalidResponse("Invalid location")
|
||||
|
||||
if not parsed_location.scheme == "http":
|
||||
raise InvalidResponse("Only http is supported")
|
||||
|
||||
self.cmd.uri = location
|
||||
|
||||
if self.msg.status == 301:
|
||||
logging.info("Status 301. Closing socket [%s]", self.cmd.host)
|
||||
self.client.close()
|
||||
|
||||
self.cmd.execute()
|
||||
|
||||
return None
|
||||
|
||||
|
||||
class DownloadHandler(ResponseHandler, ABC):
|
||||
|
||||
def __init__(self, retriever: Retriever, client: HTTPClient, msg, cmd, directory=None):
|
||||
super().__init__(retriever, client, msg, cmd)
|
||||
|
||||
if not directory:
|
||||
directory = self._create_directory()
|
||||
|
||||
self.path = self._get_duplicate_name(os.path.join(directory, self.get_filename()))
|
||||
|
||||
@staticmethod
|
||||
def create(retriever: Retriever, client: HTTPClient, msg, cmd, directory=None):
|
||||
content_type = msg.headers.get("content-type")
|
||||
if content_type and "text/html" in content_type:
|
||||
return HTMLDownloadHandler(retriever, client, msg, cmd, directory)
|
||||
return RawDownloadHandler(retriever, client, msg, cmd, directory)
|
||||
|
||||
def _create_directory(self):
|
||||
path = self._get_duplicate_name(os.path.abspath(self.client.host))
|
||||
os.mkdir(path)
|
||||
return path
|
||||
|
||||
def _get_duplicate_name(self, path):
|
||||
tmp_path = path
|
||||
i = 0
|
||||
while os.path.exists(tmp_path):
|
||||
i += 1
|
||||
tmp_path = "{path}.{counter}".format(path=path, counter=i)
|
||||
|
||||
return tmp_path
|
||||
|
||||
def get_filename(self):
|
||||
"""
|
||||
Returns the filename to download the payload to.
|
||||
"""
|
||||
filename = os.path.basename(self.cmd.path)
|
||||
if filename == '':
|
||||
return "index.html"
|
||||
|
||||
while "%" in filename:
|
||||
filename = unquote(filename)
|
||||
|
||||
filename = re.sub(r"[^\w.+-]+[.]*", '', filename)
|
||||
result = os.path.basename(filename).strip()
|
||||
if any(letter.isalnum() for letter in result):
|
||||
return result
|
||||
|
||||
return "index.html"
|
||||
|
||||
|
||||
class RawDownloadHandler(DownloadHandler):
|
||||
|
||||
def __init__(self, retriever: Retriever, client: HTTPClient, msg: Message, cmd: AbstractCommand, directory=None):
|
||||
super().__init__(retriever, client, msg, cmd, directory)
|
||||
|
||||
def handle(self) -> str:
|
||||
logging.debug("Retrieving payload")
|
||||
file = open(self.path, "wb")
|
||||
|
||||
for buffer in self.retriever.retrieve():
|
||||
file.write(buffer)
|
||||
file.close()
|
||||
|
||||
return self.path
|
||||
|
||||
|
||||
class HTMLDownloadHandler(DownloadHandler):
|
||||
def __init__(self, retriever: Retriever, client: HTTPClient, msg: Message, cmd: AbstractCommand, directory=None):
|
||||
super().__init__(retriever, client, msg, cmd, directory)
|
||||
|
||||
def handle(self) -> str:
|
||||
|
||||
(directory, file) = os.path.split(self.path)
|
||||
tmp_filename = f".{file}.tmp"
|
||||
tmp_path = os.path.join(directory, tmp_filename)
|
||||
file = open(tmp_path, "wb")
|
||||
|
||||
for buffer in self.retriever.retrieve():
|
||||
file.write(buffer)
|
||||
file.close()
|
||||
|
||||
charset = parser.get_charset(self.msg.headers)
|
||||
self._download_images(tmp_path, self.path, charset)
|
||||
os.remove(tmp_path)
|
||||
return self.path
|
||||
|
||||
def _download_images(self, tmp_filename, target_filename, charset=FORMAT):
|
||||
"""
|
||||
Downloads images referenced in the html of `tmp_filename` and replaces the references in the html
|
||||
and writes it to `target_filename`.
|
||||
@param tmp_filename: the path to the temporary html file
|
||||
@param target_filename: the path for the final html fil
|
||||
@param charset: the charset to decode `tmp_filename`
|
||||
"""
|
||||
|
||||
try:
|
||||
fp = open(tmp_filename, "r", encoding=charset)
|
||||
html = fp.read()
|
||||
except UnicodeDecodeError:
|
||||
fp = open(tmp_filename, "r", encoding=FORMAT, errors="replace")
|
||||
html = fp.read()
|
||||
|
||||
fp.close()
|
||||
|
||||
base_element = BASE_REGEX.search(html)
|
||||
base_url = self.cmd.uri
|
||||
if base_element:
|
||||
base_url = parser.urljoin(self.cmd.uri, base_element.group(1))
|
||||
|
||||
processed = {}
|
||||
to_replace = []
|
||||
|
||||
for m in IMG_REGEX.finditer(html):
|
||||
url_start = m.start(1)
|
||||
url_end = m.end(1)
|
||||
target = m.group(1)
|
||||
|
||||
try:
|
||||
if len(target) == 0:
|
||||
continue
|
||||
if target in processed:
|
||||
new_url = processed.get(target)
|
||||
else:
|
||||
new_url = self.__download_image(target, base_url)
|
||||
if not new_url:
|
||||
# Image failed to download
|
||||
continue
|
||||
|
||||
processed[target] = new_url
|
||||
|
||||
if new_url:
|
||||
local_path = os.path.basename(new_url)
|
||||
to_replace.append((url_start, url_end, local_path))
|
||||
|
||||
except Exception as e:
|
||||
logging.error("Failed to download image: %s, skipping...", target, exc_info=e)
|
||||
|
||||
to_replace.reverse()
|
||||
for (start, end, path) in to_replace:
|
||||
html = html[:start] + path + html[end:]
|
||||
|
||||
with open(target_filename, 'w', encoding=FORMAT) as file:
|
||||
file.write(html)
|
||||
|
||||
def __download_image(self, img_src, base_url):
|
||||
"""
|
||||
Download image from the specified `img_src` and `base_url`.
|
||||
If the image is available, it will be downloaded to the directory of `self.path`
|
||||
"""
|
||||
|
||||
logging.info("Downloading image: %s", img_src)
|
||||
|
||||
parsed = urlsplit(img_src)
|
||||
img_src = parser.urljoin(base_url, img_src)
|
||||
|
||||
if parsed.hostname is None or parsed.hostname == self.cmd.host:
|
||||
port = self.cmd.port
|
||||
elif ":" in parsed.netloc:
|
||||
port = parsed.netloc.split(":", 1)[1]
|
||||
else:
|
||||
port = 80
|
||||
|
||||
command = GetCommand(img_src, port, os.path.dirname(self.path))
|
||||
command.execute(True)
|
||||
|
||||
return command.filename
|
Reference in New Issue
Block a user