From 76393837823d4b76daced3c2e275d51574935323 Mon Sep 17 00:00:00 2001 From: Arthur Bols Date: Wed, 24 Mar 2021 17:20:40 +0100 Subject: [PATCH] "Fix img url parsing" --- client/command.py | 3 +-- client/response_handler.py | 16 +++++++++------- httplib/parser.py | 7 ++++++- 3 files changed, 16 insertions(+), 10 deletions(-) diff --git a/client/command.py b/client/command.py index 817ad1b..7eae322 100644 --- a/client/command.py +++ b/client/command.py @@ -164,12 +164,11 @@ class GetCommand(AbstractCommand): return Message(version, status, msg, headers) - def _await_response(self, client, retriever) -> str: + def _await_response(self, client, retriever): msg = self._get_preamble(retriever) from client import response_handler self.filename = response_handler.handle(client, msg, self, self.dir) - return class PostCommand(AbstractWithBodyCommand): diff --git a/client/response_handler.py b/client/response_handler.py index 43e3839..85acc10 100644 --- a/client/response_handler.py +++ b/client/response_handler.py @@ -247,25 +247,27 @@ class HTMLDownloadHandler(DownloadHandler): file.write(str(soup)) def __download_image(self, img_src, host, base_url): - parsed = urlsplit(img_src) - logging.debug("Downloading image: %s", img_src) + parsed = urlsplit(img_src) + if parsed.scheme not in ("", "http", "https"): # Not a valid url return None - if parsed.hostname == host: + if parsed.hostname is None: + if img_src[0] == "/": + img_src = host + img_src + else: + img_src = os.path.join(os.path.dirname(base_url), img_src) + + if parsed.hostname is None or parsed.hostname == host: port = self.cmd.port elif ":" in parsed.netloc: port = parsed.netloc.split(":", 1)[1] else: port = 80 - if len(parsed.netloc) == 0 and parsed.path != "/": - # relative url, append base_url - img_src = os.path.join(os.path.dirname(base_url), parsed.path) - command = GetCommand(img_src, port, os.path.dirname(self.path)) command.execute(True) diff --git a/httplib/parser.py b/httplib/parser.py index 91c621c..0404421 100644 --- a/httplib/parser.py +++ b/httplib/parser.py @@ -177,6 +177,7 @@ def get_headers(client: HTTPSocket): return result + def parse_headers(lines): headers = [] # first header after the status-line may not contain a space @@ -210,6 +211,7 @@ def parse_headers(lines): return result + def check_next_header(headers, next_header: str, next_value: str): if next_header == "content-length": if "content-length" in headers: @@ -229,8 +231,11 @@ def parse_uri(uri: str): path = parsed.path if parsed.query != '': path = f"{path}?{parsed.query}" - else: + elif "/" in uri: (host, path) = uri.split("/", 1) + else: + host = uri + path = "/" if ":" in host: host, port = host.split(":", 1)