From 8eae7772653daec53d6ad6ee98f0be9e8c9c5857 Mon Sep 17 00:00:00 2001 From: Arthur Bols Date: Sun, 28 Mar 2021 19:53:14 +0200 Subject: [PATCH] Fix issues --- client.py | 1 + client/command.py | 27 +-------------------------- client/responsehandler.py | 6 ++++-- httplib/httpsocket.py | 8 ++++++-- httplib/parser.py | 12 ++++++++++-- 5 files changed, 22 insertions(+), 32 deletions(-) diff --git a/client.py b/client.py index 9d19d7c..d0af628 100644 --- a/client.py +++ b/client.py @@ -16,6 +16,7 @@ def main(): arguments = parser.parse_args() + # Setup logging logging.basicConfig(level=logging.INFO - (10 * arguments.verbose), format="[%(levelname)s] %(message)s") logging.debug("Arguments: %s", arguments) diff --git a/client/command.py b/client/command.py index 3774f52..caf0ac8 100644 --- a/client/command.py +++ b/client/command.py @@ -1,7 +1,6 @@ import logging from abc import ABC, abstractmethod from typing import Dict -from urllib.parse import urlparse from client.httpclient import HTTPClient from httplib import parser @@ -21,7 +20,7 @@ def create(method: str, url: str, port): @param port: The port for the command """ - uri = parser.get_uri(url) + uri = parser.uri_from_url(url) if method == "GET": return GetCommand(uri, port) elif method == "HEAD": @@ -125,8 +124,6 @@ class AbstractCommand(ABC): if not sub_request: client.close() - - def _get_preamble(self, client): """ Returns the preamble (start-line and headers) of the response of this command. @@ -157,28 +154,6 @@ class AbstractCommand(ABC): def _build_message(self, message: str) -> bytes: return (message + "\r\n").encode(FORMAT) - def parse_uri(self): - """ - Parses the URI and returns the hostname and path. - @return: A tuple of the hostname and path. - """ - parsed = urlparse(self.uri) - - # If there is no netloc, the url is invalid, so prepend `//` and try again - if parsed.netloc == "": - parsed = urlparse("http://" + self.uri) - - host = parsed.netloc - path = parsed.path - if len(path) == 0 or path[0] != '/': - path = "/" + path - - port_pos = host.find(":") - if port_pos >= 0: - host = host[:port_pos] - - return host, path - class HeadCommand(AbstractCommand): """ diff --git a/client/responsehandler.py b/client/responsehandler.py index 841a1de..6d87ccb 100644 --- a/client/responsehandler.py +++ b/client/responsehandler.py @@ -269,7 +269,9 @@ class HTMLDownloadHandler(DownloadHandler): to_replace.append((url_start, url_end, local_path)) except Exception as e: - logging.error("Failed to download image: %s, skipping...", target, exc_info=e) + logging.error("Failed to download image: %s, skipping...", target) + logging.debug("", exc_info=e) + processed[target] = None # reverse the list so urls at the bottom of the html file are processed first. # Otherwise, our start and end positions won't be correct. @@ -277,7 +279,7 @@ class HTMLDownloadHandler(DownloadHandler): for (start, end, path) in to_replace: html = html[:start] + path + html[end:] - logging.info("Saving to HTML '%s'", parser.get_relative_save_path(target_path)) + logging.info("Saving HTML to '%s'", parser.get_relative_save_path(target_path)) with open(target_path, 'w', encoding=FORMAT) as file: file.write(html) diff --git a/httplib/httpsocket.py b/httplib/httpsocket.py index 25cd80f..ad6544a 100644 --- a/httplib/httpsocket.py +++ b/httplib/httpsocket.py @@ -82,11 +82,15 @@ class HTTPSocket: class HTTPException(Exception): - """ Base class for HTTP exceptions """ + """ + Base class for HTTP exceptions + """ class InvalidResponse(HTTPException): - """ Response message cannot be parsed """ + """ + Response message cannot be parsed + """ def __init(self, message): self.message = message diff --git a/httplib/parser.py b/httplib/parser.py index 781b7fd..12520ed 100644 --- a/httplib/parser.py +++ b/httplib/parser.py @@ -174,13 +174,21 @@ def parse_uri(uri: str): return host, port, path -def get_uri(url: str): +def uri_from_url(url: str): """ Returns a valid URI of the specified URL. """ parsed = urlsplit(url) - result = f"http://{parsed.netloc}{parsed.path}" + if parsed.hostname is None: + url = f"http://{url}" + parsed = urlsplit(url) + + path = parsed.path + if path == "": + path = "/" + + result = f"http://{parsed.netloc}{path}" if parsed.query != "": result = f"{result}?{parsed.query}"