Fix issues

This commit is contained in:
2021-03-28 19:53:14 +02:00
parent b7315c2348
commit 8eae777265
5 changed files with 22 additions and 32 deletions

View File

@@ -16,6 +16,7 @@ def main():
arguments = parser.parse_args() arguments = parser.parse_args()
# Setup logging
logging.basicConfig(level=logging.INFO - (10 * arguments.verbose), format="[%(levelname)s] %(message)s") logging.basicConfig(level=logging.INFO - (10 * arguments.verbose), format="[%(levelname)s] %(message)s")
logging.debug("Arguments: %s", arguments) logging.debug("Arguments: %s", arguments)

View File

@@ -1,7 +1,6 @@
import logging import logging
from abc import ABC, abstractmethod from abc import ABC, abstractmethod
from typing import Dict from typing import Dict
from urllib.parse import urlparse
from client.httpclient import HTTPClient from client.httpclient import HTTPClient
from httplib import parser from httplib import parser
@@ -21,7 +20,7 @@ def create(method: str, url: str, port):
@param port: The port for the command @param port: The port for the command
""" """
uri = parser.get_uri(url) uri = parser.uri_from_url(url)
if method == "GET": if method == "GET":
return GetCommand(uri, port) return GetCommand(uri, port)
elif method == "HEAD": elif method == "HEAD":
@@ -125,8 +124,6 @@ class AbstractCommand(ABC):
if not sub_request: if not sub_request:
client.close() client.close()
def _get_preamble(self, client): def _get_preamble(self, client):
""" """
Returns the preamble (start-line and headers) of the response of this command. Returns the preamble (start-line and headers) of the response of this command.
@@ -157,28 +154,6 @@ class AbstractCommand(ABC):
def _build_message(self, message: str) -> bytes: def _build_message(self, message: str) -> bytes:
return (message + "\r\n").encode(FORMAT) return (message + "\r\n").encode(FORMAT)
def parse_uri(self):
"""
Parses the URI and returns the hostname and path.
@return: A tuple of the hostname and path.
"""
parsed = urlparse(self.uri)
# If there is no netloc, the url is invalid, so prepend `//` and try again
if parsed.netloc == "":
parsed = urlparse("http://" + self.uri)
host = parsed.netloc
path = parsed.path
if len(path) == 0 or path[0] != '/':
path = "/" + path
port_pos = host.find(":")
if port_pos >= 0:
host = host[:port_pos]
return host, path
class HeadCommand(AbstractCommand): class HeadCommand(AbstractCommand):
""" """

View File

@@ -269,7 +269,9 @@ class HTMLDownloadHandler(DownloadHandler):
to_replace.append((url_start, url_end, local_path)) to_replace.append((url_start, url_end, local_path))
except Exception as e: except Exception as e:
logging.error("Failed to download image: %s, skipping...", target, exc_info=e) logging.error("Failed to download image: %s, skipping...", target)
logging.debug("", exc_info=e)
processed[target] = None
# reverse the list so urls at the bottom of the html file are processed first. # reverse the list so urls at the bottom of the html file are processed first.
# Otherwise, our start and end positions won't be correct. # Otherwise, our start and end positions won't be correct.
@@ -277,7 +279,7 @@ class HTMLDownloadHandler(DownloadHandler):
for (start, end, path) in to_replace: for (start, end, path) in to_replace:
html = html[:start] + path + html[end:] html = html[:start] + path + html[end:]
logging.info("Saving to HTML '%s'", parser.get_relative_save_path(target_path)) logging.info("Saving HTML to '%s'", parser.get_relative_save_path(target_path))
with open(target_path, 'w', encoding=FORMAT) as file: with open(target_path, 'w', encoding=FORMAT) as file:
file.write(html) file.write(html)

View File

@@ -82,11 +82,15 @@ class HTTPSocket:
class HTTPException(Exception): class HTTPException(Exception):
""" Base class for HTTP exceptions """ """
Base class for HTTP exceptions
"""
class InvalidResponse(HTTPException): class InvalidResponse(HTTPException):
""" Response message cannot be parsed """ """
Response message cannot be parsed
"""
def __init(self, message): def __init(self, message):
self.message = message self.message = message

View File

@@ -174,13 +174,21 @@ def parse_uri(uri: str):
return host, port, path return host, port, path
def get_uri(url: str): def uri_from_url(url: str):
""" """
Returns a valid URI of the specified URL. Returns a valid URI of the specified URL.
""" """
parsed = urlsplit(url) parsed = urlsplit(url)
result = f"http://{parsed.netloc}{parsed.path}" if parsed.hostname is None:
url = f"http://{url}"
parsed = urlsplit(url)
path = parsed.path
if path == "":
path = "/"
result = f"http://{parsed.netloc}{path}"
if parsed.query != "": if parsed.query != "":
result = f"{result}?{parsed.query}" result = f"{result}?{parsed.query}"