Fix issues

This commit is contained in:
2021-03-28 19:53:14 +02:00
parent b7315c2348
commit 8eae777265
5 changed files with 22 additions and 32 deletions

View File

@@ -16,6 +16,7 @@ def main():
arguments = parser.parse_args()
# Setup logging
logging.basicConfig(level=logging.INFO - (10 * arguments.verbose), format="[%(levelname)s] %(message)s")
logging.debug("Arguments: %s", arguments)

View File

@@ -1,7 +1,6 @@
import logging
from abc import ABC, abstractmethod
from typing import Dict
from urllib.parse import urlparse
from client.httpclient import HTTPClient
from httplib import parser
@@ -21,7 +20,7 @@ def create(method: str, url: str, port):
@param port: The port for the command
"""
uri = parser.get_uri(url)
uri = parser.uri_from_url(url)
if method == "GET":
return GetCommand(uri, port)
elif method == "HEAD":
@@ -125,8 +124,6 @@ class AbstractCommand(ABC):
if not sub_request:
client.close()
def _get_preamble(self, client):
"""
Returns the preamble (start-line and headers) of the response of this command.
@@ -157,28 +154,6 @@ class AbstractCommand(ABC):
def _build_message(self, message: str) -> bytes:
return (message + "\r\n").encode(FORMAT)
def parse_uri(self):
"""
Parses the URI and returns the hostname and path.
@return: A tuple of the hostname and path.
"""
parsed = urlparse(self.uri)
# If there is no netloc, the url is invalid, so prepend `//` and try again
if parsed.netloc == "":
parsed = urlparse("http://" + self.uri)
host = parsed.netloc
path = parsed.path
if len(path) == 0 or path[0] != '/':
path = "/" + path
port_pos = host.find(":")
if port_pos >= 0:
host = host[:port_pos]
return host, path
class HeadCommand(AbstractCommand):
"""

View File

@@ -269,7 +269,9 @@ class HTMLDownloadHandler(DownloadHandler):
to_replace.append((url_start, url_end, local_path))
except Exception as e:
logging.error("Failed to download image: %s, skipping...", target, exc_info=e)
logging.error("Failed to download image: %s, skipping...", target)
logging.debug("", exc_info=e)
processed[target] = None
# reverse the list so urls at the bottom of the html file are processed first.
# Otherwise, our start and end positions won't be correct.
@@ -277,7 +279,7 @@ class HTMLDownloadHandler(DownloadHandler):
for (start, end, path) in to_replace:
html = html[:start] + path + html[end:]
logging.info("Saving to HTML '%s'", parser.get_relative_save_path(target_path))
logging.info("Saving HTML to '%s'", parser.get_relative_save_path(target_path))
with open(target_path, 'w', encoding=FORMAT) as file:
file.write(html)

View File

@@ -82,11 +82,15 @@ class HTTPSocket:
class HTTPException(Exception):
""" Base class for HTTP exceptions """
"""
Base class for HTTP exceptions
"""
class InvalidResponse(HTTPException):
""" Response message cannot be parsed """
"""
Response message cannot be parsed
"""
def __init(self, message):
self.message = message

View File

@@ -174,13 +174,21 @@ def parse_uri(uri: str):
return host, port, path
def get_uri(url: str):
def uri_from_url(url: str):
"""
Returns a valid URI of the specified URL.
"""
parsed = urlsplit(url)
result = f"http://{parsed.netloc}{parsed.path}"
if parsed.hostname is None:
url = f"http://{url}"
parsed = urlsplit(url)
path = parsed.path
if path == "":
path = "/"
result = f"http://{parsed.netloc}{path}"
if parsed.query != "":
result = f"{result}?{parsed.query}"