Compare commits

...

2 Commits

Author SHA1 Message Date
7476870acc client: fix relative paths 2021-03-25 18:26:50 +01:00
f15ff38f69 client: fix image url parsing 2021-03-25 17:56:21 +01:00
8 changed files with 32 additions and 62 deletions

View File

@@ -15,7 +15,7 @@ def main():
arguments = parser.parse_args() arguments = parser.parse_args()
logging.basicConfig(level=logging.ERROR - (10 * arguments.verbose)) logging.basicConfig(level=logging.ERROR - (10 * arguments.verbose), format="[%(levelname)s] %(message)s")
logging.debug("Arguments: %s", arguments) logging.debug("Arguments: %s", arguments)
command = cmd.create(arguments.command, arguments.URI, arguments.port) command = cmd.create(arguments.command, arguments.URI, arguments.port)

View File

@@ -148,9 +148,9 @@ class GetCommand(AbstractCommand):
(version, status, msg) = parser.parse_status_line(next(lines)) (version, status, msg) = parser.parse_status_line(next(lines))
headers = parser.parse_headers(lines) headers = parser.parse_headers(lines)
logging.debug("---response begin---\r\n%s--- response end---", "".join(retriever.buffer)) logging.debug("---response begin---\r\n%s---response end---", "".join(retriever.buffer))
return Message(version, status, msg, headers) return Message(version, status, msg, headers, retriever.buffer)
def _await_response(self, client, retriever): def _await_response(self, client, retriever):
msg = self._get_preamble(retriever) msg = self._get_preamble(retriever)

View File

@@ -88,8 +88,7 @@ class BasicResponseHandler(ResponseHandler):
if self.msg.status == 101: if self.msg.status == 101:
# Switching protocols is not supported # Switching protocols is not supported
print(f"{self.msg.version} {self.msg.status} {self.msg.msg}") print("".join(self.msg.raw), end="")
print(self.msg.headers)
return return
if 200 <= self.msg.status < 300: if 200 <= self.msg.status < 300:
@@ -100,8 +99,7 @@ class BasicResponseHandler(ResponseHandler):
return self._do_handle_redirect() return self._do_handle_redirect()
if 400 <= self.msg.status < 500: if 400 <= self.msg.status < 500:
# Dump headers and exit with error # Dump headers and exit with error
print(f"{self.msg.version} {self.msg.status} {self.msg.msg}") print("".join(self.msg.raw), end="")
print(self.msg.headers)
return None return None
def _do_handle_redirect(self): def _do_handle_redirect(self):
@@ -216,15 +214,14 @@ class HTMLDownloadHandler(DownloadHandler):
def _download_images(self, tmp_filename, target_filename): def _download_images(self, tmp_filename, target_filename):
(host, path) = ResponseHandler.parse_uri(self.cmd.uri)
with open(tmp_filename, "rb") as fp: with open(tmp_filename, "rb") as fp:
soup = BeautifulSoup(fp, 'lxml') soup = BeautifulSoup(fp, 'lxml')
base_url = self.cmd.uri base_url = parser.base_url(self.cmd.uri)
base_element = soup.find("base") base_element = soup.find("base")
if base_element: if base_element:
base_url = base_element["href"] base_url = f"http://{self.cmd.host}" + base_element["href"]
processed = {} processed = {}
tag: Tag tag: Tag
@@ -236,7 +233,7 @@ class HTMLDownloadHandler(DownloadHandler):
if tag["src"] in processed: if tag["src"] in processed:
new_url = processed.get(tag["src"]) new_url = processed.get(tag["src"])
else: else:
new_url = self.__download_image(tag["src"], host, base_url) new_url = self.__download_image(tag["src"], base_url)
processed[tag["src"]] = new_url processed[tag["src"]] = new_url
if new_url: if new_url:
tag["src"] = os.path.basename(new_url) tag["src"] = os.path.basename(new_url)
@@ -246,8 +243,8 @@ class HTMLDownloadHandler(DownloadHandler):
with open(target_filename, 'w') as file: with open(target_filename, 'w') as file:
file.write(str(soup)) file.write(str(soup))
def __download_image(self, img_src, host, base_url): def __download_image(self, img_src, base_url):
logging.debug("Downloading image: %s", img_src) logging.info("Downloading image: %s", img_src)
parsed = urlsplit(img_src) parsed = urlsplit(img_src)
@@ -257,11 +254,11 @@ class HTMLDownloadHandler(DownloadHandler):
if parsed.hostname is None: if parsed.hostname is None:
if img_src[0] == "/": if img_src[0] == "/":
img_src = host + img_src img_src = f"http://{self.cmd.host}{img_src}"
else: else:
img_src = os.path.join(os.path.dirname(base_url), img_src) img_src = parser.absolute_url(base_url, img_src)
if parsed.hostname is None or parsed.hostname == host: if parsed.hostname is None or parsed.hostname == self.cmd.host:
port = self.cmd.port port = self.cmd.port
elif ":" in parsed.netloc: elif ":" in parsed.netloc:
port = parsed.netloc.split(":", 1)[1] port = parsed.netloc.split(":", 1)[1]

View File

@@ -6,11 +6,13 @@ class Message:
status: int status: int
msg: str msg: str
headers: Dict[str, str] headers: Dict[str, str]
raw: str
body: bytes body: bytes
def __init__(self, version: str, status: int, msg: str, headers: Dict[str, str], body: bytes = None): def __init__(self, version: str, status: int, msg: str, headers: Dict[str, str], raw=None, body: bytes = None):
self.version = version self.version = version
self.status = status self.status = status
self.msg = msg self.msg = msg
self.headers = headers self.headers = headers
self.raw = raw
self.body = body self.body = body

View File

@@ -1,4 +1,5 @@
import logging import logging
import os.path
import re import re
from urllib.parse import urlparse, urlsplit from urllib.parse import urlparse, urlsplit
@@ -182,7 +183,6 @@ def parse_headers(lines):
headers = [] headers = []
# first header after the status-line may not contain a space # first header after the status-line may not contain a space
for line in lines: for line in lines:
line = next(lines)
if line[0].isspace(): if line[0].isspace():
continue continue
else: else:
@@ -245,3 +245,15 @@ def parse_uri(uri: str):
port = 80 port = 80
return host, port, path return host, port, path
def base_url(uri: str):
parsed = urlsplit(uri)
path = parsed.path.rsplit("/", 1)[0]
return f"{parsed.scheme}://{parsed.hostname}{path}/"
def absolute_url(uri: str, rel_path: str):
parsed = urlsplit(uri)
path = os.path.normpath(os.path.join(parsed.path, rel_path))
return f"{parsed.scheme}://{parsed.hostname}{path}"

View File

@@ -57,7 +57,7 @@ class PreambleRetriever(Retriever):
while True: while True:
self.buffer.append(line) self.buffer.append(line)
if line in ("\r\n", "\n", " "): if line in ("\r\n", "\n", ""):
break break
yield line yield line

View File

@@ -47,44 +47,3 @@ except Exception as e:
print("[ABRT] Internal error: " + str(e), file=sys.stderr) print("[ABRT] Internal error: " + str(e), file=sys.stderr)
logging.debug("Internal error", exc_info=e) logging.debug("Internal error", exc_info=e)
sys.exit(70) sys.exit(70)
# import socket
#
# # Get hostname and address
# hostname = socket.gethostname()
# address = socket.gethostbyname(hostname)
#
# # socket heeft een listening and accept method
#
# SERVER = "127.0.0.1" # dynamisch fixen in project
# PORT = 5055
# server = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
#
# ADDR = (SERVER, PORT) # hier wordt de socket gebonden aan mijn IP adres, dit moet wel anders
# server.bind(ADDR) # in het project gebeuren
#
# HEADER = 64 # maximum size messages
# FORMAT = 'utf-8'
# DISCONNECT_MESSAGE = "DISCONNECT!" # special message for disconnecting client and server
#
#
# # function for starting server
# def start():
# pass
# server.listen()
# while True: # infinite loop in which server accept incoming connections, we want to run it forever
# conn, addr = server.accept() # Server blocks untill a client connects
# print("new connection: ", addr[0], " connected.")
# connected = True
# while connected: # while client is connected, we want to recieve messages
# msg = conn.recv(HEADER).decode(
# FORMAT).rstrip() # Argument is maximum size of msg (in project look into details of accp), decode is for converting bytes to strings, rstrip is for stripping messages for special hidden characters
# print("message: ", msg)
# if msg == DISCONNECT_MESSAGE:
# connected = False
# print("close connection ", addr[0], " disconnected.")
# conn.close()
#
#
# print("server is starting ... ")
# start()

View File

@@ -77,7 +77,7 @@ class GetCommand(AbstractCommand):
logging.debug("---response begin---\r\n%s--- response end---", "".join(retriever.buffer)) logging.debug("---response begin---\r\n%s--- response end---", "".join(retriever.buffer))
return Message(version, status, msg, headers) return Message(version, status, msg, headers, retriever.buffer)
def _await_response(self, client, retriever): def _await_response(self, client, retriever):
msg = self._get_preamble(retriever) msg = self._get_preamble(retriever)