Compare commits
2 Commits
b37aa33131
...
7476870acc
Author | SHA1 | Date | |
---|---|---|---|
7476870acc | |||
f15ff38f69 |
@@ -15,7 +15,7 @@ def main():
|
|||||||
|
|
||||||
arguments = parser.parse_args()
|
arguments = parser.parse_args()
|
||||||
|
|
||||||
logging.basicConfig(level=logging.ERROR - (10 * arguments.verbose))
|
logging.basicConfig(level=logging.ERROR - (10 * arguments.verbose), format="[%(levelname)s] %(message)s")
|
||||||
logging.debug("Arguments: %s", arguments)
|
logging.debug("Arguments: %s", arguments)
|
||||||
|
|
||||||
command = cmd.create(arguments.command, arguments.URI, arguments.port)
|
command = cmd.create(arguments.command, arguments.URI, arguments.port)
|
||||||
|
@@ -150,7 +150,7 @@ class GetCommand(AbstractCommand):
|
|||||||
|
|
||||||
logging.debug("---response begin---\r\n%s---response end---", "".join(retriever.buffer))
|
logging.debug("---response begin---\r\n%s---response end---", "".join(retriever.buffer))
|
||||||
|
|
||||||
return Message(version, status, msg, headers)
|
return Message(version, status, msg, headers, retriever.buffer)
|
||||||
|
|
||||||
def _await_response(self, client, retriever):
|
def _await_response(self, client, retriever):
|
||||||
msg = self._get_preamble(retriever)
|
msg = self._get_preamble(retriever)
|
||||||
|
@@ -88,8 +88,7 @@ class BasicResponseHandler(ResponseHandler):
|
|||||||
|
|
||||||
if self.msg.status == 101:
|
if self.msg.status == 101:
|
||||||
# Switching protocols is not supported
|
# Switching protocols is not supported
|
||||||
print(f"{self.msg.version} {self.msg.status} {self.msg.msg}")
|
print("".join(self.msg.raw), end="")
|
||||||
print(self.msg.headers)
|
|
||||||
return
|
return
|
||||||
|
|
||||||
if 200 <= self.msg.status < 300:
|
if 200 <= self.msg.status < 300:
|
||||||
@@ -100,8 +99,7 @@ class BasicResponseHandler(ResponseHandler):
|
|||||||
return self._do_handle_redirect()
|
return self._do_handle_redirect()
|
||||||
if 400 <= self.msg.status < 500:
|
if 400 <= self.msg.status < 500:
|
||||||
# Dump headers and exit with error
|
# Dump headers and exit with error
|
||||||
print(f"{self.msg.version} {self.msg.status} {self.msg.msg}")
|
print("".join(self.msg.raw), end="")
|
||||||
print(self.msg.headers)
|
|
||||||
return None
|
return None
|
||||||
|
|
||||||
def _do_handle_redirect(self):
|
def _do_handle_redirect(self):
|
||||||
@@ -216,15 +214,14 @@ class HTMLDownloadHandler(DownloadHandler):
|
|||||||
|
|
||||||
def _download_images(self, tmp_filename, target_filename):
|
def _download_images(self, tmp_filename, target_filename):
|
||||||
|
|
||||||
(host, path) = ResponseHandler.parse_uri(self.cmd.uri)
|
|
||||||
with open(tmp_filename, "rb") as fp:
|
with open(tmp_filename, "rb") as fp:
|
||||||
soup = BeautifulSoup(fp, 'lxml')
|
soup = BeautifulSoup(fp, 'lxml')
|
||||||
|
|
||||||
base_url = self.cmd.uri
|
base_url = parser.base_url(self.cmd.uri)
|
||||||
base_element = soup.find("base")
|
base_element = soup.find("base")
|
||||||
|
|
||||||
if base_element:
|
if base_element:
|
||||||
base_url = base_element["href"]
|
base_url = f"http://{self.cmd.host}" + base_element["href"]
|
||||||
|
|
||||||
processed = {}
|
processed = {}
|
||||||
tag: Tag
|
tag: Tag
|
||||||
@@ -236,7 +233,7 @@ class HTMLDownloadHandler(DownloadHandler):
|
|||||||
if tag["src"] in processed:
|
if tag["src"] in processed:
|
||||||
new_url = processed.get(tag["src"])
|
new_url = processed.get(tag["src"])
|
||||||
else:
|
else:
|
||||||
new_url = self.__download_image(tag["src"], host, base_url)
|
new_url = self.__download_image(tag["src"], base_url)
|
||||||
processed[tag["src"]] = new_url
|
processed[tag["src"]] = new_url
|
||||||
if new_url:
|
if new_url:
|
||||||
tag["src"] = os.path.basename(new_url)
|
tag["src"] = os.path.basename(new_url)
|
||||||
@@ -246,8 +243,8 @@ class HTMLDownloadHandler(DownloadHandler):
|
|||||||
with open(target_filename, 'w') as file:
|
with open(target_filename, 'w') as file:
|
||||||
file.write(str(soup))
|
file.write(str(soup))
|
||||||
|
|
||||||
def __download_image(self, img_src, host, base_url):
|
def __download_image(self, img_src, base_url):
|
||||||
logging.debug("Downloading image: %s", img_src)
|
logging.info("Downloading image: %s", img_src)
|
||||||
|
|
||||||
parsed = urlsplit(img_src)
|
parsed = urlsplit(img_src)
|
||||||
|
|
||||||
@@ -257,11 +254,11 @@ class HTMLDownloadHandler(DownloadHandler):
|
|||||||
|
|
||||||
if parsed.hostname is None:
|
if parsed.hostname is None:
|
||||||
if img_src[0] == "/":
|
if img_src[0] == "/":
|
||||||
img_src = host + img_src
|
img_src = f"http://{self.cmd.host}{img_src}"
|
||||||
else:
|
else:
|
||||||
img_src = os.path.join(os.path.dirname(base_url), img_src)
|
img_src = parser.absolute_url(base_url, img_src)
|
||||||
|
|
||||||
if parsed.hostname is None or parsed.hostname == host:
|
if parsed.hostname is None or parsed.hostname == self.cmd.host:
|
||||||
port = self.cmd.port
|
port = self.cmd.port
|
||||||
elif ":" in parsed.netloc:
|
elif ":" in parsed.netloc:
|
||||||
port = parsed.netloc.split(":", 1)[1]
|
port = parsed.netloc.split(":", 1)[1]
|
||||||
|
@@ -6,11 +6,13 @@ class Message:
|
|||||||
status: int
|
status: int
|
||||||
msg: str
|
msg: str
|
||||||
headers: Dict[str, str]
|
headers: Dict[str, str]
|
||||||
|
raw: str
|
||||||
body: bytes
|
body: bytes
|
||||||
|
|
||||||
def __init__(self, version: str, status: int, msg: str, headers: Dict[str, str], body: bytes = None):
|
def __init__(self, version: str, status: int, msg: str, headers: Dict[str, str], raw=None, body: bytes = None):
|
||||||
self.version = version
|
self.version = version
|
||||||
self.status = status
|
self.status = status
|
||||||
self.msg = msg
|
self.msg = msg
|
||||||
self.headers = headers
|
self.headers = headers
|
||||||
|
self.raw = raw
|
||||||
self.body = body
|
self.body = body
|
||||||
|
@@ -1,4 +1,5 @@
|
|||||||
import logging
|
import logging
|
||||||
|
import os.path
|
||||||
import re
|
import re
|
||||||
from urllib.parse import urlparse, urlsplit
|
from urllib.parse import urlparse, urlsplit
|
||||||
|
|
||||||
@@ -182,7 +183,6 @@ def parse_headers(lines):
|
|||||||
headers = []
|
headers = []
|
||||||
# first header after the status-line may not contain a space
|
# first header after the status-line may not contain a space
|
||||||
for line in lines:
|
for line in lines:
|
||||||
line = next(lines)
|
|
||||||
if line[0].isspace():
|
if line[0].isspace():
|
||||||
continue
|
continue
|
||||||
else:
|
else:
|
||||||
@@ -245,3 +245,15 @@ def parse_uri(uri: str):
|
|||||||
port = 80
|
port = 80
|
||||||
|
|
||||||
return host, port, path
|
return host, port, path
|
||||||
|
|
||||||
|
|
||||||
|
def base_url(uri: str):
|
||||||
|
parsed = urlsplit(uri)
|
||||||
|
path = parsed.path.rsplit("/", 1)[0]
|
||||||
|
return f"{parsed.scheme}://{parsed.hostname}{path}/"
|
||||||
|
|
||||||
|
|
||||||
|
def absolute_url(uri: str, rel_path: str):
|
||||||
|
parsed = urlsplit(uri)
|
||||||
|
path = os.path.normpath(os.path.join(parsed.path, rel_path))
|
||||||
|
return f"{parsed.scheme}://{parsed.hostname}{path}"
|
||||||
|
41
server.py
41
server.py
@@ -47,44 +47,3 @@ except Exception as e:
|
|||||||
print("[ABRT] Internal error: " + str(e), file=sys.stderr)
|
print("[ABRT] Internal error: " + str(e), file=sys.stderr)
|
||||||
logging.debug("Internal error", exc_info=e)
|
logging.debug("Internal error", exc_info=e)
|
||||||
sys.exit(70)
|
sys.exit(70)
|
||||||
|
|
||||||
# import socket
|
|
||||||
#
|
|
||||||
# # Get hostname and address
|
|
||||||
# hostname = socket.gethostname()
|
|
||||||
# address = socket.gethostbyname(hostname)
|
|
||||||
#
|
|
||||||
# # socket heeft een listening and accept method
|
|
||||||
#
|
|
||||||
# SERVER = "127.0.0.1" # dynamisch fixen in project
|
|
||||||
# PORT = 5055
|
|
||||||
# server = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
|
|
||||||
#
|
|
||||||
# ADDR = (SERVER, PORT) # hier wordt de socket gebonden aan mijn IP adres, dit moet wel anders
|
|
||||||
# server.bind(ADDR) # in het project gebeuren
|
|
||||||
#
|
|
||||||
# HEADER = 64 # maximum size messages
|
|
||||||
# FORMAT = 'utf-8'
|
|
||||||
# DISCONNECT_MESSAGE = "DISCONNECT!" # special message for disconnecting client and server
|
|
||||||
#
|
|
||||||
#
|
|
||||||
# # function for starting server
|
|
||||||
# def start():
|
|
||||||
# pass
|
|
||||||
# server.listen()
|
|
||||||
# while True: # infinite loop in which server accept incoming connections, we want to run it forever
|
|
||||||
# conn, addr = server.accept() # Server blocks untill a client connects
|
|
||||||
# print("new connection: ", addr[0], " connected.")
|
|
||||||
# connected = True
|
|
||||||
# while connected: # while client is connected, we want to recieve messages
|
|
||||||
# msg = conn.recv(HEADER).decode(
|
|
||||||
# FORMAT).rstrip() # Argument is maximum size of msg (in project look into details of accp), decode is for converting bytes to strings, rstrip is for stripping messages for special hidden characters
|
|
||||||
# print("message: ", msg)
|
|
||||||
# if msg == DISCONNECT_MESSAGE:
|
|
||||||
# connected = False
|
|
||||||
# print("close connection ", addr[0], " disconnected.")
|
|
||||||
# conn.close()
|
|
||||||
#
|
|
||||||
#
|
|
||||||
# print("server is starting ... ")
|
|
||||||
# start()
|
|
||||||
|
@@ -77,7 +77,7 @@ class GetCommand(AbstractCommand):
|
|||||||
|
|
||||||
logging.debug("---response begin---\r\n%s--- response end---", "".join(retriever.buffer))
|
logging.debug("---response begin---\r\n%s--- response end---", "".join(retriever.buffer))
|
||||||
|
|
||||||
return Message(version, status, msg, headers)
|
return Message(version, status, msg, headers, retriever.buffer)
|
||||||
|
|
||||||
def _await_response(self, client, retriever):
|
def _await_response(self, client, retriever):
|
||||||
msg = self._get_preamble(retriever)
|
msg = self._get_preamble(retriever)
|
||||||
|
Reference in New Issue
Block a user