This commit is contained in:
2021-03-27 16:30:53 +01:00
parent fdbd865889
commit 3615c56152
14 changed files with 280 additions and 110 deletions

View File

@@ -13,19 +13,30 @@ sockets: Dict[str, HTTPClient] = {}
def create(command: str, url: str, port):
"""
Create a corresponding Command instance of the specified HTTP `command` with the specified `url` and `port`.
@param command: The command type to create
@param url: The url for the command
@param port: The port for the command
"""
uri = parser.get_uri(url)
if command == "GET":
return GetCommand(url, port)
return GetCommand(uri, port)
elif command == "HEAD":
return HeadCommand(url, port)
return HeadCommand(uri, port)
elif command == "POST":
return PostCommand(url, port)
return PostCommand(uri, port)
elif command == "PUT":
return PutCommand(url, port)
return PutCommand(uri, port)
else:
raise ValueError()
class AbstractCommand(ABC):
"""
A class representing the command for sending an HTTP command.
"""
uri: str
host: str
path: str
@@ -111,6 +122,9 @@ class AbstractCommand(ABC):
class AbstractWithBodyCommand(AbstractCommand, ABC):
"""
The building block for creating an HTTP message for an HTTP command with a body.
"""
def _build_message(self, message: str) -> bytes:
body = input(f"Enter {self.command} data: ").encode(FORMAT)
@@ -127,12 +141,19 @@ class AbstractWithBodyCommand(AbstractCommand, ABC):
class HeadCommand(AbstractCommand):
"""
A Command for sending a `HEAD` message.
"""
@property
def command(self):
return "HEAD"
class GetCommand(AbstractCommand):
"""
A Command for sending a `GET` message.
"""
def __init__(self, uri: str, port, dir=None):
super().__init__(uri, port)
@@ -160,12 +181,20 @@ class GetCommand(AbstractCommand):
class PostCommand(AbstractWithBodyCommand):
"""
A command for sending a `POST` command.
"""
@property
def command(self):
return "POST"
class PutCommand(AbstractWithBodyCommand):
"""
A command for sending a `PUT` command.
"""
@property
def command(self):
return "PUT"

View File

@@ -1,6 +1,6 @@
import socket
from httplib.httpsocket import HTTPSocket
from httplib.httpsocket import HTTPSocket, InvalidResponse
BUFSIZE = 4096
TIMEOUT = 3
@@ -13,3 +13,9 @@ class HTTPClient(HTTPSocket):
def __init__(self, host: str):
super().__init__(socket.socket(socket.AF_INET, socket.SOCK_STREAM), host)
def read_line(self):
try:
return super().read_line()
except UnicodeDecodeError:
raise InvalidResponse("Unexpected decoding error")

View File

@@ -14,7 +14,7 @@ from httplib.message import ClientMessage as Message
from httplib.retriever import Retriever
def handle(client: HTTPClient, msg: Message, command: AbstractCommand, dir=None):
def handle(client: HTTPClient, msg: Message, command: AbstractCommand, directory=None):
handler = BasicResponseHandler(client, msg, command)
retriever = handler.handle()
@@ -23,9 +23,9 @@ def handle(client: HTTPClient, msg: Message, command: AbstractCommand, dir=None)
content_type = msg.headers.get("content-type")
if content_type and "text/html" in content_type:
handler = HTMLDownloadHandler(retriever, client, msg, command, dir)
handler = HTMLDownloadHandler(retriever, client, msg, command, directory)
else:
handler = RawDownloadHandler(retriever, client, msg, command, dir)
handler = RawDownloadHandler(retriever, client, msg, command, directory)
return handler.handle()
@@ -130,20 +130,20 @@ class BasicResponseHandler(ResponseHandler):
class DownloadHandler(ResponseHandler, ABC):
def __init__(self, retriever: Retriever, client: HTTPClient, msg, cmd, dir=None):
def __init__(self, retriever: Retriever, client: HTTPClient, msg, cmd, directory=None):
super().__init__(retriever, client, msg, cmd)
if not dir:
dir = self._create_directory()
if not directory:
directory = self._create_directory()
self.path = self._get_duplicate_name(os.path.join(dir, self.get_filename()))
self.path = self._get_duplicate_name(os.path.join(directory, self.get_filename()))
@staticmethod
def create(retriever: Retriever, client: HTTPClient, msg, cmd, dir=None):
def create(retriever: Retriever, client: HTTPClient, msg, cmd, directory=None):
content_type = msg.headers.get("content-type")
if content_type and "text/html" in content_type:
return HTMLDownloadHandler(retriever, client, msg, cmd, dir)
return RawDownloadHandler(retriever, client, msg, cmd, dir)
return HTMLDownloadHandler(retriever, client, msg, cmd, directory)
return RawDownloadHandler(retriever, client, msg, cmd, directory)
def _create_directory(self):
path = self._get_duplicate_name(os.path.abspath(self.client.host))
@@ -194,14 +194,14 @@ class RawDownloadHandler(DownloadHandler):
class HTMLDownloadHandler(DownloadHandler):
def __init__(self, retriever: Retriever, client: HTTPClient, msg: Message, cmd: AbstractCommand, dir=None):
super().__init__(retriever, client, msg, cmd, dir)
def __init__(self, retriever: Retriever, client: HTTPClient, msg: Message, cmd: AbstractCommand, directory=None):
super().__init__(retriever, client, msg, cmd, directory)
def handle(self) -> str:
(dir, file) = os.path.split(self.path)
(directory, file) = os.path.split(self.path)
tmp_filename = f".{file}.tmp"
tmp_path = os.path.join(dir, tmp_filename)
tmp_path = os.path.join(directory, tmp_filename)
file = open(tmp_path, "wb")
for buffer in self.retriever.retrieve():
@@ -217,11 +217,11 @@ class HTMLDownloadHandler(DownloadHandler):
with open(tmp_filename, "rb") as fp:
soup = BeautifulSoup(fp, 'lxml')
base_url = parser.base_url(self.cmd.uri)
base_element = soup.find("base")
base_url = self.cmd.uri
if base_element:
base_url = f"http://{self.cmd.host}" + base_element["href"]
base_url = parser.urljoin(self.cmd.uri, base_element["href"])
processed = {}
tag: Tag
@@ -241,22 +241,18 @@ class HTMLDownloadHandler(DownloadHandler):
logging.error("Failed to download image: %s, skipping...", tag["src"], exc_info=e)
with open(target_filename, 'w') as file:
file.write(str(soup))
file.write(soup.prettify(formatter="minimal"))
def __download_image(self, img_src, base_url):
"""
Download image from the specified `img_src` and `base_url`.
If the image is available, it will be downloaded to the directory of `self.path`
"""
logging.info("Downloading image: %s", img_src)
parsed = urlsplit(img_src)
if parsed.scheme not in ("", "http", "https"):
# Not a valid url
return None
if parsed.hostname is None:
if img_src[0] == "/":
img_src = f"http://{self.cmd.host}{img_src}"
else:
img_src = parser.absolute_url(base_url, img_src)
img_src = parser.urljoin(base_url, img_src)
if parsed.hostname is None or parsed.hostname == self.cmd.host:
port = self.cmd.port