update
This commit is contained in:
@@ -13,19 +13,30 @@ sockets: Dict[str, HTTPClient] = {}
|
||||
|
||||
|
||||
def create(command: str, url: str, port):
|
||||
"""
|
||||
Create a corresponding Command instance of the specified HTTP `command` with the specified `url` and `port`.
|
||||
@param command: The command type to create
|
||||
@param url: The url for the command
|
||||
@param port: The port for the command
|
||||
"""
|
||||
|
||||
uri = parser.get_uri(url)
|
||||
if command == "GET":
|
||||
return GetCommand(url, port)
|
||||
return GetCommand(uri, port)
|
||||
elif command == "HEAD":
|
||||
return HeadCommand(url, port)
|
||||
return HeadCommand(uri, port)
|
||||
elif command == "POST":
|
||||
return PostCommand(url, port)
|
||||
return PostCommand(uri, port)
|
||||
elif command == "PUT":
|
||||
return PutCommand(url, port)
|
||||
return PutCommand(uri, port)
|
||||
else:
|
||||
raise ValueError()
|
||||
|
||||
|
||||
class AbstractCommand(ABC):
|
||||
"""
|
||||
A class representing the command for sending an HTTP command.
|
||||
"""
|
||||
uri: str
|
||||
host: str
|
||||
path: str
|
||||
@@ -111,6 +122,9 @@ class AbstractCommand(ABC):
|
||||
|
||||
|
||||
class AbstractWithBodyCommand(AbstractCommand, ABC):
|
||||
"""
|
||||
The building block for creating an HTTP message for an HTTP command with a body.
|
||||
"""
|
||||
|
||||
def _build_message(self, message: str) -> bytes:
|
||||
body = input(f"Enter {self.command} data: ").encode(FORMAT)
|
||||
@@ -127,12 +141,19 @@ class AbstractWithBodyCommand(AbstractCommand, ABC):
|
||||
|
||||
|
||||
class HeadCommand(AbstractCommand):
|
||||
"""
|
||||
A Command for sending a `HEAD` message.
|
||||
"""
|
||||
|
||||
@property
|
||||
def command(self):
|
||||
return "HEAD"
|
||||
|
||||
|
||||
class GetCommand(AbstractCommand):
|
||||
"""
|
||||
A Command for sending a `GET` message.
|
||||
"""
|
||||
|
||||
def __init__(self, uri: str, port, dir=None):
|
||||
super().__init__(uri, port)
|
||||
@@ -160,12 +181,20 @@ class GetCommand(AbstractCommand):
|
||||
|
||||
|
||||
class PostCommand(AbstractWithBodyCommand):
|
||||
"""
|
||||
A command for sending a `POST` command.
|
||||
"""
|
||||
|
||||
@property
|
||||
def command(self):
|
||||
return "POST"
|
||||
|
||||
|
||||
class PutCommand(AbstractWithBodyCommand):
|
||||
"""
|
||||
A command for sending a `PUT` command.
|
||||
"""
|
||||
|
||||
@property
|
||||
def command(self):
|
||||
return "PUT"
|
||||
|
@@ -1,6 +1,6 @@
|
||||
import socket
|
||||
|
||||
from httplib.httpsocket import HTTPSocket
|
||||
from httplib.httpsocket import HTTPSocket, InvalidResponse
|
||||
|
||||
BUFSIZE = 4096
|
||||
TIMEOUT = 3
|
||||
@@ -13,3 +13,9 @@ class HTTPClient(HTTPSocket):
|
||||
|
||||
def __init__(self, host: str):
|
||||
super().__init__(socket.socket(socket.AF_INET, socket.SOCK_STREAM), host)
|
||||
|
||||
def read_line(self):
|
||||
try:
|
||||
return super().read_line()
|
||||
except UnicodeDecodeError:
|
||||
raise InvalidResponse("Unexpected decoding error")
|
||||
|
@@ -14,7 +14,7 @@ from httplib.message import ClientMessage as Message
|
||||
from httplib.retriever import Retriever
|
||||
|
||||
|
||||
def handle(client: HTTPClient, msg: Message, command: AbstractCommand, dir=None):
|
||||
def handle(client: HTTPClient, msg: Message, command: AbstractCommand, directory=None):
|
||||
handler = BasicResponseHandler(client, msg, command)
|
||||
retriever = handler.handle()
|
||||
|
||||
@@ -23,9 +23,9 @@ def handle(client: HTTPClient, msg: Message, command: AbstractCommand, dir=None)
|
||||
|
||||
content_type = msg.headers.get("content-type")
|
||||
if content_type and "text/html" in content_type:
|
||||
handler = HTMLDownloadHandler(retriever, client, msg, command, dir)
|
||||
handler = HTMLDownloadHandler(retriever, client, msg, command, directory)
|
||||
else:
|
||||
handler = RawDownloadHandler(retriever, client, msg, command, dir)
|
||||
handler = RawDownloadHandler(retriever, client, msg, command, directory)
|
||||
|
||||
return handler.handle()
|
||||
|
||||
@@ -130,20 +130,20 @@ class BasicResponseHandler(ResponseHandler):
|
||||
|
||||
class DownloadHandler(ResponseHandler, ABC):
|
||||
|
||||
def __init__(self, retriever: Retriever, client: HTTPClient, msg, cmd, dir=None):
|
||||
def __init__(self, retriever: Retriever, client: HTTPClient, msg, cmd, directory=None):
|
||||
super().__init__(retriever, client, msg, cmd)
|
||||
|
||||
if not dir:
|
||||
dir = self._create_directory()
|
||||
if not directory:
|
||||
directory = self._create_directory()
|
||||
|
||||
self.path = self._get_duplicate_name(os.path.join(dir, self.get_filename()))
|
||||
self.path = self._get_duplicate_name(os.path.join(directory, self.get_filename()))
|
||||
|
||||
@staticmethod
|
||||
def create(retriever: Retriever, client: HTTPClient, msg, cmd, dir=None):
|
||||
def create(retriever: Retriever, client: HTTPClient, msg, cmd, directory=None):
|
||||
content_type = msg.headers.get("content-type")
|
||||
if content_type and "text/html" in content_type:
|
||||
return HTMLDownloadHandler(retriever, client, msg, cmd, dir)
|
||||
return RawDownloadHandler(retriever, client, msg, cmd, dir)
|
||||
return HTMLDownloadHandler(retriever, client, msg, cmd, directory)
|
||||
return RawDownloadHandler(retriever, client, msg, cmd, directory)
|
||||
|
||||
def _create_directory(self):
|
||||
path = self._get_duplicate_name(os.path.abspath(self.client.host))
|
||||
@@ -194,14 +194,14 @@ class RawDownloadHandler(DownloadHandler):
|
||||
|
||||
|
||||
class HTMLDownloadHandler(DownloadHandler):
|
||||
def __init__(self, retriever: Retriever, client: HTTPClient, msg: Message, cmd: AbstractCommand, dir=None):
|
||||
super().__init__(retriever, client, msg, cmd, dir)
|
||||
def __init__(self, retriever: Retriever, client: HTTPClient, msg: Message, cmd: AbstractCommand, directory=None):
|
||||
super().__init__(retriever, client, msg, cmd, directory)
|
||||
|
||||
def handle(self) -> str:
|
||||
|
||||
(dir, file) = os.path.split(self.path)
|
||||
(directory, file) = os.path.split(self.path)
|
||||
tmp_filename = f".{file}.tmp"
|
||||
tmp_path = os.path.join(dir, tmp_filename)
|
||||
tmp_path = os.path.join(directory, tmp_filename)
|
||||
file = open(tmp_path, "wb")
|
||||
|
||||
for buffer in self.retriever.retrieve():
|
||||
@@ -217,11 +217,11 @@ class HTMLDownloadHandler(DownloadHandler):
|
||||
with open(tmp_filename, "rb") as fp:
|
||||
soup = BeautifulSoup(fp, 'lxml')
|
||||
|
||||
base_url = parser.base_url(self.cmd.uri)
|
||||
base_element = soup.find("base")
|
||||
|
||||
base_url = self.cmd.uri
|
||||
if base_element:
|
||||
base_url = f"http://{self.cmd.host}" + base_element["href"]
|
||||
base_url = parser.urljoin(self.cmd.uri, base_element["href"])
|
||||
|
||||
processed = {}
|
||||
tag: Tag
|
||||
@@ -241,22 +241,18 @@ class HTMLDownloadHandler(DownloadHandler):
|
||||
logging.error("Failed to download image: %s, skipping...", tag["src"], exc_info=e)
|
||||
|
||||
with open(target_filename, 'w') as file:
|
||||
file.write(str(soup))
|
||||
file.write(soup.prettify(formatter="minimal"))
|
||||
|
||||
def __download_image(self, img_src, base_url):
|
||||
"""
|
||||
Download image from the specified `img_src` and `base_url`.
|
||||
If the image is available, it will be downloaded to the directory of `self.path`
|
||||
"""
|
||||
|
||||
logging.info("Downloading image: %s", img_src)
|
||||
|
||||
parsed = urlsplit(img_src)
|
||||
|
||||
if parsed.scheme not in ("", "http", "https"):
|
||||
# Not a valid url
|
||||
return None
|
||||
|
||||
if parsed.hostname is None:
|
||||
if img_src[0] == "/":
|
||||
img_src = f"http://{self.cmd.host}{img_src}"
|
||||
else:
|
||||
img_src = parser.absolute_url(base_url, img_src)
|
||||
img_src = parser.urljoin(base_url, img_src)
|
||||
|
||||
if parsed.hostname is None or parsed.hostname == self.cmd.host:
|
||||
port = self.cmd.port
|
||||
|
Reference in New Issue
Block a user