small fixes
This commit is contained in:
@@ -31,6 +31,15 @@ class AbstractCommand(ABC):
|
||||
else:
|
||||
raise ValueError()
|
||||
|
||||
|
||||
@staticmethod
|
||||
def build_message(command, host, path):
|
||||
message = f"{command} {path} HTTP/1.1\r\n"
|
||||
message += f"Host: {host}\r\n"
|
||||
message += "Accept: */*\r\nAccept-Encoding: identity\r\n"
|
||||
|
||||
return message.encode(FORMAT)
|
||||
|
||||
def execute(self):
|
||||
(host, path) = self.parse_uri()
|
||||
|
||||
@@ -40,7 +49,6 @@ class AbstractCommand(ABC):
|
||||
message = f"{self.command} {path} HTTP/1.1\r\n"
|
||||
message += f"Host: {host}\r\n"
|
||||
message += "Accept: */*\r\nAccept-Encoding: identity\r\n"
|
||||
message += "User-Agent: Mozilla/5.0 (X11; Linux x86_64; rv:88.0) Gecko/20100101 Firefox/88.0\r\n"
|
||||
encoded_msg = self._build_message(message)
|
||||
|
||||
logging.info("---request begin---\r\n%s---request end---", encoded_msg.decode(FORMAT))
|
||||
@@ -94,6 +102,9 @@ class AbstractCommand(ABC):
|
||||
|
||||
class AbstractWithBodyCommand(AbstractCommand, ABC):
|
||||
|
||||
@staticmethod
|
||||
def build_message(command, host, path):
|
||||
message = AbstractCommand.build_message()
|
||||
def _build_message(self, message: str) -> bytes:
|
||||
body = input(f"Enter {self.command} data: ").encode(FORMAT)
|
||||
print()
|
||||
|
@@ -1,8 +1,9 @@
|
||||
import logging
|
||||
import os
|
||||
import re
|
||||
from abc import ABC, abstractmethod
|
||||
from typing import Dict
|
||||
from urllib.parse import urlparse
|
||||
from urllib.parse import urlparse, unquote
|
||||
|
||||
import cssutils
|
||||
from bs4 import BeautifulSoup, Tag
|
||||
@@ -107,6 +108,11 @@ class DownloadHandler(ResponseHandler, ABC):
|
||||
elif parsed.path[-1] != "/":
|
||||
filename = parsed.path[index:]
|
||||
|
||||
while "%" in filename:
|
||||
filename = unquote(filename)
|
||||
|
||||
filename = re.sub(r"[^\w.+-]+[.]*", '', filename)
|
||||
|
||||
result = os.path.basename(filename).strip()
|
||||
if any(letter.isalnum() for letter in result):
|
||||
return result
|
||||
@@ -152,7 +158,7 @@ class HTMLDownloadHandler(DownloadHandler):
|
||||
def handle(self) -> str:
|
||||
|
||||
(dir, file) = os.path.split(self.path)
|
||||
tmp_filename = ".{file}.tmp".format(file=file)
|
||||
tmp_filename = f".{file}.tmp"
|
||||
tmp_path = os.path.join(dir, tmp_filename)
|
||||
file = open(tmp_path, "wb")
|
||||
|
||||
@@ -180,16 +186,22 @@ class HTMLDownloadHandler(DownloadHandler):
|
||||
tag: Tag
|
||||
for tag in soup.find_all("img"):
|
||||
try:
|
||||
if tag["src"] in processed:
|
||||
new_url = processed.get(tag["src"])
|
||||
if tag.has_attr("src"):
|
||||
el_name = "src"
|
||||
elif tag.has_attr("data-src"):
|
||||
el_name = "data-src"
|
||||
else:
|
||||
new_url = self.__download_image(tag["src"], host, base_url)
|
||||
processed[tag["src"]] = new_url
|
||||
continue
|
||||
|
||||
if tag[el_name] in processed:
|
||||
new_url = processed.get(tag[el_name])
|
||||
else:
|
||||
new_url = self.__download_image(tag[el_name], host, base_url)
|
||||
processed[tag[el_name]] = new_url
|
||||
if new_url:
|
||||
tag["src"] = new_url
|
||||
tag[el_name] = new_url
|
||||
except Exception as e:
|
||||
logging.debug(e)
|
||||
logging.error("Failed to download image: %s, skipping...", tag["src"])
|
||||
logging.error("Failed to download image: %s, skipping...", tag[el_name], exc_info=e)
|
||||
|
||||
for tag in soup.find_all("div"):
|
||||
if not tag.has_attr("style"):
|
||||
@@ -229,7 +241,7 @@ class HTMLDownloadHandler(DownloadHandler):
|
||||
|
||||
logging.debug("Downloading image: %s", img_src)
|
||||
|
||||
if parsed.scheme not in ("", "http"):
|
||||
if parsed.scheme not in ("", "http", "https"):
|
||||
# Not a valid url
|
||||
return None
|
||||
|
||||
@@ -248,9 +260,9 @@ class HTMLDownloadHandler(DownloadHandler):
|
||||
same_host = False
|
||||
(img_host, img_path) = ResponseHandler.parse_uri(img_src)
|
||||
|
||||
message = "GET {path} HTTP/1.1\r\n".format(path=img_path)
|
||||
message = f"GET {img_path} HTTP/1.1\r\n"
|
||||
message += "Accept: */*\r\nAccept-Encoding: identity\r\n"
|
||||
message += "Host: {host}\r\n\r\n".format(host=host)
|
||||
message += f"Host: {img_host}\r\n\r\n"
|
||||
message = message.encode(FORMAT)
|
||||
|
||||
if same_host:
|
||||
|
Reference in New Issue
Block a user