Improve documentation
This commit is contained in:
@@ -17,6 +17,14 @@ IMG_REGEX = re.compile(r"<\s*img[^>]*\ssrc\s*=\s*['\"]([^\"']+)['\"][^>]*>", re.
|
|||||||
|
|
||||||
|
|
||||||
def handle(client: HTTPClient, msg: Message, command: AbstractCommand, directory=None):
|
def handle(client: HTTPClient, msg: Message, command: AbstractCommand, directory=None):
|
||||||
|
"""
|
||||||
|
Handle the response of the request message
|
||||||
|
|
||||||
|
@param client: the client which sent the request.
|
||||||
|
@param msg: the response message
|
||||||
|
@param command: the command of the sent request message
|
||||||
|
@param directory: the directory to download the response to (if available)
|
||||||
|
"""
|
||||||
handler = BasicResponseHandler(client, msg, command)
|
handler = BasicResponseHandler(client, msg, command)
|
||||||
retriever = handler.handle()
|
retriever = handler.handle()
|
||||||
|
|
||||||
@@ -33,6 +41,9 @@ def handle(client: HTTPClient, msg: Message, command: AbstractCommand, directory
|
|||||||
|
|
||||||
|
|
||||||
class ResponseHandler(ABC):
|
class ResponseHandler(ABC):
|
||||||
|
"""
|
||||||
|
Helper class for handling response messages.
|
||||||
|
"""
|
||||||
client: HTTPClient
|
client: HTTPClient
|
||||||
retriever: Retriever
|
retriever: Retriever
|
||||||
msg: Message
|
msg: Message
|
||||||
@@ -46,12 +57,15 @@ class ResponseHandler(ABC):
|
|||||||
|
|
||||||
@abstractmethod
|
@abstractmethod
|
||||||
def handle(self):
|
def handle(self):
|
||||||
|
"""
|
||||||
|
Handle the response.
|
||||||
|
"""
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
|
||||||
class BasicResponseHandler(ResponseHandler):
|
class BasicResponseHandler(ResponseHandler):
|
||||||
"""
|
"""
|
||||||
Response handler which throws away the body and only shows the headers.
|
Response handler which skips the body of the message and only shows the headers.
|
||||||
In case of a redirect, it will process it and pass it to the appropriate response handler.
|
In case of a redirect, it will process it and pass it to the appropriate response handler.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
@@ -216,7 +230,7 @@ class HTMLDownloadHandler(DownloadHandler):
|
|||||||
Downloads images referenced in the html of `tmp_filename` and replaces the references in the html
|
Downloads images referenced in the html of `tmp_filename` and replaces the references in the html
|
||||||
and writes it to `target_filename`.
|
and writes it to `target_filename`.
|
||||||
@param tmp_filename: the path to the temporary html file
|
@param tmp_filename: the path to the temporary html file
|
||||||
@param target_filename: the path for the final html fil
|
@param target_filename: the path for the final html file
|
||||||
@param charset: the charset to decode `tmp_filename`
|
@param charset: the charset to decode `tmp_filename`
|
||||||
"""
|
"""
|
||||||
|
|
||||||
@@ -237,6 +251,7 @@ class HTMLDownloadHandler(DownloadHandler):
|
|||||||
processed = {}
|
processed = {}
|
||||||
to_replace = []
|
to_replace = []
|
||||||
|
|
||||||
|
# Find all <img> tags and the urls from the corresponding `src` fields
|
||||||
for m in IMG_REGEX.finditer(html):
|
for m in IMG_REGEX.finditer(html):
|
||||||
url_start = m.start(1)
|
url_start = m.start(1)
|
||||||
url_end = m.end(1)
|
url_end = m.end(1)
|
||||||
@@ -245,14 +260,12 @@ class HTMLDownloadHandler(DownloadHandler):
|
|||||||
try:
|
try:
|
||||||
if len(target) == 0:
|
if len(target) == 0:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
if target in processed:
|
if target in processed:
|
||||||
|
# url is already processed
|
||||||
new_url = processed.get(target)
|
new_url = processed.get(target)
|
||||||
else:
|
else:
|
||||||
new_url = self.__download_image(target, base_url)
|
new_url = self.__download_image(target, base_url)
|
||||||
if not new_url:
|
|
||||||
# Image failed to download
|
|
||||||
continue
|
|
||||||
|
|
||||||
processed[target] = new_url
|
processed[target] = new_url
|
||||||
|
|
||||||
if new_url:
|
if new_url:
|
||||||
@@ -262,6 +275,8 @@ class HTMLDownloadHandler(DownloadHandler):
|
|||||||
except Exception as e:
|
except Exception as e:
|
||||||
logging.error("Failed to download image: %s, skipping...", target, exc_info=e)
|
logging.error("Failed to download image: %s, skipping...", target, exc_info=e)
|
||||||
|
|
||||||
|
# reverse the list so urls at the bottom of the html file are processed first.
|
||||||
|
# Otherwise our start and end positions won't be correct.
|
||||||
to_replace.reverse()
|
to_replace.reverse()
|
||||||
for (start, end, path) in to_replace:
|
for (start, end, path) in to_replace:
|
||||||
html = html[:start] + path + html[end:]
|
html = html[:start] + path + html[end:]
|
||||||
|
Reference in New Issue
Block a user