From 850535a06030eeaae67c5b8e26cda6577e693390 Mon Sep 17 00:00:00 2001 From: Arthur Bols Date: Sun, 28 Mar 2021 03:33:00 +0200 Subject: [PATCH] Improve documentation --- client/responsehandler.py | 27 +++++++++++++++++++++------ 1 file changed, 21 insertions(+), 6 deletions(-) diff --git a/client/responsehandler.py b/client/responsehandler.py index 69343b3..2dd0c25 100644 --- a/client/responsehandler.py +++ b/client/responsehandler.py @@ -17,6 +17,14 @@ IMG_REGEX = re.compile(r"<\s*img[^>]*\ssrc\s*=\s*['\"]([^\"']+)['\"][^>]*>", re. def handle(client: HTTPClient, msg: Message, command: AbstractCommand, directory=None): + """ + Handle the response of the request message + + @param client: the client which sent the request. + @param msg: the response message + @param command: the command of the sent request message + @param directory: the directory to download the response to (if available) + """ handler = BasicResponseHandler(client, msg, command) retriever = handler.handle() @@ -33,6 +41,9 @@ def handle(client: HTTPClient, msg: Message, command: AbstractCommand, directory class ResponseHandler(ABC): + """ + Helper class for handling response messages. + """ client: HTTPClient retriever: Retriever msg: Message @@ -46,12 +57,15 @@ class ResponseHandler(ABC): @abstractmethod def handle(self): + """ + Handle the response. + """ pass class BasicResponseHandler(ResponseHandler): """ - Response handler which throws away the body and only shows the headers. + Response handler which skips the body of the message and only shows the headers. In case of a redirect, it will process it and pass it to the appropriate response handler. """ @@ -216,7 +230,7 @@ class HTMLDownloadHandler(DownloadHandler): Downloads images referenced in the html of `tmp_filename` and replaces the references in the html and writes it to `target_filename`. @param tmp_filename: the path to the temporary html file - @param target_filename: the path for the final html fil + @param target_filename: the path for the final html file @param charset: the charset to decode `tmp_filename` """ @@ -237,6 +251,7 @@ class HTMLDownloadHandler(DownloadHandler): processed = {} to_replace = [] + # Find all tags and the urls from the corresponding `src` fields for m in IMG_REGEX.finditer(html): url_start = m.start(1) url_end = m.end(1) @@ -245,14 +260,12 @@ class HTMLDownloadHandler(DownloadHandler): try: if len(target) == 0: continue + if target in processed: + # url is already processed new_url = processed.get(target) else: new_url = self.__download_image(target, base_url) - if not new_url: - # Image failed to download - continue - processed[target] = new_url if new_url: @@ -262,6 +275,8 @@ class HTMLDownloadHandler(DownloadHandler): except Exception as e: logging.error("Failed to download image: %s, skipping...", target, exc_info=e) + # reverse the list so urls at the bottom of the html file are processed first. + # Otherwise our start and end positions won't be correct. to_replace.reverse() for (start, end, path) in to_replace: html = html[:start] + path + html[end:]