From 850535a06030eeaae67c5b8e26cda6577e693390 Mon Sep 17 00:00:00 2001
From: Arthur Bols <arthur@bols.dev>
Date: Sun, 28 Mar 2021 03:33:00 +0200
Subject: [PATCH] Improve documentation

---
 client/responsehandler.py | 27 +++++++++++++++++++++------
 1 file changed, 21 insertions(+), 6 deletions(-)
diff --git a/client/responsehandler.py b/client/responsehandler.py
index 69343b3..2dd0c25 100644
--- a/client/responsehandler.py
+++ b/client/responsehandler.py
@@ -17,6 +17,14 @@ IMG_REGEX = re.compile(r"<\s*img[^>]*\ssrc\s*=\s*['\"]([^\"']+)['\"][^>]*>", re.
 
 
 def handle(client: HTTPClient, msg: Message, command: AbstractCommand, directory=None):
+    """
+    Handle the response of the request message
+
+    @param client: the client which sent the request.
+    @param msg: the response message
+    @param command: the command of the sent request message
+    @param directory: the directory to download the response to (if available)
+    """
     handler = BasicResponseHandler(client, msg, command)
     retriever = handler.handle()
 
@@ -33,6 +41,9 @@ def handle(client: HTTPClient, msg: Message, command: AbstractCommand, directory
 
 
 class ResponseHandler(ABC):
+    """
+    Helper class for handling response messages.
+    """
     client: HTTPClient
     retriever: Retriever
     msg: Message
@@ -46,12 +57,15 @@ class ResponseHandler(ABC):
 
     @abstractmethod
     def handle(self):
+        """
+        Handle the response.
+        """
         pass
 
 
 class BasicResponseHandler(ResponseHandler):
     """
-    Response handler which throws away the body and only shows the headers.
+    Response handler which skips the body of the message and only shows the headers.
     In case of a redirect, it will process it and pass it to the appropriate response handler.
     """
 
@@ -216,7 +230,7 @@ class HTMLDownloadHandler(DownloadHandler):
         Downloads images referenced in the html of `tmp_filename` and replaces the references in the html
         and writes it to `target_filename`.
         @param tmp_filename: the path to the temporary html file
-        @param target_filename: the path for the final html fil
+        @param target_filename: the path for the final html file
         @param charset: the charset to decode `tmp_filename`
         """
 
@@ -237,6 +251,7 @@ class HTMLDownloadHandler(DownloadHandler):
         processed = {}
         to_replace = []
 
+        # Find all <img> tags and the urls from the corresponding `src` fields
         for m in IMG_REGEX.finditer(html):
             url_start = m.start(1)
             url_end = m.end(1)
@@ -245,14 +260,12 @@ class HTMLDownloadHandler(DownloadHandler):
             try:
                 if len(target) == 0:
                     continue
+
                 if target in processed:
+                    # url is already processed
                     new_url = processed.get(target)
                 else:
                     new_url = self.__download_image(target, base_url)
-                    if not new_url:
-                        # Image failed to download
-                        continue
-
                     processed[target] = new_url
 
                 if new_url:
@@ -262,6 +275,8 @@ class HTMLDownloadHandler(DownloadHandler):
             except Exception as e:
                 logging.error("Failed to download image: %s, skipping...", target, exc_info=e)
 
+        # reverse the list so urls at the bottom of the html file are processed first.
+        # Otherwise our start and end positions won't be correct.
         to_replace.reverse()
         for (start, end, path) in to_replace:
             html = html[:start] + path + html[end:]