"Fix img url parsing"

This commit is contained in:
2021-03-24 17:20:40 +01:00
parent d14252f707
commit 7639383782
3 changed files with 16 additions and 10 deletions

View File

@@ -247,25 +247,27 @@ class HTMLDownloadHandler(DownloadHandler):
file.write(str(soup))
def __download_image(self, img_src, host, base_url):
parsed = urlsplit(img_src)
logging.debug("Downloading image: %s", img_src)
parsed = urlsplit(img_src)
if parsed.scheme not in ("", "http", "https"):
# Not a valid url
return None
if parsed.hostname == host:
if parsed.hostname is None:
if img_src[0] == "/":
img_src = host + img_src
else:
img_src = os.path.join(os.path.dirname(base_url), img_src)
if parsed.hostname is None or parsed.hostname == host:
port = self.cmd.port
elif ":" in parsed.netloc:
port = parsed.netloc.split(":", 1)[1]
else:
port = 80
if len(parsed.netloc) == 0 and parsed.path != "/":
# relative url, append base_url
img_src = os.path.join(os.path.dirname(base_url), parsed.path)
command = GetCommand(img_src, port, os.path.dirname(self.path))
command.execute(True)