"Fix img url parsing"
This commit is contained in:
@@ -247,25 +247,27 @@ class HTMLDownloadHandler(DownloadHandler):
|
||||
file.write(str(soup))
|
||||
|
||||
def __download_image(self, img_src, host, base_url):
|
||||
parsed = urlsplit(img_src)
|
||||
|
||||
logging.debug("Downloading image: %s", img_src)
|
||||
|
||||
parsed = urlsplit(img_src)
|
||||
|
||||
if parsed.scheme not in ("", "http", "https"):
|
||||
# Not a valid url
|
||||
return None
|
||||
|
||||
if parsed.hostname == host:
|
||||
if parsed.hostname is None:
|
||||
if img_src[0] == "/":
|
||||
img_src = host + img_src
|
||||
else:
|
||||
img_src = os.path.join(os.path.dirname(base_url), img_src)
|
||||
|
||||
if parsed.hostname is None or parsed.hostname == host:
|
||||
port = self.cmd.port
|
||||
elif ":" in parsed.netloc:
|
||||
port = parsed.netloc.split(":", 1)[1]
|
||||
else:
|
||||
port = 80
|
||||
|
||||
if len(parsed.netloc) == 0 and parsed.path != "/":
|
||||
# relative url, append base_url
|
||||
img_src = os.path.join(os.path.dirname(base_url), parsed.path)
|
||||
|
||||
command = GetCommand(img_src, port, os.path.dirname(self.path))
|
||||
command.execute(True)
|
||||
|
||||
|
Reference in New Issue
Block a user