137 lines
3.8 KiB
Python
137 lines
3.8 KiB
Python
import logging
|
|
from abc import ABC, abstractmethod
|
|
from typing import Dict
|
|
|
|
from httplib.exceptions import IncompleteResponse, InvalidResponse, UnsupportedEncoding
|
|
from httplib.httpsocket import HTTPSocket, BUFSIZE
|
|
|
|
|
|
class Retriever(ABC):
|
|
client: HTTPSocket
|
|
|
|
def __init__(self, client: HTTPSocket):
|
|
self.client = client
|
|
|
|
@abstractmethod
|
|
def retrieve(self):
|
|
pass
|
|
|
|
@staticmethod
|
|
def create(client: HTTPSocket, headers: Dict[str, str]):
|
|
|
|
# only chunked transfer-encoding is supported
|
|
transfer_encoding = headers.get("transfer-encoding")
|
|
if transfer_encoding and transfer_encoding != "chunked":
|
|
raise UnsupportedEncoding("transfer-encoding", transfer_encoding)
|
|
chunked = transfer_encoding
|
|
|
|
# content-encoding is not supported
|
|
content_encoding = headers.get("content-encoding")
|
|
if content_encoding:
|
|
raise UnsupportedEncoding("content-encoding", content_encoding)
|
|
|
|
if chunked:
|
|
return ChunkedRetriever(client)
|
|
else:
|
|
content_length = headers.get("content-length")
|
|
|
|
if not content_length:
|
|
logging.warning("Transfer-encoding and content-length not specified, trying without")
|
|
return RawRetriever(client)
|
|
|
|
return ContentLengthRetriever(client, int(content_length))
|
|
|
|
|
|
class PreambleRetriever(Retriever):
|
|
client: HTTPSocket
|
|
buffer: []
|
|
|
|
def __init__(self, client: HTTPSocket):
|
|
super().__init__(client)
|
|
self.client = client
|
|
self.buffer = []
|
|
|
|
def retrieve(self):
|
|
|
|
line = self.client.read_line()
|
|
while True:
|
|
self.buffer.append(line)
|
|
|
|
if line in ("\r\n", "\n", " "):
|
|
break
|
|
|
|
yield line
|
|
line = self.client.read_line()
|
|
|
|
|
|
class ContentLengthRetriever(Retriever):
|
|
length: int
|
|
|
|
def __init__(self, client: HTTPSocket, length: int):
|
|
super().__init__(client)
|
|
self.length = length
|
|
|
|
def retrieve(self):
|
|
|
|
cur_payload_size = 0
|
|
read_size = BUFSIZE
|
|
while cur_payload_size < self.length:
|
|
|
|
remaining = self.length - cur_payload_size
|
|
if remaining < read_size:
|
|
read_size = remaining
|
|
|
|
try:
|
|
buffer = self.client.read(remaining)
|
|
except TimeoutError:
|
|
logging.error("Timed out before receiving complete payload")
|
|
raise IncompleteResponse("Timed out before receiving complete payload")
|
|
except ConnectionError:
|
|
logging.error("Timed out before receiving complete payload")
|
|
raise IncompleteResponse("Connection closed before receiving complete payload")
|
|
|
|
if len(buffer) == 0:
|
|
logging.warning("Received payload length %s less than expected %s", cur_payload_size, self.length)
|
|
break
|
|
|
|
cur_payload_size += len(buffer)
|
|
yield buffer
|
|
|
|
|
|
class RawRetriever(Retriever):
|
|
|
|
def retrieve(self):
|
|
while True:
|
|
try:
|
|
yield self.client.read()
|
|
except TimeoutError or ConnectionError:
|
|
return b""
|
|
|
|
|
|
class ChunkedRetriever(Retriever):
|
|
|
|
def retrieve(self):
|
|
while True:
|
|
chunk_size = self.__get_chunk_size()
|
|
logging.debug("chunk-size: %s", chunk_size)
|
|
if chunk_size == 0:
|
|
self.client.reset_request()
|
|
break
|
|
|
|
buffer = self.client.read(chunk_size)
|
|
logging.debug("chunk: %r", buffer)
|
|
yield buffer
|
|
|
|
self.client.read_line() # remove CRLF
|
|
|
|
def __get_chunk_size(self):
|
|
line = self.client.read_line()
|
|
sep_pos = line.find(";")
|
|
if sep_pos >= 0:
|
|
line = line[:sep_pos]
|
|
|
|
try:
|
|
return int(line, 16)
|
|
except ValueError:
|
|
raise InvalidResponse()
|