210 lines
6.4 KiB
Python
210 lines
6.4 KiB
Python
import logging
|
|
from abc import ABC, abstractmethod
|
|
from typing import Dict
|
|
|
|
from httplib.exceptions import IncompleteResponse, InvalidResponse, UnsupportedEncoding
|
|
from httplib.httpsocket import HTTPSocket, BUFSIZE
|
|
|
|
|
|
class Retriever(ABC):
|
|
"""
|
|
This is a helper class for retrieving HTTP messages.
|
|
"""
|
|
client: HTTPSocket
|
|
|
|
def __init__(self, client: HTTPSocket):
|
|
self.client = client
|
|
|
|
@abstractmethod
|
|
def retrieve(self):
|
|
"""
|
|
Creates an iterator of the retrieved message content.
|
|
"""
|
|
pass
|
|
|
|
@staticmethod
|
|
def create(client: HTTPSocket, headers: Dict[str, str]):
|
|
"""
|
|
Creates a Retriever instance depending on the give headers.
|
|
|
|
@param client: the socket to retrieve from
|
|
@param headers: the message headers for choosing the retriever instance
|
|
@return: ChunkedRetriever if the message uses chunked encoding, ContentLengthRetriever if the message
|
|
specifies a content-length, RawRetriever if none of the above is True.
|
|
@raise UnsupportedEncoding: if the `transfer-encoding` is not supported or if the `content-encoding` is not
|
|
supported.
|
|
"""
|
|
|
|
# only chunked transfer-encoding is supported
|
|
transfer_encoding = headers.get("transfer-encoding")
|
|
if transfer_encoding and transfer_encoding != "chunked":
|
|
raise UnsupportedEncoding("transfer-encoding", transfer_encoding)
|
|
chunked = transfer_encoding
|
|
|
|
# content-encoding is not supported
|
|
content_encoding = headers.get("content-encoding")
|
|
if content_encoding:
|
|
raise UnsupportedEncoding("content-encoding", content_encoding)
|
|
|
|
if chunked:
|
|
return ChunkedRetriever(client)
|
|
|
|
content_length = headers.get("content-length")
|
|
|
|
if not content_length:
|
|
logging.warning("Transfer-encoding and content-length not specified, trying without")
|
|
return RawRetriever(client)
|
|
|
|
return ContentLengthRetriever(client, int(content_length))
|
|
|
|
|
|
class PreambleRetriever(Retriever):
|
|
"""
|
|
Retriever instance for retrieving the start-line and headers of an HTTP message.
|
|
"""
|
|
|
|
client: HTTPSocket
|
|
_buffer: []
|
|
|
|
@property
|
|
def buffer(self):
|
|
"""
|
|
Returns a copy of the internal buffer.
|
|
Clears the internal buffer afterwards.
|
|
|
|
@return: A list of the buffered lines.
|
|
"""
|
|
tmp_buffer = self._buffer
|
|
self._buffer = []
|
|
|
|
return tmp_buffer
|
|
|
|
def __init__(self, client: HTTPSocket):
|
|
super().__init__(client)
|
|
self.client = client
|
|
self._buffer = []
|
|
|
|
def retrieve(self):
|
|
"""
|
|
Returns an iterator of the retrieved lines.
|
|
@return:
|
|
"""
|
|
|
|
line = self.client.read_line()
|
|
while True:
|
|
self._buffer.append(line)
|
|
|
|
if line in ("\r\n", "\r", "\n", ""):
|
|
return line
|
|
|
|
yield line
|
|
line = self.client.read_line()
|
|
|
|
def reset_buffer(self, line):
|
|
self._buffer.clear()
|
|
self._buffer.append(line)
|
|
|
|
|
|
class ContentLengthRetriever(Retriever):
|
|
"""
|
|
Retriever instance for retrieving a message body with a given content-length.
|
|
"""
|
|
length: int
|
|
|
|
def __init__(self, client: HTTPSocket, length: int):
|
|
super().__init__(client)
|
|
self.length = length
|
|
|
|
def retrieve(self):
|
|
"""
|
|
Returns an iterator of the received message bytes.
|
|
The size of each iteration is not necessarily constant.
|
|
@raise IncompleteResponse: if the connection is closed or timed out before receiving the complete payload.
|
|
"""
|
|
|
|
cur_payload_size = 0
|
|
read_size = BUFSIZE
|
|
while cur_payload_size < self.length:
|
|
|
|
remaining = self.length - cur_payload_size
|
|
if remaining < read_size:
|
|
read_size = remaining
|
|
|
|
try:
|
|
buffer = self.client.read(remaining)
|
|
except TimeoutError:
|
|
raise IncompleteResponse("Timed out before receiving complete payload")
|
|
except ConnectionError:
|
|
raise IncompleteResponse("Connection closed before receiving the complete payload")
|
|
|
|
if len(buffer) == 0:
|
|
logging.warning("Received payload length %s less than expected %s", cur_payload_size, self.length)
|
|
break
|
|
|
|
cur_payload_size += len(buffer)
|
|
yield buffer
|
|
|
|
|
|
class RawRetriever(Retriever):
|
|
"""
|
|
Retriever instance for retrieving a message body without any length specifier or encoding.
|
|
This retriever will keep waiting until a timeout occurs, or the connection is disconnected.
|
|
"""
|
|
|
|
def retrieve(self):
|
|
while True:
|
|
try:
|
|
yield self.client.read()
|
|
except TimeoutError or ConnectionError:
|
|
return b""
|
|
|
|
|
|
class ChunkedRetriever(Retriever):
|
|
"""
|
|
Retriever instance for retrieving a message body with chunked encoding.
|
|
"""
|
|
|
|
def retrieve(self):
|
|
"""
|
|
Returns an iterator of the received message bytes.
|
|
The size of each iteration is not necessarily constant.
|
|
|
|
@raise IncompleteResponse: if the connection is closed or timed out before receiving the complete payload.
|
|
@raise InvalidResponse: if the length of a chunk could not be determined.
|
|
"""
|
|
try:
|
|
while True:
|
|
chunk_size = self.__get_chunk_size()
|
|
logging.debug("chunk-size: %s", chunk_size)
|
|
if chunk_size == 0:
|
|
# remove all trailing lines
|
|
self.client.reset_request()
|
|
break
|
|
|
|
buffer = self.client.read(chunk_size)
|
|
yield buffer
|
|
|
|
self.client.read_line() # remove trailing CRLF
|
|
|
|
except TimeoutError:
|
|
raise IncompleteResponse("Timed out before receiving the complete payload!")
|
|
except ConnectionError:
|
|
raise IncompleteResponse("Connection closed before receiving the complete payload!")
|
|
|
|
def __get_chunk_size(self):
|
|
"""
|
|
Returns the next chunk size.
|
|
|
|
@return: The chunk size in bytes
|
|
@raise InvalidResponse: If an error occured when parsing the chunk size.
|
|
"""
|
|
line = self.client.read_line()
|
|
sep_pos = line.find(";")
|
|
if sep_pos >= 0:
|
|
line = line[:sep_pos]
|
|
|
|
try:
|
|
return int(line, 16)
|
|
except ValueError:
|
|
raise InvalidResponse("Failed to parse chunk size")
|