Files
CN2021/httplib/retriever.py

191 lines
5.8 KiB
Python

import logging
from abc import ABC, abstractmethod
from typing import Dict
from httplib.exceptions import IncompleteResponse, InvalidResponse, UnsupportedEncoding
from httplib.httpsocket import HTTPSocket, BUFSIZE
class Retriever(ABC):
"""
This is a helper class for retrieving HTTP messages.
"""
client: HTTPSocket
def __init__(self, client: HTTPSocket):
self.client = client
@abstractmethod
def retrieve(self):
"""
Creates an iterator of the retrieved message content.
"""
pass
@staticmethod
def create(client: HTTPSocket, headers: Dict[str, str]):
"""
Creates a Retriever instance depending on the give headers.
@param client: the socket to retrieve from
@param headers: the message headers for choosing the retriever instance
@return: ChunkedRetriever if the message uses chunked encoding, ContentLengthRetriever if the message
specifies a content-length, RawRetriever if none of the above is True.
@raise UnsupportedEncoding: if the `transfer-encoding` is not supported or if the `content-encoding` is not
supported.
"""
# only chunked transfer-encoding is supported
transfer_encoding = headers.get("transfer-encoding")
if transfer_encoding and transfer_encoding != "chunked":
raise UnsupportedEncoding("transfer-encoding", transfer_encoding)
chunked = transfer_encoding
# content-encoding is not supported
content_encoding = headers.get("content-encoding")
if content_encoding:
raise UnsupportedEncoding("content-encoding", content_encoding)
if chunked:
return ChunkedRetriever(client)
content_length = headers.get("content-length")
if not content_length:
logging.warning("Transfer-encoding and content-length not specified, trying without")
return RawRetriever(client)
return ContentLengthRetriever(client, int(content_length))
class PreambleRetriever(Retriever):
"""
Retriever instance for retrieving the start-line and headers of an HTTP message.
"""
client: HTTPSocket
_buffer: []
@property
def buffer(self):
tmp_buffer = self._buffer
self._buffer = []
return tmp_buffer
def __init__(self, client: HTTPSocket):
super().__init__(client)
self.client = client
self._buffer = []
def retrieve(self):
"""
Returns an iterator of the retrieved lines.
@return:
"""
line = self.client.read_line()
while True:
self._buffer.append(line)
if line in ("\r\n", "\n", ""):
return line
yield line
line = self.client.read_line()
def reset_buffer(self, line):
self._buffer.clear()
self._buffer.append(line)
class ContentLengthRetriever(Retriever):
"""
Retriever instance for retrieving a message body with a given content-length.
"""
length: int
def __init__(self, client: HTTPSocket, length: int):
super().__init__(client)
self.length = length
def retrieve(self):
"""
Returns an iterator of the received message bytes.
The size of each iteration is not necessarily constant.
@raise IncompleteResponse: if the connection is closed or timed out before receiving the complete payload.
"""
cur_payload_size = 0
read_size = BUFSIZE
while cur_payload_size < self.length:
remaining = self.length - cur_payload_size
if remaining < read_size:
read_size = remaining
try:
buffer = self.client.read(remaining)
except TimeoutError:
logging.error("Timed out before receiving the complete payload")
raise IncompleteResponse("Timed out before receiving complete payload")
except ConnectionError:
logging.error("Connection closed before receiving the complete payload")
raise IncompleteResponse("Connection closed before receiving complete payload")
if len(buffer) == 0:
logging.warning("Received payload length %s less than expected %s", cur_payload_size, self.length)
break
cur_payload_size += len(buffer)
yield buffer
class RawRetriever(Retriever):
"""
Retriever instance for retrieve a message body without any length specifier or encoding.
This retriever will keep waiting until a timeout occurs or the connection is disconnected.
"""
def retrieve(self):
while True:
try:
yield self.client.read()
except TimeoutError or ConnectionError:
return b""
class ChunkedRetriever(Retriever):
"""
Retriever instance for retrieving a message body with chunked encoding.
"""
def retrieve(self):
"""
Returns an iterator of the received message bytes.
The size of each iteration is not necessarily constant.
@raise IncompleteResponse: if the connection is closed or timed out before receiving the complete payload.
"""
while True:
chunk_size = self.__get_chunk_size()
logging.debug("chunk-size: %s", chunk_size)
if chunk_size == 0:
# remove all trailing lines
self.client.reset_request()
break
buffer = self.client.read(chunk_size)
yield buffer
self.client.read_line() # remove trailing CRLF
def __get_chunk_size(self):
line = self.client.read_line()
sep_pos = line.find(";")
if sep_pos >= 0:
line = line[:sep_pos]
try:
return int(line, 16)
except ValueError:
raise InvalidResponse()