import logging from abc import ABC, abstractmethod from typing import Dict from httplib.exceptions import IncompleteResponse, InvalidResponse, UnsupportedEncoding from httplib.httpsocket import HTTPSocket, BUFSIZE class Retriever(ABC): """ This is a helper class for retrieving HTTP messages. """ client: HTTPSocket def __init__(self, client: HTTPSocket): self.client = client @abstractmethod def retrieve(self): """ Creates an iterator of the retrieved message content. """ pass @staticmethod def create(client: HTTPSocket, headers: Dict[str, str]): """ Creates a Retriever instance depending on the give headers. @param client: the socket to retrieve from @param headers: the message headers for choosing the retriever instance @return: ChunkedRetriever if the message uses chunked encoding, ContentLengthRetriever if the message specifies a content-length, RawRetriever if none of the above is True. @raise UnsupportedEncoding: if the `transfer-encoding` is not supported or if the `content-encoding` is not supported. """ # only chunked transfer-encoding is supported transfer_encoding = headers.get("transfer-encoding") if transfer_encoding and transfer_encoding != "chunked": raise UnsupportedEncoding("transfer-encoding", transfer_encoding) chunked = transfer_encoding # content-encoding is not supported content_encoding = headers.get("content-encoding") if content_encoding: raise UnsupportedEncoding("content-encoding", content_encoding) if chunked: return ChunkedRetriever(client) content_length = headers.get("content-length") if not content_length: logging.warning("Transfer-encoding and content-length not specified, trying without") return RawRetriever(client) return ContentLengthRetriever(client, int(content_length)) class PreambleRetriever(Retriever): """ Retriever instance for retrieving the start-line and headers of an HTTP message. """ client: HTTPSocket _buffer: [] @property def buffer(self): tmp_buffer = self._buffer self._buffer = [] return tmp_buffer def __init__(self, client: HTTPSocket): super().__init__(client) self.client = client self._buffer = [] def retrieve(self): """ Returns an iterator of the retrieved lines. @return: """ line = self.client.read_line() while True: self._buffer.append(line) if line in ("\r\n", "\n", ""): return line yield line line = self.client.read_line() def reset_buffer(self, line): self._buffer.clear() self._buffer.append(line) class ContentLengthRetriever(Retriever): """ Retriever instance for retrieving a message body with a given content-length. """ length: int def __init__(self, client: HTTPSocket, length: int): super().__init__(client) self.length = length def retrieve(self): """ Returns an iterator of the received message bytes. The size of each iteration is not necessarily constant. @raise IncompleteResponse: if the connection is closed or timed out before receiving the complete payload. """ cur_payload_size = 0 read_size = BUFSIZE while cur_payload_size < self.length: remaining = self.length - cur_payload_size if remaining < read_size: read_size = remaining try: buffer = self.client.read(remaining) except TimeoutError: raise IncompleteResponse("Timed out before receiving complete payload") except ConnectionError: raise IncompleteResponse("Connection closed before receiving the complete payload") if len(buffer) == 0: logging.warning("Received payload length %s less than expected %s", cur_payload_size, self.length) break cur_payload_size += len(buffer) yield buffer class RawRetriever(Retriever): """ Retriever instance for retrieve a message body without any length specifier or encoding. This retriever will keep waiting until a timeout occurs or the connection is disconnected. """ def retrieve(self): while True: try: yield self.client.read() except TimeoutError or ConnectionError: return b"" class ChunkedRetriever(Retriever): """ Retriever instance for retrieving a message body with chunked encoding. """ def retrieve(self): """ Returns an iterator of the received message bytes. The size of each iteration is not necessarily constant. @raise IncompleteResponse: if the connection is closed or timed out before receiving the complete payload. @raise InvalidResponse: if the length of a chunk could not be determined. """ try: while True: chunk_size = self.__get_chunk_size() logging.debug("chunk-size: %s", chunk_size) if chunk_size == 0: # remove all trailing lines self.client.reset_request() break buffer = self.client.read(chunk_size) yield buffer self.client.read_line() # remove trailing CRLF except TimeoutError: raise IncompleteResponse("Timed out before receiving the complete payload!") except ConnectionError: raise IncompleteResponse("Connection closed before receiving the complete payload!") def __get_chunk_size(self): line = self.client.read_line() sep_pos = line.find(";") if sep_pos >= 0: line = line[:sep_pos] try: return int(line, 16) except ValueError: raise InvalidResponse()