import logging from abc import ABC, abstractmethod from typing import Dict from httplib.exceptions import IncompleteResponse, InvalidResponse, UnsupportedEncoding from httplib.httpsocket import HTTPSocket, BUFSIZE class Retriever(ABC): client: HTTPSocket def __init__(self, client: HTTPSocket): self.client = client @abstractmethod def retrieve(self): pass @staticmethod def create(client: HTTPSocket, headers: Dict[str, str]): # only chunked transfer-encoding is supported transfer_encoding = headers.get("transfer-encoding") if transfer_encoding and transfer_encoding != "chunked": raise UnsupportedEncoding("transfer-encoding", transfer_encoding) chunked = transfer_encoding # content-encoding is not supported content_encoding = headers.get("content-encoding") if content_encoding: raise UnsupportedEncoding("content-encoding", content_encoding) if chunked: return ChunkedRetriever(client) else: content_length = headers.get("content-length") if not content_length: logging.warning("Transfer-encoding and content-length not specified, trying without") return RawRetriever(client) return ContentLengthRetriever(client, int(content_length)) class PreambleRetriever(Retriever): client: HTTPSocket _buffer: [] @property def buffer(self): tmp_buffer = self._buffer self._buffer = [] return tmp_buffer def __init__(self, client: HTTPSocket): super().__init__(client) self.client = client self._buffer = [] def retrieve(self): line = self.client.read_line() while True: self._buffer.append(line) if line in ("\r\n", "\n", ""): return line yield line line = self.client.read_line() def reset_buffer(self, line): self._buffer.clear() self._buffer.append(line) class ContentLengthRetriever(Retriever): length: int def __init__(self, client: HTTPSocket, length: int): super().__init__(client) self.length = length def retrieve(self): cur_payload_size = 0 read_size = BUFSIZE while cur_payload_size < self.length: remaining = self.length - cur_payload_size if remaining < read_size: read_size = remaining try: buffer = self.client.read(remaining) except TimeoutError: logging.error("Timed out before receiving complete payload") raise IncompleteResponse("Timed out before receiving complete payload") except ConnectionError: logging.error("Timed out before receiving complete payload") raise IncompleteResponse("Connection closed before receiving complete payload") if len(buffer) == 0: logging.warning("Received payload length %s less than expected %s", cur_payload_size, self.length) break cur_payload_size += len(buffer) yield buffer class RawRetriever(Retriever): def retrieve(self): while True: try: yield self.client.read() except TimeoutError or ConnectionError: return b"" class ChunkedRetriever(Retriever): def retrieve(self): while True: chunk_size = self.__get_chunk_size() logging.debug("chunk-size: %s", chunk_size) if chunk_size == 0: self.client.reset_request() break buffer = self.client.read(chunk_size) logging.debug("chunk: %r", buffer) yield buffer self.client.read_line() # remove CRLF def __get_chunk_size(self): line = self.client.read_line() sep_pos = line.find(";") if sep_pos >= 0: line = line[:sep_pos] try: return int(line, 16) except ValueError: raise InvalidResponse()