Files
CN2021/httplib/retriever.py
2021-03-24 16:35:12 +01:00

139 lines
3.9 KiB
Python

import logging
from abc import ABC, abstractmethod
from typing import Dict
from httplib.exceptions import IncompleteResponse, InvalidResponse, UnsupportedEncoding
from httplib.httpsocket import HTTPSocket, BUFSIZE
class Retriever(ABC):
client: HTTPSocket
def __init__(self, client: HTTPSocket):
self.client = client
@abstractmethod
def retrieve(self):
pass
@staticmethod
def create(client: HTTPSocket, headers: Dict[str, str]):
# only chunked transfer-encoding is supported
transfer_encoding = headers.get("transfer-encoding")
if transfer_encoding and transfer_encoding != "chunked":
raise UnsupportedEncoding("transfer-encoding", transfer_encoding)
chunked = transfer_encoding
# content-encoding is not supported
content_encoding = headers.get("content-encoding")
if content_encoding:
raise UnsupportedEncoding("content-encoding", content_encoding)
if chunked:
return ChunkedRetriever(client)
else:
content_length = headers.get("content-length")
if not content_length:
logging.warning("Transfer-encoding and content-length not specified, trying without")
return RawRetriever(client)
return ContentLengthRetriever(client, int(content_length))
class PreambleRetriever(Retriever):
client: HTTPSocket
buffer: []
def __init__(self, client: HTTPSocket):
super().__init__(client)
self.client = client
self.buffer = []
def retrieve(self):
line = self.client.read_line()
while True:
self.buffer.append(line)
if line in ("\r\n", "\n", " "):
break
yield line
line = self.client.read_line()
class ContentLengthRetriever(Retriever):
length: int
def __init__(self, client: HTTPSocket, length: int):
super().__init__(client)
self.length = length
def retrieve(self):
cur_payload_size = 0
read_size = BUFSIZE
while cur_payload_size < self.length:
remaining = self.length - cur_payload_size
if remaining < read_size:
read_size = remaining
try:
buffer = self.client.read(remaining)
except TimeoutError:
logging.error("Timed out before receiving complete payload")
raise IncompleteResponse("Timed out before receiving complete payload")
except ConnectionError:
logging.error("Timed out before receiving complete payload")
raise IncompleteResponse("Connection closed before receiving complete payload")
if len(buffer) == 0:
logging.warning("Received payload length %s less than expected %s", cur_payload_size, self.length)
break
cur_payload_size += len(buffer)
yield buffer
return b""
class RawRetriever(Retriever):
def retrieve(self):
while True:
try:
yield self.client.read()
except TimeoutError or ConnectionError:
return b""
class ChunkedRetriever(Retriever):
def retrieve(self):
while True:
chunk_size = self.__get_chunk_size()
logging.debug("chunk-size: %s", chunk_size)
if chunk_size == 0:
self.client.reset_request()
break
buffer = self.client.read(chunk_size)
logging.debug("chunk: %r", buffer)
yield buffer
self.client.read_line() # remove CRLF
def __get_chunk_size(self):
line = self.client.read_line()
sep_pos = line.find(";")
if sep_pos >= 0:
line = line[:sep_pos]
try:
return int(line, 16)
except ValueError:
raise InvalidResponse()