Improve documentation

This commit is contained in:
2021-03-28 18:54:52 +02:00
parent c748387b48
commit b7315c2348
11 changed files with 79 additions and 38 deletions

View File

@@ -1,10 +1,7 @@
import logging
import socket
from io import BufferedReader
from typing import Tuple
from httplib.exceptions import BadRequest
BUFSIZE = 4096
TIMEOUT = 3
FORMAT = "UTF-8"
@@ -12,13 +9,20 @@ MAXLINE = 4096
class HTTPSocket:
host: str
"""
Wrapper class for a socket. Represents an HTTP connection.
This class adds helper methods to read the underlying socket as a file.
"""
conn: socket.socket
file: Tuple[BufferedReader, None]
file: BufferedReader
def __init__(self, conn: socket.socket, host: str):
def __init__(self, conn: socket.socket):
"""
Initialize an HTTPSocket with the given socket and host.
@param conn: the socket object
"""
self.host = host
self.conn = conn
self.conn.settimeout(TIMEOUT)
self.conn.setblocking(True)

View File

@@ -108,7 +108,7 @@ def parse_headers(lines):
break
while True:
if line in ("\r\n", "\n", ""):
if line in ("\r\n", "\r", "\n", ""):
break
if line[0].isspace():
@@ -189,14 +189,14 @@ def get_uri(url: str):
def urljoin(base, url):
"""
Join a base url and a URL to form an absolute url.
Join a base url, and a URL to form an absolute url.
"""
return urllib.parse.urljoin(base, url)
def get_charset(headers: Dict[str, str]):
"""
Returns the charset of the content from the headers if found. Otherwise returns `FORMAT`
Returns the charset of the content from the headers if found. Otherwise, returns `FORMAT`
@param headers: the headers to retrieve the charset from
@return: A charset

View File

@@ -62,13 +62,20 @@ class PreambleRetriever(Retriever):
"""
Retriever instance for retrieving the start-line and headers of an HTTP message.
"""
client: HTTPSocket
_buffer: []
@property
def buffer(self):
"""
Returns a copy of the internal buffer.
Clears the internal buffer afterwards.
@return: A list of the buffered lines.
"""
tmp_buffer = self._buffer
self._buffer = []
self._buffer.clear()
return tmp_buffer
@@ -87,7 +94,7 @@ class PreambleRetriever(Retriever):
while True:
self._buffer.append(line)
if line in ("\r\n", "\n", ""):
if line in ("\r\n", "\r", "\n", ""):
return line
yield line
@@ -140,8 +147,8 @@ class ContentLengthRetriever(Retriever):
class RawRetriever(Retriever):
"""
Retriever instance for retrieve a message body without any length specifier or encoding.
This retriever will keep waiting until a timeout occurs or the connection is disconnected.
Retriever instance for retrieving a message body without any length specifier or encoding.
This retriever will keep waiting until a timeout occurs, or the connection is disconnected.
"""
def retrieve(self):
@@ -161,6 +168,7 @@ class ChunkedRetriever(Retriever):
"""
Returns an iterator of the received message bytes.
The size of each iteration is not necessarily constant.
@raise IncompleteResponse: if the connection is closed or timed out before receiving the complete payload.
@raise InvalidResponse: if the length of a chunk could not be determined.
"""
@@ -184,6 +192,12 @@ class ChunkedRetriever(Retriever):
raise IncompleteResponse("Connection closed before receiving the complete payload!")
def __get_chunk_size(self):
"""
Returns the next chunk size.
@return: The chunk size in bytes
@raise InvalidResponse: If an error occured when parsing the chunk size.
"""
line = self.client.read_line()
sep_pos = line.find(";")
if sep_pos >= 0:
@@ -192,4 +206,4 @@ class ChunkedRetriever(Retriever):
try:
return int(line, 16)
except ValueError:
raise InvalidResponse()
raise InvalidResponse("Failed to parse chunk size")