From 9036755a624633a83ff56f69da4a79ec6cc56ba4 Mon Sep 17 00:00:00 2001 From: Arthur Bols Date: Sat, 27 Mar 2021 19:05:09 +0100 Subject: [PATCH] Fix some issues, improve documentation --- client/command.py | 8 +++-- client/response_handler.py | 21 +++---------- httplib/exceptions.py | 4 ++- httplib/retriever.py | 63 ++++++++++++++++++++++++++++++++------ server/command.py | 12 ++++---- server/requesthandler.py | 16 +++++----- server/serversocket.py | 2 +- server/worker.py | 11 ++++--- 8 files changed, 89 insertions(+), 48 deletions(-) diff --git a/client/command.py b/client/command.py index 9fec1b9..15ea135 100644 --- a/client/command.py +++ b/client/command.py @@ -41,11 +41,13 @@ class AbstractCommand(ABC): host: str path: str port: int + sub_request: bool def __init__(self, uri: str, port): self.uri = uri self.host, _, self.path = parser.parse_uri(uri) self.port = int(port) + self.sub_request = False @property @abstractmethod @@ -53,6 +55,7 @@ class AbstractCommand(ABC): pass def execute(self, sub_request=False): + self.sub_request = sub_request (host, path) = self.parse_uri() client = sockets.get(host) @@ -169,9 +172,10 @@ class GetCommand(AbstractCommand): (version, status, msg) = parser.parse_status_line(next(lines)) headers = parser.parse_headers(lines) - logging.debug("---response begin---\r\n%s---response end---", "".join(retriever.buffer)) + buffer = retriever.buffer + logging.debug("---response begin---\r\n%s---response end---", "".join(buffer)) - return Message(version, status, msg, headers, retriever.buffer) + return Message(version, status, msg, headers, buffer) def _await_response(self, client, retriever): msg = self._get_preamble(retriever) diff --git a/client/response_handler.py b/client/response_handler.py index ab5ae5f..a98292f 100644 --- a/client/response_handler.py +++ b/client/response_handler.py @@ -46,20 +46,6 @@ class ResponseHandler(ABC): def handle(self): pass - @staticmethod - def parse_uri(uri: str): - parsed = urlsplit(uri) - - # If there is no netloc, the url is invalid, so prepend `//` and try again - if parsed.netloc == "": - parsed = urlsplit("//" + uri) - - host = parsed.netloc - path = parsed.path - if len(path) == 0 or path[0] != '/': - path = "/" + path - return host, path - class BasicResponseHandler(ResponseHandler): """ @@ -98,11 +84,14 @@ class BasicResponseHandler(ResponseHandler): if 300 <= self.msg.status < 400: # Redirect return self._do_handle_redirect() - if 400 <= self.msg.status < 500: + if 400 <= self.msg.status < 600: # Dump headers and exit with error - print("".join(self.msg.raw), end="") + if not self.cmd.sub_request: + print("".join(self.msg.raw), end="") return None + return None + def _do_handle_redirect(self): self._skip_body() diff --git a/httplib/exceptions.py b/httplib/exceptions.py index 1257718..0e19e1f 100644 --- a/httplib/exceptions.py +++ b/httplib/exceptions.py @@ -34,8 +34,10 @@ class HTTPServerException(Exception): status_code: str message: str body: str + arg: str - def __init__(self, body=""): + def __init__(self, arg, body=""): + self.arg = arg self.body = body diff --git a/httplib/retriever.py b/httplib/retriever.py index fbd4328..f0c009f 100644 --- a/httplib/retriever.py +++ b/httplib/retriever.py @@ -7,6 +7,9 @@ from httplib.httpsocket import HTTPSocket, BUFSIZE class Retriever(ABC): + """ + This is a helper class for retrieving HTTP messages. + """ client: HTTPSocket def __init__(self, client: HTTPSocket): @@ -14,10 +17,23 @@ class Retriever(ABC): @abstractmethod def retrieve(self): + """ + Creates an iterator of the retrieved message content. + """ pass @staticmethod def create(client: HTTPSocket, headers: Dict[str, str]): + """ + Creates a Retriever instance depending on the give headers. + + @param client: the socket to retrieve from + @param headers: the message headers for choosing the retriever instance + @return: ChunkedRetriever if the message uses chunked encoding, ContentLengthRetriever if the message + specifies a content-length, RawRetriever if none of the above is True. + @raise UnsupportedEncoding: if the `transfer-encoding` is not supported or if the `content-encoding` is not + supported. + """ # only chunked transfer-encoding is supported transfer_encoding = headers.get("transfer-encoding") @@ -32,17 +48,20 @@ class Retriever(ABC): if chunked: return ChunkedRetriever(client) - else: - content_length = headers.get("content-length") - if not content_length: - logging.warning("Transfer-encoding and content-length not specified, trying without") - return RawRetriever(client) + content_length = headers.get("content-length") - return ContentLengthRetriever(client, int(content_length)) + if not content_length: + logging.warning("Transfer-encoding and content-length not specified, trying without") + return RawRetriever(client) + + return ContentLengthRetriever(client, int(content_length)) class PreambleRetriever(Retriever): + """ + Retriever instance for retrieving the start-line and headers of an HTTP message. + """ client: HTTPSocket _buffer: [] @@ -59,6 +78,10 @@ class PreambleRetriever(Retriever): self._buffer = [] def retrieve(self): + """ + Returns an iterator of the retrieved lines. + @return: + """ line = self.client.read_line() while True: @@ -76,6 +99,9 @@ class PreambleRetriever(Retriever): class ContentLengthRetriever(Retriever): + """ + Retriever instance for retrieving a message body with a given content-length. + """ length: int def __init__(self, client: HTTPSocket, length: int): @@ -83,6 +109,11 @@ class ContentLengthRetriever(Retriever): self.length = length def retrieve(self): + """ + Returns an iterator of the received message bytes. + The size of each iteration is not necessarily constant. + @raise IncompleteResponse: if the connection is closed or timed out before receiving the complete payload. + """ cur_payload_size = 0 read_size = BUFSIZE @@ -95,10 +126,10 @@ class ContentLengthRetriever(Retriever): try: buffer = self.client.read(remaining) except TimeoutError: - logging.error("Timed out before receiving complete payload") + logging.error("Timed out before receiving the complete payload") raise IncompleteResponse("Timed out before receiving complete payload") except ConnectionError: - logging.error("Timed out before receiving complete payload") + logging.error("Connection closed before receiving the complete payload") raise IncompleteResponse("Connection closed before receiving complete payload") if len(buffer) == 0: @@ -110,6 +141,10 @@ class ContentLengthRetriever(Retriever): class RawRetriever(Retriever): + """ + Retriever instance for retrieve a message body without any length specifier or encoding. + This retriever will keep waiting until a timeout occurs or the connection is disconnected. + """ def retrieve(self): while True: @@ -120,20 +155,28 @@ class RawRetriever(Retriever): class ChunkedRetriever(Retriever): + """ + Retriever instance for retrieving a message body with chunked encoding. + """ def retrieve(self): + """ + Returns an iterator of the received message bytes. + The size of each iteration is not necessarily constant. + @raise IncompleteResponse: if the connection is closed or timed out before receiving the complete payload. + """ while True: chunk_size = self.__get_chunk_size() logging.debug("chunk-size: %s", chunk_size) if chunk_size == 0: + # remove all trailing lines self.client.reset_request() break buffer = self.client.read(chunk_size) - logging.debug("chunk: %r", buffer) yield buffer - self.client.read_line() # remove CRLF + self.client.read_line() # remove trailing CRLF def __get_chunk_size(self): line = self.client.read_line() diff --git a/server/command.py b/server/command.py index 64089cf..dbd8dec 100644 --- a/server/command.py +++ b/server/command.py @@ -75,7 +75,7 @@ class AbstractCommand(ABC): self._process_conditional_headers() message = f"HTTP/1.1 {status} {status_message[status]}\r\n" - message += self._get_date() + "\r\n" + message += f"Date: {self._get_date()}\r\n" content_length = len(body) message += f"Content-Length: {content_length}\r\n" @@ -107,7 +107,7 @@ class AbstractCommand(ABC): path = root + norm_path if check and not os.path.exists(path): - raise NotFound() + raise NotFound(path) return path @@ -131,7 +131,7 @@ class AbstractCommand(ABC): return True if modified <= min_date: - raise NotModified() + raise NotModified(f"{modified} <= {min_date}") return True @@ -149,11 +149,11 @@ class AbstractModifyCommand(AbstractCommand, ABC): def execute(self): path = self._get_path(False) - dir = os.path.dirname(path) + directory = os.path.dirname(path) - if not os.path.exists(dir): + if not os.path.exists(directory): raise Forbidden("Target directory does not exists!") - if os.path.exists(dir) and not os.path.isdir(dir): + if os.path.exists(directory) and not os.path.isdir(directory): raise Forbidden("Target directory is an existing file!") exists = os.path.exists(path) diff --git a/server/requesthandler.py b/server/requesthandler.py index ef02965..b8805fd 100644 --- a/server/requesthandler.py +++ b/server/requesthandler.py @@ -57,7 +57,7 @@ class RequestHandler: retriever = Retriever.create(self.conn, headers) except UnsupportedEncoding as e: logging.error("Encoding not supported: %s=%s", e.enc_type, e.encoding) - raise NotImplemented() + raise NotImplemented(f"{e.enc_type}={e.encoding}") for buffer in retriever.retrieve(): body += buffer @@ -68,7 +68,7 @@ class RequestHandler: cmd = command.create(message) msg = cmd.execute() - logging.debug("---response begin---\r\n%s---response end---", msg) + logging.debug("---response begin---\r\n%s\r\n---response end---", msg.split(b"\r\n\r\n", 1)[0].decode(FORMAT)) self.conn.conn.sendall(msg) def _check_request_line(self, method: str, target: Union[ParseResultBytes, ParseResult], version): @@ -77,22 +77,22 @@ class RequestHandler: raise MethodNotAllowed(METHODS) if version not in ("1.0", "1.1"): - raise HTTPVersionNotSupported() + raise HTTPVersionNotSupported(version) # only origin-form and absolute-form are allowed if target.scheme not in ("", "http"): # Only http is supported... - raise BadRequest() + raise BadRequest(f"scheme={target.scheme}") if target.netloc != "" and target.netloc != self.conn.host and target.netloc != self.conn.host.split(":")[0]: - raise NotFound() + raise NotFound(str(target)) if target.path == "" or target.path[0] != "/": - raise NotFound() + raise NotFound(str(target)) def _validate_request(self, msg): if msg.version == "1.1" and "host" not in msg.headers: - raise BadRequest() + raise BadRequest("Missing host header") self._check_request_line(msg.method, msg.target, msg.version) @@ -119,5 +119,5 @@ class RequestHandler: message += "Content-Length: 0\r\n" message += "\r\n" - logging.debug("Sending: %r", message) + logging.debug("---response begin---\r\n%s---response end---", message) client.sendall(message.encode(FORMAT)) diff --git a/server/serversocket.py b/server/serversocket.py index adad5eb..896b41d 100644 --- a/server/serversocket.py +++ b/server/serversocket.py @@ -15,4 +15,4 @@ class ServerSocket(HTTPSocket): try: return super().read_line() except UnicodeDecodeError: - raise BadRequest() + raise BadRequest("UnicodeDecodeError") diff --git a/server/worker.py b/server/worker.py index 6b3eca5..b4a7399 100644 --- a/server/worker.py +++ b/server/worker.py @@ -67,17 +67,20 @@ class Worker: handler = RequestHandler(conn, self.host) handler.listen() except HTTPServerCloseException as e: - logging.debug("HTTP Exception:", exc_info=e) + logging.warning("[HTTP: %s] %s. Reason: %s", e.status_code, e.message, e.arg) RequestHandler.send_error(conn, e.status_code, e.message) break except HTTPServerException as e: - logging.debug("HTTP Exception:", exc_info=e) + logging.debug("[HTTP: %s] %s. Reason: %s", e.status_code, e.message, e.arg) RequestHandler.send_error(conn, e.status_code, e.message) except socket.timeout: - logging.debug("Socket for client %s timed out", addr) + logging.info("Socket for client %s timed out.", addr) + break + except ConnectionAbortedError: + logging.info("Socket for client %s disconnected.", addr) break except Exception as e: - logging.debug("Internal error", exc_info=e) + logging.error("Internal error", exc_info=e) RequestHandler.send_error(conn, InternalServerError.status_code, InternalServerError.message) break