Parse html with regex, fix small issues

This commit is contained in:
2021-03-27 23:41:28 +01:00
parent bbca6f603b
commit 4473d1bec9
7 changed files with 134 additions and 80 deletions

View File

@@ -3,39 +3,40 @@ from abc import ABC, abstractmethod
from typing import Dict, Tuple
from urllib.parse import urlparse
from client.httpclient import FORMAT, HTTPClient
from client.httpclient import HTTPClient
from httplib import parser
from httplib.exceptions import InvalidResponse, InvalidStatusLine, UnsupportedEncoding
from httplib.httpsocket import FORMAT
from httplib.message import ClientMessage as Message
from httplib.retriever import PreambleRetriever
sockets: Dict[str, HTTPClient] = {}
def create(command: str, url: str, port):
def create(method: str, url: str, port):
"""
Create a corresponding Command instance of the specified HTTP `command` with the specified `url` and `port`.
@param command: The command type to create
Create a corresponding Command instance of the specified HTTP `method` with the specified `url` and `port`.
@param method: The command type to create
@param url: The url for the command
@param port: The port for the command
"""
uri = parser.get_uri(url)
if command == "GET":
if method == "GET":
return GetCommand(uri, port)
elif command == "HEAD":
elif method == "HEAD":
return HeadCommand(uri, port)
elif command == "POST":
elif method == "POST":
return PostCommand(uri, port)
elif command == "PUT":
elif method == "PUT":
return PutCommand(uri, port)
else:
raise ValueError()
raise ValueError("Unknown HTTP method")
class AbstractCommand(ABC):
"""
A class representing the command for sending an HTTP command.
A class representing the command for sending an HTTP request.
"""
uri: str
host: str
@@ -51,10 +52,15 @@ class AbstractCommand(ABC):
@property
@abstractmethod
def command(self):
def method(self):
pass
def execute(self, sub_request=False):
"""
Creates and sends the HTTP message for this Command.
@param sub_request: If this execution is in function of a prior command.
"""
self.sub_request = sub_request
(host, path) = self.parse_uri()
@@ -69,9 +75,10 @@ class AbstractCommand(ABC):
client.conn.connect((host, self.port))
sockets[host] = client
message = f"{self.command} {path} HTTP/1.1\r\n"
message = f"{self.method} {path} HTTP/1.1\r\n"
message += f"Host: {host}:{self.port}\r\n"
message += "Accept: */*\r\nAccept-Encoding: identity\r\n"
message += "Accept: */*\r\n"
message += "Accept-Encoding: identity\r\n"
encoded_msg = self._build_message(message)
logging.debug("---request begin---\r\n%s---request end---", encoded_msg.decode(FORMAT))
@@ -81,8 +88,7 @@ class AbstractCommand(ABC):
logging.info("HTTP request sent, awaiting response...")
try:
retriever = PreambleRetriever(client)
self._await_response(client, retriever)
self._await_response(client)
except InvalidResponse as e:
logging.debug("Internal error: Response could not be parsed", exc_info=e)
return
@@ -95,7 +101,12 @@ class AbstractCommand(ABC):
if not sub_request:
client.close()
def _await_response(self, client, retriever):
def _await_response(self, client):
"""
Simple response method.
Receives the response and prints to stdout.
"""
while True:
line = client.read_line()
print(line, end="")
@@ -106,11 +117,15 @@ class AbstractCommand(ABC):
return (message + "\r\n").encode(FORMAT)
def parse_uri(self):
"""
Parses the URI and returns the hostname and path.
@return: A tuple of the hostname and path.
"""
parsed = urlparse(self.uri)
# If there is no netloc, the url is invalid, so prepend `//` and try again
if parsed.netloc == "":
parsed = urlparse("//" + self.uri)
parsed = urlparse("http://" + self.uri)
host = parsed.netloc
path = parsed.path
@@ -126,11 +141,11 @@ class AbstractCommand(ABC):
class AbstractWithBodyCommand(AbstractCommand, ABC):
"""
The building block for creating an HTTP message for an HTTP command with a body.
The building block for creating an HTTP message for an HTTP method with a body (POST and PUT).
"""
def _build_message(self, message: str) -> bytes:
body = input(f"Enter {self.command} data: ").encode(FORMAT)
body = input(f"Enter {self.method} data: ").encode(FORMAT)
print()
message += "Content-Type: text/plain\r\n"
@@ -145,29 +160,36 @@ class AbstractWithBodyCommand(AbstractCommand, ABC):
class HeadCommand(AbstractCommand):
"""
A Command for sending a `HEAD` message.
A Command for sending a `HEAD` request.
"""
@property
def command(self):
def method(self):
return "HEAD"
class GetCommand(AbstractCommand):
"""
A Command for sending a `GET` message.
A Command for sending a `GET` request.
"""
dir: str
def __init__(self, uri: str, port, dir=None):
def __init__(self, uri: str, port, directory=None):
super().__init__(uri, port)
self.dir = dir
self.dir = directory
self.filename = None
@property
def command(self):
def method(self):
return "GET"
def _get_preamble(self, retriever):
def _get_preamble(self, client):
"""
Returns the preamble (start-line and headers) of the response of this command.
@param client: the client object to retrieve from
@return: A Message object containing the HTTP-version, status code, status message, headers and buffer
"""
retriever = PreambleRetriever(client)
lines = retriever.retrieve()
(version, status, msg) = parser.parse_status_line(next(lines))
headers = parser.parse_headers(lines)
@@ -177,8 +199,11 @@ class GetCommand(AbstractCommand):
return Message(version, status, msg, headers, buffer)
def _await_response(self, client, retriever):
msg = self._get_preamble(retriever)
def _await_response(self, client):
"""
Handles the response of this command.
"""
msg = self._get_preamble(client)
from client import response_handler
self.filename = response_handler.handle(client, msg, self, self.dir)
@@ -186,19 +211,19 @@ class GetCommand(AbstractCommand):
class PostCommand(AbstractWithBodyCommand):
"""
A command for sending a `POST` command.
A command for sending a `POST` request.
"""
@property
def command(self):
def method(self):
return "POST"
class PutCommand(AbstractWithBodyCommand):
"""
A command for sending a `PUT` command.
A command for sending a `PUT` request.
"""
@property
def command(self):
def method(self):
return "PUT"