diff --git a/client.py b/client.py index 95f59f8..a9fc350 100644 --- a/client.py +++ b/client.py @@ -3,11 +3,20 @@ import argparse import logging import sys import socket +import re FORMAT = 'utf-8' BUFSIZE = 4096 +def receive_bytes_chunk(client: socket.socket): + buffering = True + buffer = b'' + while buffering: + received = client.recv(BUFSIZE) + received_size = len(received) + logging.debug("Received size: %s", received_size) + logging.debug("Received: %r", received) def receive_bytes(client: socket.socket): buffering = True buffer = b'' @@ -21,23 +30,83 @@ def receive_bytes(client: socket.socket): buffering = False buffer += received - buffer_split = buffer.split(b"\r\n\r\n") + lf_pos = buffer.find(b"\n\n") + crlf_pos = buffer.find(b"\r\n\r\n") + if lf_pos != -1 and lf_pos < crlf_pos: + buffer_split = buffer.split(b"\n\n") + else: + buffer_split = buffer.split(b"\r\n\r\n") buffer = buffer_split[-1] for part in buffer_split[:-1]: yield part + b"\r\n\r\n" + if buffer: + buffering = True + if buffer: yield buffer + +def parse_header(data: bytes): + headers = {} + + # decode bytes, split into lines and filter + header_split = list( + filter(lambda l: l is not "" and not l[0].isspace(), map(str.strip, data.decode("utf-8").split("\n")))) + + if len(header_split) == 0: + raise Exception("No start-line") + start_line = header_split.pop(0) + logging.debug("start-line: %r", start_line) + + for line in header_split: + pos = line.find(":") + + if pos <= 0 or pos >= len(line) - 1: + continue + + (header, value) = map(str.strip, line.split(":", 1)) + headers[header.upper()] = value + + logging.debug("Parsed headers: %r", headers) + + return start_line, headers + + +def validate_status_line(status_line: str): + split = list(filter(None, status_line.split(" "))) + if len(split) < 3: + return False + + # Check HTTP version + http_version = split.pop(0) + if len(http_version) < 8 or http_version[4] != "/": + return False + (name, version) = http_version[:4], http_version[5:] + if name != "HTTP" or not re.match(r"1\.[0|1]", version): + return False + + if not re.match(r"\d{3}", split[0]): + return False + + return True + + def http_parser(client: socket.socket): headers = {} start_line = "" - counter = 0 - for received in receive_bytes(client): - if counter == 0: + receiver = receive_bytes(client) + (status_line, headers) = parse_header(next(receiver)) + if not validate_status_line(status_line): + raise Exception("Invalid header") + + logging.debug("valid status-line: %r", status_line) + + for chunk in receiver: + logging.debug("chunk: %r", chunk) @@ -56,42 +125,43 @@ def main(): client = socket.socket(socket.AF_INET, socket.SOCK_STREAM) client.connect((arguments.URI, arguments.port)) - message = "GET /httpgallery/chunked/chunkedimage.aspx HTTP/1.1\r\nHost: www.httpwatch.com:80\r\n\r\n".encode(FORMAT) + message = "GET / HTTP/1.1\r\nHost: www.google.com:80\r\n\r\n".encode(FORMAT) client.sendall(message) - tmp = b'' - keep = False - count = 0 - for line in receive_lines(client): - - if count > 0: - tmp += line.rstrip(b"\r\n") - if keep: - count += 1 - - if line == b'\r\n': - keep = True - - logging.debug('end of part 1') - - logging.debug("attempt 2") - while True: - logging.debug("attempt") - keep = False - for line in receive_lines(client): - if line == b"0\r\n": - break - if keep: - tmp += line.rstrip(b"\r\n") - keep = True - - if b"0\r\n" == line: - break - logging.debug("content: %s", tmp) - # logging.debug("content: %r", tmp.replace(b"\r\n", b"").decode("utf-8")) - - f = open("test.jpeg", "wb") - f.write(tmp) + http_parser(client) + # tmp = b'' + # keep = False + # count = 0 + # for line in receive_bytes(client): + # + # if count > 0: + # tmp += line.rstrip(b"\r\n") + # if keep: + # count += 1 + # + # if line == b'\r\n': + # keep = True + # + # logging.debug('end of part 1') + # + # logging.debug("attempt 2") + # while True: + # logging.debug("attempt") + # keep = False + # for line in receive_bytes(client): + # if line == b"0\r\n": + # break + # if keep: + # tmp += line.rstrip(b"\r\n") + # keep = True + # + # if b"0\r\n" == line: + # break + # logging.debug("content: %s", tmp) + # # logging.debug("content: %r", tmp.replace(b"\r\n", b"").decode("utf-8")) + # + # f = open("test.jpeg", "wb") + # f.write(tmp) try: