#!/usr/bin/env python3 import argparse import logging import sys import socket import re FORMAT = 'utf-8' BUFSIZE = 4096 def receive_bytes_chunk(client: socket.socket): buffering = True buffer = b'' while buffering: received = client.recv(BUFSIZE) received_size = len(received) logging.debug("Received size: %s", received_size) logging.debug("Received: %r", received) def receive_bytes(client: socket.socket): buffering = True buffer = b'' while buffering: received = client.recv(BUFSIZE) received_size = len(received) logging.debug("Received size: %s", received_size) logging.debug("Received: %r", received) if received_size < BUFSIZE: buffering = False buffer += received lf_pos = buffer.find(b"\n\n") crlf_pos = buffer.find(b"\r\n\r\n") if lf_pos != -1 and lf_pos < crlf_pos: buffer_split = buffer.split(b"\n\n") else: buffer_split = buffer.split(b"\r\n\r\n") buffer = buffer_split[-1] for part in buffer_split[:-1]: yield part + b"\r\n\r\n" if buffer: buffering = True if buffer: yield buffer def parse_header(data: bytes): headers = {} # decode bytes, split into lines and filter header_split = list( filter(lambda l: l is not "" and not l[0].isspace(), map(str.strip, data.decode("utf-8").split("\n")))) if len(header_split) == 0: raise Exception("No start-line") start_line = header_split.pop(0) logging.debug("start-line: %r", start_line) for line in header_split: pos = line.find(":") if pos <= 0 or pos >= len(line) - 1: continue (header, value) = map(str.strip, line.split(":", 1)) headers[header.upper()] = value logging.debug("Parsed headers: %r", headers) return start_line, headers def validate_status_line(status_line: str): split = list(filter(None, status_line.split(" "))) if len(split) < 3: return False # Check HTTP version http_version = split.pop(0) if len(http_version) < 8 or http_version[4] != "/": return False (name, version) = http_version[:4], http_version[5:] if name != "HTTP" or not re.match(r"1\.[0|1]", version): return False if not re.match(r"\d{3}", split[0]): return False return True def get_chunk(buffer: bytes): lf_pos = buffer.find(b"\n\n") crlf_pos = buffer.find(b"\r\n\r\n") if lf_pos != -1 and lf_pos < crlf_pos: split_start = lf_pos split_end = lf_pos + 2 else: split_start = crlf_pos split_end = crlf_pos + 4 return buffer[:split_start], buffer[split_end:] def response_parser(client: socket.socket): client.settimeout(3.0) try: buffer = client.recv(BUFSIZE) except TimeoutError as err: # TODO handler error appropriately logging.debug("[ERR] Socket timeout: %r", exc_info=err) return (header_chunk, buffer) = get_chunk(buffer) (status_line, headers) = parse_header(header_chunk) if not validate_status_line(status_line): raise Exception("Invalid status-line") logging.debug("valid status-line: %r", status_line) def http_parser(client: socket.socket): headers = {} start_line = "" receiver = receive_bytes(client) (status_line, headers) = parse_header(next(receiver)) if not validate_status_line(status_line): raise Exception("Invalid header") logging.debug("valid status-line: %r", status_line) for chunk in receiver: logging.debug("chunk: %r", chunk) def main(): parser = argparse.ArgumentParser(description='HTTP Client') parser.add_argument("--verbose", "-v", action='count', default=0, help="Increase verbosity level of logging") parser.add_argument("--command", "-c", help="HEAD, GET, PUT or POST", default="GET") parser.add_argument("--port", "-p", help="The port used to connect with the server", default=80) parser.add_argument("URI", help="The URI to connect to") arguments = parser.parse_args() logging.basicConfig(level=logging.ERROR - (10 * arguments.verbose)) logging.debug("Arguments: %s", arguments) client = socket.socket(socket.AF_INET, socket.SOCK_STREAM) client.connect((arguments.URI, arguments.port)) message = "GET /Protocols/HTTP/Performance/microscape/ HTTP/1.1\r\nHost: www.w3.org:80\r\n\r\n".encode(FORMAT) client.sendall(message) response_parser(client) http_parser(client) # tmp = b'' # keep = False # count = 0 # for line in receive_bytes(client): # # if count > 0: # tmp += line.rstrip(b"\r\n") # if keep: # count += 1 # # if line == b'\r\n': # keep = True # # logging.debug('end of part 1') # # logging.debug("attempt 2") # while True: # logging.debug("attempt") # keep = False # for line in receive_bytes(client): # if line == b"0\r\n": # break # if keep: # tmp += line.rstrip(b"\r\n") # keep = True # # if b"0\r\n" == line: # break # logging.debug("content: %s", tmp) # # logging.debug("content: %r", tmp.replace(b"\r\n", b"").decode("utf-8")) # # f = open("test.jpeg", "wb") # f.write(tmp) try: main() except Exception as e: print("[ABRT] Internal error: " + str(e), file=sys.stderr) logging.debug("Internal error", exc_info=e) sys.exit(70)