#!/usr/bin/env python3 import argparse import logging import sys import socket import re import time import os from client.ResponseHandler import ResponseHandler FORMAT = 'utf-8' BUFSIZE = 4096 def receive_bytes_chunk(client: socket.socket): buffering = True buffer = b'' while buffering: received = client.recv(BUFSIZE) received_size = len(received) logging.debug("Received size: %s", received_size) logging.debug("Received: %r", received) def receive_bytes(client: socket.socket): buffering = True buffer = b'' while buffering: received = client.recv(BUFSIZE) received_size = len(received) logging.debug("Received size: %s", received_size) logging.debug("Received: %r", received) if received_size < BUFSIZE: buffering = False buffer += received lf_pos = buffer.find(b"\n\n") crlf_pos = buffer.find(b"\r\n\r\n") if lf_pos != -1 and lf_pos < crlf_pos: buffer_split = buffer.split(b"\n\n") else: buffer_split = buffer.split(b"\r\n\r\n") buffer = buffer_split[-1] for part in buffer_split[:-1]: yield part + b"\r\n\r\n" if buffer: buffering = True if buffer: yield buffer def receive(client: socket.socket): if client.fileno() == -1: raise Exception("Connection closed") result = client.recv(BUFSIZE) if len(result) == 0: time.sleep(0.1) result = client.recv(BUFSIZE) return result def parse_header(data: bytes): headers = {} # decode bytes, split into lines and filter header_split = list( filter(lambda l: l is not "" and not l[0].isspace(), map(str.strip, data.decode("utf-8").split("\n")))) if len(header_split) == 0: raise Exception("No start-line") start_line = header_split.pop(0) logging.debug("start-line: %r", start_line) for line in header_split: pos = line.find(":") if pos <= 0 or pos >= len(line) - 1: continue (header, value) = map(str.strip, line.split(":", 1)) headers[header.upper()] = value logging.debug("Parsed headers: %r", headers) return start_line, headers def validate_status_line(status_line: str): split = list(filter(None, status_line.split(" "))) if len(split) < 3: return False # Check HTTP version http_version = split.pop(0) if len(http_version) < 8 or http_version[4] != "/": return False (name, version) = http_version[:4], http_version[5:] if name != "HTTP" or not re.match(r"1\.[0|1]", version): return False if not re.match(r"\d{3}", split[0]): return False return True def get_chunk(buffer: bytes): lf_pos = buffer.find(b"\n\n") crlf_pos = buffer.find(b"\r\n\r\n") if lf_pos != -1 and lf_pos < crlf_pos: split_start = lf_pos split_end = lf_pos + 2 else: split_start = crlf_pos split_end = crlf_pos + 4 return buffer[:split_start], buffer[split_end:] def get_html_filename(headers): if "CONTENT-LOCATION" not in headers: return "index.html" filename = headers["CONTENT-LOCATION"] result = os.path.basename(filename).strip() if len(result.strip()) == 0: return 'index.html' return result def response_parser(client: socket.socket): client.settimeout(3.0) try: buffer = client.recv(BUFSIZE) except TimeoutError as err: # TODO handler error appropriately logging.debug("[ERR] Socket timeout: %r", exc_info=err) return (header_chunk, buffer) = get_chunk(buffer) (status_line, headers) = parse_header(header_chunk) if not validate_status_line(status_line): raise Exception("Invalid status-line") logging.debug("valid status-line: %r", status_line) encoding = "plain" if "TRANSFER-ENCODING" in headers: encoding = headers["TRANSFER-ENCODING"] if encoding == "plain" and "CONTENT-LENGTH" in headers: payload_size = int(headers["CONTENT-LENGTH"]) if payload_size == 0: return filename = get_html_filename(headers) f = open(filename, "wb") f.write(buffer) cur_payload_size = len(buffer) while cur_payload_size < payload_size: buffer = receive(client) logging.debug("Received payload: %r", buffer) if len(buffer) == 0: logging.warning("Received payload length %s less than expected %s", payload_size, cur_payload_size) break cur_payload_size += len(buffer) f.write(buffer) f.close() def http_parser(client: socket.socket): headers = {} start_line = "" receiver = receive_bytes(client) (status_line, headers) = parse_header(next(receiver)) if not validate_status_line(status_line): raise Exception("Invalid header") logging.debug("valid status-line: %r", status_line) for chunk in receiver: logging.debug("chunk: %r", chunk) def main(): parser = argparse.ArgumentParser(description='HTTP Client') parser.add_argument("--verbose", "-v", action='count', default=0, help="Increase verbosity level of logging") parser.add_argument("--command", "-c", help="HEAD, GET, PUT or POST", default="GET") parser.add_argument("--port", "-p", help="The port used to connect with the server", default=80) parser.add_argument("URI", help="The URI to connect to") arguments = parser.parse_args() logging.basicConfig(level=logging.ERROR - (10 * arguments.verbose)) logging.debug("Arguments: %s", arguments) client = socket.socket(socket.AF_INET, socket.SOCK_STREAM) client.connect((arguments.URI, arguments.port)) message = "GET /Protocols/HTTP/Performance/microscape/ HTTP/1.1\r\nHost: www.w3.org:80\r\n\r\n".encode(FORMAT) client.sendall(message) response_parser(client) # http_parser(client) # tmp = b'' # keep = False # count = 0 # for line in receive_bytes(client): # # if count > 0: # tmp += line.rstrip(b"\r\n") # if keep: # count += 1 # # if line == b'\r\n': # keep = True # # logging.debug('end of part 1') # # logging.debug("attempt 2") # while True: # logging.debug("attempt") # keep = False # for line in receive_bytes(client): # if line == b"0\r\n": # break # if keep: # tmp += line.rstrip(b"\r\n") # keep = True # # if b"0\r\n" == line: # break # logging.debug("content: %s", tmp) # # logging.debug("content: %r", tmp.replace(b"\r\n", b"").decode("utf-8")) # # f = open("test.jpeg", "wb") # f.write(tmp) try: main() except Exception as e: print("[ABRT] Internal error: " + str(e), file=sys.stderr) logging.debug("Internal error", exc_info=e) sys.exit(70)