#!/usr/bin/env python3 """Read and benchmark large file I/O performance.""" import argparse import hashlib import os import sys import time def parse_size(size_str): """Parse size string like '64MB', '128KB' to bytes.""" size_str = size_str.upper().strip() units = { 'B': 1, 'KB': 1024, 'MB': 1024**2, 'GB': 1024**3, 'TB': 1024**4, 'KIB': 1024, 'MIB': 1024**2, 'GIB': 1024**3, 'TIB': 1024**4, } for unit, multiplier in units.items(): if size_str.endswith(unit): try: value = float(size_str[:-len(unit)]) return int(value * multiplier) except ValueError: raise ValueError(f"Invalid size format: {size_str}") # Try parsing as raw bytes try: return int(size_str) except ValueError: raise ValueError(f"Invalid size format: {size_str}. Use format like '64MB', '128KB'") def format_bytes(bytes_val): """Format bytes to human-readable string.""" for unit in ['B', 'KiB', 'MiB', 'GiB', 'TiB']: if bytes_val < 1024.0: return f"{bytes_val:.2f} {unit}" bytes_val /= 1024.0 return f"{bytes_val:.2f} PiB" def read_file(input_path, chunk_size, compute_hash=False, quiet=False): """Read a file and optionally compute its hash.""" if not os.path.exists(input_path): print(f"Error: File '{input_path}' does not exist", file=sys.stderr) return 1 if not os.path.isfile(input_path): print(f"Error: '{input_path}' is not a file", file=sys.stderr) return 1 try: total_bytes = os.path.getsize(input_path) except OSError as e: print(f"Error: Cannot get file size: {e}", file=sys.stderr) return 1 if total_bytes == 0: print(f"Warning: File is empty", file=sys.stderr) return 0 if not quiet: print(f"Reading file: {input_path}") print(f"File size: {format_bytes(total_bytes)}") print(f"Chunk size: {format_bytes(chunk_size)}") if compute_hash: print(f"Computing: SHA256 hash") print() start_time = time.time() last_gb_log_time = start_time last_gb_read = 0 hash_obj = hashlib.sha256() if compute_hash else None try: with open(input_path, "rb") as f: bytes_read = 0 last_reported_percent = -1 while True: chunk = f.read(chunk_size) if not chunk: break bytes_read += len(chunk) if compute_hash: hash_obj.update(chunk) if not quiet: percent = int((bytes_read / total_bytes) * 100) if percent != last_reported_percent and percent % 5 == 0: print(f"Progress: {percent}% ({format_bytes(bytes_read)} read)") last_reported_percent = percent # Per second GB log now = time.time() if now - last_gb_log_time >= 1.0: gb_read = bytes_read / (1024**3) gb_per_sec = (bytes_read - last_gb_read) / (1024**3) / (now - last_gb_log_time) print(f"Read: {gb_read:.2f} GiB, Speed: {gb_per_sec:.2f} GiB/s") last_gb_log_time = now last_gb_read = bytes_read end_time = time.time() elapsed = end_time - start_time if not quiet: print() print(f"✓ Successfully read {format_bytes(bytes_read)}") if elapsed > 0: print(f"Time taken: {elapsed:.2f} seconds") print(f"Average speed: {format_bytes(bytes_read / elapsed)}/s") if compute_hash: print(f"SHA256: {hash_obj.hexdigest()}") return 0 except KeyboardInterrupt: print("\n\nInterrupted by user", file=sys.stderr) return 130 except IOError as e: print(f"Error reading file: {e}", file=sys.stderr) return 1 def main(): parser = argparse.ArgumentParser( description='Read and benchmark large file I/O performance.', epilog='Examples:\n' ' %(prog)s largefile.bin\n' ' %(prog)s test.dat --chunk-size 128MB\n' ' %(prog)s data.bin --hash --quiet', formatter_class=argparse.RawDescriptionHelpFormatter ) parser.add_argument( 'input', help='Input file path to read' ) parser.add_argument( '--chunk-size', default='64MB', help='Chunk size for reading (default: 64MB)' ) parser.add_argument( '--hash', action='store_true', help='Compute SHA256 hash of the file' ) parser.add_argument( '--quiet', '-q', action='store_true', help='Suppress progress output' ) parser.add_argument( '--version', action='version', version='%(prog)s 1.0.0' ) args = parser.parse_args() try: chunk_size = parse_size(args.chunk_size) except ValueError as e: print(f"Error: {e}", file=sys.stderr) return 1 if chunk_size <= 0 or chunk_size > 1024**3: # Max 1GB chunk print("Error: Chunk size must be between 1 byte and 1GB", file=sys.stderr) return 1 return read_file(args.input, chunk_size, args.hash, args.quiet) if __name__ == "__main__": sys.exit(main())