320 lines
11 KiB
Python
Executable File
320 lines
11 KiB
Python
Executable File
#!/usr/bin/env python3
|
|
"""Shared logic for generating and reading large files."""
|
|
|
|
from __future__ import annotations
|
|
|
|
import argparse
|
|
import hashlib
|
|
import os
|
|
import re
|
|
import shutil
|
|
import sys
|
|
import time
|
|
from dataclasses import dataclass
|
|
from pathlib import Path
|
|
from typing import Iterable, Optional
|
|
|
|
VERSION = "2.0.0"
|
|
MAX_CHUNK_SIZE = 1024 ** 3
|
|
_PROGRESS_STEP = 5
|
|
_SIZE_RE = re.compile(r"^\s*(?P<value>\d+(?:\.\d+)?)\s*(?P<unit>[A-Za-z]*)\s*$")
|
|
|
|
_BINARY_UNITS = {
|
|
"": 1,
|
|
"B": 1,
|
|
"K": 1024,
|
|
"KB": 1024,
|
|
"KIB": 1024,
|
|
"M": 1024 ** 2,
|
|
"MB": 1024 ** 2,
|
|
"MIB": 1024 ** 2,
|
|
"G": 1024 ** 3,
|
|
"GB": 1024 ** 3,
|
|
"GIB": 1024 ** 3,
|
|
"T": 1024 ** 4,
|
|
"TB": 1024 ** 4,
|
|
"TIB": 1024 ** 4,
|
|
"P": 1024 ** 5,
|
|
"PB": 1024 ** 5,
|
|
"PIB": 1024 ** 5,
|
|
}
|
|
|
|
|
|
@dataclass(frozen=True)
|
|
class ProgressState:
|
|
total_bytes: int
|
|
last_reported_percent: int = -1
|
|
last_log_time: float = 0.0
|
|
last_log_bytes: int = 0
|
|
|
|
|
|
def parse_size(value: str) -> int:
|
|
match = _SIZE_RE.match(value)
|
|
if not match:
|
|
raise ValueError(f"invalid size: {value!r}")
|
|
|
|
amount = float(match.group("value"))
|
|
unit = match.group("unit").upper()
|
|
if unit not in _BINARY_UNITS:
|
|
raise ValueError(f"unknown size unit: {unit or 'bytes'}")
|
|
|
|
result = int(amount * _BINARY_UNITS[unit])
|
|
if result <= 0:
|
|
raise ValueError("size must be greater than zero")
|
|
return result
|
|
|
|
|
|
def format_bytes(value: int) -> str:
|
|
units = ["B", "KiB", "MiB", "GiB", "TiB", "PiB"]
|
|
size = float(value)
|
|
for unit in units:
|
|
if size < 1024.0 or unit == units[-1]:
|
|
return f"{size:.2f} {unit}"
|
|
size /= 1024.0
|
|
return f"{size:.2f} PiB"
|
|
|
|
|
|
def _disk_free_bytes(path: Path) -> int:
|
|
usage = shutil.disk_usage(path)
|
|
return usage.free
|
|
|
|
|
|
def _ensure_parent_dir(path: Path) -> None:
|
|
if path.parent and not path.parent.exists():
|
|
raise FileNotFoundError(f"directory does not exist: {path.parent}")
|
|
|
|
|
|
def _write_progress(prefix: str, current: int, total: int) -> None:
|
|
percent = min(100, int(current * 100 / total)) if total else 100
|
|
print(f"{prefix}: {percent}% ({format_bytes(current)} of {format_bytes(total)})")
|
|
|
|
|
|
def _maybe_log_throughput(prefix: str, bytes_done: int, started_at: float, state: ProgressState) -> ProgressState:
|
|
now = time.time()
|
|
if state.last_log_time == 0.0:
|
|
return ProgressState(state.total_bytes, state.last_reported_percent, now, bytes_done)
|
|
elapsed = now - state.last_log_time
|
|
if elapsed < 1.0:
|
|
return state
|
|
delta = bytes_done - state.last_log_bytes
|
|
speed = delta / elapsed if elapsed > 0 else 0
|
|
print(f"{prefix}: {format_bytes(bytes_done)} at {format_bytes(int(speed))}/s")
|
|
return ProgressState(state.total_bytes, state.last_reported_percent, now, bytes_done)
|
|
|
|
|
|
def create_file(output: str | Path, total_bytes: int, chunk_size: int, quiet: bool = False, sparse: bool = False) -> int:
|
|
output_path = Path(output)
|
|
|
|
if output_path.exists():
|
|
print(f"Error: file already exists: {output_path}", file=sys.stderr)
|
|
return 1
|
|
|
|
_ensure_parent_dir(output_path)
|
|
|
|
if chunk_size <= 0 or chunk_size > MAX_CHUNK_SIZE:
|
|
print("Error: chunk size must be between 1 byte and 1 GiB", file=sys.stderr)
|
|
return 1
|
|
|
|
free_bytes = _disk_free_bytes(output_path.parent if output_path.parent != Path("") else Path("."))
|
|
if not sparse and free_bytes < total_bytes:
|
|
print(
|
|
f"Error: not enough disk space, need {format_bytes(total_bytes)}, have {format_bytes(free_bytes)}",
|
|
file=sys.stderr,
|
|
)
|
|
return 1
|
|
|
|
if not quiet:
|
|
print(f"Creating: {output_path}")
|
|
print(f"Size: {format_bytes(total_bytes)}")
|
|
print(f"Chunk: {format_bytes(chunk_size)}")
|
|
if sparse:
|
|
print("Mode: sparse")
|
|
print()
|
|
|
|
started_at = time.time()
|
|
progress = ProgressState(total_bytes=total_bytes, last_log_time=started_at, last_log_bytes=0)
|
|
buffer = b"\x00" * chunk_size
|
|
|
|
try:
|
|
with output_path.open("wb") as handle:
|
|
if sparse:
|
|
handle.truncate(total_bytes)
|
|
written = total_bytes
|
|
else:
|
|
written = 0
|
|
while written < total_bytes:
|
|
step = min(chunk_size, total_bytes - written)
|
|
handle.write(buffer[:step])
|
|
written += step
|
|
|
|
if quiet:
|
|
continue
|
|
|
|
percent = int(written * 100 / total_bytes)
|
|
if percent >= progress.last_reported_percent + _PROGRESS_STEP:
|
|
_write_progress("Progress", written, total_bytes)
|
|
progress = ProgressState(total_bytes, percent, progress.last_log_time, progress.last_log_bytes)
|
|
|
|
progress = _maybe_log_throughput("Speed", written, started_at, progress)
|
|
|
|
elapsed = time.time() - started_at
|
|
print(f"Done, wrote {format_bytes(written)} to {output_path}")
|
|
print(f"Elapsed: {elapsed:.2f}s")
|
|
if elapsed > 0:
|
|
print(f"Average: {format_bytes(int(written / elapsed))}/s")
|
|
return 0
|
|
except KeyboardInterrupt:
|
|
print("Interrupted, cleaning up partial file", file=sys.stderr)
|
|
try:
|
|
if output_path.exists():
|
|
output_path.unlink()
|
|
except OSError:
|
|
pass
|
|
return 130
|
|
except OSError as exc:
|
|
print(f"Error writing file: {exc}", file=sys.stderr)
|
|
try:
|
|
if output_path.exists():
|
|
output_path.unlink()
|
|
except OSError:
|
|
pass
|
|
return 1
|
|
|
|
|
|
def read_file(input_path: str | Path, chunk_size: int, compute_hash: bool = False, quiet: bool = False) -> int:
|
|
path = Path(input_path)
|
|
|
|
if not path.exists():
|
|
print(f"Error: file not found: {path}", file=sys.stderr)
|
|
return 1
|
|
if not path.is_file():
|
|
print(f"Error: not a file: {path}", file=sys.stderr)
|
|
return 1
|
|
|
|
if chunk_size <= 0 or chunk_size > MAX_CHUNK_SIZE:
|
|
print("Error: chunk size must be between 1 byte and 1 GiB", file=sys.stderr)
|
|
return 1
|
|
|
|
try:
|
|
total_bytes = path.stat().st_size
|
|
except OSError as exc:
|
|
print(f"Error reading file metadata: {exc}", file=sys.stderr)
|
|
return 1
|
|
|
|
if not quiet:
|
|
print(f"Reading: {path}")
|
|
print(f"Size: {format_bytes(total_bytes)}")
|
|
print(f"Chunk: {format_bytes(chunk_size)}")
|
|
if compute_hash:
|
|
print("Hash: sha256")
|
|
print()
|
|
|
|
started_at = time.time()
|
|
progress = ProgressState(total_bytes=total_bytes, last_log_time=started_at, last_log_bytes=0)
|
|
hasher = hashlib.sha256() if compute_hash else None
|
|
bytes_read = 0
|
|
|
|
try:
|
|
with path.open("rb") as handle:
|
|
while True:
|
|
chunk = handle.read(chunk_size)
|
|
if not chunk:
|
|
break
|
|
bytes_read += len(chunk)
|
|
|
|
if hasher is not None:
|
|
hasher.update(chunk)
|
|
|
|
if quiet:
|
|
continue
|
|
|
|
percent = int(bytes_read * 100 / total_bytes) if total_bytes else 100
|
|
if percent >= progress.last_reported_percent + _PROGRESS_STEP:
|
|
_write_progress("Progress", bytes_read, total_bytes)
|
|
progress = ProgressState(total_bytes, percent, progress.last_log_time, progress.last_log_bytes)
|
|
|
|
progress = _maybe_log_throughput("Speed", bytes_read, started_at, progress)
|
|
|
|
elapsed = time.time() - started_at
|
|
print(f"Done, read {format_bytes(bytes_read)} from {path}")
|
|
print(f"Elapsed: {elapsed:.2f}s")
|
|
if elapsed > 0:
|
|
print(f"Average: {format_bytes(int(bytes_read / elapsed))}/s")
|
|
if hasher is not None:
|
|
print(f"SHA256: {hasher.hexdigest()}")
|
|
return 0
|
|
except KeyboardInterrupt:
|
|
print("Interrupted", file=sys.stderr)
|
|
return 130
|
|
except OSError as exc:
|
|
print(f"Error reading file: {exc}", file=sys.stderr)
|
|
return 1
|
|
|
|
|
|
def build_create_parser(prog: str) -> argparse.ArgumentParser:
|
|
parser = argparse.ArgumentParser(
|
|
prog=prog,
|
|
description="Create large binary files for storage and transfer testing.",
|
|
formatter_class=argparse.RawDescriptionHelpFormatter,
|
|
epilog=(
|
|
"Examples:\n"
|
|
f" {prog} output.bin 15GB\n"
|
|
f" {prog} dump.dat 1.5TB --chunk-size 128MB\n"
|
|
f" {prog} test.bin 500MB --quiet"
|
|
),
|
|
)
|
|
parser.add_argument("output", help="Path to the file to create")
|
|
parser.add_argument("size", help="Target size, for example 15GB or 1.5TiB")
|
|
parser.add_argument("--chunk-size", default="64MB", help="Write chunk size (default: 64MB)")
|
|
parser.add_argument("--sparse", action="store_true", help="Create a sparse file instead of writing zeros")
|
|
parser.add_argument("--quiet", "-q", action="store_true", help="Suppress progress output")
|
|
parser.add_argument("--version", action="version", version=f"{prog} {VERSION}")
|
|
return parser
|
|
|
|
|
|
def build_read_parser(prog: str) -> argparse.ArgumentParser:
|
|
parser = argparse.ArgumentParser(
|
|
prog=prog,
|
|
description="Read large files and benchmark I/O throughput.",
|
|
formatter_class=argparse.RawDescriptionHelpFormatter,
|
|
epilog=(
|
|
"Examples:\n"
|
|
f" {prog} largefile.bin\n"
|
|
f" {prog} test.dat --chunk-size 128MB --hash\n"
|
|
f" {prog} data.bin --quiet"
|
|
),
|
|
)
|
|
parser.add_argument("input", help="Path to the file to read")
|
|
parser.add_argument("--chunk-size", default="64MB", help="Read chunk size (default: 64MB)")
|
|
parser.add_argument("--hash", action="store_true", help="Compute SHA256 while reading")
|
|
parser.add_argument("--quiet", "-q", action="store_true", help="Suppress progress output")
|
|
parser.add_argument("--version", action="version", version=f"{prog} {VERSION}")
|
|
return parser
|
|
|
|
|
|
def create_main(argv: Optional[Iterable[str]] = None) -> int:
|
|
parser = build_create_parser("make_big_file.py")
|
|
args = parser.parse_args(list(argv) if argv is not None else None)
|
|
try:
|
|
total_bytes = parse_size(args.size)
|
|
chunk_size = parse_size(args.chunk_size)
|
|
except ValueError as exc:
|
|
print(f"Error: {exc}", file=sys.stderr)
|
|
return 1
|
|
return create_file(args.output, total_bytes, chunk_size, args.quiet, args.sparse)
|
|
|
|
|
|
def read_main(argv: Optional[Iterable[str]] = None) -> int:
|
|
parser = build_read_parser("read_big_file.py")
|
|
args = parser.parse_args(list(argv) if argv is not None else None)
|
|
try:
|
|
chunk_size = parse_size(args.chunk_size)
|
|
except ValueError as exc:
|
|
print(f"Error: {exc}", file=sys.stderr)
|
|
return 1
|
|
return read_file(args.input, chunk_size, args.hash, args.quiet)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
raise SystemExit(create_main())
|