chore: initialize screenjob project baseline

This commit is contained in:
Space-Banane
2026-05-27 17:31:49 +02:00
commit 84b0df520c
9 changed files with 1045 additions and 0 deletions

151
src/cli.py Normal file
View File

@@ -0,0 +1,151 @@
from __future__ import annotations
import argparse
import json
import os
import sys
from pathlib import Path
from dotenv import load_dotenv
from openai import OpenAI
from .agent import ScreenJobAgent
from .utils import setup_artifacts, setup_logger
try:
import pyautogui
except Exception as import_exc:
raise RuntimeError(
"pyautogui is required. Install dependencies with: pip install pyautogui pillow"
) from import_exc
def build_parser() -> argparse.ArgumentParser:
parser = argparse.ArgumentParser(
description="Run an autonomous desktop task agent using OpenAI + UI tools.",
formatter_class=argparse.RawDescriptionHelpFormatter,
epilog=(
"Examples:\n"
' python main.py "Open amazon.de"\n'
' python main.py "Open amazon.de and search for mechanical keyboard" --max-steps 80\n\n'
"Artifacts:\n"
" Each run stores logs/screens in ./screenjob_runs/run_YYYYMMDD_HHMMSS/"
),
)
parser.add_argument("job", type=str, help="Task objective for the agent.")
parser.add_argument("--model", type=str, default="gpt-5.2", help="OpenAI model name.")
parser.add_argument("--max-steps", type=int, default=60, help="Max tool-iteration steps.")
parser.add_argument(
"--command-timeout",
type=int,
default=45,
help="Timeout (seconds) for execute_command tool.",
)
parser.add_argument(
"--type-interval",
type=float,
default=0.02,
help="Seconds between typed characters.",
)
parser.add_argument(
"--click-pause",
type=float,
default=0.10,
help="Mouse move duration before click (seconds).",
)
parser.add_argument(
"--no-failsafe",
action="store_true",
help="Disable PyAutoGUI fail-safe. Not recommended.",
)
return parser
def main() -> int:
load_dotenv()
parser = build_parser()
args = parser.parse_args()
api_key = os.getenv("OPENAI_API_KEY", "").strip()
if not api_key:
print("ERROR: Missing OPENAI_API_KEY (expected in environment or .env).", file=sys.stderr)
return 2
pyautogui.FAILSAFE = not args.no_failsafe
pyautogui.PAUSE = 0.05
runs_base = Path.cwd() / "screenjob_runs"
artifacts = setup_artifacts(runs_base)
logger = setup_logger(artifacts.log_file, verbose=True)
logger.info("ScreenJob booting. Artifacts: %s", str(artifacts.root_dir.resolve()))
logger.info("PyAutoGUI FAILSAFE=%s", pyautogui.FAILSAFE)
try:
client = OpenAI(api_key=api_key)
except Exception as exc: # noqa: BLE001
logger.exception("Failed to create OpenAI client.")
print(f"ERROR: Could not initialize OpenAI client: {exc}", file=sys.stderr)
return 2
agent = ScreenJobAgent(
client=client,
logger=logger,
artifacts=artifacts,
model=args.model,
max_steps=args.max_steps,
command_timeout=args.command_timeout,
type_interval=args.type_interval,
click_pause=args.click_pause,
)
try:
result = agent.run(args.job)
elapsed = result.ended_at - result.started_at
logger.info("Run finished. completed=%s elapsed=%.2fs", result.completed, elapsed)
print(
json.dumps(
{
"completed": result.completed,
"result": result.result,
"steps": result.steps,
"elapsed_seconds": round(elapsed, 3),
"artifacts_dir": str(artifacts.root_dir.resolve()),
},
ensure_ascii=False,
indent=2,
)
)
return 0 if result.completed else 1
except KeyboardInterrupt:
logger.warning("Interrupted by user.")
print(
json.dumps(
{
"completed": False,
"result": "Interrupted by user.",
"steps": agent.step,
"artifacts_dir": str(artifacts.root_dir.resolve()),
},
ensure_ascii=False,
indent=2,
)
)
return 130
except Exception as exc: # noqa: BLE001
logger.exception("Fatal runtime error.")
print(
json.dumps(
{
"completed": False,
"result": f"Fatal error: {type(exc).__name__}: {exc}",
"steps": agent.step,
"artifacts_dir": str(artifacts.root_dir.resolve()),
},
ensure_ascii=False,
indent=2,
),
file=sys.stderr,
)
return 1