feat: finalize production cleanup with structured agent responses and project governance

2026-05-27 18:08:52 +02:00
parent a19b285232
commit c09f0ee9c0
17 changed files with 737 additions and 126 deletions
--- a/.gitea/workflows/ci.yml
+++ b/.gitea/workflows/ci.yml
@@ -21,7 +21,7 @@ jobs:
      - name: Install dependencies
        run: |
          python -m pip install --upgrade pip
-          pip install openai pillow python-dotenv fastapi uvicorn pytest httpx
+          pip install -r requirements.txt

      - name: Compile check
        run: |
--- a/CODE_OF_CONDUCT.md
+++ b/CODE_OF_CONDUCT.md
@@ -0,0 +1,32 @@
+# Code of Conduct
+
+## Our Pledge
+
+We want ScreenJob to be an open, respectful, and harassment-free project for everyone.
+
+## Expected Behavior
+
+- Be respectful and constructive.
+- Assume good intent, ask clarifying questions, and focus on technical outcomes.
+- Accept feedback professionally.
+- Share credit and document decisions clearly.
+
+## Unacceptable Behavior
+
+- Harassment, threats, or intimidation.
+- Personal attacks, insults, or discriminatory language.
+- Publishing private information without permission.
+- Deliberate disruption of project collaboration.
+
+## Enforcement
+
+Project maintainers may remove or reject contributions and interactions that violate this Code of Conduct.
+
+## Reporting
+
+Report violations by opening a private maintainer contact issue or contacting the project maintainers directly.
+
+## Attribution
+
+This policy is adapted from the Contributor Covenant, version 2.1:  
+https://www.contributor-covenant.org/version/2/1/code_of_conduct.html
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -0,0 +1,58 @@
+# Contributing to ScreenJob
+
+## Development Setup
+
+1. Use Python 3.11+.
+2. Install dependencies:
+
+```powershell
+pip install -r requirements.txt
+```
+
+3. Create `.env` with required keys:
+   - `OPENAI_API_KEY`
+   - `SCREENJOB_TOKEN`
+
+## Branch and PR Workflow
+
+1. Create a feature branch from `main`.
+2. Keep changes focused and modular.
+3. Add or update tests for behavior changes.
+4. Run checks locally before opening a PR:
+
+```powershell
+pytest -q
+```
+
+5. Open a PR with:
+   - problem statement
+   - approach summary
+   - test evidence
+
+## Code Guidelines
+
+- Favor small, single-purpose functions.
+- Keep runtime and API behavior deterministic where possible.
+- Preserve backward compatibility for external API contracts unless explicitly changing versioned behavior.
+- Use clear error messages and structured outputs.
+- Do not commit secrets, API keys, or runtime artifacts.
+
+## Testing Expectations
+
+- Unit tests for new logic paths.
+- API tests for request/response contract changes.
+- Regression coverage for bug fixes.
+
+## Security and Safety
+
+- Treat safety checks as part of core behavior.
+- Do not bypass auth/token requirements in server code.
+- Restrict file access to intended directories (for artifacts/endpoints).
+
+## Reporting Issues
+
+Use issues for:
+
+- reproducible bugs
+- security concerns (with minimal sensitive detail)
+- feature requests with concrete use cases
--- a/201
+++ b/201
@@ -0,0 +1,201 @@
+                                 Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+   1. Definitions.
+
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+
+   END OF TERMS AND CONDITIONS
+
+   APPENDIX: How to apply the Apache License to your work.
+
+      To apply the Apache License to your work, attach the following
+      boilerplate notice, with the fields enclosed by brackets "[]"
+      replaced with your own identifying information. (Don't include
+      the brackets!) The text should be enclosed in the appropriate
+      comment syntax for the file format. We also recommend that a
+      file or class name and description of purpose be included on the
+      same "printed page" as the copyright notice for easier
+      identification within third-party archives.
+
+   Copyright [yyyy] [name of copyright owner]
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
--- a/README.md
+++ b/README.md
@@ -1,23 +1,66 @@
 # ScreenJob

-Desktop-and-terminal task agent with:
+ScreenJob is an autonomous desktop-and-terminal execution service.  
+It lets an LLM use controlled local tools (screen, click, type, shell) to complete GUI-heavy tasks on a real computer.

- CLI runner
- FastAPI job server
- SQLite task history
- WebSocket-powered monitoring UI
- Safety pre-check and per-job tool disable controls
- Live/final token and cost estimation
+## What It Solves

-## Install
+- Runs agent-driven tasks that require a graphical interface.
+- Exposes both CLI and HTTP API modes.
+- Stores job history and events in SQLite.
+- Streams live monitoring updates over WebSocket.
+- Returns structured agent output as:
+  - `return`: human-readable completion message
+  - `data`: structured payload (for example command output)

-```powershell
-pip install openai pillow pyautogui python-dotenv fastapi uvicorn
+## Core Features
+
+- Tool-based agent loop (`execute_command`, `see_screen`, `enhance`, `click`, `type`, `press_key`, `sleep`, `task_complete`)
+- Safety pre-check with override support
+- Per-job tool disable list
+- Live/final usage and cost estimates
+- Read-only Tailwind monitoring UI
+- Persistent job and event history
+
+## Project Layout
+
+```text
+main.py
+screenjob.py
+requirements.txt
+docker-compose.yml
+src/
+  agent.py
+  app_main.py
+  cli.py
+  config.py
+  models.py
+  pricing.py
+  runtime.py
+  safety.py
+  server.py
+  storage.py
+  task_manager.py
+  ui.py
+  utils.py
+tests/
+  test_agent_tools.py
+  test_pricing.py
+  test_server_api.py
+  test_storage.py
+.gitea/workflows/ci.yml
 ```

-## Environment
+## Setup

-Create `.env` in project root:
+1. Install Python 3.11+.
+2. Install dependencies:
+
+```powershell
+pip install -r requirements.txt
+```
+
+3. Create `.env` in project root:

 ```env
 OPENAI_API_KEY=...
@@ -31,44 +74,50 @@ SCREENJOB_PORT=8787
 DISABLE_UI=false
 ```

-## Entry Points
+## Usage

- `python main.py run "<job>"`
- `python main.py server`
- Backward-compatible wrapper: `python screenjob.py "<job>"`
-
-## CLI Usage
+### CLI

 ```powershell
 python main.py run "Open amazon.de and go to my orders"
 ```

-Useful flags:
+CLI JSON output includes both legacy and structured fields:

- `--model gpt-5.4-mini`
- `--disable-tool click --disable-tool type`
- `--skip-safety-check`
- `--max-steps 80`
+```json
+{
+  "completed": true,
+  "result": "Task completed successfully",
+  "response": {
+    "return": "Task completed successfully",
+    "data": "file1.txt\nfile2.txt"
+  },
+  "return": "Task completed successfully",
+  "data": "file1.txt\nfile2.txt"
+}
+```

-## HTTP API
+### Server

-All API routes require token auth using `SCREENJOB_TOKEN`:
+```powershell
+python main.py server
+```

- `Authorization: Bearer <token>` or
- `X-ScreenJob-Token: <token>`
- (for browser/image fetch) `?token=<token>` query parameter
+Auth for all API routes:
+
+- `Authorization: Bearer <SCREENJOB_TOKEN>`
+- `X-ScreenJob-Token: <SCREENJOB_TOKEN>`
+- Query fallback `?token=` (mainly for UI/websocket/artifact fetch)

 ### Create Job

 `POST /api/jobs`

-Body:
-
 ```json
 {
-  "job": "Open amazon.de and go to my orders",
+  "job": "run \"ls -a\" in C:/Users/username/Documents and return output",
  "model": "gpt-5.4-mini",
-  "disabled_tools": ["click"],
+  "disabled_tools": [],
  "safety_override": false
 }
 ```
@@ -79,103 +128,68 @@ Response:
 { "job_id": "job_..." }
 ```

-### Status / Output
+### Job Status / History

- `GET /api/jobs/{job_id}`: full status + output + live/final usage/cost
- `GET /api/jobs/{job_id}/status`: status alias
- `GET /api/jobs/{job_id}/events`: detailed timeline
- `GET /api/jobs/{job_id}/artifact?path=<absolute_path>&token=<token>`: authenticated artifact file fetch for screenshots/enhancements
- `GET /api/jobs`: list active + past jobs
- `POST /api/jobs/{job_id}/cancel`: graceful cancellation
- `GET /api/stats`: aggregate metrics
+- `GET /api/jobs/{job_id}`
+- `GET /api/jobs/{job_id}/status`
+- `GET /api/jobs/{job_id}/events`
+- `GET /api/jobs`
+- `POST /api/jobs/{job_id}/cancel`
+- `GET /api/stats`

-## Monitoring UI
+Each job payload includes:

- Served at `/` when `DISABLE_UI=false`
- Tailwind-based read-only dashboard
- Requires entering `SCREENJOB_TOKEN` in UI before data loads
- Uses WebSocket `/ws` for live updates (tool calls, step events, usage/cost updates)
- No task launch controls in UI (monitoring only)
+- `result` (compat string)
+- `response.return`
+- `response.data`
+- top-level `return` and `data` aliases

-If `DISABLE_UI=true`, `/` returns `{ "ui_disabled": true }` and only API endpoints remain.
+### Monitoring UI

-## Safety
+- URL: `/`
+- Read-only dashboard (no run controls)
+- Requires token input
+- Live updates via `/ws`
+- Set `DISABLE_UI=true` to disable UI

-Before execution, each task is classified by a model safety gate:
+## Agent Instructions (Practical)

- Safe: task runs
- Unsafe: task is rejected and recorded
- Override: set `safety_override=true` (or `--skip-safety-check` in CLI)
+- Prefer `execute_command` for deterministic actions (opening URLs, filesystem checks).
+- Use `see_screen` before UI interaction.
+- Use `enhance` when text is unclear.
+- Use `press_key` for non-text keys (Enter, Tab, arrows, Escape).
+- Use `click` offsets via `offset_up/down/left/right` and optional `sleep_after_seconds`.
+- When done, call:
+  - `task_complete(return="...", data=...)`

-## Tool Controls
+`data` should contain useful structured output for the requester (text, object, list, etc.).

-Per-job tool allowlisting via disable list:
+## Docker Compose

- API: `disabled_tools: ["type", "click"]`
- CLI: `--disable-tool type --disable-tool click`
+Run server in container:

-Available tools:
-
- `execute_command(command)`
- `sleep(seconds)`
- `see_screen()`
- `enhance(coordinate)`
- `click(coordinate, offset_up/down/left/right, sleep_after_seconds)`
- `type(text)`
- `press_key(key, repeats=1)`
- `task_complete(result)`
-
-## Cost Estimation
-
-Live/final cost is computed from OpenAI response usage (`input`, `cached_input`, `output`) and model pricing rates in `src/pricing.py`.
-
- Live: exposed in `GET /api/jobs/{job_id}` during execution
- Final: persisted in SQLite and returned in status output
-
-## Persistence
-
- SQLite DB: `screenjob.db`
- Runs/artifacts: `screenjob_runs/run_YYYYMMDD_HHMMSS/...`
- Full event log per job (for history and UI)
-
-## Project Layout
-
-```text
-main.py
-screenjob.py
-src/
-  __init__.py
-  agent.py
-  app_main.py
-  cli.py
-  config.py
-  models.py
-  pricing.py
-  runtime.py
-  safety.py
-  server.py
-  storage.py
-  task_manager.py
-  ui.py
-tests/
-  conftest.py
-  test_pricing.py
-  test_server_api.py
-  test_storage.py
-.gitea/
-  workflows/
-    ci.yml
+```powershell
+docker compose up --build
 ```

+Service uses official Python image and reads `.env`.
+
 ## Verification

-Run local verification:
+Local:

 ```powershell
 pytest -q
 ```

-Gitea CI pipeline:
+CI:

- File: `.gitea/workflows/ci.yml`
- Runs compile checks + pytest on push and PR.
+- `.gitea/workflows/ci.yml` runs compile checks + tests on push/PR.
+
+## Compatibility Entry Point
+
+- `python screenjob.py "<job>"` remains supported as a wrapper to `main.py`.
+
+## License
+
+Apache License 2.0. See `LICENSE`.
--- a/SKILL.md
+++ b/SKILL.md
@@ -0,0 +1,33 @@
+# ScreenJob Skill (OpenClaw Agents)
+
+## What ScreenJob Solves
+
+ScreenJob lets an agent execute tasks that require a real desktop UI plus terminal access, with structured tool calls and job tracking.
+
+## Main Features
+
+- Screen perception (`see_screen`, `enhance`)
+- Mouse/keyboard control (`click`, `type`, `press_key`)
+- Terminal execution (`execute_command`, `sleep`)
+- Structured completion payload (`task_complete(return=..., data=...)`)
+- Safety gate, auth, history, and live monitoring
+
+## Important Environment Note
+
+ScreenJob runs on a separate computer (the human/operator machine), not inside the agent's own runtime environment.
+
+## Why It Is Useful
+
+Agents can use ScreenJob to launch and control GUI workflows, including orchestrating other GUI agents/tools on a human computer.
+
+## Example Tasks
+
+- Open amazon.de and buy a USB-C to USB-C cable for 10 EUR or less.
+- Open google.com, go to my account, and change my profile picture to a provided image URL.
+- Run `ls -a` in `C:/Users/username/Documents` and return the output in `data`.
+
+## Practical Usage
+
+1. Submit job via CLI or API.
+2. Agent performs tool loop.
+3. Read final `response.return` and `response.data` from job status.
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -0,0 +1,17 @@
+services:
+  screenjob:
+    image: python:3.11-slim
+    working_dir: /app
+    env_file:
+      - .env
+    environment:
+      SCREENJOB_HOST: 0.0.0.0
+      SCREENJOB_PORT: 8787
+    volumes:
+      - ./:/app
+    ports:
+      - "8787:8787"
+    command: >
+      sh -c "pip install --no-cache-dir -r requirements.txt &&
+      python main.py server"
+    restart: unless-stopped
--- a/requirements.txt
+++ b/requirements.txt
@@ -0,0 +1,8 @@
+fastapi>=0.115,<1.0
+httpx>=0.27,<1.0
+openai>=1.0.0
+pillow>=10.0.0
+pyautogui>=0.9.54
+pytest>=8.0.0
+python-dotenv>=1.0.0
+uvicorn>=0.30.0
--- a/src/agent.py
+++ b/src/agent.py
@@ -39,7 +39,8 @@ Rules:
 7) You may call multiple tools in one step. If needed, do click then sleep.
 8) Never spam repeated clicks on the same coordinate; switch strategy.
 9) Keep tool arguments valid JSON and concise.
-10) When objective is fully complete, call task_complete(result="...").
+10) When objective is fully complete, call task_complete(return="...", data=...).
+11) The "data" field should contain structured output useful for the requester (for example command output text).
 """


@@ -69,6 +70,7 @@ class ScreenJobAgent:
        self.step = 0
        self.completed = False
        self.final_result = ""
+        self.final_data: Any | None = None
        self.previous_response_id: str | None = None
        self.usage = UsageSummary()

@@ -134,9 +136,11 @@ class ScreenJobAgent:
                "parameters": {
                    "type": "object",
                    "properties": {
+                        "return": {"type": "string"},
                        "result": {"type": "string"},
+                        "data": {},
                    },
-                    "required": ["result"],
+                    "required": [],
                    "additionalProperties": False,
                },
            },
@@ -551,10 +555,17 @@ class ScreenJobAgent:
            return {"ok": False, "command": command, "error": f"{type(exc).__name__}: {exc}"}

    def _tool_task_complete(self, args: dict[str, Any]) -> dict[str, Any]:
-        result = str(args.get("result", "")).strip() or "Task completed."
+        return_text = str(args.get("return", "")).strip()
+        if not return_text:
+            return_text = str(args.get("result", "")).strip()
+        if not return_text:
+            return_text = "Task completed."
+
+        data = args.get("data")
        self.completed = True
-        self.final_result = result
-        return {"ok": True, "result": result}
+        self.final_result = return_text
+        self.final_data = data
+        return {"ok": True, "return": return_text, "data": data}

    def _dispatch_tool(self, name: str, args: dict[str, Any]) -> dict[str, Any]:
        if name in self.disabled_tools:
@@ -620,7 +631,8 @@ class ScreenJobAgent:
                            f"JOB: {job}\n"
                            "You are in an action loop. Prefer execute_command for deterministic actions. "
                            "You can return multiple tool calls in one step (example: click then sleep). "
-                            "Call task_complete(result=...) only when truly done."
+                            "When done call task_complete(return=..., data=...). "
+                            "Include useful structured output in data."
                        ),
                    }
                ],
@@ -672,7 +684,7 @@ class ScreenJobAgent:
                                "text": (
                                    "No function call was returned. Continue by using tools. "
                                    "You may call multiple tools in one step. "
-                                    "When complete, call task_complete(result=...)."
+                                    "When complete, call task_complete(return=..., data=...)."
                                ),
                            }
                        ],
@@ -746,6 +758,8 @@ class ScreenJobAgent:
            return AgentResult(
                completed=True,
                result=self.final_result,
+                return_message=self.final_result,
+                data=self.final_data,
                steps=self.step,
                started_at=started_at,
                ended_at=ended_at,
@@ -758,6 +772,8 @@ class ScreenJobAgent:
            return AgentResult(
                completed=False,
                result="Cancelled by user request.",
+                return_message="Cancelled by user request.",
+                data=None,
                steps=self.step,
                started_at=started_at,
                ended_at=ended_at,
@@ -772,6 +788,8 @@ class ScreenJobAgent:
            return AgentResult(
                completed=False,
                result=error_text,
+                return_message=error_text,
+                data=None,
                steps=self.step,
                started_at=started_at,
                ended_at=ended_at,
@@ -785,6 +803,8 @@ class ScreenJobAgent:
        return AgentResult(
            completed=False,
            result=result_text,
+            return_message=result_text,
+            data=None,
            steps=self.step,
            started_at=started_at,
            ended_at=ended_at,
--- a/src/cli.py
+++ b/src/cli.py
@@ -61,6 +61,9 @@ def main(argv: list[str] | None = None) -> int:
                    {
                        "completed": False,
                        "result": f"Blocked by safety check: {reason}",
+                        "response": {"return": f"Blocked by safety check: {reason}", "data": parsed},
+                        "return": f"Blocked by safety check: {reason}",
+                        "data": parsed,
                        "safety": parsed,
                    },
                    ensure_ascii=False,
@@ -101,7 +104,10 @@ def main(argv: list[str] | None = None) -> int:

    payload = {
        "completed": result.completed,
-        "result": result.result,
+        "result": result.return_message,
+        "response": {"return": result.return_message, "data": result.data},
+        "return": result.return_message,
+        "data": result.data,
        "steps": result.steps,
        "elapsed_seconds": round(result.ended_at - result.started_at, 3),
        "artifacts_dir": str(artifacts.root_dir.resolve()),
@@ -111,4 +117,3 @@ def main(argv: list[str] | None = None) -> int:
    }
    print(json.dumps(payload, ensure_ascii=False, indent=2))
    return 0 if result.completed else 1
-
--- a/src/models.py
+++ b/src/models.py
@@ -19,6 +19,8 @@ class RunArtifacts:
 class AgentResult:
    completed: bool
    result: str
+    return_message: str
+    data: Any | None
    steps: int
    started_at: float
    ended_at: float
--- a/src/storage.py
+++ b/src/storage.py
@@ -31,6 +31,7 @@ class HistoryDB:
                    started_at TEXT,
                    ended_at TEXT,
                    result TEXT,
+                    response_json TEXT,
                    error TEXT,
                    steps INTEGER DEFAULT 0,
                    cancelled INTEGER DEFAULT 0,
@@ -65,6 +66,9 @@ class HistoryDB:
            conn.execute(
                "CREATE INDEX IF NOT EXISTS idx_job_events_job_id_id ON job_events(job_id, id)"
            )
+            columns = {row[1] for row in conn.execute("PRAGMA table_info(jobs)").fetchall()}
+            if "response_json" not in columns:
+                conn.execute("ALTER TABLE jobs ADD COLUMN response_json TEXT")
            conn.commit()

    def create_job(
@@ -195,6 +199,7 @@ class HistoryDB:
            "started_at": row["started_at"],
            "ended_at": row["ended_at"],
            "result": row["result"],
+            "response": self._parse_response_payload(row["response_json"], row["result"]),
            "error": row["error"],
            "steps": row["steps"],
            "cancelled": bool(row["cancelled"]),
@@ -214,3 +219,17 @@ class HistoryDB:
            },
        }

+    def _parse_response_payload(self, response_json: str | None, result: str | None) -> dict[str, Any]:
+        fallback_return = str(result or "").strip()
+        if not response_json:
+            return {"return": fallback_return, "data": None}
+        try:
+            payload = json.loads(response_json)
+            if isinstance(payload, dict):
+                return {
+                    "return": str(payload.get("return") or fallback_return),
+                    "data": payload.get("data"),
+                }
+        except Exception:
+            pass
+        return {"return": fallback_return, "data": None}
--- a/src/task_manager.py
+++ b/src/task_manager.py
@@ -1,5 +1,6 @@
 from __future__ import annotations

+import json
 import threading
 import time
 import uuid
@@ -159,6 +160,7 @@ class JobManager:
                    ended_at=ended_at,
                    error=error_text,
                    result=error_text,
+                    response_json=json.dumps({"return": error_text, "data": None}, ensure_ascii=False),
                )
                self._publish(
                    job_id,
@@ -237,6 +239,7 @@ class JobManager:
                ended_at=ended_at,
                error=err,
                result=err,
+                response_json=json.dumps({"return": err, "data": None}, ensure_ascii=False),
            )
            self._publish(job_id, {"ts": ended_at, "step": 0, "event_type": "job_failed", "payload": {"error": err}})
            with self._lock:
@@ -251,7 +254,14 @@ class JobManager:
            job_id,
            status=status,
            ended_at=ended_at,
-            result=result.result,
+            result=result.return_message,
+            response_json=json.dumps(
+                {
+                    "return": result.return_message,
+                    "data": result.data,
+                },
+                ensure_ascii=False,
+            ),
            error=result.error,
            steps=result.steps,
            cancelled=1 if result.cancelled else 0,
@@ -271,7 +281,8 @@ class JobManager:
                "event_type": "job_finished",
                "payload": {
                    "status": status,
-                    "result": result.result,
+                    "result": result.return_message,
+                    "response": {"return": result.return_message, "data": result.data},
                    "error": result.error,
                    "cancelled": result.cancelled,
                    "usage": result.usage.to_dict(),
@@ -318,10 +329,10 @@ class JobManager:
            job["is_running_thread"] = live.thread.is_alive()
        else:
            job["is_running_thread"] = False
-        return job
+        return self._normalize_job_payload(job)

    def list_jobs(self, limit: int = 100) -> list[dict[str, Any]]:
-        return self.db.list_jobs(limit=limit)
+        return [self._normalize_job_payload(job) for job in self.db.list_jobs(limit=limit)]

    def get_events(self, job_id: str, limit: int = 500) -> list[dict[str, Any]]:
        return self.db.get_job_events(job_id, limit=limit)
@@ -331,3 +342,12 @@ class JobManager:
        with self._lock:
            stats["live_running_threads"] = sum(1 for job in self._running.values() if job.thread.is_alive())
        return stats
+
+    def _normalize_job_payload(self, job: dict[str, Any]) -> dict[str, Any]:
+        response = job.get("response")
+        if not isinstance(response, dict):
+            response = {"return": str(job.get("result") or ""), "data": None}
+            job["response"] = response
+        job["return"] = str(response.get("return") or "")
+        job["data"] = response.get("data")
+        return job
--- a/tests/test_agent_tools.py
+++ b/tests/test_agent_tools.py
@@ -0,0 +1,86 @@
+from __future__ import annotations
+
+import logging
+from pathlib import Path
+
+from PIL import Image
+
+import src.agent as agent_module
+from src.models import RunArtifacts, RuntimeOptions
+
+
+class _DummyPyAutoGUI:
+    FAILSAFE = True
+    PAUSE = 0.0
+
+    def __init__(self) -> None:
+        self.last_move_to: tuple[int, int] | None = None
+        self.last_click: tuple[int, int] | None = None
+
+    def screenshot(self) -> Image.Image:
+        return Image.new("RGB", (1280, 720), color=(24, 24, 24))
+
+    def size(self) -> tuple[int, int]:
+        return (1280, 720)
+
+    def moveTo(self, x: int, y: int, duration: float = 0.0) -> None:  # noqa: N802
+        self.last_move_to = (x, y)
+
+    def click(self, x: int, y: int) -> None:
+        self.last_click = (x, y)
+
+    def write(self, _: str, interval: float = 0.0) -> None:
+        return None
+
+    def press(self, _: str) -> None:
+        return None
+
+
+def _build_agent(tmp_path: Path, monkeypatch) -> agent_module.ScreenJobAgent:
+    dummy_gui = _DummyPyAutoGUI()
+    monkeypatch.setattr(agent_module, "pyautogui", dummy_gui)
+    monkeypatch.setattr(agent_module.time, "sleep", lambda _: None)
+
+    run_dir = tmp_path / "run"
+    run_dir.mkdir(parents=True, exist_ok=True)
+    artifacts = RunArtifacts(
+        run_id="test_run",
+        root_dir=run_dir,
+        logs_dir=run_dir / "logs",
+        shots_dir=run_dir / "shots",
+        enhance_dir=run_dir / "enhance",
+        log_file=run_dir / "screenjob.log",
+    )
+    options = RuntimeOptions(model="gpt-5.4-mini")
+    logger = logging.getLogger("screenjob-test-agent")
+    return agent_module.ScreenJobAgent(
+        client=object(),  # type: ignore[arg-type]
+        logger=logger,
+        artifacts=artifacts,
+        options=options,
+    )
+
+
+def test_task_complete_captures_return_and_data(tmp_path: Path, monkeypatch) -> None:
+    agent = _build_agent(tmp_path, monkeypatch)
+    result = agent._tool_task_complete({"return": "Task completed successfully", "data": "file1\nfile2"})
+    assert result["ok"] is True
+    assert result["return"] == "Task completed successfully"
+    assert result["data"] == "file1\nfile2"
+    assert agent.final_result == "Task completed successfully"
+    assert agent.final_data == "file1\nfile2"
+
+
+def test_click_supports_directional_offsets(tmp_path: Path, monkeypatch) -> None:
+    agent = _build_agent(tmp_path, monkeypatch)
+    click_result = agent._tool_click(
+        {
+            "coordinate": {"x": 100, "y": 100},
+            "offset_up": "2px",
+            "offset_right": 7,
+            "offset": {"x": 3, "y": 4},
+            "sleep_after_seconds": 0,
+        }
+    )
+    assert click_result["ok"] is True
+    assert click_result["clicked"] == {"x": 110, "y": 102}
--- a/tests/test_cli.py
+++ b/tests/test_cli.py
@@ -0,0 +1,68 @@
+from __future__ import annotations
+
+import json
+from pathlib import Path
+from typing import Any
+
+import src.cli as cli_module
+from src.config import AppConfig
+from src.models import AgentResult, RunArtifacts, UsageSummary
+
+
+def test_cli_emits_structured_return_and_data(monkeypatch: Any, capsys, tmp_path: Path) -> None:
+    config = AppConfig(
+        openai_api_key="test_key",
+        screenjob_token="test_token",
+        disable_ui=False,
+        default_model="gpt-5.4-mini",
+        safety_model="gpt-5.4-mini",
+        host="127.0.0.1",
+        port=8787,
+        runs_dir=tmp_path / "runs",
+        db_path=tmp_path / "screenjob.db",
+    )
+    config.runs_dir.mkdir(parents=True, exist_ok=True)
+
+    def fake_load_app_config(_: Path) -> AppConfig:
+        return config
+
+    def fake_assess_task_safety(*_args, **_kwargs):
+        return True, "safe", {"safe": True}
+
+    def fake_run_job(*_args, **_kwargs):
+        result = AgentResult(
+            completed=True,
+            result="Done",
+            return_message="Task completed successfully",
+            data="file1.txt\nfile2.txt",
+            steps=3,
+            started_at=10.0,
+            ended_at=12.5,
+            usage=UsageSummary(total_tokens=123),
+            error=None,
+            cancelled=False,
+        )
+        artifacts = RunArtifacts(
+            run_id="20260527_000001",
+            root_dir=config.runs_dir / "run_20260527_000001",
+            logs_dir=config.runs_dir / "run_20260527_000001" / "logs",
+            shots_dir=config.runs_dir / "run_20260527_000001" / "shots",
+            enhance_dir=config.runs_dir / "run_20260527_000001" / "enhance",
+            log_file=config.runs_dir / "run_20260527_000001" / "screenjob.log",
+        )
+        return result, artifacts
+
+    monkeypatch.setattr(cli_module, "load_app_config", fake_load_app_config)
+    monkeypatch.setattr(cli_module, "assess_task_safety", fake_assess_task_safety)
+    monkeypatch.setattr(cli_module, "run_job", fake_run_job)
+    monkeypatch.setattr(cli_module, "create_openai_client", lambda *_args, **_kwargs: object())
+
+    code = cli_module.main(["Open amazon.de"])
+    assert code == 0
+
+    out = capsys.readouterr().out
+    payload = json.loads(out)
+    assert payload["response"]["return"] == "Task completed successfully"
+    assert payload["response"]["data"] == "file1.txt\nfile2.txt"
+    assert payload["return"] == "Task completed successfully"
+    assert payload["data"] == "file1.txt\nfile2.txt"
--- a/tests/test_server_api.py
+++ b/tests/test_server_api.py
@@ -49,6 +49,10 @@ class FakeJobManager:
            "objective": objective,
            "model": selected_model,
            "status": "running",
+            "result": "Running",
+            "response": {"return": "Running", "data": None},
+            "return": "Running",
+            "data": None,
            "usage": {
                "input_tokens": 10,
                "cached_input_tokens": 2,
@@ -145,6 +149,8 @@ def test_create_job_returns_only_job_id_and_defaults_model(tmp_path: Path, monke
    status_res = client.get(f"/api/jobs/{job_id}/status", headers=headers)
    assert status_res.status_code == 200
    assert status_res.json()["job_id"] == job_id
+    assert status_res.json()["response"]["return"] == "Running"
+    assert "data" in status_res.json()["response"]


 def test_cancel_endpoint_and_events(tmp_path: Path, monkeypatch: Any) -> None:
@@ -164,6 +170,8 @@ def test_cancel_endpoint_and_events(tmp_path: Path, monkeypatch: Any) -> None:

    status_after = client.get(f"/api/jobs/{job_id}", headers=headers).json()
    assert status_after["status"] == "cancelling"
+    assert status_after["return"] == "Running"
+    assert status_after["data"] is None


 def test_ui_toggle(tmp_path: Path, monkeypatch: Any) -> None:
@@ -178,4 +186,3 @@ def test_ui_toggle(tmp_path: Path, monkeypatch: Any) -> None:
    root_disabled = client_disabled.get("/")
    assert root_disabled.status_code == 200
    assert root_disabled.json()["ui_disabled"] is True
-
--- a/tests/test_storage.py
+++ b/tests/test_storage.py
@@ -1,4 +1,5 @@
 from pathlib import Path
+import json

 from src.storage import HistoryDB

@@ -26,6 +27,7 @@ def test_history_db_job_and_events_roundtrip(tmp_path: Path) -> None:
        status="completed",
        ended_at="2026-05-27T00:00:02Z",
        result="Done",
+        response_json=json.dumps({"return": "Done", "data": {"files": ["a.txt", "b.txt"]}}, ensure_ascii=False),
        steps=2,
        estimated_cost_usd=0.1234,
    )
@@ -35,6 +37,8 @@ def test_history_db_job_and_events_roundtrip(tmp_path: Path) -> None:
    assert job["status"] == "completed"
    assert job["model"] == "gpt-5.4-mini"
    assert job["disabled_tools"] == ["click"]
+    assert job["response"]["return"] == "Done"
+    assert job["response"]["data"]["files"] == ["a.txt", "b.txt"]
    assert job["usage"]["estimated_cost_usd"] == 0.1234

    events = db.get_job_events(job_id, limit=10)
@@ -51,3 +55,20 @@ def test_history_db_job_and_events_roundtrip(tmp_path: Path) -> None:
    assert stats["completed_jobs"] == 1
    assert abs(stats["total_estimated_cost"] - 0.1234) < 1e-9

+
+def test_storage_response_fallback_uses_result_when_json_missing(tmp_path: Path) -> None:
+    db = HistoryDB(tmp_path / "screenjob_test_fallback.db")
+    job_id = "job_test_002"
+    db.create_job(
+        job_id=job_id,
+        objective="Fallback check",
+        model="gpt-5.4-mini",
+        created_at="2026-05-27T00:00:00Z",
+        safety_override=False,
+        disabled_tools=[],
+    )
+    db.update_job(job_id, status="completed", result="Legacy result string")
+    job = db.get_job(job_id)
+    assert job is not None
+    assert job["response"]["return"] == "Legacy result string"
+    assert job["response"]["data"] is None