Compare commits
7 Commits
35fb17f888
...
main
| Author | SHA1 | Date | |
|---|---|---|---|
| 32fb8d9813 | |||
| 575e2d6747 | |||
| 869a70b621 | |||
| 1c6415d306 | |||
| 44af756bd3 | |||
| 2b1c26781e | |||
|
|
a2bd3ea822 |
3
.gitignore
vendored
3
.gitignore
vendored
@@ -1,3 +1,6 @@
|
|||||||
__pycache__/
|
__pycache__/
|
||||||
.pytest_cache/
|
.pytest_cache/
|
||||||
*.egg-info/
|
*.egg-info/
|
||||||
|
.venv/
|
||||||
|
.venv-ci/
|
||||||
|
build/
|
||||||
|
|||||||
12
README.md
12
README.md
@@ -9,8 +9,17 @@ Two separate Python packages live here:
|
|||||||
|
|
||||||
The repo also includes a Gitea Actions workflow at `.gitea/workflows/ci.yml` that tests and builds both packages on pushes to `main` and pull requests.
|
The repo also includes a Gitea Actions workflow at `.gitea/workflows/ci.yml` that tests and builds both packages on pushes to `main` and pull requests.
|
||||||
|
|
||||||
## Backend setup
|
## Docker backend (no image build)
|
||||||
|
|
||||||
|
Run the backend directly from an official Python image without creating a Dockerfile:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
docker compose up backend
|
||||||
|
```
|
||||||
|
|
||||||
|
This uses `python:3.14-slim`, installs `ffmpeg` and `openai-whisper` at container startup, mounts this repo into the container, and serves the API on `http://localhost:8000`.
|
||||||
|
|
||||||
|
## Backend setup
|
||||||
```bash
|
```bash
|
||||||
cd backend
|
cd backend
|
||||||
pip install -e .
|
pip install -e .
|
||||||
@@ -50,3 +59,4 @@ whisper-remote ./audio.mp3 --model base --language en --output-format txt
|
|||||||
- backend-side cleanup of uploaded and generated files after each request
|
- backend-side cleanup of uploaded and generated files after each request
|
||||||
|
|
||||||
By default the CLI prints the returned transcript to stdout. Use `--to-file` to save it locally.
|
By default the CLI prints the returned transcript to stdout. Use `--to-file` to save it locally.
|
||||||
|
|
||||||
|
|||||||
@@ -17,6 +17,7 @@ CONTENT_TYPES = {
|
|||||||
}
|
}
|
||||||
|
|
||||||
app = FastAPI(title="whisper-remote-backend")
|
app = FastAPI(title="whisper-remote-backend")
|
||||||
|
WHISPER_PROCESS_TIMEOUT_SECONDS = 300
|
||||||
|
|
||||||
|
|
||||||
def validate_output_format(output_format: str) -> str:
|
def validate_output_format(output_format: str) -> str:
|
||||||
@@ -112,12 +113,21 @@ async def transcribe(
|
|||||||
check=False,
|
check=False,
|
||||||
capture_output=True,
|
capture_output=True,
|
||||||
text=True,
|
text=True,
|
||||||
|
timeout=WHISPER_PROCESS_TIMEOUT_SECONDS,
|
||||||
)
|
)
|
||||||
except FileNotFoundError as exc:
|
except FileNotFoundError as exc:
|
||||||
raise HTTPException(
|
raise HTTPException(
|
||||||
status_code=500,
|
status_code=500,
|
||||||
detail="The 'whisper' CLI was not found on PATH on the backend host.",
|
detail="The 'whisper' CLI was not found on PATH on the backend host.",
|
||||||
) from exc
|
) from exc
|
||||||
|
except subprocess.TimeoutExpired as exc:
|
||||||
|
raise HTTPException(
|
||||||
|
status_code=504,
|
||||||
|
detail=(
|
||||||
|
"Whisper CLI timed out after "
|
||||||
|
f"{WHISPER_PROCESS_TIMEOUT_SECONDS}s and was terminated."
|
||||||
|
),
|
||||||
|
) from exc
|
||||||
|
|
||||||
if completed.returncode != 0:
|
if completed.returncode != 0:
|
||||||
detail = completed.stderr.strip() or completed.stdout.strip() or "Whisper CLI failed."
|
detail = completed.stderr.strip() or completed.stdout.strip() or "Whisper CLI failed."
|
||||||
|
|||||||
@@ -21,7 +21,7 @@ def test_validate_output_format_rejects_unknown() -> None:
|
|||||||
|
|
||||||
|
|
||||||
def test_transcriptions_returns_generated_artifact(monkeypatch, tmp_path: Path) -> None:
|
def test_transcriptions_returns_generated_artifact(monkeypatch, tmp_path: Path) -> None:
|
||||||
def fake_run(command: list[str], check: bool, capture_output: bool, text: bool):
|
def fake_run(command: list[str], check: bool, capture_output: bool, text: bool, timeout: int):
|
||||||
output_dir = Path(command[command.index("--output_dir") + 1])
|
output_dir = Path(command[command.index("--output_dir") + 1])
|
||||||
(output_dir / "clip.txt").write_text("hello world", encoding="utf-8")
|
(output_dir / "clip.txt").write_text("hello world", encoding="utf-8")
|
||||||
|
|
||||||
@@ -46,7 +46,7 @@ def test_transcriptions_returns_generated_artifact(monkeypatch, tmp_path: Path)
|
|||||||
|
|
||||||
|
|
||||||
def test_transcriptions_maps_subprocess_failure(monkeypatch) -> None:
|
def test_transcriptions_maps_subprocess_failure(monkeypatch) -> None:
|
||||||
def fake_run(command: list[str], check: bool, capture_output: bool, text: bool):
|
def fake_run(command: list[str], check: bool, capture_output: bool, text: bool, timeout: int):
|
||||||
class Result:
|
class Result:
|
||||||
returncode = 1
|
returncode = 1
|
||||||
stdout = ""
|
stdout = ""
|
||||||
@@ -64,3 +64,22 @@ def test_transcriptions_maps_subprocess_failure(monkeypatch) -> None:
|
|||||||
|
|
||||||
assert response.status_code == 502
|
assert response.status_code == 502
|
||||||
assert response.json()["detail"] == "bad whisper day"
|
assert response.json()["detail"] == "bad whisper day"
|
||||||
|
|
||||||
|
|
||||||
|
def test_transcriptions_maps_subprocess_timeout(monkeypatch) -> None:
|
||||||
|
def fake_run(command: list[str], check: bool, capture_output: bool, text: bool, timeout: int):
|
||||||
|
raise server.subprocess.TimeoutExpired(cmd=command, timeout=timeout)
|
||||||
|
|
||||||
|
monkeypatch.setattr(server.subprocess, "run", fake_run)
|
||||||
|
|
||||||
|
response = client.post(
|
||||||
|
"/transcriptions",
|
||||||
|
data={"model": "base", "output_format": "txt"},
|
||||||
|
files={"file": ("clip.wav", b"audio", "audio/wav")},
|
||||||
|
)
|
||||||
|
|
||||||
|
assert response.status_code == 504
|
||||||
|
assert (
|
||||||
|
response.json()["detail"]
|
||||||
|
== f"Whisper CLI timed out after {server.WHISPER_PROCESS_TIMEOUT_SECONDS}s and was terminated."
|
||||||
|
)
|
||||||
|
|||||||
@@ -59,6 +59,19 @@ def save_response(response: httpx.Response, destination: Path) -> None:
|
|||||||
destination.write_bytes(response.content)
|
destination.write_bytes(response.content)
|
||||||
|
|
||||||
|
|
||||||
|
def format_http_error(response: httpx.Response, endpoint: str) -> str:
|
||||||
|
body = response.text.strip() or "<empty response body>"
|
||||||
|
return f"HTTP {response.status_code} from {endpoint}: {body}"
|
||||||
|
|
||||||
|
|
||||||
|
def format_request_error(exc: httpx.RequestError, endpoint: str) -> str:
|
||||||
|
if isinstance(exc, httpx.TimeoutException):
|
||||||
|
return f"Request to {endpoint} timed out."
|
||||||
|
|
||||||
|
reason = str(exc).strip() or exc.__class__.__name__
|
||||||
|
return f"Request to {endpoint} failed: {reason}"
|
||||||
|
|
||||||
|
|
||||||
def main() -> int:
|
def main() -> int:
|
||||||
parser = build_parser()
|
parser = build_parser()
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
@@ -70,21 +83,24 @@ def main() -> int:
|
|||||||
server = resolve_server(args)
|
server = resolve_server(args)
|
||||||
endpoint = f"{server}/transcriptions"
|
endpoint = f"{server}/transcriptions"
|
||||||
|
|
||||||
with input_file.open("rb") as handle, httpx.Client(timeout=300.0) as client:
|
try:
|
||||||
response = client.post(
|
with input_file.open("rb") as handle, httpx.Client(timeout=300.0) as client:
|
||||||
endpoint,
|
response = client.post(
|
||||||
data={
|
endpoint,
|
||||||
"model": args.model,
|
data={
|
||||||
"language": args.language or "",
|
"model": args.model,
|
||||||
"output_format": args.output_format,
|
"language": args.language or "",
|
||||||
},
|
"output_format": args.output_format,
|
||||||
files={"file": (input_file.name, handle, "application/octet-stream")},
|
},
|
||||||
)
|
files={"file": (input_file.name, handle, "application/octet-stream")},
|
||||||
|
)
|
||||||
|
except httpx.RequestError as exc:
|
||||||
|
parser.exit(1, f"{format_request_error(exc, endpoint)}\n")
|
||||||
|
|
||||||
try:
|
try:
|
||||||
response.raise_for_status()
|
response.raise_for_status()
|
||||||
except httpx.HTTPStatusError as exc:
|
except httpx.HTTPStatusError as exc:
|
||||||
message = exc.response.text.strip() or str(exc)
|
message = format_http_error(exc.response, endpoint)
|
||||||
parser.exit(1, f"{message}\n")
|
parser.exit(1, f"{message}\n")
|
||||||
|
|
||||||
if args.to_file:
|
if args.to_file:
|
||||||
|
|||||||
@@ -8,6 +8,7 @@ import pytest
|
|||||||
sys.path.insert(0, str(Path(__file__).resolve().parents[1] / "src"))
|
sys.path.insert(0, str(Path(__file__).resolve().parents[1] / "src"))
|
||||||
|
|
||||||
import main
|
import main
|
||||||
|
import httpx
|
||||||
|
|
||||||
|
|
||||||
def test_resolve_server_from_env(monkeypatch) -> None:
|
def test_resolve_server_from_env(monkeypatch) -> None:
|
||||||
@@ -29,3 +30,34 @@ def test_infer_output_path_for_directory(tmp_path: Path) -> None:
|
|||||||
def test_infer_output_path_for_explicit_file(tmp_path: Path) -> None:
|
def test_infer_output_path_for_explicit_file(tmp_path: Path) -> None:
|
||||||
destination = main.infer_output_path(tmp_path / "custom-name.txt", Path("clip.wav"), "txt")
|
destination = main.infer_output_path(tmp_path / "custom-name.txt", Path("clip.wav"), "txt")
|
||||||
assert destination == tmp_path / "custom-name.txt"
|
assert destination == tmp_path / "custom-name.txt"
|
||||||
|
|
||||||
|
|
||||||
|
def test_format_http_error_with_body() -> None:
|
||||||
|
request = httpx.Request("POST", "http://localhost:8000/transcriptions")
|
||||||
|
response = httpx.Response(500, text="Internal Server Error", request=request)
|
||||||
|
message = main.format_http_error(response, "http://localhost:8000/transcriptions")
|
||||||
|
assert message == "HTTP 500 from http://localhost:8000/transcriptions: Internal Server Error"
|
||||||
|
|
||||||
|
|
||||||
|
def test_format_http_error_with_empty_body() -> None:
|
||||||
|
request = httpx.Request("POST", "http://localhost:8000/transcriptions")
|
||||||
|
response = httpx.Response(500, text="", request=request)
|
||||||
|
message = main.format_http_error(response, "http://localhost:8000/transcriptions")
|
||||||
|
assert message == "HTTP 500 from http://localhost:8000/transcriptions: <empty response body>"
|
||||||
|
|
||||||
|
|
||||||
|
def test_format_request_error_timeout() -> None:
|
||||||
|
request = httpx.Request("POST", "http://localhost:8000/transcriptions")
|
||||||
|
exc = httpx.ReadTimeout("read timed out", request=request)
|
||||||
|
message = main.format_request_error(exc, "http://localhost:8000/transcriptions")
|
||||||
|
assert message == "Request to http://localhost:8000/transcriptions timed out."
|
||||||
|
|
||||||
|
|
||||||
|
def test_format_request_error_network_failure() -> None:
|
||||||
|
request = httpx.Request("POST", "http://localhost:8000/transcriptions")
|
||||||
|
exc = httpx.ConnectError("connection refused", request=request)
|
||||||
|
message = main.format_request_error(exc, "http://localhost:8000/transcriptions")
|
||||||
|
assert (
|
||||||
|
message
|
||||||
|
== "Request to http://localhost:8000/transcriptions failed: connection refused"
|
||||||
|
)
|
||||||
|
|||||||
22
docker-compose.yml
Normal file
22
docker-compose.yml
Normal file
@@ -0,0 +1,22 @@
|
|||||||
|
services:
|
||||||
|
backend:
|
||||||
|
image: python:3.14-slim
|
||||||
|
working_dir: /app/backend
|
||||||
|
ports:
|
||||||
|
- "8000:8000"
|
||||||
|
volumes:
|
||||||
|
- ./:/app
|
||||||
|
- whisper_cache:/root/.cache
|
||||||
|
environment:
|
||||||
|
- PYTHONUNBUFFERED=1
|
||||||
|
command: >-
|
||||||
|
sh -lc "
|
||||||
|
apt-get update
|
||||||
|
&& apt-get install -y --no-install-recommends ffmpeg
|
||||||
|
&& rm -rf /var/lib/apt/lists/*
|
||||||
|
&& pip install --no-cache-dir -e . openai-whisper
|
||||||
|
&& uvicorn server:app --app-dir src --host 0.0.0.0 --port 8000
|
||||||
|
"
|
||||||
|
|
||||||
|
volumes:
|
||||||
|
whisper_cache:
|
||||||
Reference in New Issue
Block a user