Add whisper remote backend and CLI

This commit is contained in:
2026-05-24 12:36:24 +02:00
parent ad10a62ae6
commit aa89590555
12 changed files with 445 additions and 0 deletions

23
backend/README.md Normal file
View File

@@ -0,0 +1,23 @@
# whisper-remote-backend
FastAPI wrapper around the upstream `whisper` CLI from `openai/whisper`.
## Run
```bash
pip install -e .
uvicorn whisper_remote_backend.server:app --host 0.0.0.0 --port 8000
```
## API
`POST /transcriptions`
Multipart form fields:
- `file`: media file
- `model`: Whisper model name
- `language`: optional language code
- `output_format`: `txt`, `vtt`, `srt`, `tsv`, or `json`
The response body is the transcript artifact itself. The backend deletes the uploaded file and generated output after each request.

30
backend/pyproject.toml Normal file
View File

@@ -0,0 +1,30 @@
[build-system]
requires = ["setuptools>=68", "wheel"]
build-backend = "setuptools.build_meta"
[project]
name = "whisper-remote-backend"
version = "0.1.0"
description = "FastAPI wrapper around the openai/whisper CLI"
readme = "README.md"
requires-python = ">=3.11"
dependencies = [
"fastapi>=0.115.0,<1.0.0",
"python-multipart>=0.0.9,<1.0.0",
"uvicorn>=0.32.0,<1.0.0",
]
[project.scripts]
whisper-remote-server = "whisper_remote_backend.server:main"
[project.optional-dependencies]
dev = [
"httpx>=0.28.0,<1.0.0",
"pytest>=8.3.0,<9.0.0",
]
[tool.setuptools]
package-dir = {"" = "src"}
[tool.setuptools.packages.find]
where = ["src"]

View File

@@ -0,0 +1 @@
"""whisper-remote backend package."""

View File

@@ -0,0 +1,144 @@
from __future__ import annotations
import subprocess
from pathlib import Path
from tempfile import TemporaryDirectory
from fastapi import FastAPI, File, Form, HTTPException, UploadFile
from fastapi.responses import Response
SUPPORTED_FORMATS = {"txt", "vtt", "srt", "tsv", "json"}
CONTENT_TYPES = {
"txt": "text/plain; charset=utf-8",
"vtt": "text/vtt; charset=utf-8",
"srt": "application/x-subrip; charset=utf-8",
"tsv": "text/tab-separated-values; charset=utf-8",
"json": "application/json; charset=utf-8",
}
app = FastAPI(title="whisper-remote-backend")
def validate_output_format(output_format: str) -> str:
normalized = output_format.strip().lower()
if normalized not in SUPPORTED_FORMATS:
supported = ", ".join(sorted(SUPPORTED_FORMATS))
raise HTTPException(
status_code=400,
detail=f"Unsupported output format '{output_format}'. Supported formats: {supported}.",
)
return normalized
def build_whisper_command(
*,
input_path: Path,
output_dir: Path,
model: str,
language: str | None,
output_format: str,
) -> list[str]:
command = [
"whisper",
str(input_path),
"--model",
model,
"--output_format",
output_format,
"--output_dir",
str(output_dir),
]
if language:
command.extend(["--language", language])
return command
async def save_upload(upload: UploadFile, destination: Path) -> None:
with destination.open("wb") as handle:
while chunk := await upload.read(1024 * 1024):
handle.write(chunk)
await upload.close()
def find_transcript_file(output_dir: Path, input_name: str, output_format: str) -> Path:
expected = output_dir / f"{Path(input_name).stem}.{output_format}"
if expected.exists():
return expected
matches = list(output_dir.glob(f"*.{output_format}"))
if len(matches) == 1:
return matches[0]
raise HTTPException(
status_code=500,
detail="Whisper finished without producing the expected output file.",
)
@app.get("/health")
def healthcheck() -> dict[str, str]:
return {"status": "ok"}
@app.post("/transcriptions")
async def transcribe(
file: UploadFile = File(...),
model: str = Form(...),
language: str | None = Form(default=None),
output_format: str = Form(...),
) -> Response:
normalized_format = validate_output_format(output_format)
if not file.filename:
raise HTTPException(status_code=400, detail="Uploaded file must have a filename.")
with TemporaryDirectory(prefix="whisper-remote-upload-") as upload_root, TemporaryDirectory(
prefix="whisper-remote-output-"
) as output_root:
input_path = Path(upload_root) / Path(file.filename).name
output_dir = Path(output_root)
await save_upload(file, input_path)
command = build_whisper_command(
input_path=input_path,
output_dir=output_dir,
model=model,
language=language,
output_format=normalized_format,
)
try:
completed = subprocess.run(
command,
check=False,
capture_output=True,
text=True,
)
except FileNotFoundError as exc:
raise HTTPException(
status_code=500,
detail="The 'whisper' CLI was not found on PATH on the backend host.",
) from exc
if completed.returncode != 0:
detail = completed.stderr.strip() or completed.stdout.strip() or "Whisper CLI failed."
raise HTTPException(status_code=502, detail=detail)
transcript_path = find_transcript_file(output_dir, file.filename, normalized_format)
content = transcript_path.read_bytes()
download_name = f"{Path(file.filename).stem}.{normalized_format}"
return Response(
content=content,
media_type=CONTENT_TYPES[normalized_format],
headers={
"Content-Disposition": f'attachment; filename="{download_name}"',
"X-Whisper-Output-Format": normalized_format,
"X-Whisper-Model": model,
},
)
def main() -> None:
import uvicorn
uvicorn.run("whisper_remote_backend.server:app", host="0.0.0.0", port=8000)

View File

@@ -0,0 +1,63 @@
from pathlib import Path
from fastapi.testclient import TestClient
from whisper_remote_backend import server
client = TestClient(server.app)
def test_validate_output_format_rejects_unknown() -> None:
try:
server.validate_output_format("docx")
except Exception as exc: # pragma: no cover - structure assertion below
assert getattr(exc, "status_code", None) == 400
else: # pragma: no cover
raise AssertionError("Expected HTTPException")
def test_transcriptions_returns_generated_artifact(monkeypatch, tmp_path: Path) -> None:
def fake_run(command: list[str], check: bool, capture_output: bool, text: bool):
output_dir = Path(command[command.index("--output_dir") + 1])
(output_dir / "clip.txt").write_text("hello world", encoding="utf-8")
class Result:
returncode = 0
stdout = ""
stderr = ""
return Result()
monkeypatch.setattr(server.subprocess, "run", fake_run)
response = client.post(
"/transcriptions",
data={"model": "base", "language": "en", "output_format": "txt"},
files={"file": ("clip.wav", b"audio", "audio/wav")},
)
assert response.status_code == 200
assert response.text == "hello world"
assert response.headers["x-whisper-output-format"] == "txt"
def test_transcriptions_maps_subprocess_failure(monkeypatch) -> None:
def fake_run(command: list[str], check: bool, capture_output: bool, text: bool):
class Result:
returncode = 1
stdout = ""
stderr = "bad whisper day"
return Result()
monkeypatch.setattr(server.subprocess, "run", fake_run)
response = client.post(
"/transcriptions",
data={"model": "base", "output_format": "txt"},
files={"file": ("clip.wav", b"audio", "audio/wav")},
)
assert response.status_code == 502
assert response.json()["detail"] == "bad whisper day"