Harden stuck-job retry accounting and retry config validation
This commit is contained in:
@@ -169,3 +169,49 @@ def test_requeue_allows_one_retry_then_fails_on_second_timeout() -> None:
|
||||
assert second[0].failed is True
|
||||
failed = session.execute(select(ReviewJob).where(ReviewJob.id == job.id)).scalar_one()
|
||||
assert failed.status == JobStatus.failed
|
||||
|
||||
|
||||
def test_recovery_persists_timeout_retry_even_if_latest_run_not_running() -> None:
|
||||
session_factory = get_session_factory()
|
||||
with session_factory() as session:
|
||||
job = enqueue_job(
|
||||
session,
|
||||
repo="acme/repo",
|
||||
pr_number=5,
|
||||
head_sha="33334444",
|
||||
trigger_comment_id=1005,
|
||||
trigger_comment_body="@codex review",
|
||||
requested_by="alice",
|
||||
command=ParsedCommand(name="review", raw="@codex review"),
|
||||
)
|
||||
claimed = claim_next_job(session)
|
||||
assert claimed is not None
|
||||
assert claimed.id == job.id
|
||||
|
||||
with session_factory() as session:
|
||||
latest_run = session.execute(select(ReviewRun).where(ReviewRun.job_id == job.id).order_by(ReviewRun.id.desc()).limit(1)).scalar_one()
|
||||
latest_run.status = RunStatus.succeeded
|
||||
latest_run.finished_at = datetime.now(timezone.utc)
|
||||
stale = session.execute(select(ReviewJob).where(ReviewJob.id == job.id)).scalar_one()
|
||||
stale.started_at = datetime.now(timezone.utc) - timedelta(seconds=601)
|
||||
session.commit()
|
||||
|
||||
with session_factory() as session:
|
||||
first = recover_stuck_jobs(session, lease_timeout_seconds=300, action="requeue", max_retries=1)
|
||||
assert len(first) == 1
|
||||
assert first[0].failed is False
|
||||
claimed_again = claim_next_job(session)
|
||||
assert claimed_again is not None
|
||||
assert claimed_again.id == job.id
|
||||
|
||||
with session_factory() as session:
|
||||
stale_again = session.execute(select(ReviewJob).where(ReviewJob.id == job.id)).scalar_one()
|
||||
stale_again.started_at = datetime.now(timezone.utc) - timedelta(seconds=601)
|
||||
session.commit()
|
||||
|
||||
with session_factory() as session:
|
||||
second = recover_stuck_jobs(session, lease_timeout_seconds=300, action="requeue", max_retries=1)
|
||||
assert len(second) == 1
|
||||
assert second[0].failed is True
|
||||
final = session.execute(select(ReviewJob).where(ReviewJob.id == job.id)).scalar_one()
|
||||
assert final.status == JobStatus.failed
|
||||
|
||||
Reference in New Issue
Block a user