Files
Ben Stull 1a0c4428af Slice 8 WIP: §12 hygiene + §10.7 + routing + rollback cleanup
- Add §12 30/90 hygiene scheduler in hygiene.py, mirroring the
  DigestScheduler shape; wires next to digest in main.py with the
  same start/stop/run_tick test seam.
- Extend bot.delete_branch to accept actor=None for system gestures,
  per §15.9 (actor_user_id=NULL, on_behalf_of=bot_login).
- Convert every branches/{branch} route in api_branches.py and
  api_prs.py to {branch:path}; move the bare GET to the bottom of
  the router so deeper GETs match before greedy-path swallow.
- Extend api_prs.py's _require_pr to accept pr_kind='meta_metadata'
  so the §9.5 metadata-pane PRs land an in-app merge.
- Graduation rollback now deletes the graduate-<slug>-<6hex> branch
  after closing the PR — §19.2 candidate that lands here.
- Email-bounce webhook gains a WEBHOOK_EMAIL_BOUNCE_SECRET seam.
- FakeGitea grows a DELETE /branches/{branch:path} handler and a
  slashed-branch read; integration tests for the hygiene vertical
  cover the 30d close, 90d delete, post-merge delete, pinned
  exemption, per-user cursor preservation, no-notification rule,
  and the graduation-rollback cleanup.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-25 04:03:09 -07:00

335 lines
12 KiB
Python

"""§12: the branch-hygiene scheduler.
The structural commitment Slice 8 owes. Closes the loop on §11.5's
branch lifecycle (open → closed at 30d → deleted at 90d) and on §10.7's
post-merge deletion timer for per-RFC PR branches.
The sweep rides next to `DigestScheduler` per the §19.1 brief — same
`start` / `stop` / `run_tick` shape, same hourly cadence by default,
same test seam pattern. The cadence is configurable via
`HYGIENE_TICK_SECONDS` for tests and dev.
Per §15.9 and the §19.1 brief, hygiene actions fire as "the app"
`actor_user_id = NULL` and `on_behalf_of = bot_login`. The action
kinds (`close_idle_branch`, `delete_stale_branch`,
`delete_post_merge_branch`) are intentionally outside
`notify._AUTO_WATCH_ACTIONS` and `notify._ROUTING`, so no notifications
fan out for the hygiene gestures. The branches being touched are stale
by definition; the affected population would be churn-grade noise per
§15.4.
The per-user message-cursor preservation contract per §11.5: this
module never touches `branch_chat_messages` or `branch_chat_seen`.
Chat history survives the branch's deletion in Gitea because those
tables are app-canonical, not cached.
"""
from __future__ import annotations
import asyncio
import logging
import os
from datetime import datetime, timedelta, timezone
from . import db
from .bot import Bot
from .config import Config
log = logging.getLogger(__name__)
# Window sizes per §11.5 / §12 / §10.7. The 30/90 numbers are the
# canonical spec values; they are exposed as env vars so the integration
# tests can compress windows to small fractions of a second without
# touching production code.
def _close_after_days() -> int:
return int(os.environ.get("HYGIENE_CLOSE_AFTER_DAYS", "30"))
def _delete_after_days() -> int:
return int(os.environ.get("HYGIENE_DELETE_AFTER_DAYS", "90"))
# ---------------------------------------------------------------------------
# Scheduler shell — mirrors DigestScheduler
# ---------------------------------------------------------------------------
class HygieneScheduler:
"""Periodic task wrapper that drives `run_tick()`.
Same lifecycle contract as DigestScheduler so the operator's mental
model stays "the app has three scheduled jobs, all the same shape"
(reconciler, digest, hygiene)."""
def __init__(self, *, config: Config, bot: Bot, tick_seconds: int | None = None):
self._config = config
self._bot = bot
self._tick = tick_seconds or int(os.environ.get("HYGIENE_TICK_SECONDS", "3600"))
self._task: asyncio.Task | None = None
self._stop = asyncio.Event()
def start(self) -> None:
if self._task is None:
self._task = asyncio.create_task(self._loop())
async def stop(self) -> None:
self._stop.set()
if self._task is not None:
await self._task
async def _loop(self) -> None:
await self._safe_tick()
while not self._stop.is_set():
try:
await asyncio.wait_for(self._stop.wait(), timeout=self._tick)
except asyncio.TimeoutError:
pass
if self._stop.is_set():
break
await self._safe_tick()
async def _safe_tick(self) -> None:
try:
await run_tick(config=self._config, bot=self._bot)
except Exception:
log.exception("hygiene tick failed")
# ---------------------------------------------------------------------------
# The tick itself
# ---------------------------------------------------------------------------
async def run_tick(*, config: Config, bot: Bot, now: datetime | None = None) -> dict[str, int]:
"""One pass over the §12 + §10.7 surfaces.
Returns counters for observability and tests. Idempotent — a second
tick within the same window is a no-op because the state-flip is
monotonic (open → closed → deleted).
Tests pass an explicit `now` to control the time horizon; production
uses `datetime.now(timezone.utc)`.
"""
if now is None:
now = datetime.now(timezone.utc)
closed_after = timedelta(days=_close_after_days())
deleted_after = timedelta(days=_delete_after_days())
close_cutoff = (now - closed_after).strftime("%Y-%m-%d %H:%M:%S")
delete_cutoff = (now - deleted_after).strftime("%Y-%m-%d %H:%M:%S")
counters = {
"closed_idle": 0,
"closed_post_merge": 0,
"deleted_stale": 0,
"deleted_post_merge": 0,
}
# Order matters: deletes fire BEFORE closes so a branch that
# crosses both boundaries in the same sweep (a long-merged PR
# whose branch is still open in the cache, the cache-bootstrap
# case the brief calls out) goes straight to 'deleted' rather
# than spending one tick at 'closed' with a fresh closed_at that
# would delay the delete by another 90 days. Real-time sweeps see
# the two windows 60 days apart, so this is only load-bearing for
# cache-bootstrap and clock-jump cases — but those are exactly the
# cases this slice hardens against.
# ---- 90-day delete: §10.7 fast-path for merged-PR branches that
# never got flipped to 'closed' (cache-bootstrap from history the
# bot did not author, or a process restart that skipped enough
# ticks for both boundaries to land in one sweep). ----
post_merge_delete = db.conn().execute(
f"""
SELECT DISTINCT b.rfc_slug, b.branch_name
FROM cached_branches b
JOIN cached_prs p
ON p.rfc_slug = b.rfc_slug
AND p.head_branch = b.branch_name
WHERE b.state IN ('open', 'closed')
AND b.pinned = 0
AND p.state = 'merged'
AND COALESCE(p.merged_at, '') != ''
AND p.merged_at <= ?
""",
(delete_cutoff,),
).fetchall()
for r in post_merge_delete:
ok = await _delete_branch_via_bot(
config=config, bot=bot,
slug=r["rfc_slug"], branch=r["branch_name"],
action_kind="delete_post_merge_branch",
reason="90d post-merge",
)
if ok:
counters["deleted_post_merge"] += 1
# ---- 90-day delete: idle branches that closed long enough ago ----
stale_rows = db.conn().execute(
f"""
SELECT b.rfc_slug, b.branch_name, b.closed_at, b.last_commit_at
FROM cached_branches b
WHERE b.state = 'closed'
AND b.pinned = 0
AND b.branch_name != 'main'
AND COALESCE(b.closed_at, b.last_commit_at, b.created_at) <= ?
""",
(delete_cutoff,),
).fetchall()
for r in stale_rows:
ok = await _delete_branch_via_bot(
config=config, bot=bot,
slug=r["rfc_slug"], branch=r["branch_name"],
action_kind="delete_stale_branch",
reason="90d closed",
)
if ok:
counters["deleted_stale"] += 1
# ---- 30-day close: idle open branches ----
#
# §11.5: a branch with no associated PR auto-closes at 30 days from
# last commit. The query joins against cached_prs to exclude
# branches that have any open PR (those stay open) or any merged PR
# (those are handled by the post-merge timer below). Pinned branches
# (§12) skip the close.
idle_rows = db.conn().execute(
f"""
SELECT b.rfc_slug, b.branch_name
FROM cached_branches b
WHERE b.state = 'open'
AND b.pinned = 0
AND b.branch_name != 'main'
AND COALESCE(b.last_commit_at, b.created_at) <= ?
AND NOT EXISTS (
SELECT 1 FROM cached_prs p
WHERE p.rfc_slug = b.rfc_slug
AND p.head_branch = b.branch_name
AND p.state IN ('open', 'merged')
)
""",
(close_cutoff,),
).fetchall()
for r in idle_rows:
_close_branch(r["rfc_slug"], r["branch_name"], config.gitea_bot_user, reason="30d idle")
counters["closed_idle"] += 1
# ---- 30-day "settle": post-merge branches still flagged open ----
#
# §10.7: after merge, the branch enters a closed state per §12. In
# practice the cached_branches row may still read state='open'
# immediately after the merge (the meta-repo refresh doesn't flip
# it). At the 30-day mark the hygiene sweep formalizes the closure
# so the rest of the app reads "this branch is sealed." Pinned
# branches retain open state.
post_merge_close = db.conn().execute(
f"""
SELECT DISTINCT b.rfc_slug, b.branch_name
FROM cached_branches b
JOIN cached_prs p
ON p.rfc_slug = b.rfc_slug
AND p.head_branch = b.branch_name
WHERE b.state = 'open'
AND b.pinned = 0
AND p.state = 'merged'
AND COALESCE(p.merged_at, '') != ''
AND p.merged_at <= ?
""",
(close_cutoff,),
).fetchall()
for r in post_merge_close:
_close_branch(r["rfc_slug"], r["branch_name"], config.gitea_bot_user, reason="30d post-merge")
counters["closed_post_merge"] += 1
return counters
# ---------------------------------------------------------------------------
# Helpers
# ---------------------------------------------------------------------------
def _close_branch(slug: str, branch: str, bot_login: str, *, reason: str) -> None:
"""Flip the cached_branches row to state='closed' and write the
audit row. No Gitea call — the branch stays present in Gitea until
the 90-day mark. The `On-behalf-of` shape mirrors the bot's
`delete_branch` system path: actor_user_id=NULL, on_behalf_of=bot."""
db.conn().execute(
"""
UPDATE cached_branches
SET state = 'closed',
closed_at = COALESCE(closed_at, datetime('now'))
WHERE rfc_slug = ? AND branch_name = ? AND state = 'open'
""",
(slug, branch),
)
db.conn().execute(
"""
INSERT INTO actions
(actor_user_id, on_behalf_of, action_kind, rfc_slug, branch_name, details)
VALUES (NULL, ?, 'close_idle_branch', ?, ?, ?)
""",
(bot_login, slug, branch, _json_details({"reason": reason})),
)
async def _delete_branch_via_bot(
*,
config: Config,
bot: Bot,
slug: str,
branch: str,
action_kind: str,
reason: str,
) -> bool:
"""Call `bot.delete_branch` with the system actor. Resolves the
`(org, repo)` pair from the slug: super-draft edit branches and
graduation branches live on the meta repo; active-RFC branches
live on the per-RFC repo named by `cached_rfcs.repo`.
Returns True on a clean delete; False if the rfc row is missing
(we leave the branch row in place — a subsequent reconciler sweep
will reconcile or the operator can intervene)."""
rfc = db.conn().execute(
"SELECT state, repo FROM cached_rfcs WHERE slug = ?", (slug,)
).fetchone()
if rfc is None:
log.warning("hygiene: cannot delete %s/%s — slug missing from cache", slug, branch)
return False
if rfc["state"] == "super-draft":
owner, repo = config.gitea_org, config.meta_repo
elif rfc["state"] == "active" and rfc["repo"] and "/" in rfc["repo"]:
owner, repo = rfc["repo"].split("/", 1)
else:
log.warning("hygiene: cannot resolve repo for %s state=%s", slug, rfc["state"])
return False
try:
await bot.delete_branch(
None,
owner=owner,
repo=repo,
branch=branch,
slug=slug,
action_kind=action_kind,
reason=reason,
bot_login=config.gitea_bot_user,
)
except Exception:
log.exception("hygiene: bot.delete_branch failed for %s/%s", slug, branch)
return False
db.conn().execute(
"""
UPDATE cached_branches
SET state = 'deleted'
WHERE rfc_slug = ? AND branch_name = ?
""",
(slug, branch),
)
return True
def _json_details(payload: dict) -> str:
import json
return json.dumps(payload)