Files
rfc-app/backend/app/cache.py
T
Ben Stull 4565a6cb95 Slice 4: super-draft body editing per §9.5 + §9.6
The §17 routing-collapse rule lands in api_branches.py and
api_prs.py — every branches/<branch>/... and prs/<n>/... route
dispatches on the entry's state to pick the right Gitea repo, and
the body extracted from the entry's frontmatter envelope is what
the editor and the diff see. The bot grows open_metadata_pr;
cache grows refresh_meta_branches. Two §17 routes added:
start-edit-branch and metadata. The §9.4 super-draft view replaces
RFCView.jsx's Slice 2 placeholder; a metadata pane modal opens
from the breadcrumb. Branch naming uses edit-<slug>-<6hex> to
dodge the §19.2 path-routing candidate while preserving §9.5's
structural shape.

Covered by tests/test_super_draft_vertical.py (10 tests). The
full Slices 1-4 suite is 35/35 green.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-24 15:43:21 -07:00

646 lines
25 KiB
Python

"""The §4 metadata cache and its two writers.
Per §4: Gitea is truth. The cache mirrors only what the left pane and
the read surfaces need, and it is rebuildable from Gitea at any time.
Per §4.1: two writers — the webhook handler and the periodic reconciler —
both read from Gitea and write to the cache. User actions never write
to the cache directly; they trigger Git operations through the bot
(`bot.py`), and the resulting webhook (or the next reconciler sweep)
is what updates the cache.
This module provides:
- `refresh_meta_repo()` — reads rfcs/ on the meta repo and reconciles
cached_rfcs against what's there. Used by both the webhook handler
(on meta-repo merge events) and the reconciler.
- `refresh_meta_pulls()` — reads open meta-repo PRs and reconciles
cached_prs for pr_kind='idea' and friends. Backs the §7.3
pending-ideas disclosure.
Per §4.2's "single SQLite file colocated with the FastAPI process," the
cache writes happen on the same process that serves reads; lock
contention is bounded by the small mutation surface (a few hundred
rows at most for v1) and SQLite's WAL mode.
"""
from __future__ import annotations
import asyncio
import json
import logging
from . import db, entry as entry_mod
from .config import Config
from .gitea import Gitea, GiteaError
log = logging.getLogger(__name__)
async def refresh_meta_repo(config: Config, gitea: Gitea) -> None:
"""Re-read rfcs/ on the meta repo and reconcile cached_rfcs.
Idempotent. Safe to call on every meta-repo webhook and on every
reconciler sweep.
"""
org, repo = config.gitea_org, config.meta_repo
try:
files = await gitea.list_dir(org, repo, "rfcs", ref="main")
except GiteaError as e:
log.warning("refresh_meta_repo: cannot list rfcs/: %s", e)
return
seen_slugs: set[str] = set()
for f in files:
if f.get("type") != "file" or not f.get("name", "").endswith(".md"):
continue
result = await gitea.read_file(org, repo, f["path"], ref="main")
if not result:
continue
text, sha = result
try:
entry = entry_mod.parse(text)
except Exception as parse_err:
log.warning("refresh_meta_repo: skipping %s: %s", f["path"], parse_err)
continue
if not entry.slug:
log.warning("refresh_meta_repo: skipping %s: missing slug", f["path"])
continue
seen_slugs.add(entry.slug)
_upsert_cached_rfc(entry, body_sha=sha)
# Mark entries removed from the meta repo as withdrawn-without-trace.
# In practice the spec keeps withdrawn entries in rfcs/ as historical
# record (§3), so this branch fires only for entries deleted out of
# band. We leave the row but flag it for reconciler attention.
existing = {row["slug"] for row in db.conn().execute("SELECT slug FROM cached_rfcs")}
for missing in existing - seen_slugs:
log.info("refresh_meta_repo: %s no longer in rfcs/ — leaving cache row in place", missing)
def _upsert_cached_rfc(entry: entry_mod.Entry, body_sha: str) -> None:
db.conn().execute(
"""
INSERT INTO cached_rfcs
(slug, title, state, rfc_id, repo, proposed_by, proposed_at,
graduated_at, graduated_by, owners_json, arbiters_json, tags_json,
body, body_sha, last_entry_commit_at, updated_at)
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, datetime('now'), datetime('now'))
ON CONFLICT(slug) DO UPDATE SET
title = excluded.title,
state = excluded.state,
rfc_id = excluded.rfc_id,
repo = excluded.repo,
proposed_by = excluded.proposed_by,
proposed_at = excluded.proposed_at,
graduated_at = excluded.graduated_at,
graduated_by = excluded.graduated_by,
owners_json = excluded.owners_json,
arbiters_json = excluded.arbiters_json,
tags_json = excluded.tags_json,
body = excluded.body,
body_sha = excluded.body_sha,
last_entry_commit_at = datetime('now'),
updated_at = datetime('now')
""",
(
entry.slug,
entry.title,
entry.state,
entry.id,
entry.repo,
entry.proposed_by,
entry.proposed_at,
entry.graduated_at,
entry.graduated_by,
json.dumps(entry.owners),
json.dumps(entry.arbiters),
json.dumps(entry.tags),
entry.body,
body_sha,
),
)
async def refresh_rfc_repo(config: Config, gitea: Gitea, slug: str) -> None:
"""Mirror an active RFC's per-RFC repo into the cache.
Reads `RFC.md` on main into `cached_rfcs.body` (per §4 #3), lists
branches into `cached_branches`, and lists open PRs into
`cached_prs` with `pr_kind='rfc_branch'`. Per §4.1 this runs in two
places: a webhook arrival for events on the per-RFC repo, and the
reconciler sweep.
"""
row = db.conn().execute(
"SELECT repo, state FROM cached_rfcs WHERE slug = ?", (slug,)
).fetchone()
if not row or not row["repo"] or row["state"] != "active":
return
if "/" not in row["repo"]:
log.warning("refresh_rfc_repo: %s has malformed repo %r", slug, row["repo"])
return
owner, repo = row["repo"].split("/", 1)
# Body on main — populates the discuss-mode default surface per §8.2.
try:
result = await gitea.read_file(owner, repo, "RFC.md", ref="main")
except GiteaError as e:
log.warning("refresh_rfc_repo(%s): read_file failed: %s", slug, e)
result = None
if result is not None:
text, sha = result
db.conn().execute(
"""
UPDATE cached_rfcs
SET body = ?, body_sha = ?, last_main_commit_at = datetime('now'),
updated_at = datetime('now')
WHERE slug = ?
""",
(text, sha, slug),
)
# Branches — every branch the bot knows about per §11.5 / §12.
try:
branches = await gitea.list_branches(owner, repo)
except GiteaError as e:
log.warning("refresh_rfc_repo(%s): list_branches failed: %s", slug, e)
branches = []
seen_branches: set[str] = set()
for b in branches:
name = b.get("name") or ""
if not name:
continue
seen_branches.add(name)
head_sha = (b.get("commit") or {}).get("id") or ""
last_commit_at = (b.get("commit") or {}).get("timestamp")
db.conn().execute(
"""
INSERT INTO cached_branches (rfc_slug, branch_name, head_sha, state, last_commit_at)
VALUES (?, ?, ?, 'open', ?)
ON CONFLICT(rfc_slug, branch_name) DO UPDATE SET
head_sha = excluded.head_sha,
state = CASE WHEN cached_branches.state = 'closed' THEN 'closed' ELSE 'open' END,
last_commit_at = excluded.last_commit_at
""",
(slug, name, head_sha, last_commit_at),
)
# Mark previously-known branches that disappeared as deleted, keeping
# the row per §11.5 ("branch removed from Gitea, row remains").
existing = {
r["branch_name"]
for r in db.conn().execute(
"SELECT branch_name FROM cached_branches WHERE rfc_slug = ? AND state != 'deleted'",
(slug,),
)
}
for missing in existing - seen_branches:
db.conn().execute(
"UPDATE cached_branches SET state = 'deleted' WHERE rfc_slug = ? AND branch_name = ?",
(slug, missing),
)
# PRs on the per-RFC repo (pr_kind = 'rfc_branch'). Slice 3 owns the
# full PR surface; we mirror metadata here so the §8.1 breadcrumb
# dropdown's "1 PR" count is honest from Slice 2 onward.
repo_full = f"{owner}/{repo}"
bot_login = config.gitea_bot_user
try:
open_pulls = await gitea.list_pulls(owner, repo, state="open")
closed_pulls = await gitea.list_pulls(owner, repo, state="closed")
except GiteaError as e:
log.warning("refresh_rfc_repo(%s): list_pulls failed: %s", slug, e)
open_pulls, closed_pulls = [], []
for pull in open_pulls + closed_pulls:
head_branch = pull.get("head", {}).get("ref", "")
state = _state_from_pull(pull)
gitea_opener = (pull.get("user") or {}).get("login") or ""
opened_by = _resolve_actor(
gitea_opener,
bot_login,
slug,
pull["number"],
pull.get("body") or "",
)
# §10.8: distinguish "user withdrew" from "Gitea closed for any
# other reason." The bot's withdraw action lands in the actions
# log; if we see it, surface state='withdrawn'.
if state == "closed":
withdrew = db.conn().execute(
"""
SELECT 1 FROM actions
WHERE action_kind = 'withdraw_branch_pr'
AND rfc_slug = ? AND pr_number = ? LIMIT 1
""",
(slug, pull["number"]),
).fetchone()
if withdrew:
state = "withdrawn"
merge_commit_sha = pull.get("merge_commit_sha")
db.conn().execute(
"""
INSERT INTO cached_prs
(rfc_slug, pr_kind, repo, pr_number, title, description, state,
opened_by, opened_at, merged_at, closed_at,
head_branch, base_branch, head_sha, merge_commit_sha)
VALUES (?, 'rfc_branch', ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
ON CONFLICT(repo, pr_number) DO UPDATE SET
title = excluded.title,
description = excluded.description,
state = excluded.state,
opened_by = excluded.opened_by,
merged_at = excluded.merged_at,
closed_at = excluded.closed_at,
head_sha = excluded.head_sha,
merge_commit_sha = COALESCE(excluded.merge_commit_sha, cached_prs.merge_commit_sha)
""",
(
slug,
repo_full,
pull["number"],
pull.get("title") or "",
pull.get("body") or "",
state,
opened_by,
pull.get("created_at"),
pull.get("merged_at"),
pull.get("closed_at"),
head_branch,
(pull.get("base") or {}).get("ref") or "main",
(pull.get("head") or {}).get("sha"),
merge_commit_sha,
),
)
# §10.9: an explicit `Supersedes: #N` trailer on a merged PR's
# body bumps the predecessor's state to closed and records the
# supersession. The cache propagates this whether the merge came
# via webhook or reconciler.
if state == "merged":
superseded = _parse_supersedes(pull.get("body") or "")
if superseded:
db.conn().execute(
"""
UPDATE cached_prs
SET state = 'closed',
superseded_by_pr_number = ?,
closed_at = COALESCE(closed_at, datetime('now'))
WHERE repo = ? AND pr_number = ? AND state = 'open'
""",
(pull["number"], repo_full, superseded),
)
async def refresh_meta_branches(config: Config, gitea: Gitea) -> None:
"""Mirror the meta repo's branches into `cached_branches` for super-draft
edit branches, plus a per-slug `main` row that records the meta-repo
main's tip sha so the §10.1 has-commits-ahead check works uniformly
across active and super-draft surfaces.
Per the §5 super-draft scoping note, super-draft edits are branches on
the meta repo. The naming Slice 4 picked is `edit-<slug>-<6hex>` —
structurally `edit/<slug>/<auto-name>` per §9.5, with dashes in place
of slashes per the §19.2 path-routing candidate.
"""
org, repo = config.gitea_org, config.meta_repo
try:
branches = await gitea.list_branches(org, repo)
except GiteaError as e:
log.warning("refresh_meta_branches: %s", e)
return
meta_main_sha = ""
meta_main_ts = None
edit_keys_seen: set[tuple[str, str]] = set()
for b in branches:
name = b.get("name") or ""
head_sha = (b.get("commit") or {}).get("id") or ""
last_commit_at = (b.get("commit") or {}).get("timestamp")
if name == "main":
meta_main_sha = head_sha
meta_main_ts = last_commit_at
continue
slug = _slug_from_branch_name(name)
if not slug:
continue
rfc = db.conn().execute(
"SELECT state FROM cached_rfcs WHERE slug = ?", (slug,)
).fetchone()
if not rfc or rfc["state"] != "super-draft":
continue
edit_keys_seen.add((slug, name))
db.conn().execute(
"""
INSERT INTO cached_branches (rfc_slug, branch_name, head_sha, state, last_commit_at)
VALUES (?, ?, ?, 'open', ?)
ON CONFLICT(rfc_slug, branch_name) DO UPDATE SET
head_sha = excluded.head_sha,
state = CASE WHEN cached_branches.state = 'closed' THEN 'closed' ELSE 'open' END,
last_commit_at = excluded.last_commit_at
""",
(slug, name, head_sha, last_commit_at),
)
# Synthesize a per-slug `main` row for every super-draft entry, so the
# §10.1 has-commits-ahead check in api_prs.py works uniformly. The
# head_sha is the meta-repo main's tip — every super-draft edit branch
# diverges from this single point.
if meta_main_sha:
super_drafts = db.conn().execute(
"SELECT slug FROM cached_rfcs WHERE state = 'super-draft'"
).fetchall()
for r in super_drafts:
db.conn().execute(
"""
INSERT INTO cached_branches (rfc_slug, branch_name, head_sha, state, last_commit_at)
VALUES (?, 'main', ?, 'open', ?)
ON CONFLICT(rfc_slug, branch_name) DO UPDATE SET
head_sha = excluded.head_sha,
last_commit_at = excluded.last_commit_at
""",
(r["slug"], meta_main_sha, meta_main_ts),
)
# Mark previously-known edit branches that disappeared as deleted per
# §11.5 / §12. Keep the row so chat history survives the branch's
# deletion in Gitea.
known = db.conn().execute(
"""
SELECT b.rfc_slug, b.branch_name
FROM cached_branches b
JOIN cached_rfcs r ON r.slug = b.rfc_slug
WHERE r.state = 'super-draft'
AND b.state != 'deleted'
AND b.branch_name != 'main'
"""
).fetchall()
for k in known:
if (k["rfc_slug"], k["branch_name"]) not in edit_keys_seen:
db.conn().execute(
"UPDATE cached_branches SET state = 'deleted' WHERE rfc_slug = ? AND branch_name = ?",
(k["rfc_slug"], k["branch_name"]),
)
def _slug_from_branch_name(name: str) -> str | None:
"""Mirror of `_slug_from_head_branch` for branch-only inputs (no PR
body to consult)."""
if name.startswith("edit-"):
body = name[len("edit-") :]
if "-" in body:
slug, _hex = body.rsplit("-", 1)
return slug or None
if name.startswith("edit/"):
parts = name.split("/", 2)
if len(parts) >= 2:
return parts[1]
return None
async def refresh_meta_pulls(config: Config, gitea: Gitea) -> None:
"""Reconcile open meta-repo PRs into cached_prs.
For Slice 1 we care about pr_kind='idea' (proposing a new entry).
Other meta-repo PR kinds (body edits, metadata edits, claims) will
be wired in their respective slices.
`opened_by` is the **underlying actor**, not the bot login Gitea
reports — per §15.9's framing for notifications and per §6.5's
On-behalf-of accountability shape. We recover the actor by joining
against the `actions` audit log; if no row matches (cache rebuilt
from scratch on a deployment that pre-dates the actions log, or a
pull we did not author), we fall back to parsing the
`On-behalf-of:` trailer from the PR body, then to the raw Gitea
login as last resort.
"""
org, repo = config.gitea_org, config.meta_repo
repo_full = f"{org}/{repo}"
try:
open_pulls = await gitea.list_pulls(org, repo, state="open")
closed_pulls = await gitea.list_pulls(org, repo, state="closed")
except GiteaError as e:
log.warning("refresh_meta_pulls: %s", e)
return
bot_login = config.gitea_bot_user
for pull in open_pulls + closed_pulls:
head_branch = pull.get("head", {}).get("ref", "")
slug = _slug_from_head_branch(head_branch)
if slug is None:
continue
pr_kind = _kind_from_branch(head_branch)
state = _state_from_pull(pull)
gitea_opener = (pull.get("user") or {}).get("login") or ""
opened_by = _resolve_actor(
gitea_opener,
bot_login,
slug,
pull["number"],
pull.get("body") or "",
)
# §10.8 / Slice 4: a closed body-edit PR may have been withdrawn
# by the contributor. Distinguish from a generic Gitea close via
# the audit log — same shape api_prs.py uses for rfc_branch PRs.
if state == "closed" and pr_kind == "meta_body_edit":
withdrew = db.conn().execute(
"""
SELECT 1 FROM actions
WHERE action_kind = 'withdraw_branch_pr'
AND rfc_slug = ? AND pr_number = ? LIMIT 1
""",
(slug, pull["number"]),
).fetchone()
if withdrew:
state = "withdrawn"
merge_commit_sha = pull.get("merge_commit_sha")
db.conn().execute(
"""
INSERT INTO cached_prs
(rfc_slug, pr_kind, repo, pr_number, title, description, state,
opened_by, opened_at, merged_at, closed_at,
head_branch, base_branch, head_sha, merge_commit_sha)
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
ON CONFLICT(repo, pr_number) DO UPDATE SET
title = excluded.title,
description = excluded.description,
state = excluded.state,
opened_by = excluded.opened_by,
merged_at = excluded.merged_at,
closed_at = excluded.closed_at,
head_sha = excluded.head_sha,
merge_commit_sha = COALESCE(excluded.merge_commit_sha, cached_prs.merge_commit_sha)
""",
(
slug,
pr_kind,
repo_full,
pull["number"],
pull.get("title") or "",
pull.get("body") or "",
state,
opened_by,
pull.get("created_at"),
pull.get("merged_at"),
pull.get("closed_at"),
head_branch,
(pull.get("base") or {}).get("ref") or "main",
(pull.get("head") or {}).get("sha"),
merge_commit_sha,
),
)
_TRAILER_RE = None
def _resolve_actor(gitea_opener: str, bot_login: str, slug: str, pr_number: int, body: str) -> str:
"""Best effort: collapse the bot's authorship to the underlying actor."""
if gitea_opener and gitea_opener != bot_login:
return gitea_opener
# Prefer the audit log.
row = db.conn().execute(
"""
SELECT on_behalf_of FROM actions
WHERE action_kind IN ('propose_rfc', 'open_body_edit_pr', 'open_branch_pr', 'open_claim_pr', 'open_metadata_pr')
AND rfc_slug = ? AND pr_number = ?
ORDER BY id LIMIT 1
""",
(slug, pr_number),
).fetchone()
if row and row["on_behalf_of"]:
return row["on_behalf_of"]
# Fall back to parsing the On-behalf-of trailer.
import re as _re
global _TRAILER_RE
if _TRAILER_RE is None:
_TRAILER_RE = _re.compile(r"On-behalf-of:\s+.*?<([^>]+)>", _re.MULTILINE)
m = _TRAILER_RE.search(body)
if m:
return m.group(1)
return gitea_opener or bot_login
def _slug_from_head_branch(head_branch: str) -> str | None:
if head_branch.startswith("propose/"):
return head_branch[len("propose/") :]
if head_branch.startswith("edit/"):
parts = head_branch.split("/", 2)
if len(parts) >= 2:
return parts[1]
if head_branch.startswith("edit-"):
# §9.5 names the structural shape `edit/<slug>/<auto-name>`, but
# FastAPI's default {branch} path-segment matcher refuses slashes
# (the §19.2 routing candidate). Slice 4 picks the same dash-
# separated workaround Slice 2 used for promote-to-branch:
# `edit-<slug>-<6hex>`. The slug is the middle; the final
# dash-segment is a 6-hex suffix.
body = head_branch[len("edit-") :]
if "-" in body:
slug, _hex = body.rsplit("-", 1)
return slug or None
if head_branch.startswith("claim/"):
return head_branch[len("claim/") :]
if head_branch.startswith("metadata/"):
return head_branch[len("metadata/") :]
if head_branch.startswith("metadata-"):
# §9.5 metadata-pane PRs use the same dash-separated branch shape
# as edit branches, for the same routing reason.
body = head_branch[len("metadata-") :]
if "-" in body:
slug, _hex = body.rsplit("-", 1)
return slug or None
return None
def _kind_from_branch(head_branch: str) -> str:
if head_branch.startswith("propose/"):
return "idea"
if head_branch.startswith("edit/") or head_branch.startswith("edit-"):
return "meta_body_edit"
if head_branch.startswith("claim/"):
return "meta_claim"
if head_branch.startswith("metadata/") or head_branch.startswith("metadata-"):
return "meta_metadata"
return "idea" # fallback
_SUPERSEDES_RE = None
def _parse_supersedes(body: str) -> int | None:
"""Parse a `Supersedes: #N` trailer from a PR body per §10.9."""
import re as _re
global _SUPERSEDES_RE
if _SUPERSEDES_RE is None:
_SUPERSEDES_RE = _re.compile(r"^Supersedes:\s*#(\d+)", _re.MULTILINE)
m = _SUPERSEDES_RE.search(body or "")
return int(m.group(1)) if m else None
def _state_from_pull(pull: dict) -> str:
if pull.get("merged"):
return "merged"
if pull.get("state") == "closed":
return "closed"
return "open"
# ----- Reconciler -----
class Reconciler:
"""Per §4.1: periodic safety-net sweep.
Runs in the background, every five minutes by default. Catches up
on any webhook the bot missed (downtime, network failure, Gitea
flake). If the cache is corrupted, the reconciler rebuilds from
scratch — that's the contract.
"""
def __init__(self, config: Config, gitea: Gitea, interval_seconds: int = 300):
self._config = config
self._gitea = gitea
self._interval = interval_seconds
self._task: asyncio.Task | None = None
self._stop = asyncio.Event()
async def _loop(self) -> None:
# One sweep at startup, then on the interval. The startup sweep
# is what brings a fresh cache to life on first boot.
await self.sweep()
while not self._stop.is_set():
try:
await asyncio.wait_for(self._stop.wait(), timeout=self._interval)
except asyncio.TimeoutError:
pass
if self._stop.is_set():
break
await self.sweep()
async def sweep(self) -> None:
log.info("reconciler: starting sweep")
try:
await refresh_meta_repo(self._config, self._gitea)
await refresh_meta_branches(self._config, self._gitea)
await refresh_meta_pulls(self._config, self._gitea)
# Per-RFC repos: refresh each active entry. Meta-repo refresh
# must come first so newly-graduated entries land in
# cached_rfcs before we try to reach their per-RFC repos.
active = [
r["slug"]
for r in db.conn().execute(
"SELECT slug FROM cached_rfcs WHERE state = 'active' AND repo IS NOT NULL"
)
]
for slug in active:
await refresh_rfc_repo(self._config, self._gitea, slug)
except Exception:
log.exception("reconciler: sweep failed")
else:
log.info("reconciler: sweep complete")
def start(self) -> None:
if self._task is None:
self._task = asyncio.create_task(self._loop())
async def stop(self) -> None:
self._stop.set()
if self._task is not None:
await self._task
self._task = None