Slice 1: scaffolding + propose-to-super-draft vertical

Brings the §1 bot wrapper, the §4 cache (webhook + reconciler), the
§5 schema (six numbered migrations), Gitea OAuth + §6 user
provisioning, the §7 catalog left pane, and the propose-to-merge
vertical: propose modal opens an idea PR against the meta repo, an
owner merges from the pending-idea view, the cache picks it up via
webhook or reconciler sweep, and the catalog renders the new
super-draft.

Per §1 the bot is the only Git writer; every commit, branch
creation, and PR merge carries the §6.5 On-behalf-of: trailer and
an `actions` audit row. Per §4 the cache is never written from a
user action — it's webhook+reconciler only.

Covered by `backend/tests/test_propose_vertical.py` against an
in-process Gitea simulator.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
Ben Stull
2026-05-24 04:31:11 -07:00
commit 779ba6db59
42 changed files with 10385 additions and 0 deletions
+312
View File
@@ -0,0 +1,312 @@
"""The §4 metadata cache and its two writers.
Per §4: Gitea is truth. The cache mirrors only what the left pane and
the read surfaces need, and it is rebuildable from Gitea at any time.
Per §4.1: two writers — the webhook handler and the periodic reconciler —
both read from Gitea and write to the cache. User actions never write
to the cache directly; they trigger Git operations through the bot
(`bot.py`), and the resulting webhook (or the next reconciler sweep)
is what updates the cache.
This module provides:
- `refresh_meta_repo()` — reads rfcs/ on the meta repo and reconciles
cached_rfcs against what's there. Used by both the webhook handler
(on meta-repo merge events) and the reconciler.
- `refresh_meta_pulls()` — reads open meta-repo PRs and reconciles
cached_prs for pr_kind='idea' and friends. Backs the §7.3
pending-ideas disclosure.
Per §4.2's "single SQLite file colocated with the FastAPI process," the
cache writes happen on the same process that serves reads; lock
contention is bounded by the small mutation surface (a few hundred
rows at most for v1) and SQLite's WAL mode.
"""
from __future__ import annotations
import asyncio
import json
import logging
from . import db, entry as entry_mod
from .config import Config
from .gitea import Gitea, GiteaError
log = logging.getLogger(__name__)
async def refresh_meta_repo(config: Config, gitea: Gitea) -> None:
"""Re-read rfcs/ on the meta repo and reconcile cached_rfcs.
Idempotent. Safe to call on every meta-repo webhook and on every
reconciler sweep.
"""
org, repo = config.gitea_org, config.meta_repo
try:
files = await gitea.list_dir(org, repo, "rfcs", ref="main")
except GiteaError as e:
log.warning("refresh_meta_repo: cannot list rfcs/: %s", e)
return
seen_slugs: set[str] = set()
for f in files:
if f.get("type") != "file" or not f.get("name", "").endswith(".md"):
continue
result = await gitea.read_file(org, repo, f["path"], ref="main")
if not result:
continue
text, sha = result
try:
entry = entry_mod.parse(text)
except Exception as parse_err:
log.warning("refresh_meta_repo: skipping %s: %s", f["path"], parse_err)
continue
if not entry.slug:
log.warning("refresh_meta_repo: skipping %s: missing slug", f["path"])
continue
seen_slugs.add(entry.slug)
_upsert_cached_rfc(entry, body_sha=sha)
# Mark entries removed from the meta repo as withdrawn-without-trace.
# In practice the spec keeps withdrawn entries in rfcs/ as historical
# record (§3), so this branch fires only for entries deleted out of
# band. We leave the row but flag it for reconciler attention.
existing = {row["slug"] for row in db.conn().execute("SELECT slug FROM cached_rfcs")}
for missing in existing - seen_slugs:
log.info("refresh_meta_repo: %s no longer in rfcs/ — leaving cache row in place", missing)
def _upsert_cached_rfc(entry: entry_mod.Entry, body_sha: str) -> None:
db.conn().execute(
"""
INSERT INTO cached_rfcs
(slug, title, state, rfc_id, repo, proposed_by, proposed_at,
graduated_at, graduated_by, owners_json, arbiters_json, tags_json,
body, body_sha, last_entry_commit_at, updated_at)
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, datetime('now'), datetime('now'))
ON CONFLICT(slug) DO UPDATE SET
title = excluded.title,
state = excluded.state,
rfc_id = excluded.rfc_id,
repo = excluded.repo,
proposed_by = excluded.proposed_by,
proposed_at = excluded.proposed_at,
graduated_at = excluded.graduated_at,
graduated_by = excluded.graduated_by,
owners_json = excluded.owners_json,
arbiters_json = excluded.arbiters_json,
tags_json = excluded.tags_json,
body = excluded.body,
body_sha = excluded.body_sha,
last_entry_commit_at = datetime('now'),
updated_at = datetime('now')
""",
(
entry.slug,
entry.title,
entry.state,
entry.id,
entry.repo,
entry.proposed_by,
entry.proposed_at,
entry.graduated_at,
entry.graduated_by,
json.dumps(entry.owners),
json.dumps(entry.arbiters),
json.dumps(entry.tags),
entry.body,
body_sha,
),
)
async def refresh_meta_pulls(config: Config, gitea: Gitea) -> None:
"""Reconcile open meta-repo PRs into cached_prs.
For Slice 1 we care about pr_kind='idea' (proposing a new entry).
Other meta-repo PR kinds (body edits, metadata edits, claims) will
be wired in their respective slices.
`opened_by` is the **underlying actor**, not the bot login Gitea
reports — per §15.9's framing for notifications and per §6.5's
On-behalf-of accountability shape. We recover the actor by joining
against the `actions` audit log; if no row matches (cache rebuilt
from scratch on a deployment that pre-dates the actions log, or a
pull we did not author), we fall back to parsing the
`On-behalf-of:` trailer from the PR body, then to the raw Gitea
login as last resort.
"""
org, repo = config.gitea_org, config.meta_repo
repo_full = f"{org}/{repo}"
try:
open_pulls = await gitea.list_pulls(org, repo, state="open")
closed_pulls = await gitea.list_pulls(org, repo, state="closed")
except GiteaError as e:
log.warning("refresh_meta_pulls: %s", e)
return
bot_login = config.gitea_bot_user
for pull in open_pulls + closed_pulls:
head_branch = pull.get("head", {}).get("ref", "")
slug = _slug_from_head_branch(head_branch)
if slug is None:
continue
pr_kind = _kind_from_branch(head_branch)
state = _state_from_pull(pull)
gitea_opener = (pull.get("user") or {}).get("login") or ""
opened_by = _resolve_actor(
gitea_opener,
bot_login,
slug,
pull["number"],
pull.get("body") or "",
)
db.conn().execute(
"""
INSERT INTO cached_prs
(rfc_slug, pr_kind, repo, pr_number, title, description, state,
opened_by, opened_at, merged_at, closed_at,
head_branch, base_branch, head_sha)
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
ON CONFLICT(repo, pr_number) DO UPDATE SET
title = excluded.title,
description = excluded.description,
state = excluded.state,
opened_by = excluded.opened_by,
merged_at = excluded.merged_at,
closed_at = excluded.closed_at,
head_sha = excluded.head_sha
""",
(
slug,
pr_kind,
repo_full,
pull["number"],
pull.get("title") or "",
pull.get("body") or "",
state,
opened_by,
pull.get("created_at"),
pull.get("merged_at"),
pull.get("closed_at"),
head_branch,
(pull.get("base") or {}).get("ref") or "main",
(pull.get("head") or {}).get("sha"),
),
)
_TRAILER_RE = None
def _resolve_actor(gitea_opener: str, bot_login: str, slug: str, pr_number: int, body: str) -> str:
"""Best effort: collapse the bot's authorship to the underlying actor."""
if gitea_opener and gitea_opener != bot_login:
return gitea_opener
# Prefer the audit log.
row = db.conn().execute(
"""
SELECT on_behalf_of FROM actions
WHERE action_kind IN ('propose_rfc', 'open_body_edit_pr', 'open_claim_pr', 'open_metadata_pr')
AND rfc_slug = ? AND pr_number = ?
ORDER BY id LIMIT 1
""",
(slug, pr_number),
).fetchone()
if row and row["on_behalf_of"]:
return row["on_behalf_of"]
# Fall back to parsing the On-behalf-of trailer.
import re as _re
global _TRAILER_RE
if _TRAILER_RE is None:
_TRAILER_RE = _re.compile(r"On-behalf-of:\s+.*?<([^>]+)>", _re.MULTILINE)
m = _TRAILER_RE.search(body)
if m:
return m.group(1)
return gitea_opener or bot_login
def _slug_from_head_branch(head_branch: str) -> str | None:
if head_branch.startswith("propose/"):
return head_branch[len("propose/") :]
if head_branch.startswith("edit/"):
parts = head_branch.split("/", 2)
if len(parts) >= 2:
return parts[1]
if head_branch.startswith("claim/"):
return head_branch[len("claim/") :]
if head_branch.startswith("metadata/"):
return head_branch[len("metadata/") :]
return None
def _kind_from_branch(head_branch: str) -> str:
if head_branch.startswith("propose/"):
return "idea"
if head_branch.startswith("edit/"):
return "meta_body_edit"
if head_branch.startswith("claim/"):
return "meta_claim"
if head_branch.startswith("metadata/"):
return "meta_metadata"
return "idea" # fallback
def _state_from_pull(pull: dict) -> str:
if pull.get("merged"):
return "merged"
if pull.get("state") == "closed":
return "closed"
return "open"
# ----- Reconciler -----
class Reconciler:
"""Per §4.1: periodic safety-net sweep.
Runs in the background, every five minutes by default. Catches up
on any webhook the bot missed (downtime, network failure, Gitea
flake). If the cache is corrupted, the reconciler rebuilds from
scratch — that's the contract.
"""
def __init__(self, config: Config, gitea: Gitea, interval_seconds: int = 300):
self._config = config
self._gitea = gitea
self._interval = interval_seconds
self._task: asyncio.Task | None = None
self._stop = asyncio.Event()
async def _loop(self) -> None:
# One sweep at startup, then on the interval. The startup sweep
# is what brings a fresh cache to life on first boot.
await self.sweep()
while not self._stop.is_set():
try:
await asyncio.wait_for(self._stop.wait(), timeout=self._interval)
except asyncio.TimeoutError:
pass
if self._stop.is_set():
break
await self.sweep()
async def sweep(self) -> None:
log.info("reconciler: starting sweep")
try:
await refresh_meta_repo(self._config, self._gitea)
await refresh_meta_pulls(self._config, self._gitea)
except Exception:
log.exception("reconciler: sweep failed")
else:
log.info("reconciler: sweep complete")
def start(self) -> None:
if self._task is None:
self._task = asyncio.create_task(self._loop())
async def stop(self) -> None:
self._stop.set()
if self._task is not None:
await self._task
self._task = None