3bc8fe92af
Per the §19.1 brief: the three-column shape (§8.1) opens on main
in discuss mode (§8.2), supports the §8.3 discuss-vs-contribute
flip on non-main branches, hosts §8.4's per-branch chat with AI
participation (§18's <change> protocol → §8.14 changes rows), the
§8.8 change-card panel with §8.9 accept/decline/edit-before-accept,
the §8.10 tracked-change markup + DiffView toggle, the §8.11
manual-edit flushes with the stale-change mechanic, the §8.12
range and paragraph sub-threads, the §8.13 flag affordance, and
the §8.14 discuss-mode buffer.
Backend: bot.py grew per-RFC-repo write ops (cut_branch_from_main,
commit_accepted_change with the structured original/proposed/reason
body and Change-Id + Source-Message-Id + On-behalf-of trailers,
commit_manual_flush, ensure_rfc_repo_seed). cache.py grew
refresh_rfc_repo and the webhook dispatches on repository.full_name.
providers.py and chat.py port the §18 carryovers — multi-provider
LLM abstraction and SSE-streaming chat against the §5 threads /
thread_messages / changes schema. api_branches.py mounts the §17
branches/<branch>/* and threads/<thread_id>/* routes with the §6
/ §11 permission checks inline.
Frontend: RFCView.jsx rebuilt as the §8 surface; Editor.jsx,
ChatPanel.jsx, ChangePanel.jsx, PromptBar.jsx, SelectionTooltip.jsx,
DiffView.jsx, ModelPicker.jsx, modelStyles.js lifted from the
prototype and adapted to the canonical schema.
Covered by `backend/tests/test_rfc_view_vertical.py` — eleven new
integration tests against an extended FakeGitea (PUT contents,
POST orgs/{org}/repos, seed_rfc_repo): main-view read,
promote-to-branch, accept (with and without edit-before-accept),
decline, manual flush + system message, flag creation, visibility
flip, anonymous read-but-no-contribute, stale-change refusal, and
the chat-streaming path with a fake provider injected. The 5
Slice 1 tests continue to pass alongside.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
457 lines
17 KiB
Python
457 lines
17 KiB
Python
"""The §4 metadata cache and its two writers.
|
|
|
|
Per §4: Gitea is truth. The cache mirrors only what the left pane and
|
|
the read surfaces need, and it is rebuildable from Gitea at any time.
|
|
Per §4.1: two writers — the webhook handler and the periodic reconciler —
|
|
both read from Gitea and write to the cache. User actions never write
|
|
to the cache directly; they trigger Git operations through the bot
|
|
(`bot.py`), and the resulting webhook (or the next reconciler sweep)
|
|
is what updates the cache.
|
|
|
|
This module provides:
|
|
- `refresh_meta_repo()` — reads rfcs/ on the meta repo and reconciles
|
|
cached_rfcs against what's there. Used by both the webhook handler
|
|
(on meta-repo merge events) and the reconciler.
|
|
- `refresh_meta_pulls()` — reads open meta-repo PRs and reconciles
|
|
cached_prs for pr_kind='idea' and friends. Backs the §7.3
|
|
pending-ideas disclosure.
|
|
|
|
Per §4.2's "single SQLite file colocated with the FastAPI process," the
|
|
cache writes happen on the same process that serves reads; lock
|
|
contention is bounded by the small mutation surface (a few hundred
|
|
rows at most for v1) and SQLite's WAL mode.
|
|
"""
|
|
from __future__ import annotations
|
|
|
|
import asyncio
|
|
import json
|
|
import logging
|
|
|
|
from . import db, entry as entry_mod
|
|
from .config import Config
|
|
from .gitea import Gitea, GiteaError
|
|
|
|
log = logging.getLogger(__name__)
|
|
|
|
|
|
async def refresh_meta_repo(config: Config, gitea: Gitea) -> None:
|
|
"""Re-read rfcs/ on the meta repo and reconcile cached_rfcs.
|
|
|
|
Idempotent. Safe to call on every meta-repo webhook and on every
|
|
reconciler sweep.
|
|
"""
|
|
org, repo = config.gitea_org, config.meta_repo
|
|
try:
|
|
files = await gitea.list_dir(org, repo, "rfcs", ref="main")
|
|
except GiteaError as e:
|
|
log.warning("refresh_meta_repo: cannot list rfcs/: %s", e)
|
|
return
|
|
|
|
seen_slugs: set[str] = set()
|
|
for f in files:
|
|
if f.get("type") != "file" or not f.get("name", "").endswith(".md"):
|
|
continue
|
|
result = await gitea.read_file(org, repo, f["path"], ref="main")
|
|
if not result:
|
|
continue
|
|
text, sha = result
|
|
try:
|
|
entry = entry_mod.parse(text)
|
|
except Exception as parse_err:
|
|
log.warning("refresh_meta_repo: skipping %s: %s", f["path"], parse_err)
|
|
continue
|
|
if not entry.slug:
|
|
log.warning("refresh_meta_repo: skipping %s: missing slug", f["path"])
|
|
continue
|
|
seen_slugs.add(entry.slug)
|
|
_upsert_cached_rfc(entry, body_sha=sha)
|
|
|
|
# Mark entries removed from the meta repo as withdrawn-without-trace.
|
|
# In practice the spec keeps withdrawn entries in rfcs/ as historical
|
|
# record (§3), so this branch fires only for entries deleted out of
|
|
# band. We leave the row but flag it for reconciler attention.
|
|
existing = {row["slug"] for row in db.conn().execute("SELECT slug FROM cached_rfcs")}
|
|
for missing in existing - seen_slugs:
|
|
log.info("refresh_meta_repo: %s no longer in rfcs/ — leaving cache row in place", missing)
|
|
|
|
|
|
def _upsert_cached_rfc(entry: entry_mod.Entry, body_sha: str) -> None:
|
|
db.conn().execute(
|
|
"""
|
|
INSERT INTO cached_rfcs
|
|
(slug, title, state, rfc_id, repo, proposed_by, proposed_at,
|
|
graduated_at, graduated_by, owners_json, arbiters_json, tags_json,
|
|
body, body_sha, last_entry_commit_at, updated_at)
|
|
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, datetime('now'), datetime('now'))
|
|
ON CONFLICT(slug) DO UPDATE SET
|
|
title = excluded.title,
|
|
state = excluded.state,
|
|
rfc_id = excluded.rfc_id,
|
|
repo = excluded.repo,
|
|
proposed_by = excluded.proposed_by,
|
|
proposed_at = excluded.proposed_at,
|
|
graduated_at = excluded.graduated_at,
|
|
graduated_by = excluded.graduated_by,
|
|
owners_json = excluded.owners_json,
|
|
arbiters_json = excluded.arbiters_json,
|
|
tags_json = excluded.tags_json,
|
|
body = excluded.body,
|
|
body_sha = excluded.body_sha,
|
|
last_entry_commit_at = datetime('now'),
|
|
updated_at = datetime('now')
|
|
""",
|
|
(
|
|
entry.slug,
|
|
entry.title,
|
|
entry.state,
|
|
entry.id,
|
|
entry.repo,
|
|
entry.proposed_by,
|
|
entry.proposed_at,
|
|
entry.graduated_at,
|
|
entry.graduated_by,
|
|
json.dumps(entry.owners),
|
|
json.dumps(entry.arbiters),
|
|
json.dumps(entry.tags),
|
|
entry.body,
|
|
body_sha,
|
|
),
|
|
)
|
|
|
|
|
|
async def refresh_rfc_repo(config: Config, gitea: Gitea, slug: str) -> None:
|
|
"""Mirror an active RFC's per-RFC repo into the cache.
|
|
|
|
Reads `RFC.md` on main into `cached_rfcs.body` (per §4 #3), lists
|
|
branches into `cached_branches`, and lists open PRs into
|
|
`cached_prs` with `pr_kind='rfc_branch'`. Per §4.1 this runs in two
|
|
places: a webhook arrival for events on the per-RFC repo, and the
|
|
reconciler sweep.
|
|
"""
|
|
row = db.conn().execute(
|
|
"SELECT repo, state FROM cached_rfcs WHERE slug = ?", (slug,)
|
|
).fetchone()
|
|
if not row or not row["repo"] or row["state"] != "active":
|
|
return
|
|
if "/" not in row["repo"]:
|
|
log.warning("refresh_rfc_repo: %s has malformed repo %r", slug, row["repo"])
|
|
return
|
|
owner, repo = row["repo"].split("/", 1)
|
|
|
|
# Body on main — populates the discuss-mode default surface per §8.2.
|
|
try:
|
|
result = await gitea.read_file(owner, repo, "RFC.md", ref="main")
|
|
except GiteaError as e:
|
|
log.warning("refresh_rfc_repo(%s): read_file failed: %s", slug, e)
|
|
result = None
|
|
if result is not None:
|
|
text, sha = result
|
|
db.conn().execute(
|
|
"""
|
|
UPDATE cached_rfcs
|
|
SET body = ?, body_sha = ?, last_main_commit_at = datetime('now'),
|
|
updated_at = datetime('now')
|
|
WHERE slug = ?
|
|
""",
|
|
(text, sha, slug),
|
|
)
|
|
|
|
# Branches — every branch the bot knows about per §11.5 / §12.
|
|
try:
|
|
branches = await gitea.list_branches(owner, repo)
|
|
except GiteaError as e:
|
|
log.warning("refresh_rfc_repo(%s): list_branches failed: %s", slug, e)
|
|
branches = []
|
|
seen_branches: set[str] = set()
|
|
for b in branches:
|
|
name = b.get("name") or ""
|
|
if not name:
|
|
continue
|
|
seen_branches.add(name)
|
|
head_sha = (b.get("commit") or {}).get("id") or ""
|
|
last_commit_at = (b.get("commit") or {}).get("timestamp")
|
|
db.conn().execute(
|
|
"""
|
|
INSERT INTO cached_branches (rfc_slug, branch_name, head_sha, state, last_commit_at)
|
|
VALUES (?, ?, ?, 'open', ?)
|
|
ON CONFLICT(rfc_slug, branch_name) DO UPDATE SET
|
|
head_sha = excluded.head_sha,
|
|
state = CASE WHEN cached_branches.state = 'closed' THEN 'closed' ELSE 'open' END,
|
|
last_commit_at = excluded.last_commit_at
|
|
""",
|
|
(slug, name, head_sha, last_commit_at),
|
|
)
|
|
# Mark previously-known branches that disappeared as deleted, keeping
|
|
# the row per §11.5 ("branch removed from Gitea, row remains").
|
|
existing = {
|
|
r["branch_name"]
|
|
for r in db.conn().execute(
|
|
"SELECT branch_name FROM cached_branches WHERE rfc_slug = ? AND state != 'deleted'",
|
|
(slug,),
|
|
)
|
|
}
|
|
for missing in existing - seen_branches:
|
|
db.conn().execute(
|
|
"UPDATE cached_branches SET state = 'deleted' WHERE rfc_slug = ? AND branch_name = ?",
|
|
(slug, missing),
|
|
)
|
|
|
|
# PRs on the per-RFC repo (pr_kind = 'rfc_branch'). Slice 3 owns the
|
|
# full PR surface; we mirror metadata here so the §8.1 breadcrumb
|
|
# dropdown's "1 PR" count is honest from Slice 2 onward.
|
|
repo_full = f"{owner}/{repo}"
|
|
bot_login = config.gitea_bot_user
|
|
try:
|
|
open_pulls = await gitea.list_pulls(owner, repo, state="open")
|
|
closed_pulls = await gitea.list_pulls(owner, repo, state="closed")
|
|
except GiteaError as e:
|
|
log.warning("refresh_rfc_repo(%s): list_pulls failed: %s", slug, e)
|
|
open_pulls, closed_pulls = [], []
|
|
for pull in open_pulls + closed_pulls:
|
|
head_branch = pull.get("head", {}).get("ref", "")
|
|
state = _state_from_pull(pull)
|
|
gitea_opener = (pull.get("user") or {}).get("login") or ""
|
|
opened_by = _resolve_actor(
|
|
gitea_opener,
|
|
bot_login,
|
|
slug,
|
|
pull["number"],
|
|
pull.get("body") or "",
|
|
)
|
|
db.conn().execute(
|
|
"""
|
|
INSERT INTO cached_prs
|
|
(rfc_slug, pr_kind, repo, pr_number, title, description, state,
|
|
opened_by, opened_at, merged_at, closed_at,
|
|
head_branch, base_branch, head_sha)
|
|
VALUES (?, 'rfc_branch', ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
|
ON CONFLICT(repo, pr_number) DO UPDATE SET
|
|
title = excluded.title,
|
|
description = excluded.description,
|
|
state = excluded.state,
|
|
opened_by = excluded.opened_by,
|
|
merged_at = excluded.merged_at,
|
|
closed_at = excluded.closed_at,
|
|
head_sha = excluded.head_sha
|
|
""",
|
|
(
|
|
slug,
|
|
repo_full,
|
|
pull["number"],
|
|
pull.get("title") or "",
|
|
pull.get("body") or "",
|
|
state,
|
|
opened_by,
|
|
pull.get("created_at"),
|
|
pull.get("merged_at"),
|
|
pull.get("closed_at"),
|
|
head_branch,
|
|
(pull.get("base") or {}).get("ref") or "main",
|
|
(pull.get("head") or {}).get("sha"),
|
|
),
|
|
)
|
|
|
|
|
|
async def refresh_meta_pulls(config: Config, gitea: Gitea) -> None:
|
|
"""Reconcile open meta-repo PRs into cached_prs.
|
|
|
|
For Slice 1 we care about pr_kind='idea' (proposing a new entry).
|
|
Other meta-repo PR kinds (body edits, metadata edits, claims) will
|
|
be wired in their respective slices.
|
|
|
|
`opened_by` is the **underlying actor**, not the bot login Gitea
|
|
reports — per §15.9's framing for notifications and per §6.5's
|
|
On-behalf-of accountability shape. We recover the actor by joining
|
|
against the `actions` audit log; if no row matches (cache rebuilt
|
|
from scratch on a deployment that pre-dates the actions log, or a
|
|
pull we did not author), we fall back to parsing the
|
|
`On-behalf-of:` trailer from the PR body, then to the raw Gitea
|
|
login as last resort.
|
|
"""
|
|
org, repo = config.gitea_org, config.meta_repo
|
|
repo_full = f"{org}/{repo}"
|
|
try:
|
|
open_pulls = await gitea.list_pulls(org, repo, state="open")
|
|
closed_pulls = await gitea.list_pulls(org, repo, state="closed")
|
|
except GiteaError as e:
|
|
log.warning("refresh_meta_pulls: %s", e)
|
|
return
|
|
|
|
bot_login = config.gitea_bot_user
|
|
|
|
for pull in open_pulls + closed_pulls:
|
|
head_branch = pull.get("head", {}).get("ref", "")
|
|
slug = _slug_from_head_branch(head_branch)
|
|
if slug is None:
|
|
continue
|
|
pr_kind = _kind_from_branch(head_branch)
|
|
state = _state_from_pull(pull)
|
|
gitea_opener = (pull.get("user") or {}).get("login") or ""
|
|
opened_by = _resolve_actor(
|
|
gitea_opener,
|
|
bot_login,
|
|
slug,
|
|
pull["number"],
|
|
pull.get("body") or "",
|
|
)
|
|
db.conn().execute(
|
|
"""
|
|
INSERT INTO cached_prs
|
|
(rfc_slug, pr_kind, repo, pr_number, title, description, state,
|
|
opened_by, opened_at, merged_at, closed_at,
|
|
head_branch, base_branch, head_sha)
|
|
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
|
ON CONFLICT(repo, pr_number) DO UPDATE SET
|
|
title = excluded.title,
|
|
description = excluded.description,
|
|
state = excluded.state,
|
|
opened_by = excluded.opened_by,
|
|
merged_at = excluded.merged_at,
|
|
closed_at = excluded.closed_at,
|
|
head_sha = excluded.head_sha
|
|
""",
|
|
(
|
|
slug,
|
|
pr_kind,
|
|
repo_full,
|
|
pull["number"],
|
|
pull.get("title") or "",
|
|
pull.get("body") or "",
|
|
state,
|
|
opened_by,
|
|
pull.get("created_at"),
|
|
pull.get("merged_at"),
|
|
pull.get("closed_at"),
|
|
head_branch,
|
|
(pull.get("base") or {}).get("ref") or "main",
|
|
(pull.get("head") or {}).get("sha"),
|
|
),
|
|
)
|
|
|
|
|
|
_TRAILER_RE = None
|
|
|
|
|
|
def _resolve_actor(gitea_opener: str, bot_login: str, slug: str, pr_number: int, body: str) -> str:
|
|
"""Best effort: collapse the bot's authorship to the underlying actor."""
|
|
if gitea_opener and gitea_opener != bot_login:
|
|
return gitea_opener
|
|
# Prefer the audit log.
|
|
row = db.conn().execute(
|
|
"""
|
|
SELECT on_behalf_of FROM actions
|
|
WHERE action_kind IN ('propose_rfc', 'open_body_edit_pr', 'open_claim_pr', 'open_metadata_pr')
|
|
AND rfc_slug = ? AND pr_number = ?
|
|
ORDER BY id LIMIT 1
|
|
""",
|
|
(slug, pr_number),
|
|
).fetchone()
|
|
if row and row["on_behalf_of"]:
|
|
return row["on_behalf_of"]
|
|
# Fall back to parsing the On-behalf-of trailer.
|
|
import re as _re
|
|
global _TRAILER_RE
|
|
if _TRAILER_RE is None:
|
|
_TRAILER_RE = _re.compile(r"On-behalf-of:\s+.*?<([^>]+)>", _re.MULTILINE)
|
|
m = _TRAILER_RE.search(body)
|
|
if m:
|
|
return m.group(1)
|
|
return gitea_opener or bot_login
|
|
|
|
|
|
def _slug_from_head_branch(head_branch: str) -> str | None:
|
|
if head_branch.startswith("propose/"):
|
|
return head_branch[len("propose/") :]
|
|
if head_branch.startswith("edit/"):
|
|
parts = head_branch.split("/", 2)
|
|
if len(parts) >= 2:
|
|
return parts[1]
|
|
if head_branch.startswith("claim/"):
|
|
return head_branch[len("claim/") :]
|
|
if head_branch.startswith("metadata/"):
|
|
return head_branch[len("metadata/") :]
|
|
return None
|
|
|
|
|
|
def _kind_from_branch(head_branch: str) -> str:
|
|
if head_branch.startswith("propose/"):
|
|
return "idea"
|
|
if head_branch.startswith("edit/"):
|
|
return "meta_body_edit"
|
|
if head_branch.startswith("claim/"):
|
|
return "meta_claim"
|
|
if head_branch.startswith("metadata/"):
|
|
return "meta_metadata"
|
|
return "idea" # fallback
|
|
|
|
|
|
def _state_from_pull(pull: dict) -> str:
|
|
if pull.get("merged"):
|
|
return "merged"
|
|
if pull.get("state") == "closed":
|
|
return "closed"
|
|
return "open"
|
|
|
|
|
|
# ----- Reconciler -----
|
|
|
|
class Reconciler:
|
|
"""Per §4.1: periodic safety-net sweep.
|
|
|
|
Runs in the background, every five minutes by default. Catches up
|
|
on any webhook the bot missed (downtime, network failure, Gitea
|
|
flake). If the cache is corrupted, the reconciler rebuilds from
|
|
scratch — that's the contract.
|
|
"""
|
|
|
|
def __init__(self, config: Config, gitea: Gitea, interval_seconds: int = 300):
|
|
self._config = config
|
|
self._gitea = gitea
|
|
self._interval = interval_seconds
|
|
self._task: asyncio.Task | None = None
|
|
self._stop = asyncio.Event()
|
|
|
|
async def _loop(self) -> None:
|
|
# One sweep at startup, then on the interval. The startup sweep
|
|
# is what brings a fresh cache to life on first boot.
|
|
await self.sweep()
|
|
while not self._stop.is_set():
|
|
try:
|
|
await asyncio.wait_for(self._stop.wait(), timeout=self._interval)
|
|
except asyncio.TimeoutError:
|
|
pass
|
|
if self._stop.is_set():
|
|
break
|
|
await self.sweep()
|
|
|
|
async def sweep(self) -> None:
|
|
log.info("reconciler: starting sweep")
|
|
try:
|
|
await refresh_meta_repo(self._config, self._gitea)
|
|
await refresh_meta_pulls(self._config, self._gitea)
|
|
# Per-RFC repos: refresh each active entry. Meta-repo refresh
|
|
# must come first so newly-graduated entries land in
|
|
# cached_rfcs before we try to reach their per-RFC repos.
|
|
active = [
|
|
r["slug"]
|
|
for r in db.conn().execute(
|
|
"SELECT slug FROM cached_rfcs WHERE state = 'active' AND repo IS NOT NULL"
|
|
)
|
|
]
|
|
for slug in active:
|
|
await refresh_rfc_repo(self._config, self._gitea, slug)
|
|
except Exception:
|
|
log.exception("reconciler: sweep failed")
|
|
else:
|
|
log.info("reconciler: sweep complete")
|
|
|
|
def start(self) -> None:
|
|
if self._task is None:
|
|
self._task = asyncio.create_task(self._loop())
|
|
|
|
async def stop(self) -> None:
|
|
self._stop.set()
|
|
if self._task is not None:
|
|
await self._task
|
|
self._task = None
|