Slice 2: the §8 active-RFC view in full

Per the §19.1 brief: the three-column shape (§8.1) opens on main
in discuss mode (§8.2), supports the §8.3 discuss-vs-contribute
flip on non-main branches, hosts §8.4's per-branch chat with AI
participation (§18's <change> protocol → §8.14 changes rows), the
§8.8 change-card panel with §8.9 accept/decline/edit-before-accept,
the §8.10 tracked-change markup + DiffView toggle, the §8.11
manual-edit flushes with the stale-change mechanic, the §8.12
range and paragraph sub-threads, the §8.13 flag affordance, and
the §8.14 discuss-mode buffer.

Backend: bot.py grew per-RFC-repo write ops (cut_branch_from_main,
commit_accepted_change with the structured original/proposed/reason
body and Change-Id + Source-Message-Id + On-behalf-of trailers,
commit_manual_flush, ensure_rfc_repo_seed). cache.py grew
refresh_rfc_repo and the webhook dispatches on repository.full_name.
providers.py and chat.py port the §18 carryovers — multi-provider
LLM abstraction and SSE-streaming chat against the §5 threads /
thread_messages / changes schema. api_branches.py mounts the §17
branches/<branch>/* and threads/<thread_id>/* routes with the §6
/ §11 permission checks inline.

Frontend: RFCView.jsx rebuilt as the §8 surface; Editor.jsx,
ChatPanel.jsx, ChangePanel.jsx, PromptBar.jsx, SelectionTooltip.jsx,
DiffView.jsx, ModelPicker.jsx, modelStyles.js lifted from the
prototype and adapted to the canonical schema.

Covered by `backend/tests/test_rfc_view_vertical.py` — eleven new
integration tests against an extended FakeGitea (PUT contents,
POST orgs/{org}/repos, seed_rfc_repo): main-view read,
promote-to-branch, accept (with and without edit-before-accept),
decline, manual flush + system message, flag creation, visibility
flip, anonymous read-but-no-contribute, stale-change refusal, and
the chat-streaming path with a fake provider injected. The 5
Slice 1 tests continue to pass alongside.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
Ben Stull
2026-05-24 04:35:14 -07:00
parent 779ba6db59
commit 3bc8fe92af
24 changed files with 5433 additions and 151 deletions
+17 -2
View File
@@ -17,10 +17,11 @@ from typing import Any
from fastapi import APIRouter, HTTPException, Request
from pydantic import BaseModel, Field
from . import auth, db, entry as entry_mod, cache
from . import api_branches, auth, db, entry as entry_mod, cache
from .bot import Bot
from .config import Config
from .gitea import Gitea, GiteaError
from .providers import BaseProvider
class ProposeBody(BaseModel):
@@ -34,8 +35,22 @@ class DeclineBody(BaseModel):
comment: str = Field(min_length=1, max_length=4000)
def make_router(config: Config, gitea: Gitea, bot: Bot) -> APIRouter:
def make_router(
config: Config,
gitea: Gitea,
bot: Bot,
providers: dict[str, BaseProvider] | None = None,
) -> APIRouter:
# Use `is None` rather than `providers or {}` — an empty dict is
# falsy, and the test harness mutates the dict the closure holds to
# inject a fake provider; substituting a fresh `{}` here would
# silently drop those mutations.
if providers is None:
providers = {}
router = APIRouter()
# Slice 2: the §8 active-RFC view's endpoints live in api_branches.
# Mounting them on the same router keeps the §17 layout flat.
router.include_router(api_branches.make_router(config, gitea, bot, providers))
# ---------------------------------------------------------------
# Auth surface — extends the prototype's pattern but reads role
File diff suppressed because it is too large Load Diff
+213
View File
@@ -220,3 +220,216 @@ class Bot:
rfc_slug=slug,
pr_number=pr_number,
)
# ----- Per-RFC repo: branches (§8.3, §8.14) -----
async def cut_branch_from_main(
self,
actor: Actor,
*,
owner: str,
repo: str,
new_branch: str,
slug: str,
from_branch: str = "main",
) -> dict:
"""Per §8.14: 'Start Contributing' on main cuts a new branch.
Also covers the §8.3 case of a contributor wanting a fresh branch
for a piece of work. Returns the Gitea branch payload.
"""
created = await self._gitea.create_branch(owner, repo, new_branch, from_branch=from_branch)
_log(
actor,
"create_branch",
rfc_slug=slug,
branch_name=new_branch,
details={"from": from_branch, "repo": f"{owner}/{repo}"},
)
return created
# ----- Per-RFC repo: per-accepted-change commits (§8.6, §8.9) -----
async def commit_accepted_change(
self,
actor: Actor,
*,
owner: str,
repo: str,
branch: str,
file_path: str,
new_content: str,
prior_sha: str,
change_id: int,
original: str,
proposed: str,
ai_proposed: str | None,
reason: str,
source_message_id: int | None,
slug: str,
) -> str:
"""Per §8.6: one commit per accepted change.
The commit message subject is a short structural description; the
body carries `original`, `proposed`, and `reason` in named
sections. When the contributor edited the AI's proposal before
accepting (§8.9's `was_edited_before_accept`), the AI's original
wording is preserved under an `AI proposed:` section so the
timeline records both what was offered and what landed.
Trailers: `Change-Id`, `Source-Message-Id` (where applicable),
and the standard `On-behalf-of:` per §6.5.
Returns the commit SHA.
"""
subject = _subject_from_reason(reason, fallback="Accept change")
body_lines = [
"**Original:**",
original.strip(),
"",
"**Proposed:**",
proposed.strip(),
]
if ai_proposed is not None and ai_proposed.strip() != proposed.strip():
body_lines += ["", "**AI proposed (edited before accept):**", ai_proposed.strip()]
if reason and reason.strip():
body_lines += ["", "**Reason:**", reason.strip()]
body_lines += ["", f"Change-Id: {change_id}"]
if source_message_id is not None:
body_lines += [f"Source-Message-Id: {source_message_id}"]
body_lines += [_trailer(actor)]
message = subject + "\n\n" + "\n".join(body_lines).strip()
result = await self._gitea.update_file(
owner,
repo,
file_path,
content=new_content,
sha=prior_sha,
message=message,
branch=branch,
author_name=actor.display_name,
author_email=actor.email or f"{actor.gitea_login}@users.noreply",
)
sha = result.get("commit", {}).get("sha") or result.get("content", {}).get("sha") or ""
_log(
actor,
"accept_change",
rfc_slug=slug,
branch_name=branch,
bot_commit_sha=sha,
details={"change_id": change_id, "file_path": file_path},
)
return sha
# ----- Per-RFC repo: manual-edit flushes (§8.6, §8.11) -----
async def commit_manual_flush(
self,
actor: Actor,
*,
owner: str,
repo: str,
branch: str,
file_path: str,
new_content: str,
prior_sha: str,
change_id: int,
paragraph_count: int,
slug: str,
) -> str:
"""Per §8.6 / §8.11: one commit per manual-edit flush window.
Subject names the structural extent so a reviewer scanning the
log can size the change at a glance; the body carries the
change-id trailer that binds the commit to the resolved card in
the panel.
"""
plural = "" if paragraph_count == 1 else "s"
subject = f"manual edit: {paragraph_count} paragraph{plural}"
body_lines = [
f"Change-Id: {change_id}",
_trailer(actor),
]
message = subject + "\n\n" + "\n".join(body_lines)
result = await self._gitea.update_file(
owner,
repo,
file_path,
content=new_content,
sha=prior_sha,
message=message,
branch=branch,
author_name=actor.display_name,
author_email=actor.email or f"{actor.gitea_login}@users.noreply",
)
sha = result.get("commit", {}).get("sha") or result.get("content", {}).get("sha") or ""
_log(
actor,
"manual_flush",
rfc_slug=slug,
branch_name=branch,
bot_commit_sha=sha,
details={"change_id": change_id, "paragraph_count": paragraph_count},
)
return sha
# ----- Per-RFC repo: seeding (test/dev fixtures, future graduation) -----
async def ensure_rfc_repo_seed(
self,
actor: Actor,
*,
owner: str,
repo: str,
slug: str,
title: str,
body: str,
) -> None:
"""Create the per-RFC repo and seed `RFC.md` on `main` if missing.
Slice 2 surfaces against per-RFC repos that Slice 5's graduation
flow will eventually create. Until graduation exists, this is the
seam test fixtures and ad-hoc dev workflows use to bring an RFC
repo into existence — the bot stays the only Git writer and the
seed itself enters the audit log.
"""
existing = await self._gitea.get_repo(owner, repo)
if existing is None:
await self._gitea.create_org_repo(owner, repo, description=f"RFC: {title}")
# If main has a tip already, leave it alone — the seed is idempotent.
main = await self._gitea.get_branch(owner, repo, "main")
if main is not None:
return
message = "Seed RFC.md\n\n" + _trailer(actor)
await self._gitea.create_file(
owner,
repo,
"RFC.md",
content=body,
message=message,
branch="main",
author_name=actor.display_name,
author_email=actor.email or f"{actor.gitea_login}@users.noreply",
)
_log(
actor,
"seed_rfc_repo",
rfc_slug=slug,
branch_name="main",
details={"repo": f"{owner}/{repo}", "title": title},
)
def _subject_from_reason(reason: str, fallback: str) -> str:
"""One-line commit subject derived from the change's reason.
Truncated to 72 chars so the Git log scans cleanly. Exact length is
an implementation detail per §8.6.
"""
text = (reason or "").strip().split("\n")[0]
if not text:
return fallback
if len(text) > 72:
return text[:69].rstrip() + ""
return text
+144
View File
@@ -119,6 +119,139 @@ def _upsert_cached_rfc(entry: entry_mod.Entry, body_sha: str) -> None:
)
async def refresh_rfc_repo(config: Config, gitea: Gitea, slug: str) -> None:
"""Mirror an active RFC's per-RFC repo into the cache.
Reads `RFC.md` on main into `cached_rfcs.body` (per §4 #3), lists
branches into `cached_branches`, and lists open PRs into
`cached_prs` with `pr_kind='rfc_branch'`. Per §4.1 this runs in two
places: a webhook arrival for events on the per-RFC repo, and the
reconciler sweep.
"""
row = db.conn().execute(
"SELECT repo, state FROM cached_rfcs WHERE slug = ?", (slug,)
).fetchone()
if not row or not row["repo"] or row["state"] != "active":
return
if "/" not in row["repo"]:
log.warning("refresh_rfc_repo: %s has malformed repo %r", slug, row["repo"])
return
owner, repo = row["repo"].split("/", 1)
# Body on main — populates the discuss-mode default surface per §8.2.
try:
result = await gitea.read_file(owner, repo, "RFC.md", ref="main")
except GiteaError as e:
log.warning("refresh_rfc_repo(%s): read_file failed: %s", slug, e)
result = None
if result is not None:
text, sha = result
db.conn().execute(
"""
UPDATE cached_rfcs
SET body = ?, body_sha = ?, last_main_commit_at = datetime('now'),
updated_at = datetime('now')
WHERE slug = ?
""",
(text, sha, slug),
)
# Branches — every branch the bot knows about per §11.5 / §12.
try:
branches = await gitea.list_branches(owner, repo)
except GiteaError as e:
log.warning("refresh_rfc_repo(%s): list_branches failed: %s", slug, e)
branches = []
seen_branches: set[str] = set()
for b in branches:
name = b.get("name") or ""
if not name:
continue
seen_branches.add(name)
head_sha = (b.get("commit") or {}).get("id") or ""
last_commit_at = (b.get("commit") or {}).get("timestamp")
db.conn().execute(
"""
INSERT INTO cached_branches (rfc_slug, branch_name, head_sha, state, last_commit_at)
VALUES (?, ?, ?, 'open', ?)
ON CONFLICT(rfc_slug, branch_name) DO UPDATE SET
head_sha = excluded.head_sha,
state = CASE WHEN cached_branches.state = 'closed' THEN 'closed' ELSE 'open' END,
last_commit_at = excluded.last_commit_at
""",
(slug, name, head_sha, last_commit_at),
)
# Mark previously-known branches that disappeared as deleted, keeping
# the row per §11.5 ("branch removed from Gitea, row remains").
existing = {
r["branch_name"]
for r in db.conn().execute(
"SELECT branch_name FROM cached_branches WHERE rfc_slug = ? AND state != 'deleted'",
(slug,),
)
}
for missing in existing - seen_branches:
db.conn().execute(
"UPDATE cached_branches SET state = 'deleted' WHERE rfc_slug = ? AND branch_name = ?",
(slug, missing),
)
# PRs on the per-RFC repo (pr_kind = 'rfc_branch'). Slice 3 owns the
# full PR surface; we mirror metadata here so the §8.1 breadcrumb
# dropdown's "1 PR" count is honest from Slice 2 onward.
repo_full = f"{owner}/{repo}"
bot_login = config.gitea_bot_user
try:
open_pulls = await gitea.list_pulls(owner, repo, state="open")
closed_pulls = await gitea.list_pulls(owner, repo, state="closed")
except GiteaError as e:
log.warning("refresh_rfc_repo(%s): list_pulls failed: %s", slug, e)
open_pulls, closed_pulls = [], []
for pull in open_pulls + closed_pulls:
head_branch = pull.get("head", {}).get("ref", "")
state = _state_from_pull(pull)
gitea_opener = (pull.get("user") or {}).get("login") or ""
opened_by = _resolve_actor(
gitea_opener,
bot_login,
slug,
pull["number"],
pull.get("body") or "",
)
db.conn().execute(
"""
INSERT INTO cached_prs
(rfc_slug, pr_kind, repo, pr_number, title, description, state,
opened_by, opened_at, merged_at, closed_at,
head_branch, base_branch, head_sha)
VALUES (?, 'rfc_branch', ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
ON CONFLICT(repo, pr_number) DO UPDATE SET
title = excluded.title,
description = excluded.description,
state = excluded.state,
opened_by = excluded.opened_by,
merged_at = excluded.merged_at,
closed_at = excluded.closed_at,
head_sha = excluded.head_sha
""",
(
slug,
repo_full,
pull["number"],
pull.get("title") or "",
pull.get("body") or "",
state,
opened_by,
pull.get("created_at"),
pull.get("merged_at"),
pull.get("closed_at"),
head_branch,
(pull.get("base") or {}).get("ref") or "main",
(pull.get("head") or {}).get("sha"),
),
)
async def refresh_meta_pulls(config: Config, gitea: Gitea) -> None:
"""Reconcile open meta-repo PRs into cached_prs.
@@ -296,6 +429,17 @@ class Reconciler:
try:
await refresh_meta_repo(self._config, self._gitea)
await refresh_meta_pulls(self._config, self._gitea)
# Per-RFC repos: refresh each active entry. Meta-repo refresh
# must come first so newly-graduated entries land in
# cached_rfcs before we try to reach their per-RFC repos.
active = [
r["slug"]
for r in db.conn().execute(
"SELECT slug FROM cached_rfcs WHERE state = 'active' AND repo IS NOT NULL"
)
]
for slug in active:
await refresh_rfc_repo(self._config, self._gitea, slug)
except Exception:
log.exception("reconciler: sweep failed")
else:
+321
View File
@@ -0,0 +1,321 @@
"""SSE-streaming chat layer — §18 carryover, adapted to the §5 schema.
The prototype kept conversation state in an in-memory `RFCChat`
keyed by a session_id. Here, history is the durable list of
`thread_messages` rows on a `threads` row, scoped to one branch (or
to a sub-thread anchored to a range or paragraph within it). The
streaming response is parsed for `<change>` blocks per §18; each
`<change>` becomes a `changes` row with `state='pending'` per §8.14
the moment it is parsed, regardless of mode.
This module exposes two seams:
- `build_history` and `build_system_prompt` pure functions a caller
can use to assemble the LLM request without owning a provider.
- `stream_assistant_turn` the orchestration that creates the
assistant `thread_messages` row, runs the provider's streaming
interface, parses `<change>` blocks as they accumulate, materializes
`changes` rows on completion, and yields SSE-shaped text chunks.
Per the §1 invariant, no Git writes happen here chat is app data;
turning an accepted `<change>` into a commit is a separate gesture
that goes through `bot.py`.
"""
from __future__ import annotations
import base64
import json
import logging
import re
from dataclasses import dataclass
from typing import AsyncIterator, Iterator
from . import db
from .providers import BaseProvider
log = logging.getLogger(__name__)
# The §18 system prompt, adapted from the prototype. The prototype's
# version assumed one RFC document loaded as context; here the document
# is the branch's RFC.md at its current tip. The selection-quote shape
# (§8.12) is wired by the caller into the user message text — not the
# system prompt — so the model sees it as part of the turn.
SYSTEM_PROMPT = """You are a participant in the Wiggleverse RFC framework — a standardization process for natural-language vocabulary that humans and machines need to share. You are collaborating with a human contributor on the RFC titled "{title}".
The contributor's gestures may be questions, objections, sketches of new framings, or direct edit requests. Your role is to translate them into concrete proposed edits where possible. The transcript of this conversation is the durable evidence the definition was earned, so be specific and stay close to the text.
Format each proposed change as one <change> block:
<change>
<original>exact text to replace, copied verbatim from the document</original>
<proposed>replacement text</proposed>
<reason>why this change improves the document one or two short sentences</reason>
</change>
Rules:
- The <original> text must match the document character-for-character. Do not paraphrase, do not abbreviate.
- One <change> block per distinct edit. Multiple blocks are encouraged when the contributor's input touches several passages.
- If the contributor is asking a general question or exploring an idea not yet ready to become an edit, respond in plain prose. When in doubt, lean toward proposing an edit.
- After your <change> blocks you may add a brief conversational note. Keep it short.
---
The current document:
{body}
"""
# ---------------------------------------------------------------------------
# History / prompt assembly
# ---------------------------------------------------------------------------
def build_history(thread_id: int) -> list[dict]:
"""Pull the thread's messages in chronological order, in the
{role, content} shape every provider's `send` interface expects.
System-author rows are excluded the prompt template carries the
standing instructions; system-author messages are inline narrative
that doesn't change the model's behavior.
"""
rows = db.conn().execute(
"""
SELECT role, text FROM thread_messages
WHERE thread_id = ? AND role IN ('user', 'assistant')
ORDER BY id
""",
(thread_id,),
).fetchall()
return [{"role": r["role"], "content": r["text"]} for r in rows]
def build_system_prompt(*, title: str, body: str) -> str:
return SYSTEM_PROMPT.format(title=title, body=body)
# ---------------------------------------------------------------------------
# <change> parsing
# ---------------------------------------------------------------------------
_CHANGE_RE = re.compile(
r"<change>\s*<original>([\s\S]*?)</original>\s*<proposed>([\s\S]*?)</proposed>\s*<reason>([\s\S]*?)</reason>\s*</change>",
re.MULTILINE,
)
@dataclass(frozen=True)
class ParsedChange:
original: str
proposed: str
reason: str
def parse_changes(text: str) -> list[ParsedChange]:
"""Per §18: pull every well-formed <change> block out of an assistant
message. Mid-stream partials are simply not matched yet; the parser
runs once on completion."""
return [
ParsedChange(m.group(1).strip(), m.group(2).strip(), m.group(3).strip())
for m in _CHANGE_RE.finditer(text)
]
# ---------------------------------------------------------------------------
# Persistence — turn boundaries
# ---------------------------------------------------------------------------
def append_user_message(
*,
thread_id: int,
author_user_id: int,
text: str,
quote: str | None,
) -> int:
cur = db.conn().execute(
"""
INSERT INTO thread_messages (thread_id, role, author_user_id, text, quote)
VALUES (?, 'user', ?, ?, ?)
""",
(thread_id, author_user_id, text, quote),
)
return cur.lastrowid
def append_assistant_placeholder(*, thread_id: int, model_id: str) -> int:
cur = db.conn().execute(
"""
INSERT INTO thread_messages (thread_id, role, model_id, text)
VALUES (?, 'assistant', ?, '')
""",
(thread_id, model_id),
)
return cur.lastrowid
def finalize_assistant_message(*, message_id: int, text: str) -> None:
db.conn().execute(
"UPDATE thread_messages SET text = ? WHERE id = ?",
(text, message_id),
)
def append_system_message(*, thread_id: int, text: str) -> int:
"""Used by §10.6 (manual-edit-flush markers), §9.3 (decline-comment
record), and any other system-narrated event that needs to live
inline in chat. role='system', author_user_id=NULL."""
cur = db.conn().execute(
"""
INSERT INTO thread_messages (thread_id, role, text)
VALUES (?, 'system', ?)
""",
(thread_id, text),
)
return cur.lastrowid
def materialize_changes(
*,
rfc_slug: str,
branch_name: str,
thread_id: int,
source_message_id: int,
parsed: list[ParsedChange],
) -> list[int]:
"""Per §8.14: every <change> block becomes a `changes` row with
state='pending' immediately, regardless of mode. Returns the new
row ids in source order."""
ids: list[int] = []
for ch in parsed:
cur = db.conn().execute(
"""
INSERT INTO changes
(rfc_slug, branch_name, thread_id, source_message_id,
kind, state, original, proposed, reason)
VALUES (?, ?, ?, ?, 'ai', 'pending', ?, ?, ?)
""",
(rfc_slug, branch_name, thread_id, source_message_id, ch.original, ch.proposed, ch.reason),
)
ids.append(cur.lastrowid)
return ids
def mark_stale_overlapping(
*,
rfc_slug: str,
branch_name: str,
new_body: str,
) -> int:
"""Per §8.11: when a manual edit changes the document such that a
pending AI proposal's `original` no longer locates, set its
`stale_since`. The contributor's action stays gated on the stale
card; state stays `pending`.
Returns the number of rows marked stale on this call (idempotent
on re-entry already-stale rows aren't touched twice).
"""
rows = db.conn().execute(
"""
SELECT id, original FROM changes
WHERE rfc_slug = ? AND branch_name = ?
AND kind = 'ai' AND state = 'pending' AND stale_since IS NULL
""",
(rfc_slug, branch_name),
).fetchall()
marked = 0
for r in rows:
original = (r["original"] or "").strip()
if original and original not in new_body:
db.conn().execute(
"UPDATE changes SET stale_since = datetime('now') WHERE id = ?",
(r["id"],),
)
marked += 1
return marked
# ---------------------------------------------------------------------------
# SSE shape — base64 chunks for binary-safe transport
# ---------------------------------------------------------------------------
def sse_chunk(text: str) -> str:
encoded = base64.b64encode(text.encode("utf-8")).decode("ascii")
return f"data: {encoded}\n\n"
def sse_event(name: str, payload: dict) -> str:
return f"event: {name}\ndata: {json.dumps(payload)}\n\n"
# ---------------------------------------------------------------------------
# Orchestration
# ---------------------------------------------------------------------------
async def stream_assistant_turn(
*,
provider: BaseProvider,
system_prompt: str,
history: list[dict],
user_message: str,
thread_id: int,
rfc_slug: str,
branch_name: str,
assistant_message_id: int,
) -> AsyncIterator[str]:
"""Run the provider's streaming interface, yielding SSE-encoded
chunks. On completion, materializes `changes` rows from any
`<change>` blocks in the assembled text and emits a trailing
`changes` event listing the new change ids.
The user's message must already have been persisted by the caller
before this is invoked; the placeholder assistant row whose id is
`assistant_message_id` must exist too. This module's job is to
populate the assistant row's text and materialize the changes; the
caller wires it into a FastAPI StreamingResponse.
Provider streaming is synchronous (an `Iterator[str]`) per §18; we
drain it eagerly into chunks and yield them as async strings. This
is sufficient at single-process scale (§4.2) and the streaming
impl is what the prototype shipped re-wrapping it in a worker
thread for a future deployment shape is a one-liner if it
matters.
"""
full_text_chunks: list[str] = []
# Hand the provider the user turn appended to history.
history_for_call = list(history) + [{"role": "user", "content": user_message}]
def _drain() -> Iterator[str]:
try:
yield from provider.send_streaming(system_prompt, history_for_call)
except Exception as e:
log.exception("provider stream failed")
yield f"\n\n[Provider error: {e}]"
for chunk in _drain():
if not chunk:
continue
full_text_chunks.append(chunk)
yield sse_chunk(chunk)
full_text = "".join(full_text_chunks)
finalize_assistant_message(message_id=assistant_message_id, text=full_text)
parsed = parse_changes(full_text)
new_ids = materialize_changes(
rfc_slug=rfc_slug,
branch_name=branch_name,
thread_id=thread_id,
source_message_id=assistant_message_id,
parsed=parsed,
)
yield sse_event(
"changes",
{
"message_id": assistant_message_id,
"change_ids": new_ids,
"count": len(new_ids),
},
)
yield "data: DONE\n\n"
+13 -2
View File
@@ -14,7 +14,7 @@ from fastapi import APIRouter, FastAPI, HTTPException, Request
from fastapi.responses import RedirectResponse
from starlette.middleware.sessions import SessionMiddleware
from . import api as api_routes, auth, cache, db, webhooks
from . import api as api_routes, auth, cache, db, providers as providers_mod, webhooks
from .bot import Bot
from .config import load_config
from .gitea import Gitea
@@ -32,13 +32,24 @@ async def lifespan(app: FastAPI):
bot = Bot(gitea)
reconciler = cache.Reconciler(config, gitea)
# §18 carryover: the multi-provider LLM abstraction. Provider
# construction can fail (missing key, wrong env value) — if it does,
# the rest of the app still serves; chat endpoints surface a clear
# 503 instead of crashing the process.
try:
providers = providers_mod.load_from_config(config)
except Exception:
log.exception("provider construction failed; chat will be disabled")
providers = {}
app.state.config = config
app.state.gitea = gitea
app.state.bot = bot
app.state.reconciler = reconciler
app.state.providers = providers
app.include_router(_oauth_router(config))
app.include_router(api_routes.make_router(config, gitea, bot))
app.include_router(api_routes.make_router(config, gitea, bot, providers))
app.include_router(webhooks.make_router(config, gitea))
reconciler.start()
+195
View File
@@ -0,0 +1,195 @@
"""Multi-provider LLM abstraction — §18 carryover from the prototype.
Each provider speaks a common interface `send` and `send_streaming`
so the chat layer in `chat.py` is provider-agnostic. Enabled providers
and their API keys are configured via env per the prototype's
`ENABLED_MODELS` contract; per §16 / §19.2, per-RFC model availability
and credential delegation are deferred until the topic is settled.
"""
from __future__ import annotations
from typing import Iterator
class BaseProvider:
name: str = "base"
display_name: str = "Base"
def send(self, system: str, history: list[dict]) -> str:
raise NotImplementedError
def send_streaming(self, system: str, history: list[dict]) -> Iterator[str]:
raise NotImplementedError
# ---------------------------------------------------------------------------
# Anthropic — Claude
# ---------------------------------------------------------------------------
class AnthropicProvider(BaseProvider):
name = "claude"
def __init__(self, api_key: str, model: str = "claude-sonnet-4-6", display_name: str = "Claude"):
import anthropic
self.client = anthropic.Anthropic(api_key=api_key)
self.model = model
self.display_name = display_name
def send(self, system: str, history: list[dict]) -> str:
response = self.client.messages.create(
model=self.model,
max_tokens=4096,
system=system,
messages=history,
)
return response.content[0].text
def send_streaming(self, system: str, history: list[dict]) -> Iterator[str]:
with self.client.messages.stream(
model=self.model,
max_tokens=4096,
system=system,
messages=history,
) as stream:
for text in stream.text_stream:
yield text
# ---------------------------------------------------------------------------
# Google — Gemini
# ---------------------------------------------------------------------------
class GeminiProvider(BaseProvider):
name = "gemini"
def __init__(self, api_key: str, model: str = "gemini-1.5-pro", display_name: str = "Gemini"):
import google.generativeai as genai
genai.configure(api_key=api_key)
self._genai = genai
self.model_name = model
self.display_name = display_name
def _build_model(self, system: str):
return self._genai.GenerativeModel(model_name=self.model_name, system_instruction=system)
def _convert_history(self, history: list[dict]) -> list[dict]:
return [
{"role": "user" if msg["role"] == "user" else "model", "parts": [msg["content"]]}
for msg in history
]
def send(self, system: str, history: list[dict]) -> str:
model = self._build_model(system)
prior = self._convert_history(history[:-1])
chat = model.start_chat(history=prior)
response = chat.send_message(history[-1]["content"])
return response.text
def send_streaming(self, system: str, history: list[dict]) -> Iterator[str]:
model = self._build_model(system)
prior = self._convert_history(history[:-1])
chat = model.start_chat(history=prior)
response = chat.send_message(history[-1]["content"], stream=True)
for chunk in response:
if chunk.text:
yield chunk.text
# ---------------------------------------------------------------------------
# OpenAI-compatible — OpenAI, Copilot, or any compatible endpoint
# ---------------------------------------------------------------------------
class OpenAIProvider(BaseProvider):
name = "openai"
def __init__(self, api_key: str, model: str = "gpt-4o", base_url: str | None = None, display_name: str = "Copilot"):
from openai import OpenAI
self.client = OpenAI(api_key=api_key, base_url=base_url or "https://api.openai.com/v1")
self.model = model
self.display_name = display_name
def _messages(self, system: str, history: list[dict]) -> list[dict]:
return [{"role": "system", "content": system}] + [
{"role": msg["role"], "content": msg["content"]} for msg in history
]
def send(self, system: str, history: list[dict]) -> str:
response = self.client.chat.completions.create(
model=self.model, max_tokens=4096, messages=self._messages(system, history)
)
return response.choices[0].message.content
def send_streaming(self, system: str, history: list[dict]) -> Iterator[str]:
stream = self.client.chat.completions.create(
model=self.model, max_tokens=4096, messages=self._messages(system, history), stream=True
)
for chunk in stream:
delta = chunk.choices[0].delta.content
if delta:
yield delta
# ---------------------------------------------------------------------------
# Variants and factory — preserved from the prototype to keep the contract.
# ---------------------------------------------------------------------------
_CLAUDE_VARIANTS: dict[str, tuple[str, str]] = {
"claude": ("claude-sonnet-4-6", "Claude"),
"claude-sonnet": ("claude-sonnet-4-6", "Claude Sonnet"),
"claude-opus": ("claude-opus-4-6", "Claude Opus"),
"claude-haiku": ("claude-haiku-4-5-20251001", "Claude Haiku"),
}
_GEMINI_VARIANTS: dict[str, tuple[str, str]] = {
"gemini": ("gemini-1.5-pro", "Gemini"),
"gemini-pro": ("gemini-1.5-pro", "Gemini Pro"),
"gemini-flash": ("gemini-1.5-flash", "Gemini Flash"),
"gemini-2-flash": ("gemini-2.0-flash", "Gemini 2 Flash"),
}
def load_providers(env: dict) -> dict[str, BaseProvider]:
"""Instantiate enabled providers from env — same contract as the prototype."""
enabled = [m.strip() for m in env.get("ENABLED_MODELS", "claude").split(",") if m.strip()]
providers: dict[str, BaseProvider] = {}
anthropic_key = env.get("ANTHROPIC_API_KEY") or ""
google_key = env.get("GOOGLE_API_KEY") or ""
openai_key = env.get("OPENAI_API_KEY") or ""
for key in enabled:
prefix = key.upper().replace("-", "_")
if key in _CLAUDE_VARIANTS and anthropic_key:
default_model, default_name = _CLAUDE_VARIANTS[key]
providers[key] = AnthropicProvider(
api_key=anthropic_key,
model=env.get(f"{prefix}_MODEL", default_model),
display_name=env.get(f"{prefix}_DISPLAY_NAME", default_name),
)
elif key in _GEMINI_VARIANTS and google_key:
default_model, default_name = _GEMINI_VARIANTS[key]
providers[key] = GeminiProvider(
api_key=google_key,
model=env.get(f"{prefix}_MODEL", default_model),
display_name=env.get(f"{prefix}_DISPLAY_NAME", default_name),
)
elif key == "openai" and openai_key:
providers["openai"] = OpenAIProvider(
api_key=openai_key,
model=env.get("OPENAI_MODEL", "gpt-4o"),
base_url=env.get("OPENAI_BASE_URL"),
display_name=env.get("OPENAI_DISPLAY_NAME", "Copilot"),
)
return providers
def load_from_config(config) -> dict[str, BaseProvider]:
"""Convenience adapter so callers can pass our Config dataclass directly."""
env = {
"ENABLED_MODELS": ",".join(config.enabled_models),
"ANTHROPIC_API_KEY": config.anthropic_api_key,
"GOOGLE_API_KEY": config.google_api_key,
"OPENAI_API_KEY": config.openai_api_key,
}
return load_providers(env)
+27 -8
View File
@@ -10,11 +10,12 @@ from __future__ import annotations
import hashlib
import hmac
import json
import logging
from fastapi import APIRouter, Header, HTTPException, Request
from . import cache
from . import cache, db
from .config import Config
from .gitea import Gitea
@@ -47,14 +48,25 @@ def make_router(config: Config, gitea: Gitea) -> APIRouter:
if event not in EVENTS_OF_INTEREST:
return {"ok": True, "ignored": event}
# Slice 1 only acts on meta-repo events; per-RFC-repo events
# land in their respective slices. The handler is generous in
# what it accepts — any meta-repo change is a cue to refresh
# the whole meta-repo cache, since the cache is small and the
# refresh is idempotent.
# Identify the originating repo. For the meta repo we refresh
# the entry cache + meta-PR cache; for a per-RFC repo we refresh
# just that repo's branches/PRs/main body. The handler stays
# generous in what it accepts — refreshes are idempotent and
# small enough that overlapping events do not pile up.
try:
await cache.refresh_meta_repo(config, gitea)
await cache.refresh_meta_pulls(config, gitea)
payload = json.loads(body) if body else {}
except Exception:
payload = {}
repo_full = (payload.get("repository") or {}).get("full_name") or ""
meta_full = f"{config.gitea_org}/{config.meta_repo}"
try:
if repo_full == meta_full or not repo_full:
await cache.refresh_meta_repo(config, gitea)
await cache.refresh_meta_pulls(config, gitea)
else:
slug = _slug_for_repo(repo_full)
if slug:
await cache.refresh_rfc_repo(config, gitea, slug)
except Exception:
log.exception("webhook refresh failed")
raise HTTPException(status_code=500, detail="Refresh failed")
@@ -69,3 +81,10 @@ def _verify_signature(body: bytes, header: str, secret: str) -> bool:
return False
expected = hmac.new(secret.encode("utf-8"), body, hashlib.sha256).hexdigest()
return hmac.compare_digest(expected, header)
def _slug_for_repo(repo_full: str) -> str | None:
row = db.conn().execute(
"SELECT slug FROM cached_rfcs WHERE repo = ?", (repo_full,)
).fetchone()
return row["slug"] if row else None