Ports KanzlAI document upload + AI extraction into paliad. PDFs are stored
in Supabase Storage (bucket paliad-documents); Claude Sonnet extracts
deadlines with tool-forced structured output; the user reviews candidates
and picks which to persist as Fristen.
Backend
- internal/services/ai_service.go — Anthropic SDK wrapper. Uses native PDF
content blocks, forced tool_use for structured output, ephemeral prompt
caching on the system prompt. Sonnet 4.6.
- internal/services/storage.go — Supabase Storage REST client (upload,
download, delete). Nil when SUPABASE_SERVICE_KEY is unset.
- internal/services/dokument_service.go — upload (PDF magic-number check,
20 MB cap), list, download, extract, persist-confirmed-as-Fristen. All
visibility-checked through AkteService.GetByID.
- internal/handlers/dokumente.go — five endpoints plus /api/config/features
so the UI can hide disabled buttons.
- internal/handlers/ratelimit.go — in-memory per-user cap of 20 extractions
per UTC day (design §9.7).
- Both optional services (storage, AI) degrade to 501 with friendly German
messages when their env vars are unset.
Schema
- migration 013 adds fristen.source_document_id (FK to dokumente) and
dokumente.ai_extraction_count + ai_extracted_at for the UI badge.
Frontend
- Dokumente tab in /akten/{id}/dokumente replaces the Phase D placeholder:
drag-drop upload zone with live progress bar (XHR), document table with
download + extract actions, extraction-review modal with per-row
checkboxes, confidence chips, expandable source-quote, editable title +
due date + rule code, POST to the from-extraction endpoint.
- Upload + extract buttons hide automatically when the server reports the
feature is disabled.
- Full DE/EN i18n. CSS for the upload zone, extraction modal, and
confidence chips.
Env vars (not set here — flag to head):
- ANTHROPIC_API_KEY (enables extraction)
- SUPABASE_SERVICE_KEY (enables upload/download)
Branch: mai/ritchie/phase-h-ai-deadline
196 lines
7.8 KiB
Go
196 lines
7.8 KiB
Go
// Package services — Phase H: AI-powered deadline extraction.
|
||
//
|
||
// AIService wraps the Anthropic SDK to extract deadlines from uploaded court
|
||
// documents (PDFs). The service is optional: if ANTHROPIC_API_KEY is unset at
|
||
// startup, NewAIService returns nil and the handlers respond with 501.
|
||
//
|
||
// Design (from docs/design-kanzlai-integration.md §8 Phase H):
|
||
// - Claude Sonnet for cost/quality balance; PDF sent directly as a document
|
||
// content block (no preprocessing needed).
|
||
// - Tool-forced structured output: a single `extract_deadlines` tool with a
|
||
// strict schema, with `ToolChoice` pinned to it so the response is always
|
||
// parseable JSON.
|
||
// - System prompt is cached (ephemeral, 5m TTL) so repeat extractions in
|
||
// the same session hit the cache and pay ~1/10 the prompt-token cost.
|
||
package services
|
||
|
||
import (
|
||
"context"
|
||
"encoding/base64"
|
||
"encoding/json"
|
||
"errors"
|
||
"fmt"
|
||
|
||
"github.com/anthropics/anthropic-sdk-go"
|
||
"github.com/anthropics/anthropic-sdk-go/option"
|
||
)
|
||
|
||
// ErrAIDisabled is returned when the service is called but ANTHROPIC_API_KEY
|
||
// was unset at startup. Handlers should map this to 501 Not Implemented.
|
||
var ErrAIDisabled = errors.New("AI extraction not configured")
|
||
|
||
// AIService performs Claude-backed deadline extraction on document uploads.
|
||
// A nil pointer is a valid "disabled" state — the handlers check for it.
|
||
type AIService struct {
|
||
client anthropic.Client
|
||
model anthropic.Model
|
||
}
|
||
|
||
// NewAIService constructs the service. Returns nil if apiKey is empty so the
|
||
// caller can store the nil and let handlers return 501.
|
||
func NewAIService(apiKey string) *AIService {
|
||
if apiKey == "" {
|
||
return nil
|
||
}
|
||
client := anthropic.NewClient(option.WithAPIKey(apiKey))
|
||
return &AIService{
|
||
client: client,
|
||
// Sonnet 4.6 — cheapest capable model for this task. Sonnet handles
|
||
// legal German + English well and is materially cheaper than Opus.
|
||
model: anthropic.ModelClaudeSonnet4_6,
|
||
}
|
||
}
|
||
|
||
// ExtractedDeadline is one item returned by Claude for the user to review
|
||
// before persisting. Dates are YYYY-MM-DD strings (may be empty when the
|
||
// document only gives a duration, not an absolute date).
|
||
type ExtractedDeadline struct {
|
||
Title string `json:"title"`
|
||
DueDate string `json:"due_date"`
|
||
RuleCode string `json:"rule_code"`
|
||
Confidence float64 `json:"confidence"`
|
||
SourceQuote string `json:"source_quote"`
|
||
}
|
||
|
||
type extractDeadlinesToolInput struct {
|
||
Deadlines []ExtractedDeadline `json:"deadlines"`
|
||
}
|
||
|
||
// extractDeadlinesTool is the tool schema Claude must call. Using forced
|
||
// tool use guarantees structured JSON output without regex parsing.
|
||
var extractDeadlinesTool = anthropic.ToolParam{
|
||
Name: "extract_deadlines",
|
||
Description: anthropic.String("Record every legal deadline, hearing date, or filing obligation identified in the document."),
|
||
InputSchema: anthropic.ToolInputSchemaParam{
|
||
Properties: map[string]any{
|
||
"deadlines": map[string]any{
|
||
"type": "array",
|
||
"description": "List of extracted deadlines, hearings, or filing obligations actionable by a patent lawyer.",
|
||
"items": map[string]any{
|
||
"type": "object",
|
||
"properties": map[string]any{
|
||
"title": map[string]any{
|
||
"type": "string",
|
||
"description": "Short actionable label (e.g. 'Statement of Defence', 'Oral hearing', 'Reply to Counterclaim').",
|
||
},
|
||
"due_date": map[string]any{
|
||
"type": "string",
|
||
"description": "Absolute due date in YYYY-MM-DD format if determinable from the document. Empty string if the document only gives a duration without a trigger date.",
|
||
},
|
||
"rule_code": map[string]any{
|
||
"type": "string",
|
||
"description": "Legal rule reference if identifiable (e.g. 'Rule 23 RoP', 'Rule 222 RoP', '§ 276 ZPO'). Empty string if none.",
|
||
},
|
||
"confidence": map[string]any{
|
||
"type": "number",
|
||
"minimum": 0,
|
||
"maximum": 1,
|
||
"description": "Confidence score 0.0 to 1.0. Only include items with confidence above 0.4.",
|
||
},
|
||
"source_quote": map[string]any{
|
||
"type": "string",
|
||
"description": "Exact quote from the document where the deadline was identified, up to ~280 characters.",
|
||
},
|
||
},
|
||
"required": []string{"title", "due_date", "rule_code", "confidence", "source_quote"},
|
||
},
|
||
},
|
||
},
|
||
Required: []string{"deadlines"},
|
||
},
|
||
}
|
||
|
||
// extractionSystemPrompt is cached on the server side (ephemeral, 5m). The
|
||
// content is intentionally stable so repeat extractions hit the cache.
|
||
const extractionSystemPrompt = `You are a patent-law deadline extractor for German and UPC (Unified Patent Court) litigation.
|
||
|
||
Your task: identify every deadline, hearing date, or filing obligation in the provided document that is actionable by a patent lawyer.
|
||
|
||
For each item, return:
|
||
- A short actionable title (e.g. "Statement of Defence", "Oral hearing", "Reply to Counterclaim").
|
||
- The absolute due date in YYYY-MM-DD format when determinable. If the document only gives a duration ("within 2 months of service") without a concrete trigger date, return an empty string.
|
||
- The legal rule reference if identifiable (e.g. "Rule 23 RoP", "§ 276 ZPO"). Empty if none.
|
||
- A confidence score 0.0–1.0. Only include items with confidence above 0.4.
|
||
- The exact source quote from the document (up to ~280 characters) where the deadline appears.
|
||
|
||
Rules:
|
||
1. Only report items a patent lawyer would actually put in the Fristenkalender. Skip generic references like "the court may schedule a hearing in due course" unless a specific date or window is given.
|
||
2. Do not invent dates. If the document says "within 3 months" but no trigger date is visible, leave due_date empty.
|
||
3. Cite the original passage verbatim. Do not paraphrase the source_quote.
|
||
4. Return bilingual documents correctly (UPC documents often mix DE/EN).
|
||
5. If the document contains no deadlines, return an empty array.
|
||
|
||
Always call the extract_deadlines tool.`
|
||
|
||
// ExtractDeadlines sends the PDF bytes to Claude and returns the parsed
|
||
// deadline candidates. Caller handles user review + persistence.
|
||
//
|
||
// The pdf must be a valid PDF byte slice. A nil/empty slice is a programming
|
||
// error (handlers validate the magic number before calling this).
|
||
func (s *AIService) ExtractDeadlines(ctx context.Context, pdfData []byte) ([]ExtractedDeadline, error) {
|
||
if s == nil {
|
||
return nil, ErrAIDisabled
|
||
}
|
||
if len(pdfData) == 0 {
|
||
return nil, fmt.Errorf("empty pdf data")
|
||
}
|
||
|
||
encoded := base64.StdEncoding.EncodeToString(pdfData)
|
||
userContent := []anthropic.ContentBlockParamUnion{
|
||
{OfDocument: &anthropic.DocumentBlockParam{
|
||
Source: anthropic.DocumentBlockParamSourceUnion{
|
||
OfBase64: &anthropic.Base64PDFSourceParam{
|
||
Data: encoded,
|
||
},
|
||
},
|
||
}},
|
||
anthropic.NewTextBlock("Extract every patent-law-relevant deadline, hearing, or filing obligation from this document. Call the extract_deadlines tool."),
|
||
}
|
||
|
||
msg, err := s.client.Messages.New(ctx, anthropic.MessageNewParams{
|
||
Model: s.model,
|
||
MaxTokens: 4096,
|
||
System: []anthropic.TextBlockParam{
|
||
{
|
||
Text: extractionSystemPrompt,
|
||
CacheControl: anthropic.NewCacheControlEphemeralParam(),
|
||
},
|
||
},
|
||
Messages: []anthropic.MessageParam{
|
||
anthropic.NewUserMessage(userContent...),
|
||
},
|
||
Tools: []anthropic.ToolUnionParam{
|
||
{OfTool: &extractDeadlinesTool},
|
||
},
|
||
ToolChoice: anthropic.ToolChoiceParamOfTool("extract_deadlines"),
|
||
})
|
||
if err != nil {
|
||
return nil, fmt.Errorf("claude API call: %w", err)
|
||
}
|
||
|
||
for _, block := range msg.Content {
|
||
if block.Type == "tool_use" && block.Name == "extract_deadlines" {
|
||
var input extractDeadlinesToolInput
|
||
if err := json.Unmarshal(block.Input, &input); err != nil {
|
||
return nil, fmt.Errorf("parsing tool output: %w", err)
|
||
}
|
||
if input.Deadlines == nil {
|
||
return []ExtractedDeadline{}, nil
|
||
}
|
||
return input.Deadlines, nil
|
||
}
|
||
}
|
||
|
||
return nil, fmt.Errorf("claude response had no tool_use block")
|
||
}
|