From d86cac0b53f638afe1b3fb2629912be96be6105d Mon Sep 17 00:00:00 2001 From: mAi Date: Thu, 21 May 2026 15:23:24 +0200 Subject: [PATCH] =?UTF-8?q?feat(submissions):=20t-paliad-230=20format-only?= =?UTF-8?q?=20.dotm=E2=86=92.docx=20convert?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit m's 2026-05-21 scope reduction of the t-paliad-215 submission generator: ship a demo that hands the lawyer the firm style template as a clean .docx. No variable-merge engine, no per-submission template registry, no fallback chain — the merge slice is deferred to a future task. Replaces the previous engine (template registry + variable bag + {{placeholder}} renderer + dual project_events/documents writes) with: * services.ConvertDotmToDocx — single-function .dotm/.docm/.dotx → .docx format converter that strips word/vbaProject.bin, word/vbaData.xml, word/customizations.xml, and word/_rels/vbaProject.bin.rels, rewrites [Content_Types].xml (demotes the macro/template main type to plain docx, drops the .bin Default Extension and the macro Overrides), and rewrites word/_rels/document.xml.rels to drop the vbaProject + keyMapCustomizations relationships. Idempotent on a plain .docx. archive/zip + regex stdlib only — no new third-party dependencies. * handlers/submissions.go — POST /api/projects/{id}/submissions/{code} /generate fetches the cached HL Patents Style .dotm (via a new fetchHLPatentsStyleBytes accessor on files.go that shares the same cache as /files/{slug}), converts, writes one paliad.system_audit_log row (event_type='submission.generated', metadata={submission_code, rule_name, filename}), and streams the .docx as an attachment. GET /api/projects/{id}/submissions still lists filing rules but has_template is unconditionally true (one universal template). * Filename per design §7: {rule.name}-{project.case_number}-{YYYY-MM-DD} .docx, with Umlauts ASCII-folded and slashes → underscores. Drops services/submission_templates.go, services/submission_vars.go, and the wiring in cmd/server/main.go + handlers/handlers.go that bound them together. Frontend client switched to POST. Verified the converter against the real HL Patents Style.dotm (361 KB input → 243 KB output, 46 parts in output zip): unzip -tq /tmp/hl-patents-style.converted.docx → No errors python3 -c "import zipfile, xml.etree.ElementTree as ET; \ z=zipfile.ZipFile('/tmp/hl-patents-style.converted.docx'); \ [ET.fromstring(z.read(p)) for p in z.namelist() if p.endswith('.xml')]" uv run --with python-docx python3 -c "import docx; \ d=docx.Document('/tmp/hl-patents-style.converted.docx'); \ print(len(d.paragraphs), 'paragraphs', len(d.styles), 'styles')" → 236 paragraphs, 168 styles, 1 section All assertions passed: every Override in [Content_Types].xml resolves to a real part, every internal Target in document.xml.rels resolves, zero macro-related residue, and the document body + styles + theme survive untouched. go test -run TestBootSmoke ./cmd/server/... clean (route additions register without conflict on the Go ServeMux). --- cmd/server/main.go | 17 +- frontend/src/client/submissions.ts | 2 +- internal/handlers/files.go | 40 ++ internal/handlers/handlers.go | 27 +- internal/handlers/submissions.go | 331 +++++------- internal/services/submission_render.go | 399 +++++--------- internal/services/submission_render_test.go | 543 +++++++------------ internal/services/submission_templates.go | 442 ---------------- internal/services/submission_vars.go | 559 -------------------- 9 files changed, 537 insertions(+), 1823 deletions(-) delete mode 100644 internal/services/submission_templates.go delete mode 100644 internal/services/submission_vars.go diff --git a/cmd/server/main.go b/cmd/server/main.go index e445ed3..fc9bf50 100644 --- a/cmd/server/main.go +++ b/cmd/server/main.go @@ -229,19 +229,10 @@ func main() { // Nil-safe: empty firm row falls back to the factory layout. svcBundle.DashboardLayout.SetFirmDefaultService(svcBundle.FirmDashboardDefault) - // t-paliad-215 Slice 1 — submission generator. Three services - // stitched together by handlers/submissions.go: registry pulls - // templates from Gitea (reuses GITEA_TOKEN env), vars builds - // the placeholder map from project + parties + rule, renderer - // merges {{placeholder}} tokens into the .docx. - svcBundle.SubmissionRegistry = services.NewTemplateRegistry(giteaToken, branding.Name) - svcBundle.SubmissionVars = services.NewSubmissionVarsService( - pool, - svcBundle.Project, - svcBundle.Party, - svcBundle.Users, - ) - svcBundle.SubmissionRenderer = services.NewSubmissionRenderer() + // t-paliad-230 — submission generator (format-only). No + // service wiring needed: handlers/submissions.go reuses the + // existing files.go HL Patents Style cache and calls + // services.ConvertDotmToDocx (stateless function). // Paliadin backend selection. // diff --git a/frontend/src/client/submissions.ts b/frontend/src/client/submissions.ts index c3b98a3..9aa561f 100644 --- a/frontend/src/client/submissions.ts +++ b/frontend/src/client/submissions.ts @@ -159,7 +159,7 @@ async function onGenerateClick(btn: HTMLButtonElement): Promise { try { const url = `/api/projects/${projectID}/submissions/${encodeURIComponent(code)}/generate`; - const resp = await fetch(url, { method: "GET" }); + const resp = await fetch(url, { method: "POST" }); if (!resp.ok) { let detail = ""; try { diff --git a/internal/handlers/files.go b/internal/handlers/files.go index fd9f8b1..62e03cb 100644 --- a/internal/handlers/files.go +++ b/internal/handlers/files.go @@ -1,6 +1,7 @@ package handlers import ( + "context" "encoding/json" "fmt" "io" @@ -117,6 +118,45 @@ func handleFileRefresh(w http.ResponseWriter, r *http.Request) { writeJSON(w, http.StatusOK, map[string]string{"ok": "true", "message": "Cache cleared"}) } +// fetchHLPatentsStyleBytes returns the cached HL Patents Style .dotm +// bytes. Shared accessor used by both the /files/{slug} download path +// (Word auto-update channel) and the submission generator +// (handlers/submissions.go) so a refresh through one path is visible to +// the other. First call warms the cache from Gitea synchronously; +// subsequent calls are sub-millisecond. A stale-but-present cache is +// returned immediately while a background refresh runs. +func fetchHLPatentsStyleBytes(ctx context.Context) ([]byte, error) { + entry, ok := fileRegistry[hlPatentsStyleSlug] + if !ok { + return nil, fmt.Errorf("file proxy: %s not registered", hlPatentsStyleSlug) + } + ce := getCacheEntry(hlPatentsStyleSlug) + + ce.mu.RLock() + hasData := len(ce.data) > 0 + needsCheck := time.Since(ce.lastChecked) >= checkInterval + ce.mu.RUnlock() + + if !hasData { + if err := fileFetch(ce, entry); err != nil { + return nil, err + } + } else if needsCheck { + go fileCheckAndRefresh(ce, entry) + } + + ce.mu.RLock() + defer ce.mu.RUnlock() + if len(ce.data) == 0 { + return nil, fmt.Errorf("file proxy: %s cache empty after fetch", hlPatentsStyleSlug) + } + out := make([]byte, len(ce.data)) + copy(out, ce.data) + _ = ctx // ctx reserved for future timeout pass-through; fileFetch + // uses the package httpClient timeout today. + return out, nil +} + // fileFetch downloads the file synchronously (first request). func fileFetch(ce *cacheEntry, entry fileEntry) error { sha, _ := giteaLatestSHA(entry) diff --git a/internal/handlers/handlers.go b/internal/handlers/handlers.go index 5fe9fb6..47934c3 100644 --- a/internal/handlers/handlers.go +++ b/internal/handlers/handlers.go @@ -98,15 +98,6 @@ type Services struct { Projection *services.ProjectionService Export *services.ExportService - // Submission generator (t-paliad-215) — Klageerwiderung & - // friends. Three coordinated services: registry fetches templates - // from Gitea; vars builds the placeholder map from project + - // parties + rule; renderer merges the .docx. Wired together in - // cmd/server/main.go; nil here when DATABASE_URL is unset. - SubmissionRegistry *services.TemplateRegistry - SubmissionVars *services.SubmissionVarsService - SubmissionRenderer *services.SubmissionRenderer - // Paliadin is wired when DATABASE_URL is set. The concrete backend // is picked in cmd/server/main.go based on PALIADIN_REMOTE_HOST // (remote → mRiver via SSH) or local tmux availability. Stays nil @@ -123,14 +114,6 @@ func Register(mux *http.ServeMux, client *auth.Client, giteaAPIToken string, svc paliadinSvc = svc.Paliadin } - // Submission generator singletons (t-paliad-215). All three or - // none — the handler short-circuits with 503 when any is nil. - if svc != nil { - submissionRegistry = svc.SubmissionRegistry - submissionVars = svc.SubmissionVars - submissionRenderer = svc.SubmissionRenderer - } - if svc != nil { dbSvc = &dbServices{ projects: svc.Project, @@ -323,11 +306,13 @@ func Register(mux *http.ServeMux, client *auth.Client, giteaAPIToken string, svc protected.HandleFunc("POST /api/projects/{id}/timeline/milestone", handleCreateProjectTimelineMilestone) protected.HandleFunc("POST /api/projects/{id}/timeline/anchor", handleProjectTimelineAnchor) protected.HandleFunc("POST /api/projects/{id}/timeline/skip", handleProjectTimelineSkip) - // t-paliad-215 Slice 1 — submission generator. /submissions lists - // the project's filing-type rules with template-availability flags; - // /submissions/{code}/generate streams the rendered .docx. + // t-paliad-230 — submission generator (format-only). /submissions + // lists the project's published filing rules; /generate fetches the + // universal HL Patents Style .dotm, strips the macro project, and + // streams a clean .docx attachment. POST because each generation + // writes an audit row. protected.HandleFunc("GET /api/projects/{id}/submissions", handleListProjectSubmissions) - protected.HandleFunc("GET /api/projects/{id}/submissions/{code}/generate", handleGenerateProjectSubmission) + protected.HandleFunc("POST /api/projects/{id}/submissions/{code}/generate", handleGenerateProjectSubmission) // /counterclaim creates a CCR sub-project linked via the new // paliad.projects.counterclaim_of FK (t-paliad-174 Slice 3). protected.HandleFunc("POST /api/projects/{id}/counterclaim", handleCreateProjectCounterclaim) diff --git a/internal/handlers/submissions.go b/internal/handlers/submissions.go index b890ebb..ea3e1aa 100644 --- a/internal/handlers/submissions.go +++ b/internal/handlers/submissions.go @@ -1,24 +1,32 @@ package handlers -// Submission generator HTTP layer (t-paliad-215 Slice 1). +// Submission generator HTTP layer (t-paliad-230 — format-only scope +// reduction of t-paliad-215). // // Endpoints: // // GET /api/projects/{id}/submissions -// Lists the project's proceeding-relevant submission codes -// and reports template availability for each. Powers the -// SubmissionsPanel on the project detail page. +// Lists the project's proceeding-relevant filing rules. +// has_template is unconditionally true: every project gets +// offered the universal HL Patents Style template. // -// GET /api/projects/{id}/submissions/{code}/generate -// Renders the .docx and streams it as an attachment download. -// Writes one paliad.system_audit_log row and one -// paliad.project_events row per generation. No server-side -// binary persistence (design §3, m's Q3 pick). +// POST /api/projects/{id}/submissions/{code}/generate +// Fetches the cached HL Patents Style .dotm (same proxy used +// by /files/hl-patents-style.dotm), converts it to a clean +// .docx via services.ConvertDotmToDocx, writes one +// paliad.system_audit_log row, and streams the result as an +// attachment download. +// +// No variable substitution, no per-submission templates, no +// project_events/documents writes. Those layers are deferred to a +// future "merge engine" slice; today's generator hands the lawyer a +// clean .docx of the firm style and lets them edit and save under +// their own filename. // // Visibility: every endpoint runs through ProjectService.GetByID -// (paliad.can_see_project gate). Unauthorised callers get 404, never -// 403 — same convention as the rest of the project surfaces (avoids -// project-existence enumeration). +// (paliad.can_see_project gate). Unauthorised callers get 404 — same +// convention as the rest of the project surfaces (no project-existence +// enumeration). import ( "context" @@ -33,29 +41,26 @@ import ( "github.com/google/uuid" - "mgit.msbls.de/m/paliad/internal/branding" + "mgit.msbls.de/m/paliad/internal/models" "mgit.msbls.de/m/paliad/internal/services" ) -// submissionRenderer + registry + vars are package-level singletons -// wired by Register() once at boot. Stateless rendering + thread-safe -// caches inside the registry mean no per-request construction. -var ( - submissionRenderer *services.SubmissionRenderer - submissionRegistry *services.TemplateRegistry - submissionVars *services.SubmissionVarsService -) - -// submissionRenderTimeout caps a single generate request. Template -// fetch (cache-miss) + rendering of a typical pleading takes well -// under a second; the timeout exists to surface "Gitea is unreachable" -// quickly rather than letting the browser spin. +// submissionRenderTimeout caps a single generate request. .dotm fetch +// is from the in-process cache (sub-millisecond) and the convert step +// is a single zip round-trip; the timeout exists so a cold cache miss +// against Gitea surfaces quickly rather than letting the browser spin. const submissionRenderTimeout = 30 * time.Second // docxMime is the .docx Content-Type per the OOXML spec. const docxMime = "application/vnd.openxmlformats-officedocument.wordprocessingml.document" -// submissionListEntry is one row in the SubmissionsPanel. +// hlPatentsStyleSlug names the universal style template inside the +// fileRegistry in files.go. Both surfaces (the /files download for +// Word's auto-update channel and this generator) share the same +// cache entry so a refresh through one path is visible to the other. +const hlPatentsStyleSlug = "hl-patents-style.dotm" + +// submissionListEntry is one row in the Schriftsätze panel. type submissionListEntry struct { SubmissionCode string `json:"submission_code"` Name string `json:"name"` @@ -73,8 +78,10 @@ type submissionListResponse struct { Entries []submissionListEntry `json:"entries"` } -// handleListProjectSubmissions returns the filing-type rules for the -// project's proceeding, annotated with template availability. +// handleListProjectSubmissions returns the published filing rules for +// the project's proceeding_type. has_template is true for every row — +// Slice 1 (t-paliad-230) ships one universal template, so the only +// "no template" case is a project that has no proceeding_type bound. func handleListProjectSubmissions(w http.ResponseWriter, r *http.Request) { if !requireDB(w) { return @@ -83,9 +90,6 @@ func handleListProjectSubmissions(w http.ResponseWriter, r *http.Request) { if !ok { return } - if !requireSubmissionsWired(w) { - return - } projectID, err := uuid.Parse(r.PathValue("id")) if err != nil { writeJSON(w, http.StatusBadRequest, map[string]string{"error": "invalid project id"}) @@ -123,8 +127,6 @@ func handleListProjectSubmissions(w http.ResponseWriter, r *http.Request) { continue } if rule.EventType == nil || *rule.EventType != "filing" { - // Hearings + decisions don't generate submissions. The - // "Schriftsätze" panel only lists filings. continue } if rule.LifecycleState != "published" { @@ -134,7 +136,7 @@ func handleListProjectSubmissions(w http.ResponseWriter, r *http.Request) { SubmissionCode: *rule.SubmissionCode, Name: rule.Name, NameEN: rule.NameEN, - HasTemplate: submissionRegistry.HasTemplate(ctx, *rule.SubmissionCode), + HasTemplate: true, } if rule.EventType != nil { entry.EventType = *rule.EventType @@ -151,9 +153,10 @@ func handleListProjectSubmissions(w http.ResponseWriter, r *http.Request) { writeJSON(w, http.StatusOK, resp) } -// handleGenerateProjectSubmission renders the .docx and streams it -// back to the browser. Audits the generation; never persists the -// rendered bytes server-side. +// handleGenerateProjectSubmission fetches the universal HL Patents +// Style .dotm, converts it to a clean .docx, writes one audit row, and +// streams the result. No variable substitution; the bytes that go down +// the wire are the firm style template with macros stripped. func handleGenerateProjectSubmission(w http.ResponseWriter, r *http.Request) { if !requireDB(w) { return @@ -162,9 +165,6 @@ func handleGenerateProjectSubmission(w http.ResponseWriter, r *http.Request) { if !ok { return } - if !requireSubmissionsWired(w) { - return - } projectID, err := uuid.Parse(r.PathValue("id")) if err != nil { writeJSON(w, http.StatusBadRequest, map[string]string{"error": "invalid project id"}) @@ -179,209 +179,162 @@ func handleGenerateProjectSubmission(w http.ResponseWriter, r *http.Request) { ctx, cancel := context.WithTimeout(r.Context(), submissionRenderTimeout) defer cancel() - varsResult, err := submissionVars.Build(ctx, services.SubmissionVarsContext{ - UserID: uid, - ProjectID: projectID, - SubmissionCode: submissionCode, - }) + project, err := dbSvc.projects.GetByID(ctx, uid, projectID) if err != nil { - if errors.Is(err, services.ErrSubmissionRuleNotFound) { + writeServiceError(w, err) + return + } + + rule, err := loadPublishedRuleByCode(ctx, submissionCode) + if err != nil { + if errors.Is(err, errRuleNotFound) { writeJSON(w, http.StatusNotFound, map[string]string{ "error": fmt.Sprintf("no published rule for submission_code %q", submissionCode), }) return } - writeServiceError(w, err) + log.Printf("submissions: load rule %q: %v", submissionCode, err) + writeJSON(w, http.StatusInternalServerError, map[string]string{"error": "rule lookup failed"}) return } - tmpl, err := submissionRegistry.Resolve(ctx, submissionCode) + dotm, err := fetchHLPatentsStyleBytes(ctx) if err != nil { - if errors.Is(err, services.ErrNoTemplate) { - writeJSON(w, http.StatusServiceUnavailable, map[string]string{ - "error": "no template available for this submission", - "hint": "ask an admin to upload a .docx template under templates/_base/ in mWorkRepo", - }) - return - } - log.Printf("submissions: template resolve for %s: %v", submissionCode, err) - writeJSON(w, http.StatusServiceUnavailable, map[string]string{ - "error": "template repository unreachable", + log.Printf("submissions: fetch HL Patents Style .dotm: %v", err) + writeJSON(w, http.StatusBadGateway, map[string]string{ + "error": "template upstream unreachable", }) return } - missing := services.DefaultMissingMarker(varsResult.Lang) - rendered, err := submissionRenderer.Render(tmpl.Bytes, varsResult.Placeholders, missing) + docx, err := services.ConvertDotmToDocx(dotm) if err != nil { - log.Printf("submissions: render %s for project %s: %v", submissionCode, projectID, err) + log.Printf("submissions: convert dotm for project %s code %s: %v", projectID, submissionCode, err) writeJSON(w, http.StatusInternalServerError, map[string]string{ - "error": "render failed", + "error": "convert failed", }) return } - filename := submissionFileName(varsResult, projectID) + user, err := dbSvc.users.GetByID(ctx, uid) + if err != nil { + log.Printf("submissions: load user %s: %v", uid, err) + } + lang := "de" + if user != nil && user.Lang != "" { + lang = user.Lang + } - // Audit + Verlauf writes. Best-effort with a background context so - // the user still receives the download even if the audit insert - // races a slow DB. + filename := submissionFileName(rule, project, lang) + + // Audit write is best-effort with a background context so the + // download still succeeds if the DB races. Audit failure here only + // affects the system_audit_log feed — never the user's response. bgCtx, cancelBG := context.WithTimeout(context.Background(), 10*time.Second) defer cancelBG() - if err := writeSubmissionAuditRow(bgCtx, varsResult, tmpl, submissionCode); err != nil { + if err := writeSubmissionAuditRow(bgCtx, user, project.ID, submissionCode, rule.Name, filename); err != nil { log.Printf("submissions: audit insert failed (project=%s code=%s): %v", projectID, submissionCode, err) } - if err := writeSubmissionProjectEvent(bgCtx, varsResult, tmpl, submissionCode); err != nil { - log.Printf("submissions: project_events insert failed (project=%s code=%s): %v", projectID, submissionCode, err) - } - if err := writeSubmissionDocumentRow(bgCtx, varsResult, tmpl, submissionCode); err != nil { - log.Printf("submissions: documents insert failed (project=%s code=%s): %v", projectID, submissionCode, err) - } w.Header().Set("Content-Type", docxMime) w.Header().Set("Content-Disposition", fmt.Sprintf(`attachment; filename=%q`, filename)) - w.Header().Set("Content-Length", strconv.Itoa(len(rendered))) - w.Header().Set("X-Paliad-Template-Sha", tmpl.SHA) - w.Header().Set("X-Paliad-Template-Tier", tmpl.FirmTier) - if _, err := w.Write(rendered); err != nil { + w.Header().Set("Content-Length", strconv.Itoa(len(docx))) + if _, err := w.Write(docx); err != nil { log.Printf("submissions: response write failed (project=%s code=%s): %v", projectID, submissionCode, err) } } -// requireSubmissionsWired returns false (and writes 503) when the -// generator wasn't constructed at boot. Happens in DATABASE_URL-less -// deployments — knowledge-platform-only stacks don't ship the -// submission engine. -func requireSubmissionsWired(w http.ResponseWriter) bool { - if submissionRenderer == nil || submissionRegistry == nil || submissionVars == nil { - writeJSON(w, http.StatusServiceUnavailable, map[string]string{ - "error": "submission generator not configured", - }) - return false +// errRuleNotFound is the sentinel for "no published rule with that +// submission_code" — distinguished from a generic DB error so the +// handler returns 404 instead of 500. +var errRuleNotFound = errors.New("submission rule not found") + +// loadPublishedRuleByCode fetches the rule the user requested. Only +// published+active rows resolve; drafts and archived rules never feed +// a real submission. +func loadPublishedRuleByCode(ctx context.Context, submissionCode string) (*models.DeadlineRule, error) { + if submissionCode == "" { + return nil, errRuleNotFound } - return true + var rule models.DeadlineRule + err := dbSvc.projects.DB().GetContext(ctx, &rule, + `SELECT id, proceeding_type_id, parent_id, submission_code, name, name_en, + description, primary_party, event_type, duration_value, duration_unit, + timing, rule_code, deadline_notes, deadline_notes_en, sequence_order, + alt_duration_value, alt_duration_unit, alt_rule_code, anchor_alt, + concept_id, legal_source, is_spawn, spawn_label, is_active, + created_at, updated_at, lifecycle_state + FROM paliad.deadline_rules + WHERE submission_code = $1 + AND lifecycle_state = 'published' + AND is_active = true + ORDER BY sequence_order + LIMIT 1`, submissionCode) + if err != nil { + if strings.Contains(err.Error(), "no rows") { + return nil, errRuleNotFound + } + return nil, err + } + return &rule, nil } -// submissionFileName builds the user-facing filename per design §7: -// -// {rule.name}-{project.case_number}-{YYYY-MM-DD}.docx -// -// Slashes and backslashes in case_number sanitise to underscores so -// the file saves cleanly across Windows + macOS + Linux. Missing -// case_number falls back to an 8-hex-char stable id from the project -// UUID so the file still has a deterministic handle. -func submissionFileName(vars *services.SubmissionVarsResult, projectID uuid.UUID) string { +// submissionFileName produces the user-facing download name per +// design §7: {rule.name}-{project.case_number}-{YYYY-MM-DD}.docx. +// Empty case_number drops the segment entirely (no fallback hash — +// the lawyer can rename if the project lacks an Aktenzeichen). +// Umlauts in the rule name are ASCII-folded by SanitiseSubmissionFileName +// so the file lands cleanly on legacy SMB shares. +func submissionFileName(rule *models.DeadlineRule, project *models.Project, lang string) string { day := time.Now() if loc, err := time.LoadLocation("Europe/Berlin"); err == nil { day = day.In(loc) } - ruleName := strings.TrimSpace(vars.Rule.Name) - if strings.EqualFold(vars.Lang, "en") { - ruleName = strings.TrimSpace(vars.Rule.NameEN) + ruleName := strings.TrimSpace(rule.Name) + if strings.EqualFold(lang, "en") && strings.TrimSpace(rule.NameEN) != "" { + ruleName = strings.TrimSpace(rule.NameEN) } if ruleName == "" { ruleName = "submission" } + parts := []string{services.SanitiseSubmissionFileName(ruleName)} caseNo := "" - if vars.Project != nil && vars.Project.CaseNumber != nil { - caseNo = strings.TrimSpace(*vars.Project.CaseNumber) + if project != nil && project.CaseNumber != nil { + caseNo = strings.TrimSpace(*project.CaseNumber) } - if caseNo == "" { - caseNo = projectID.String()[:8] + if caseNo != "" { + parts = append(parts, services.SanitiseSubmissionFileName(caseNo)) } - caseNo = strings.ReplaceAll(caseNo, "/", "_") - caseNo = strings.ReplaceAll(caseNo, `\`, "_") - return fmt.Sprintf("%s-%s-%s.docx", ruleName, caseNo, day.Format("2006-01-02")) + parts = append(parts, day.Format("2006-01-02")) + return strings.Join(parts, "-") + ".docx" } -// writeSubmissionAuditRow files the org-wide audit entry. Reuses the -// system_audit_log convention (event_type='submission.generated') -// established in t-paliad-214's mig 102. -func writeSubmissionAuditRow(ctx context.Context, vars *services.SubmissionVarsResult, tmpl *services.ResolvedTemplate, code string) error { +// writeSubmissionAuditRow files one row in paliad.system_audit_log per +// generation. event_type='submission.generated', scope='project', +// scope_root=project_id. Metadata is intentionally small per Slice 1: +// {submission_code, rule_name, filename} — enough for a reviewer to +// reconstruct which template was offered to which project without +// over-baking the audit shape. +func writeSubmissionAuditRow(ctx context.Context, user *models.User, projectID uuid.UUID, submissionCode, ruleName, filename string) error { meta := map[string]any{ - "submission_code": code, - "template_path": tmpl.Path, - "template_sha": tmpl.SHA, - "template_tier": tmpl.FirmTier, - "project_id": vars.Project.ID.String(), - "rule_id": vars.Rule.ID.String(), - "firm": branding.Name, + "submission_code": submissionCode, + "rule_name": ruleName, + "filename": filename, } body, _ := json.Marshal(meta) + var ( + actorID any + actorEmail string + ) + if user != nil { + actorID = user.ID + actorEmail = user.Email + } _, err := dbSvc.projects.DB().ExecContext(ctx, `INSERT INTO paliad.system_audit_log (event_type, actor_id, actor_email, scope, scope_root, metadata) VALUES ('submission.generated', $1, $2, 'project', $3, $4::jsonb)`, - vars.User.ID, vars.User.Email, vars.Project.ID.String(), string(body), - ) - return err -} - -// writeSubmissionProjectEvent surfaces the generation in the project -// Verlauf / SmartTimeline. event_type stays free-text (no CHECK on -// paliad.project_events.event_type per Slice 2 of SmartTimeline) so we -// don't need a migration to introduce 'submission_generated'. -func writeSubmissionProjectEvent(ctx context.Context, vars *services.SubmissionVarsResult, tmpl *services.ResolvedTemplate, code string) error { - ruleName := strings.TrimSpace(vars.Rule.Name) - if strings.EqualFold(vars.Lang, "en") { - ruleName = strings.TrimSpace(vars.Rule.NameEN) - } - title := fmt.Sprintf("%s generiert", ruleName) - if strings.EqualFold(vars.Lang, "en") { - title = fmt.Sprintf("%s generated", ruleName) - } - meta := map[string]any{ - "submission_code": code, - "template_path": tmpl.Path, - "template_sha": tmpl.SHA, - "template_tier": tmpl.FirmTier, - "rule_id": vars.Rule.ID.String(), - } - body, _ := json.Marshal(meta) - now := time.Now().UTC() - _, err := dbSvc.projects.DB().ExecContext(ctx, - `INSERT INTO paliad.project_events - (id, project_id, event_type, title, description, event_date, - created_by, metadata, created_at, updated_at) - VALUES ($1, $2, 'submission_generated', $3, NULL, $4, $5, $6::jsonb, $4, $4)`, - uuid.New(), vars.Project.ID, title, now, vars.User.ID, string(body), - ) - return err -} - -// writeSubmissionDocumentRow files the audit-only paliad.documents -// row. file_path stays NULL — the bytes are regenerable from inputs -// (m's Q3 pick: no server-side binary). doc_type='generated_submission' -// is the additive marker; no CHECK constraint exists on doc_type, so -// this requires no migration. -func writeSubmissionDocumentRow(ctx context.Context, vars *services.SubmissionVarsResult, tmpl *services.ResolvedTemplate, code string) error { - ruleName := strings.TrimSpace(vars.Rule.Name) - if strings.EqualFold(vars.Lang, "en") { - ruleName = strings.TrimSpace(vars.Rule.NameEN) - } - day := time.Now() - if loc, err := time.LoadLocation("Europe/Berlin"); err == nil { - day = day.In(loc) - } - title := fmt.Sprintf("%s (generiert %s)", ruleName, day.Format("2006-01-02")) - if strings.EqualFold(vars.Lang, "en") { - title = fmt.Sprintf("%s (generated %s)", ruleName, day.Format("2006-01-02")) - } - provenance := map[string]any{ - "submission_code": code, - "template_path": tmpl.Path, - "template_sha": tmpl.SHA, - "template_tier": tmpl.FirmTier, - "firm": branding.Name, - "rule_id": vars.Rule.ID.String(), - } - body, _ := json.Marshal(provenance) - _, err := dbSvc.projects.DB().ExecContext(ctx, - `INSERT INTO paliad.documents - (id, project_id, title, doc_type, file_path, file_size, mime_type, - ai_extracted, uploaded_by, created_at, updated_at) - VALUES ($1, $2, $3, 'generated_submission', NULL, NULL, $4, $5::jsonb, $6, now(), now())`, - uuid.New(), vars.Project.ID, title, docxMime, string(body), vars.User.ID, + actorID, actorEmail, projectID.String(), string(body), ) return err } diff --git a/internal/services/submission_render.go b/internal/services/submission_render.go index 317df0d..552db6d 100644 --- a/internal/services/submission_render.go +++ b/internal/services/submission_render.go @@ -1,27 +1,33 @@ package services -// Submission template renderer — in-house engine for the submission -// generator (t-paliad-215, design doc -// docs/design-submission-generator-2026-05-19.md §6). +// Submission .dotm → .docx converter (t-paliad-230, "format-only" scope +// reduction of the original t-paliad-215 submission generator). // -// Design choice — why not lukasjarosch/go-docx: -// The library's "nested placeholder" guard treats sibling placeholders -// inside the same run (e.g. "{{a}} ./. {{b}}") as nested and -// refuses to replace either. Patent submissions routinely have multiple -// placeholders per paragraph (party blocks especially), so the library -// is a non-starter without a custom fork. The in-house renderer below -// is ~150 LoC and handles both the single-run common case and the -// cross-run case (where Word may split a placeholder across runs after -// editing). +// Word .dotm (macro-enabled template), .docm (macro-enabled document), +// .dotx (template, no macros), and .docx (document, no macros) are all +// OOXML zip containers. The macro-bearing variants carry an extra set +// of parts: // -// Placeholder grammar: {{[A-Za-z][A-Za-z0-9_.]*}} with optional -// whitespace inside braces ({{ project.case_number }} ≡ -// {{project.case_number}}). +// word/vbaProject.bin — the VBA project binary +// word/_rels/vbaProject.bin.rels — auxiliary relationships +// word/vbaData.xml — VBA support data +// word/customizations.xml — keyMapCustomizations // -// Missing-value behaviour: when a placeholder has no binding in the -// PlaceholderMap, the renderer emits a marker token so the lawyer sees -// the gap in Word rather than failing the request. See §6.3 of the -// design doc. +// plus a Content-Types override for each of those, a Default extension +// declaring all .bin files as vbaProject, and a different "main" content +// type for word/document.xml itself. +// +// ConvertDotmToDocx walks the zip, drops the macro parts, rewrites +// [Content_Types].xml and word/_rels/document.xml.rels to remove every +// reference to them, and switches the main document content type to +// the plain .docx form. Every other part — styles, fonts, theme, +// settings, document body, header/footer/numbering, glossary, custom +// XML — passes through bit-for-bit at the original compression method +// and modification time. +// +// No variable substitution. Today's slice hands the lawyer the firm +// style template as a clean .docx so they can edit and save under +// their own filename. The merge-engine slice is deferred. import ( "archive/zip" @@ -32,110 +38,132 @@ import ( "strings" ) -// PlaceholderMap is the variable bag built by SubmissionVarsService. -// Keys are dotted paths without braces (e.g. "project.case_number"). -// Values are the substituted text — already locale-aware, pretty- -// printed, and sanitised by the caller. -type PlaceholderMap map[string]string +// The four OOXML "main" content types we may see on word/document.xml. +// Anything other than docxMainContentType gets rewritten so the output +// reads as a plain document. +const ( + dotmMainContentType = "application/vnd.ms-word.template.macroEnabledTemplate.main+xml" + docmMainContentType = "application/vnd.ms-word.document.macroEnabled.main+xml" + dotxMainContentType = "application/vnd.openxmlformats-officedocument.wordprocessingml.template.main+xml" + docxMainContentType = "application/vnd.openxmlformats-officedocument.wordprocessingml.document.main+xml" +) -// MissingPlaceholderFn translates an unbound placeholder key into the -// in-document marker token. The default in DefaultMissingMarker is -// "[KEIN WERT: ]" / "[NO VALUE: ]" depending on lang. -type MissingPlaceholderFn func(key string) string - -// DefaultMissingMarker returns the standard missing-value marker for -// the given UI language. -func DefaultMissingMarker(lang string) MissingPlaceholderFn { - prefix := "KEIN WERT" - if strings.EqualFold(lang, "en") { - prefix = "NO VALUE" - } - return func(key string) string { - return "[" + prefix + ": " + key + "]" - } +// Macro-related parts dropped wholesale from the output zip. +var macroParts = map[string]bool{ + "word/vbaProject.bin": true, + "word/_rels/vbaProject.bin.rels": true, + "word/vbaData.xml": true, + "word/customizations.xml": true, } -// placeholderRegex matches a single placeholder. The capture group -// extracts the key name without braces or surrounding whitespace. -// -// Restricted to [A-Za-z][A-Za-z0-9_.]* so that stray "{{" sequences in -// legal prose (extremely rare in DE/EN court briefs but possible) -// don't get mistaken for placeholders. A genuine placeholder always -// starts with an ASCII letter. -var placeholderRegex = regexp.MustCompile(`\{\{\s*([A-Za-z][A-Za-z0-9_.]*)\s*\}\}`) +const ( + contentTypesPath = "[Content_Types].xml" + documentRelsPath = "word/_rels/document.xml.rels" +) -// SubmissionRenderer renders a .docx template into a .docx output by -// substituting {{placeholder}} tokens with values from a PlaceholderMap. -// Stateless; safe for concurrent use. -type SubmissionRenderer struct{} +// vbaDefaultExtensionRegex matches the `` row in [Content_Types].xml. After +// vbaProject.bin is dropped, the Default is dead weight (and Word will +// flag the file as macro-bearing if it survives). +var vbaDefaultExtensionRegex = regexp.MustCompile( + `\s*]*\bExtension\s*=\s*"bin"[^>]*\bContentType\s*=\s*"application/vnd\.ms-office\.vbaProject"[^>]*/>`, +) -// NewSubmissionRenderer constructs the renderer. -func NewSubmissionRenderer() *SubmissionRenderer { - return &SubmissionRenderer{} -} +// macroOverridePartRegex matches any element +// whose PartName is one of the dropped macro parts. The /word/ +// prefix is the OOXML convention for the absolute part path in +// [Content_Types].xml — file paths in the zip itself omit the leading +// slash. +var macroOverridePartRegex = regexp.MustCompile( + `\s*]*\bPartName\s*=\s*"/word/(?:vbaProject\.bin|vbaData\.xml|customizations\.xml)"[^>]*/>`, +) -// Render reads the .docx template at templateBytes, substitutes every -// placeholder from vars (or emits the missing-marker token), and writes -// the result to the returned byte slice. Unknown placeholders never -// fail the render — the lawyer sees the marker in Word and fixes it. -func (r *SubmissionRenderer) Render(templateBytes []byte, vars PlaceholderMap, missing MissingPlaceholderFn) ([]byte, error) { - if missing == nil { - missing = DefaultMissingMarker("de") - } - zr, err := zip.NewReader(bytes.NewReader(templateBytes), int64(len(templateBytes))) +// macroRelTypeRegex matches the two macro-related relationship Types +// in word/_rels/document.xml.rels: vbaProject (binds to vbaProject.bin) +// and keyMapCustomizations (binds to customizations.xml). After both +// targets are dropped, leaving the relationships in would make Word +// flag the file as corrupt. +var macroRelTypeRegex = regexp.MustCompile( + `\s*]*\bType\s*=\s*"http://schemas\.microsoft\.com/office/2006/relationships/(?:vbaProject|keyMapCustomizations)"[^>]*/>`, +) + +// ConvertDotmToDocx rewrites a .dotm (or .docm, or .dotx) zip into a +// clean .docx zip. Idempotent on a zip that is already a plain .docx. +// Returns an error if the input is not a valid zip. +func ConvertDotmToDocx(dotmBytes []byte) ([]byte, error) { + zr, err := zip.NewReader(bytes.NewReader(dotmBytes), int64(len(dotmBytes))) if err != nil { - return nil, fmt.Errorf("submission template: open zip: %w", err) + return nil, fmt.Errorf("dotm→docx: open zip: %w", err) } var out bytes.Buffer zw := zip.NewWriter(&out) - defer zw.Close() for _, entry := range zr.File { - body, err := readZipEntry(entry) + if macroParts[entry.Name] { + continue + } + + body, err := readZipFile(entry) if err != nil { - return nil, fmt.Errorf("submission template: read %s: %w", entry.Name, err) + return nil, fmt.Errorf("dotm→docx: read %s: %w", entry.Name, err) } - if isWordXMLEntry(entry.Name) { - body = substituteInDocumentXML(body, vars, missing) + + switch entry.Name { + case contentTypesPath: + body = rewriteContentTypes(body) + case documentRelsPath: + body = rewriteDocumentRels(body) } + w, err := zw.CreateHeader(&zip.FileHeader{ Name: entry.Name, Method: entry.Method, Modified: entry.Modified, }) if err != nil { - return nil, fmt.Errorf("submission template: write header %s: %w", entry.Name, err) + return nil, fmt.Errorf("dotm→docx: write header %s: %w", entry.Name, err) } if _, err := w.Write(body); err != nil { - return nil, fmt.Errorf("submission template: write %s: %w", entry.Name, err) + return nil, fmt.Errorf("dotm→docx: write body %s: %w", entry.Name, err) } } + if err := zw.Close(); err != nil { - return nil, fmt.Errorf("submission template: finalise zip: %w", err) + return nil, fmt.Errorf("dotm→docx: finalise zip: %w", err) } return out.Bytes(), nil } -// isWordXMLEntry returns true for the .docx parts that contain -// substitutable text. We touch document.xml plus header*.xml and -// footer*.xml (templates may put firm letterhead in a header) but -// skip styles, theme, settings, comments, footnotes — none of which -// should carry merge placeholders in a well-formed template. -func isWordXMLEntry(name string) bool { - switch { - case name == "word/document.xml": - return true - case strings.HasPrefix(name, "word/header") && strings.HasSuffix(name, ".xml"): - return true - case strings.HasPrefix(name, "word/footer") && strings.HasSuffix(name, ".xml"): - return true - } - return false +// rewriteContentTypes demotes any of the three non-docx "main" content +// types to plain docx, drops the bin Default-Extension entry, and +// drops every Override that targeted a dropped macro part. +// +// String-level substitution rather than encoding/xml: round-tripping +// through Go's XML marshaller would re-emit the document with +// canonical namespace declarations on every child, which Word reads +// but which makes the binary diff unnecessarily large. Direct +// substitution preserves the file's original shape. +func rewriteContentTypes(body []byte) []byte { + body = bytes.ReplaceAll(body, []byte(dotmMainContentType), []byte(docxMainContentType)) + body = bytes.ReplaceAll(body, []byte(docmMainContentType), []byte(docxMainContentType)) + body = bytes.ReplaceAll(body, []byte(dotxMainContentType), []byte(docxMainContentType)) + body = vbaDefaultExtensionRegex.ReplaceAll(body, nil) + body = macroOverridePartRegex.ReplaceAll(body, nil) + return body } -// readZipEntry slurps a zip entry's bytes. -func readZipEntry(f *zip.File) ([]byte, error) { +// rewriteDocumentRels drops the two macro-related relationships from +// word/_rels/document.xml.rels (vbaProject + keyMapCustomizations) so +// the manifest no longer points at parts the zip no longer carries. +// Every other relationship — styles, settings, numbering, theme, +// headers/footers, customXml — passes through untouched. +func rewriteDocumentRels(body []byte) []byte { + return macroRelTypeRegex.ReplaceAll(body, nil) +} + +// readZipFile slurps a zip entry's bytes. +func readZipFile(f *zip.File) ([]byte, error) { rc, err := f.Open() if err != nil { return nil, err @@ -144,172 +172,33 @@ func readZipEntry(f *zip.File) ([]byte, error) { return io.ReadAll(rc) } -// substituteInDocumentXML walks document XML and replaces every -// {{placeholder}} occurrence inside text nodes. Handles both -// single-run placeholders (the common case for freshly authored -// templates) and cross-run placeholders (where Word's autocorrect or -// manual editing has split a placeholder across runs). -// -// Two-pass strategy: -// -// 1. Pass 1: replace placeholders that fit entirely within one -// . This is the 99% case and preserves all run-level -// formatting (bold, italic, font runs). -// 2. Pass 2: for paragraphs that still contain orphan "{{" or "}}" -// markers after pass 1, merge the text of every inside the -// paragraph, run the replacement on the merged text, and rewrite -// the paragraph's runs as a single using -// the formatting properties of the first run. Loses intra-paragraph -// formatting on the affected paragraph — but only on paragraphs -// where Word genuinely fragmented a placeholder. -func substituteInDocumentXML(body []byte, vars PlaceholderMap, missing MissingPlaceholderFn) []byte { - replaced := substituteInTextNodes(body, vars, missing) - if !needsCrossRunMerge(replaced) { - return replaced - } - return substituteAcrossRuns(replaced, vars, missing) -} - -// wTextNodeRegex matches one contents element, capturing -// the contents. Attributes on (xml:space="preserve") are preserved -// because the entire match is rewritten. -var wTextNodeRegex = regexp.MustCompile(`]*)?>([^<]*)`) - -// substituteInTextNodes runs the placeholder replacement inside each -// text node independently. Format-preserving for single-run -// placeholders. -func substituteInTextNodes(body []byte, vars PlaceholderMap, missing MissingPlaceholderFn) []byte { - return wTextNodeRegex.ReplaceAllFunc(body, func(match []byte) []byte { - sub := wTextNodeRegex.FindSubmatch(match) - attrs := string(sub[1]) - contents := xmlDecode(string(sub[2])) - replaced := replacePlaceholders(contents, vars, missing) - if replaced == contents { - return match +// SanitiseSubmissionFileName cleans a string for use inside a download +// filename — strips path separators and quote characters that would +// break Content-Disposition or confuse browsers across OSes. ASCII-folds +// the small set of German umlaut letters that show up in submission +// names today (Klageerwiderung, Berufungsbegründung, …) so the file +// lands cleanly on legacy SMB shares whose layer is still cp1252. +// Other Unicode is preserved so non-DE/EN names still produce a +// recognisable file. +func SanitiseSubmissionFileName(s string) string { + s = strings.TrimSpace(s) + s = umlautFolder.Replace(s) + s = strings.Map(func(r rune) rune { + switch r { + case '/', '\\': + return '_' + case '"', '\'': + return -1 } - // xml:space="preserve" stays attached whenever the original - // content had leading/trailing whitespace; ensure it's still - // declared after replacement to avoid Word collapsing spaces. - if !strings.Contains(attrs, "xml:space") && - (strings.HasPrefix(replaced, " ") || strings.HasSuffix(replaced, " ")) { - attrs += ` xml:space="preserve"` - } - return []byte(`` + xmlEncode(replaced) + ``) - }) -} - -// needsCrossRunMerge returns true when the body still contains an -// unmatched "{{" or "}}" after pass 1 — a sign that Word fragmented -// the placeholder across runs and pass 1 couldn't touch it. -func needsCrossRunMerge(body []byte) bool { - // Cheap heuristic: count "{{" vs "}}" inside nodes. If we have - // either marker present in the text-node space, pass 2 will handle - // it. (Inside attributes or other XML, the markers don't matter.) - for _, m := range wTextNodeRegex.FindAllSubmatch(body, -1) { - t := string(m[2]) - if strings.Contains(t, "{{") || strings.Contains(t, "}}") { - return true - } - } - return false -} - -// wParagraphRegex matches one paragraph block. Greedy -// inner-content match is safe here because elements do not nest -// in WordprocessingML — a paragraph is the leaf container for text. -var wParagraphRegex = regexp.MustCompile(`(?s)]*>.*?`) - -// wRunPropsRegex pulls the first block from a -// paragraph so we can reuse it as the formatting of the merged run. -var wRunPropsRegex = regexp.MustCompile(`(?s).*?`) - -// wParagraphPropsRegex pulls the optional that sits -// at the top of a paragraph (alignment, spacing, etc.). Preserved. -var wParagraphPropsRegex = regexp.MustCompile(`(?s).*?`) - -// substituteAcrossRuns is pass 2: for any paragraph that still has a -// split placeholder, concatenate every text node, run replacement, and -// rewrite the paragraph as a single run using the first run's -// properties. Paragraphs without orphan markers are left untouched so -// run-level formatting survives wherever pass 1 already resolved the -// placeholders. -func substituteAcrossRuns(body []byte, vars PlaceholderMap, missing MissingPlaceholderFn) []byte { - return wParagraphRegex.ReplaceAllFunc(body, func(para []byte) []byte { - textNodes := wTextNodeRegex.FindAllSubmatch(para, -1) - if len(textNodes) == 0 { - return para - } - var merged strings.Builder - for _, m := range textNodes { - merged.WriteString(xmlDecode(string(m[2]))) - } - original := merged.String() - if !strings.Contains(original, "{{") { - // No fragmented placeholder in this paragraph; leave it - // alone so pass 1's run-level edits survive. - return para - } - replaced := replacePlaceholders(original, vars, missing) - if replaced == original { - return para - } - // Preserve paragraph properties (alignment, spacing) and the - // first run's properties (font, bold/italic). - pPr := wParagraphPropsRegex.Find(para) - rPr := wRunPropsRegex.Find(para) - var rebuilt bytes.Buffer - rebuilt.WriteString(``) - if pPr != nil { - rebuilt.Write(pPr) - } - rebuilt.WriteString(``) - if rPr != nil { - rebuilt.Write(rPr) - } - rebuilt.WriteString(``) - rebuilt.WriteString(xmlEncode(replaced)) - rebuilt.WriteString(``) - return rebuilt.Bytes() - }) -} - -// replacePlaceholders performs the actual substitution on a plain -// string. Unbound placeholders render the missing marker. -func replacePlaceholders(s string, vars PlaceholderMap, missing MissingPlaceholderFn) string { - return placeholderRegex.ReplaceAllStringFunc(s, func(match string) string { - sub := placeholderRegex.FindStringSubmatch(match) - if len(sub) < 2 { - return match - } - key := sub[1] - if value, ok := vars[key]; ok { - return value - } - return missing(key) - }) -} - -// xmlDecode reverses the small set of escapes used in WordprocessingML -// text content. We don't need a full XML parser — text nodes carry only -// the standard five entities, and Word never emits numeric-character -// references inside for printable content. -func xmlDecode(s string) string { - s = strings.ReplaceAll(s, "<", "<") - s = strings.ReplaceAll(s, ">", ">") - s = strings.ReplaceAll(s, """, `"`) - s = strings.ReplaceAll(s, "'", "'") - s = strings.ReplaceAll(s, "&", "&") + return r + }, s) return s } -// xmlEncode escapes a substituted value for safe insertion back into a -// WordprocessingML text node. & must be replaced first to avoid double -// encoding the entity prefixes we introduce on the other characters. -func xmlEncode(s string) string { - s = strings.ReplaceAll(s, "&", "&") - s = strings.ReplaceAll(s, "<", "<") - s = strings.ReplaceAll(s, ">", ">") - s = strings.ReplaceAll(s, `"`, """) - s = strings.ReplaceAll(s, "'", "'") - return s -} +// umlautFolder turns the four DE umlaut letters (both cases) into ASCII +// digraphs; ß → ss. +var umlautFolder = strings.NewReplacer( + "ä", "ae", "ö", "oe", "ü", "ue", + "Ä", "Ae", "Ö", "Oe", "Ü", "Ue", + "ß", "ss", +) diff --git a/internal/services/submission_render_test.go b/internal/services/submission_render_test.go index 284d12a..1b3cd0e 100644 --- a/internal/services/submission_render_test.go +++ b/internal/services/submission_render_test.go @@ -6,392 +6,249 @@ import ( "io" "strings" "testing" + "time" ) -// minimalDOCX builds a tiny .docx zip with one document.xml that -// contains the given body. Just enough to exercise the renderer -// without depending on Word's full OOXML scaffolding. -func minimalDOCX(t *testing.T, documentBody string) []byte { +// minimalDOTM builds a small .dotm zip whose shape mirrors the real +// HL Patents Style template: macro-enabled main content type, Default +// extension declaring .bin as vbaProject, Overrides for vbaData.xml + +// customizations.xml, document.xml.rels with vbaProject + +// keyMapCustomizations relationships, and the four macro parts on +// disk (vbaProject.bin + auxiliary rels + vbaData.xml + +// customizations.xml). +// +// In-memory so the test is self-contained (no checked-in binary). +// Word and LibreOffice would reject this minimal file as incomplete +// (no _rels/.rels root manifest); the tests work at the byte level +// and assert structural properties of the converted output. +func minimalDOTM(t *testing.T) []byte { t.Helper() var buf bytes.Buffer zw := zip.NewWriter(&buf) - w, err := zw.Create("word/document.xml") - if err != nil { - t.Fatalf("create document.xml: %v", err) - } - if _, err := io.WriteString(w, documentBody); err != nil { - t.Fatalf("write document.xml: %v", err) - } - // Drop in a stub Content-Types so the bytes look more like a real - // .docx for any downstream sanity checks; Word doesn't care about - // the content during our unit tests but the shape stays honest. - w2, err := zw.Create("[Content_Types].xml") - if err != nil { - t.Fatalf("create content types: %v", err) - } - if _, err := io.WriteString(w2, ``); err != nil { - t.Fatalf("write content types: %v", err) + add := func(name, body string) { + t.Helper() + w, err := zw.CreateHeader(&zip.FileHeader{ + Name: name, + Method: zip.Deflate, + Modified: time.Date(2026, 5, 21, 12, 0, 0, 0, time.UTC), + }) + if err != nil { + t.Fatalf("zip header %s: %v", name, err) + } + if _, err := io.WriteString(w, body); err != nil { + t.Fatalf("write %s: %v", name, err) + } } + + add(contentTypesPath, ``+ + ``+ + ``+ + ``+ + ``+ + ``+ + ``+ + ``+ + ``+ + ``) + + add("word/document.xml", + ``+ + ``+ + `Hello Paliad`) + + add(documentRelsPath, + ``+ + ``+ + ``+ + ``+ + ``+ + ``) + + add("word/styles.xml", ``) + add("word/vbaProject.bin", "PRETEND-VBA-BINARY-PAYLOAD") + add("word/_rels/vbaProject.bin.rels", ``) + add("word/vbaData.xml", ``) + add("word/customizations.xml", ``) + if err := zw.Close(); err != nil { t.Fatalf("close zip: %v", err) } return buf.Bytes() } -// readDocumentXML pulls word/document.xml out of a rendered .docx. -func readDocumentXML(t *testing.T, b []byte) string { +func unzipEntries(t *testing.T, data []byte) map[string]string { t.Helper() - zr, err := zip.NewReader(bytes.NewReader(b), int64(len(b))) + zr, err := zip.NewReader(bytes.NewReader(data), int64(len(data))) if err != nil { - t.Fatalf("open rendered zip: %v", err) + t.Fatalf("open output zip: %v", err) } + out := make(map[string]string, len(zr.File)) for _, f := range zr.File { - if f.Name != "word/document.xml" { - continue - } rc, err := f.Open() if err != nil { - t.Fatalf("open document.xml: %v", err) + t.Fatalf("open %s: %v", f.Name, err) } - defer rc.Close() body, err := io.ReadAll(rc) + rc.Close() if err != nil { - t.Fatalf("read document.xml: %v", err) + t.Fatalf("read %s: %v", f.Name, err) } - return string(body) + out[f.Name] = string(body) } - t.Fatal("rendered .docx had no word/document.xml") - return "" + return out } -// TestRender_SingleRunPlaceholder covers the 99% case: a placeholder -// that sits inside a single text node. -func TestRender_SingleRunPlaceholder(t *testing.T) { - doc := `{{firm.name}}` - tmpl := minimalDOCX(t, doc) - r := NewSubmissionRenderer() - out, err := r.Render(tmpl, PlaceholderMap{"firm.name": "HLC"}, nil) +func TestConvertDotmToDocx_StripsMacroParts(t *testing.T) { + dotm := minimalDOTM(t) + out, err := ConvertDotmToDocx(dotm) if err != nil { - t.Fatalf("render: %v", err) + t.Fatalf("ConvertDotmToDocx: %v", err) } - body := readDocumentXML(t, out) - if !strings.Contains(body, ">HLC<") { - t.Errorf("expected HLC in body, got %q", body) - } - if strings.Contains(body, "{{") { - t.Errorf("unreplaced placeholder marker in body: %q", body) - } -} -// TestRender_MultiplePlaceholdersPerRun is the case go-docx fails on -// — sibling placeholders inside the same run. The in-house -// renderer must handle them. -func TestRender_MultiplePlaceholdersPerRun(t *testing.T) { - doc := `{{parties.claimant.name}}, vertreten durch {{parties.claimant.representative}}` - tmpl := minimalDOCX(t, doc) - r := NewSubmissionRenderer() - out, err := r.Render(tmpl, PlaceholderMap{ - "parties.claimant.name": "Acme Inc.", - "parties.claimant.representative": "Kanzlei Müller", - }, nil) - if err != nil { - t.Fatalf("render: %v", err) - } - body := readDocumentXML(t, out) - if !strings.Contains(body, "Acme Inc.") || !strings.Contains(body, "Kanzlei Müller") { - t.Errorf("expected both party values, got %q", body) - } - if strings.Contains(body, "{{") { - t.Errorf("unreplaced placeholder marker in body: %q", body) - } -} + entries := unzipEntries(t, out) -// TestRender_MissingMarker confirms unbound placeholders render the -// missing-value marker instead of failing the request. -func TestRender_MissingMarker(t *testing.T) { - doc := `{{project.case_number}}` - tmpl := minimalDOCX(t, doc) - r := NewSubmissionRenderer() - out, err := r.Render(tmpl, PlaceholderMap{}, DefaultMissingMarker("de")) - if err != nil { - t.Fatalf("render: %v", err) - } - body := readDocumentXML(t, out) - if !strings.Contains(body, "[KEIN WERT: project.case_number]") { - t.Errorf("expected KEIN WERT marker, got %q", body) - } - outEN, err := r.Render(tmpl, PlaceholderMap{}, DefaultMissingMarker("en")) - if err != nil { - t.Fatalf("render en: %v", err) - } - bodyEN := readDocumentXML(t, outEN) - if !strings.Contains(bodyEN, "[NO VALUE: project.case_number]") { - t.Errorf("expected NO VALUE marker, got %q", bodyEN) - } -} - -// TestRender_CrossRunPlaceholder simulates Word fragmenting a -// placeholder across runs (autocorrect or post-edit run-split). -// Pass 2 must catch it. -func TestRender_CrossRunPlaceholder(t *testing.T) { - doc := `Hello {{project.case_number}}!` - tmpl := minimalDOCX(t, doc) - r := NewSubmissionRenderer() - out, err := r.Render(tmpl, PlaceholderMap{"project.case_number": "7 O 1234/26"}, nil) - if err != nil { - t.Fatalf("render: %v", err) - } - body := readDocumentXML(t, out) - if !strings.Contains(body, "7 O 1234/26") { - t.Errorf("expected case number after cross-run merge, got %q", body) - } - if strings.Contains(body, "{{") { - t.Errorf("orphan placeholder marker remained: %q", body) - } -} - -// TestRender_XMLEscaping verifies special characters in placeholder -// values are escaped so they don't corrupt the document XML. -func TestRender_XMLEscaping(t *testing.T) { - doc := `{{user.display_name}}` - tmpl := minimalDOCX(t, doc) - r := NewSubmissionRenderer() - out, err := r.Render(tmpl, PlaceholderMap{ - "user.display_name": `Müller & Söhne "Special"`, - }, nil) - if err != nil { - t.Fatalf("render: %v", err) - } - body := readDocumentXML(t, out) - if !strings.Contains(body, "Müller & Söhne <GmbH> "Special"") { - t.Errorf("expected escaped value, got %q", body) - } -} - -// TestRender_PreservesNonWordEntries leaves the rest of the .docx -// untouched so any styles / theme / settings parts come through bit- -// for-bit. -func TestRender_PreservesNonWordEntries(t *testing.T) { - doc := `{{firm.name}}` - tmpl := minimalDOCX(t, doc) - r := NewSubmissionRenderer() - out, err := r.Render(tmpl, PlaceholderMap{"firm.name": "HLC"}, nil) - if err != nil { - t.Fatalf("render: %v", err) - } - zr, err := zip.NewReader(bytes.NewReader(out), int64(len(out))) - if err != nil { - t.Fatalf("open rendered: %v", err) - } - var sawTypes bool - for _, f := range zr.File { - if f.Name == "[Content_Types].xml" { - sawTypes = true + for _, name := range []string{ + "word/vbaProject.bin", + "word/_rels/vbaProject.bin.rels", + "word/vbaData.xml", + "word/customizations.xml", + } { + if _, ok := entries[name]; ok { + t.Errorf("output still contains %s", name) } } - if !sawTypes { - t.Error("rendered .docx lost [Content_Types].xml") + if doc, ok := entries["word/document.xml"]; !ok { + t.Error("output is missing word/document.xml") + } else if !strings.Contains(doc, "Hello Paliad") { + t.Errorf("document body lost during conversion: %q", doc) + } + if _, ok := entries["word/styles.xml"]; !ok { + t.Error("output lost unrelated word/styles.xml") + } + + ctypes, ok := entries[contentTypesPath] + if !ok { + t.Fatal("output is missing [Content_Types].xml") + } + if strings.Contains(ctypes, "macroEnabled") { + t.Errorf("output [Content_Types].xml still references a macro-enabled type: %q", ctypes) + } + if !strings.Contains(ctypes, docxMainContentType) { + t.Errorf("output is missing plain docx main content type: %q", ctypes) + } + if strings.Contains(ctypes, "vbaProject") { + t.Errorf("output [Content_Types].xml still references vbaProject: %q", ctypes) + } + if strings.Contains(ctypes, "vbaData") { + t.Errorf("output [Content_Types].xml still overrides vbaData: %q", ctypes) + } + if strings.Contains(ctypes, "keyMapCustomizations") { + t.Errorf("output [Content_Types].xml still overrides customizations: %q", ctypes) + } + if !strings.Contains(ctypes, "wordprocessingml.styles") { + t.Errorf("output lost unrelated styles Override: %q", ctypes) + } + + rels, ok := entries[documentRelsPath] + if !ok { + t.Fatal("output is missing word/_rels/document.xml.rels") + } + if strings.Contains(rels, "vbaProject") { + t.Errorf("output rels still references vbaProject: %q", rels) + } + if strings.Contains(rels, "keyMapCustomizations") { + t.Errorf("output rels still references keyMapCustomizations: %q", rels) + } + if !strings.Contains(rels, "styles.xml") { + t.Errorf("output rels lost unrelated styles relationship: %q", rels) } } -// TestPlaceholderRegex_Boundaries pins the placeholder grammar. -func TestPlaceholderRegex_Boundaries(t *testing.T) { - tests := []struct { - in string - matches []string - }{ - {"plain text", nil}, - {"{{foo}}", []string{"{{foo}}"}}, - {"{{ foo }}", []string{"{{ foo }}"}}, - {"{{foo.bar}}", []string{"{{foo.bar}}"}}, - {"{{ foo.bar_baz }}", []string{"{{ foo.bar_baz }}"}}, - {"{{1bad}}", nil}, // must start with a letter - {"{{ foo }} and {{ bar }}", []string{"{{ foo }}", "{{ bar }}"}}, +func TestConvertDotmToDocx_IdempotentOnPlainDocx(t *testing.T) { + var buf bytes.Buffer + zw := zip.NewWriter(&buf) + add := func(name, body string) { + w, err := zw.Create(name) + if err != nil { + t.Fatalf("create %s: %v", name, err) + } + if _, err := io.WriteString(w, body); err != nil { + t.Fatalf("write %s: %v", name, err) + } } - for _, tc := range tests { - t.Run(tc.in, func(t *testing.T) { - got := placeholderRegex.FindAllString(tc.in, -1) - if len(got) != len(tc.matches) { - t.Fatalf("got %d matches, want %d (in=%q)", len(got), len(tc.matches), tc.in) + add(contentTypesPath, ``+ + ``+ + ``+ + ``) + add("word/document.xml", ``) + if err := zw.Close(); err != nil { + t.Fatalf("close: %v", err) + } + + out, err := ConvertDotmToDocx(buf.Bytes()) + if err != nil { + t.Fatalf("ConvertDotmToDocx: %v", err) + } + + entries := unzipEntries(t, out) + if _, ok := entries["word/vbaProject.bin"]; ok { + t.Error("plain docx grew a vbaProject during conversion") + } + if ctypes := entries[contentTypesPath]; !strings.Contains(ctypes, docxMainContentType) { + t.Errorf("plain docx lost its content type: %q", ctypes) + } +} + +func TestConvertDotmToDocx_AcceptsDocmAndDotx(t *testing.T) { + for _, mainType := range []string{docmMainContentType, dotxMainContentType} { + t.Run(mainType, func(t *testing.T) { + var buf bytes.Buffer + zw := zip.NewWriter(&buf) + add := func(name, body string) { + w, _ := zw.Create(name) + _, _ = io.WriteString(w, body) } - for i := range got { - if got[i] != tc.matches[i] { - t.Errorf("match %d: got %q, want %q", i, got[i], tc.matches[i]) - } + add(contentTypesPath, ``+ + ``+ + ``+ + ``) + add("word/document.xml", ``) + zw.Close() + out, err := ConvertDotmToDocx(buf.Bytes()) + if err != nil { + t.Fatalf("ConvertDotmToDocx: %v", err) + } + ctypes := unzipEntries(t, out)[contentTypesPath] + if strings.Contains(ctypes, mainType) { + t.Errorf("non-docx main type survived conversion: %q", ctypes) + } + if !strings.Contains(ctypes, docxMainContentType) { + t.Errorf("docx main type not present: %q", ctypes) } }) } } -// TestFamilyOf covers the proceeding-family extraction used by the -// template registry's fallback chain. -func TestFamilyOf(t *testing.T) { - tests := map[string]string{ - "de.inf.lg.erwidg": "de.inf.lg", - "upc.inf.cfi.soc": "upc.inf.cfi", - "dpma.opp.dpma": "", // only three segments → no family - "de.inf.lg": "", - "": "", +func TestConvertDotmToDocx_RejectsNonZip(t *testing.T) { + _, err := ConvertDotmToDocx([]byte("not a zip file")) + if err == nil { + t.Fatal("expected error for non-zip input, got nil") } - for in, want := range tests { +} + +func TestSanitiseSubmissionFileName(t *testing.T) { + cases := map[string]string{ + "Klageerwiderung": "Klageerwiderung", + "Berufungsbegründung": "Berufungsbegruendung", + "Schriftsatz/Anlage": "Schriftsatz_Anlage", + `Statement of "Defence"`: "Statement of Defence", + ` Klage `: "Klage", + "Größe": "Groesse", + } + for in, want := range cases { t.Run(in, func(t *testing.T) { - got := familyOf(in) - if got != want { - t.Errorf("familyOf(%q) = %q, want %q", in, got, want) - } - }) - } -} - -// TestLegalSourcePretty covers the prefix table. -func TestLegalSourcePretty(t *testing.T) { - tests := []struct { - src, lang, want string - }{ - {"DE.ZPO.276.1", "de", "§ 276 Abs. 1 ZPO"}, - {"DE.ZPO.276.1", "en", "Section 276(1) ZPO"}, - {"DE.ZPO.253", "de", "§ 253 ZPO"}, - {"DE.ZPO.253", "en", "Section 253 ZPO"}, - {"UPC.RoP.23.1", "de", "Regel 23.1 VerfO UPC"}, - {"UPC.RoP.23.1", "en", "Rule 23.1 RoP UPC"}, - {"UPC.RoP.198", "de", "Regel 198 VerfO UPC"}, - {"DE.PatG.83", "de", "§ 83 PatG"}, - {"EPC.123", "de", "Art. 123 EPÜ"}, - {"EPC.123", "en", "Art. 123 EPC"}, - // Unknown prefix → pass-through unchanged. - {"FOO.BAR.123", "de", "FOO.BAR.123"}, - {"", "de", ""}, - } - for _, tc := range tests { - t.Run(tc.src+"/"+tc.lang, func(t *testing.T) { - got := legalSourcePretty(tc.src, tc.lang) - if got != tc.want { - t.Errorf("legalSourcePretty(%q, %q) = %q, want %q", tc.src, tc.lang, got, tc.want) - } - }) - } -} - -// TestOurSideTranslations pins the our_side enum → DE/EN prose -// mapping used by addProjectVars. Post t-paliad-222: seven sub-role -// values + the gender-neutral "-Seite" / "-Partei" suffix shape on -// DE. Legacy 'court' / 'both' yield "" (the column no longer accepts -// them after mig 112, but the function defensively handles stale -// in-memory values from older callers). -func TestOurSideTranslations(t *testing.T) { - cases := []struct { - in, wantDE, wantEN string - }{ - {"claimant", "Klägerseite", "Claimant"}, - {"defendant", "Beklagtenseite", "Defendant"}, - {"applicant", "Antragstellerseite", "Applicant"}, - {"appellant", "Berufungsklägerseite", "Appellant"}, - {"respondent", "Antragsgegnerseite", "Respondent"}, - {"third_party", "Drittpartei", "Third Party"}, - {"other", "sonstige Verfahrensbeteiligte", "other party"}, - {"court", "", ""}, - {"both", "", ""}, - {"", "", ""}, - {"unknown", "", ""}, - } - for _, tc := range cases { - t.Run(tc.in, func(t *testing.T) { - if got := ourSideDE(tc.in); got != tc.wantDE { - t.Errorf("ourSideDE(%q) = %q, want %q", tc.in, got, tc.wantDE) - } - if got := ourSideEN(tc.in); got != tc.wantEN { - t.Errorf("ourSideEN(%q) = %q, want %q", tc.in, got, tc.wantEN) - } - }) - } -} - -// TestTemplateRegistry_Candidates verifies the fallback-chain order -// matches the m-locked Q4 decision (firm → base/code → base/family → -// skeleton). -func TestTemplateRegistry_Candidates(t *testing.T) { - r := NewTemplateRegistry("", "HLC") - got := r.candidates("de.inf.lg.erwidg") - want := []string{ - "templates/HLC/de.inf.lg.erwidg.docx", - "templates/_base/de.inf.lg.erwidg.docx", - "templates/_base/de.inf.lg.docx", - "templates/_base/_skeleton.docx", - } - if len(got) != len(want) { - t.Fatalf("candidates = %v, want %v", got, want) - } - for i := range got { - if got[i] != want[i] { - t.Errorf("candidate[%d] = %q, want %q", i, got[i], want[i]) - } - } -} - -// TestTemplateRegistry_Candidates_NoFamily covers submission codes -// without a family suffix (only three dot-segments). -func TestTemplateRegistry_Candidates_NoFamily(t *testing.T) { - r := NewTemplateRegistry("", "HLC") - got := r.candidates("dpma.opp.dpma") - want := []string{ - "templates/HLC/dpma.opp.dpma.docx", - "templates/_base/dpma.opp.dpma.docx", - "templates/_base/_skeleton.docx", - } - if len(got) != len(want) { - t.Fatalf("candidates = %v, want %v", got, want) - } - for i := range got { - if got[i] != want[i] { - t.Errorf("candidate[%d] = %q, want %q", i, got[i], want[i]) - } - } -} - -// TestTemplateRegistry_Tiers labels each candidate slot. Must stay -// 1:1 with candidates(). -func TestTemplateRegistry_Tiers(t *testing.T) { - r := NewTemplateRegistry("", "HLC") - codes := []string{"de.inf.lg.erwidg", "dpma.opp.dpma"} - for _, code := range codes { - c := r.candidates(code) - ts := r.tiers(code) - if len(c) != len(ts) { - t.Fatalf("candidate/tier mismatch for %q: %d vs %d", code, len(c), len(ts)) - } - } -} - -// TestPatentNumberUPC covers the kind-code parenthesisation that UPC -// briefs use (t-paliad-215 Slice 2, design §22 Q-S2-4). -func TestPatentNumberUPC(t *testing.T) { - tests := []struct { - in, want string - }{ - // EP variants — the common case. - {"EP 1 234 567 B1", "EP 1 234 567 (B1)"}, - {"EP 4 056 049 A1", "EP 4 056 049 (A1)"}, - // DE national number with kind code. - {"DE 10 2020 123 456 A1", "DE 10 2020 123 456 (A1)"}, - // No kind code → pass-through unchanged. - {"EP 1 234 567", "EP 1 234 567"}, - // Leading + trailing whitespace trimmed. - {" EP 1 234 567 B1 ", "EP 1 234 567 (B1)"}, - // Empty input. - {"", ""}, - // Slash-separated forms (WO publication numbers) don't match - // the kind-code shape → pass through. - {"WO/2023/123456", "WO/2023/123456"}, - // Two-digit kind code (e.g. B12) doesn't match the single-digit - // pattern; pass through. This is intentional — real EP kind - // codes are single-letter + single-digit. - {"EP 1 234 567 B12", "EP 1 234 567 B12"}, - } - for _, tc := range tests { - t.Run(tc.in, func(t *testing.T) { - got := patentNumberUPC(tc.in) - if got != tc.want { - t.Errorf("patentNumberUPC(%q) = %q, want %q", tc.in, got, tc.want) + if got := SanitiseSubmissionFileName(in); got != want { + t.Errorf("SanitiseSubmissionFileName(%q) = %q, want %q", in, got, want) } }) } diff --git a/internal/services/submission_templates.go b/internal/services/submission_templates.go deleted file mode 100644 index b682546..0000000 --- a/internal/services/submission_templates.go +++ /dev/null @@ -1,442 +0,0 @@ -package services - -// Submission template registry — Gitea-backed .docx template loader for -// the submission generator (t-paliad-215, design doc -// docs/design-submission-generator-2026-05-19.md §5). -// -// Layout in mWorkRepo: -// -// templates/{FIRM_NAME}/{submission_code}.docx firm-specific override -// templates/_base/{submission_code}.docx cross-firm baseline -// templates/_base/{family}.docx proceeding-family fallback -// templates/_base/_skeleton.docx ultra-generic fallback -// -// Lookup is first-match-wins down the chain; this is the m-locked Q4 -// decision. Templates fetched via Gitea's raw URL endpoint, cached -// in-process with a 5-minute SHA refresh check — identical pattern to -// the HL Patents Style proxy in internal/handlers/files.go (which the -// design doc §1 verified is in production and works). -// -// Slice 1 ships one template at templates/_base/de.inf.lg.erwidg.docx -// (committed to HL/mWorkRepo at SHA 7f97b7f9, the bootstrap demo -// authored by the engine for end-to-end testing — HLC ships the -// polished version per §14 follow-up). - -import ( - "context" - "encoding/json" - "errors" - "fmt" - "io" - "log" - "net/http" - "net/url" - "strings" - "sync" - "time" -) - -const ( - templatesGiteaBaseURL = "https://mgit.msbls.de" - templatesGiteaRepoOwn = "HL" - templatesGiteaRepoName = "mWorkRepo" - templatesGiteaBranch = "main" - templatesCheckInterval = 5 * time.Minute - templatesSkeleton = "_skeleton" -) - -// ErrNoTemplate is returned when no template resolves anywhere in the -// fallback chain (firm/code → base/code → base/family → skeleton). -// Caller maps to 503 + a clear UI hint. -var ErrNoTemplate = errors.New("submission template: no template resolved in fallback chain") - -// ErrTemplateUpstream wraps Gitea-side failures (network, 5xx). -// Distinct from ErrNoTemplate so the handler can render different UI: -// "no template configured" vs "template repo unreachable". -var ErrTemplateUpstream = errors.New("submission template: upstream Gitea unreachable") - -// ResolvedTemplate is the result of a fallback-chain lookup: the -// template bytes plus the metadata the audit row + UI need. -type ResolvedTemplate struct { - // Path is the Gitea-relative path that resolved (e.g. - // "templates/HLC/de.inf.lg.erwidg.docx"). Persisted in the - // system_audit_log row so an admin can trace which template was - // used for a given generation. - Path string - - // SHA is the commit SHA the template was fetched at. Pinning this - // lets audit consumers reproduce the exact bytes that went into - // the lawyer's download. - SHA string - - // FirmTier reports which level of the fallback chain fired: - // "firm", "base_code", "base_family", or "skeleton". Useful for - // the variable-contract sidebar (Slice 3) and for ops monitoring - // of how often each firm is actually overriding. - FirmTier string - - // Bytes is the .docx content; only populated for callers that - // need to render (i.e. SubmissionRenderer.Render). Resolve() - // returns it populated; Probe() leaves it nil. - Bytes []byte -} - -// templateCacheEntry mirrors the per-file cache shape used by -// internal/handlers/files.go. Each cached entry tracks its bytes, the -// commit SHA, the last upstream check, and a checking flag so two -// concurrent refresh goroutines don't double-fetch. -type templateCacheEntry struct { - mu sync.RWMutex - data []byte - sha string - lastChecked time.Time - checking bool - missing bool // true when Gitea returned 404 — short-circuits subsequent lookups -} - -// TemplateRegistry resolves submission templates from Gitea using the -// fallback chain. Process-wide cache; single-replica deployment (per -// docs/design-submission-generator-2026-05-19.md §1) makes in-process -// caching sufficient — a future multi-replica rollout would swap this -// for a shared cache. Same trade-off the HL Patents Style proxy makes. -type TemplateRegistry struct { - cache map[string]*templateCacheEntry - cacheMu sync.Mutex - giteaToken string - httpClient *http.Client - firmName string -} - -// NewTemplateRegistry constructs the registry. firmName is read once -// at process start from internal/branding.Name so a runtime FIRM_NAME -// rebrand cuts in on the next deploy, not mid-request. -func NewTemplateRegistry(giteaToken, firmName string) *TemplateRegistry { - return &TemplateRegistry{ - cache: make(map[string]*templateCacheEntry), - giteaToken: giteaToken, - firmName: firmName, - httpClient: &http.Client{Timeout: 30 * time.Second}, - } -} - -// HasTemplate reports whether any template resolves for the given -// submission code, without fetching the bytes. Used by the -// SubmissionsPanel to decide which "Generate" buttons to enable. -// -// Cheap path: walks the same fallback chain as Resolve, but stops at -// the SHA-probe step (Gitea's contents endpoint, single round-trip per -// candidate). The probe results land in the same cache as Resolve so a -// subsequent Resolve call reuses the SHA. -func (r *TemplateRegistry) HasTemplate(ctx context.Context, submissionCode string) bool { - for _, candidate := range r.candidates(submissionCode) { - if r.probe(ctx, candidate) { - return true - } - } - return false -} - -// Resolve walks the fallback chain and returns the first template that -// fetches successfully, with bytes loaded. Returns ErrNoTemplate when -// no candidate (including the ultra-generic skeleton) resolves. -func (r *TemplateRegistry) Resolve(ctx context.Context, submissionCode string) (*ResolvedTemplate, error) { - candidates := r.candidates(submissionCode) - tiers := r.tiers(submissionCode) - if len(candidates) != len(tiers) { - return nil, fmt.Errorf("template registry: candidate/tier mismatch (%d vs %d)", len(candidates), len(tiers)) - } - for i, candidate := range candidates { - entry := r.cacheGet(candidate) - entry.mu.RLock() - hasData := !entry.missing && len(entry.data) > 0 - needsCheck := time.Since(entry.lastChecked) >= templatesCheckInterval - isMissing := entry.missing - entry.mu.RUnlock() - - if isMissing && !needsCheck { - continue - } - if !hasData { - if err := r.fetchInto(ctx, candidate, entry); err != nil { - if errors.Is(err, errTemplate404) { - continue - } - return nil, fmt.Errorf("%w: %v", ErrTemplateUpstream, err) - } - } else if needsCheck { - go r.refresh(context.Background(), candidate, entry) - } - - entry.mu.RLock() - out := &ResolvedTemplate{ - Path: candidate, - SHA: entry.sha, - FirmTier: tiers[i], - Bytes: append([]byte(nil), entry.data...), - } - entry.mu.RUnlock() - return out, nil - } - return nil, ErrNoTemplate -} - -// candidates returns the ordered Gitea-relative paths the registry -// walks for the given submission code. The order is the m-locked Q4 -// decision: firm → base/code → base/family → skeleton. -func (r *TemplateRegistry) candidates(submissionCode string) []string { - family := familyOf(submissionCode) - out := []string{ - fmt.Sprintf("templates/%s/%s.docx", r.firmName, submissionCode), - fmt.Sprintf("templates/_base/%s.docx", submissionCode), - } - if family != "" && family != submissionCode { - out = append(out, fmt.Sprintf("templates/_base/%s.docx", family)) - } - out = append(out, fmt.Sprintf("templates/_base/%s.docx", templatesSkeleton)) - return out -} - -// tiers labels each candidate with its fallback tier. Order is locked -// to candidates(); both functions evolve together. -func (r *TemplateRegistry) tiers(submissionCode string) []string { - family := familyOf(submissionCode) - out := []string{"firm", "base_code"} - if family != "" && family != submissionCode { - out = append(out, "base_family") - } - out = append(out, "skeleton") - return out -} - -// familyOf extracts the proceeding-family prefix from a submission -// code. The convention (docs/design-proceeding-code-taxonomy-2026-05-18.md) -// is jurisdiction.substantive.forum.submission, so the family is the -// first three dot-segments. -// -// de.inf.lg.erwidg → de.inf.lg -// upc.inf.cfi.soc → upc.inf.cfi -// dpma.opp.dpma → "" (only three segments — no submission suffix) -// -// Returns "" when the code doesn't carry a submission segment (no -// family-level fallback is meaningful). -func familyOf(submissionCode string) string { - parts := strings.Split(submissionCode, ".") - if len(parts) < 4 { - return "" - } - return strings.Join(parts[:3], ".") -} - -// cacheGet returns the cache entry for a Gitea path, creating an empty -// entry on first lookup. -func (r *TemplateRegistry) cacheGet(path string) *templateCacheEntry { - r.cacheMu.Lock() - defer r.cacheMu.Unlock() - entry, ok := r.cache[path] - if !ok { - entry = &templateCacheEntry{} - r.cache[path] = entry - } - return entry -} - -// errTemplate404 is an internal sentinel: candidate doesn't exist in -// Gitea, walk the chain. Distinguished from network/5xx errors so the -// registry doesn't wrap every fallback miss as ErrTemplateUpstream. -var errTemplate404 = errors.New("template not found in gitea") - -// fetchInto downloads a candidate and populates the cache entry. On -// 404 it marks the entry missing so subsequent lookups short-circuit -// without hitting the network. -func (r *TemplateRegistry) fetchInto(ctx context.Context, path string, entry *templateCacheEntry) error { - sha, err := r.giteaSHA(ctx, path) - if err != nil { - if errors.Is(err, errTemplate404) { - entry.mu.Lock() - entry.missing = true - entry.lastChecked = time.Now() - entry.mu.Unlock() - } - return err - } - data, err := r.giteaDownload(ctx, path) - if err != nil { - return err - } - entry.mu.Lock() - entry.data = data - entry.sha = sha - entry.lastChecked = time.Now() - entry.missing = false - entry.mu.Unlock() - return nil -} - -// refresh runs in the background after a stale-but-present cache hit. -// SHA-checks the candidate; re-downloads on change. Mirrors the same -// goroutine pattern as internal/handlers/files.go. -func (r *TemplateRegistry) refresh(ctx context.Context, path string, entry *templateCacheEntry) { - entry.mu.Lock() - if entry.checking { - entry.mu.Unlock() - return - } - entry.checking = true - entry.mu.Unlock() - - defer func() { - entry.mu.Lock() - entry.checking = false - entry.mu.Unlock() - }() - - latestSHA, err := r.giteaSHA(ctx, path) - if err != nil { - log.Printf("submission template: SHA check for %s failed: %v", path, err) - entry.mu.Lock() - entry.lastChecked = time.Now() - entry.mu.Unlock() - return - } - entry.mu.RLock() - unchanged := latestSHA == entry.sha && entry.sha != "" - entry.mu.RUnlock() - if unchanged { - entry.mu.Lock() - entry.lastChecked = time.Now() - entry.mu.Unlock() - return - } - data, err := r.giteaDownload(ctx, path) - if err != nil { - log.Printf("submission template: download %s failed: %v", path, err) - entry.mu.Lock() - entry.lastChecked = time.Now() - entry.mu.Unlock() - return - } - entry.mu.Lock() - entry.data = data - entry.sha = latestSHA - entry.lastChecked = time.Now() - entry.mu.Unlock() - log.Printf("submission template: updated %s (SHA: %.8s)", path, latestSHA) -} - -// probe is the cheap existence-check used by HasTemplate. Reuses the -// cache but only fetches the SHA (not the bytes), so the -// SubmissionsPanel's per-row HasTemplate calls don't pull a megabyte -// of .docx data the user might never download. -func (r *TemplateRegistry) probe(ctx context.Context, path string) bool { - entry := r.cacheGet(path) - entry.mu.RLock() - hasData := !entry.missing && len(entry.data) > 0 - hasSHA := !entry.missing && entry.sha != "" - isMissing := entry.missing - needsCheck := time.Since(entry.lastChecked) >= templatesCheckInterval - entry.mu.RUnlock() - if isMissing && !needsCheck { - return false - } - if hasData || hasSHA { - return true - } - sha, err := r.giteaSHA(ctx, path) - if err != nil { - if errors.Is(err, errTemplate404) { - entry.mu.Lock() - entry.missing = true - entry.lastChecked = time.Now() - entry.mu.Unlock() - } - return false - } - entry.mu.Lock() - entry.sha = sha - entry.lastChecked = time.Now() - entry.missing = false - entry.mu.Unlock() - return true -} - -// giteaSHA returns the SHA of the latest commit that touched the -// template path. Returns errTemplate404 when Gitea responds with 404 — -// the registry distinguishes "no such template" from "Gitea is down". -func (r *TemplateRegistry) giteaSHA(ctx context.Context, path string) (string, error) { - apiURL := fmt.Sprintf("%s/api/v1/repos/%s/%s/commits?path=%s&limit=1&sha=%s", - templatesGiteaBaseURL, - templatesGiteaRepoOwn, - templatesGiteaRepoName, - url.QueryEscape(path), - templatesGiteaBranch, - ) - req, err := http.NewRequestWithContext(ctx, "GET", apiURL, nil) - if err != nil { - return "", err - } - if r.giteaToken != "" { - req.Header.Set("Authorization", "token "+r.giteaToken) - } - resp, err := r.httpClient.Do(req) - if err != nil { - return "", err - } - defer resp.Body.Close() - if resp.StatusCode == http.StatusNotFound { - return "", errTemplate404 - } - if resp.StatusCode != http.StatusOK { - return "", fmt.Errorf("gitea sha lookup returned %d", resp.StatusCode) - } - var commits []struct { - SHA string `json:"sha"` - } - if err := json.NewDecoder(resp.Body).Decode(&commits); err != nil { - return "", err - } - if len(commits) == 0 { - return "", errTemplate404 - } - return commits[0].SHA, nil -} - -// giteaDownload fetches the raw template bytes. -func (r *TemplateRegistry) giteaDownload(ctx context.Context, path string) ([]byte, error) { - rawURL := fmt.Sprintf("%s/%s/%s/raw/branch/%s/%s", - templatesGiteaBaseURL, - templatesGiteaRepoOwn, - templatesGiteaRepoName, - templatesGiteaBranch, - path, - ) - req, err := http.NewRequestWithContext(ctx, "GET", rawURL, nil) - if err != nil { - return nil, err - } - if r.giteaToken != "" { - req.Header.Set("Authorization", "token "+r.giteaToken) - } - resp, err := r.httpClient.Do(req) - if err != nil { - return nil, err - } - defer resp.Body.Close() - if resp.StatusCode == http.StatusNotFound { - return nil, errTemplate404 - } - if resp.StatusCode != http.StatusOK { - return nil, fmt.Errorf("gitea raw returned %d", resp.StatusCode) - } - return io.ReadAll(resp.Body) -} - -// ClearCache drops every cached entry. Exposed for an admin-side -// "refresh templates" affordance — paliad's existing /api/files/refresh -// has the same shape for the HL Patents Style proxy. -func (r *TemplateRegistry) ClearCache() { - r.cacheMu.Lock() - defer r.cacheMu.Unlock() - for k := range r.cache { - r.cache[k] = &templateCacheEntry{} - } -} diff --git a/internal/services/submission_vars.go b/internal/services/submission_vars.go deleted file mode 100644 index 6444e07..0000000 --- a/internal/services/submission_vars.go +++ /dev/null @@ -1,559 +0,0 @@ -package services - -// Submission variable bag — builds the PlaceholderMap that -// SubmissionRenderer fills into a template (t-paliad-215, design doc -// docs/design-submission-generator-2026-05-19.md §6.2). -// -// Variables span six namespaces: -// -// firm.* process-wide (branding.Name) -// user.* caller's user row -// today.* server time in Europe/Berlin, locale-aware -// project.* paliad.projects + joined proceeding type -// parties.* paliad.parties grouped by role -// rule.* paliad.deadline_rules row keyed by submission_code -// deadline.* next open paliad.deadlines row for (project, rule), if any -// -// Locale handling: every long-form date string is computed in both DE -// and EN; the renderer picks based on the user's lang preference. The -// rule pretty-printer (legalSourcePretty) also has DE/EN variants. -// -// Visibility: caller passes userID; ProjectService.GetByID enforces -// paliad.can_see_project — unauthorised callers get the standard -// ErrNotFound before any variable construction runs. - -import ( - "context" - "database/sql" - "errors" - "fmt" - "regexp" - "strings" - "time" - - "github.com/google/uuid" - "github.com/jmoiron/sqlx" - - "mgit.msbls.de/m/paliad/internal/branding" - "mgit.msbls.de/m/paliad/internal/models" -) - -// SubmissionVarsService assembles the placeholder map. -type SubmissionVarsService struct { - db *sqlx.DB - projects *ProjectService - parties *PartyService - users *UserService -} - -// NewSubmissionVarsService wires the service. -func NewSubmissionVarsService(db *sqlx.DB, projects *ProjectService, parties *PartyService, users *UserService) *SubmissionVarsService { - return &SubmissionVarsService{ - db: db, - projects: projects, - parties: parties, - users: users, - } -} - -// SubmissionVarsContext is the input bundle that produces a render. -type SubmissionVarsContext struct { - UserID uuid.UUID - ProjectID uuid.UUID - SubmissionCode string -} - -// SubmissionVarsResult bundles the placeholder map with the lookup -// values the handler needs for the audit row + file naming -// (rule.Name, project.case_number, etc.). -type SubmissionVarsResult struct { - Placeholders PlaceholderMap - - // Resolved entities for audit + naming. - User *models.User - Project *models.Project - Rule *models.DeadlineRule - ProceedingType *models.ProceedingType - Parties []models.Party - NextDeadline *models.Deadline - - // Lang is the user's UI language used to pick locale-aware values - // during the build. Returned so the renderer can use the matching - // missing-marker function. - Lang string -} - -// ErrSubmissionRuleNotFound is returned when no published deadline_rule -// matches the requested submission_code. Maps to 404 in the handler. -var ErrSubmissionRuleNotFound = errors.New("submission generator: no rule found for submission_code") - -// Build resolves every entity and assembles the placeholder map. -func (s *SubmissionVarsService) Build(ctx context.Context, in SubmissionVarsContext) (*SubmissionVarsResult, error) { - if s.projects == nil || s.users == nil { - return nil, fmt.Errorf("submission vars: required services not wired") - } - - user, err := s.users.GetByID(ctx, in.UserID) - if err != nil { - return nil, err - } - if user == nil { - return nil, ErrNotVisible - } - - // Visibility gate — GetByID returns ErrNotFound when the user - // can't see the project, which is exactly the 404 the handler - // wants to propagate. - project, err := s.projects.GetByID(ctx, in.UserID, in.ProjectID) - if err != nil { - return nil, err - } - - rule, err := s.loadPublishedRule(ctx, in.SubmissionCode) - if err != nil { - return nil, err - } - - pt, err := s.loadProceedingType(ctx, project.ProceedingTypeID) - if err != nil { - return nil, err - } - - parties, err := s.parties.ListForProject(ctx, in.UserID, in.ProjectID) - if err != nil { - return nil, err - } - - next, err := s.nextOpenDeadline(ctx, in.ProjectID, rule.ID) - if err != nil { - return nil, err - } - - lang := user.Lang - if lang == "" { - lang = "de" - } - bag := PlaceholderMap{} - addFirmVars(bag) - addTodayVars(bag, time.Now()) - addUserVars(bag, user) - addProjectVars(bag, project, pt, lang) - addPartyVars(bag, parties) - addRuleVars(bag, rule, lang) - addDeadlineVars(bag, next, project, lang) - - return &SubmissionVarsResult{ - Placeholders: bag, - User: user, - Project: project, - Rule: rule, - ProceedingType: pt, - Parties: parties, - NextDeadline: next, - Lang: lang, - }, nil -} - -// loadPublishedRule fetches the deadline_rule that owns the given -// submission_code. Restricts to lifecycle_state='published' so drafts -// never end up shaping a real submission. -func (s *SubmissionVarsService) loadPublishedRule(ctx context.Context, submissionCode string) (*models.DeadlineRule, error) { - if submissionCode == "" { - return nil, ErrSubmissionRuleNotFound - } - var rule models.DeadlineRule - err := s.db.GetContext(ctx, &rule, - `SELECT `+ruleColumns+` - FROM paliad.deadline_rules - WHERE submission_code = $1 - AND lifecycle_state = 'published' - AND is_active = true - ORDER BY sequence_order - LIMIT 1`, submissionCode) - if errors.Is(err, sql.ErrNoRows) { - return nil, ErrSubmissionRuleNotFound - } - if err != nil { - return nil, fmt.Errorf("load rule by submission_code %q: %w", submissionCode, err) - } - return &rule, nil -} - -// loadProceedingType fetches the proceeding type row for the project's -// proceeding_type_id. Tolerates a nil id (returns nil, nil) so projects -// without a bound proceeding still render a meaningful template — the -// {{project.proceeding.*}} placeholders just resolve to the missing -// marker. -func (s *SubmissionVarsService) loadProceedingType(ctx context.Context, id *int) (*models.ProceedingType, error) { - if id == nil { - return nil, nil - } - var pt models.ProceedingType - err := s.db.GetContext(ctx, &pt, - `SELECT `+proceedingTypeColumns+` - FROM paliad.proceeding_types - WHERE id = $1`, *id) - if errors.Is(err, sql.ErrNoRows) { - return nil, nil - } - if err != nil { - return nil, fmt.Errorf("load proceeding type %d: %w", *id, err) - } - return &pt, nil -} - -// nextOpenDeadline finds the earliest pending paliad.deadlines row on -// the given project that maps to the chosen rule. Returns (nil, nil) -// when no matching deadline exists — common when the lawyer is drafting -// the submission before the system has computed its deadline row. -func (s *SubmissionVarsService) nextOpenDeadline(ctx context.Context, projectID, ruleID uuid.UUID) (*models.Deadline, error) { - var d models.Deadline - err := s.db.GetContext(ctx, &d, - `SELECT id, project_id, title, description, due_date, original_due_date, - warning_date, source, rule_id, rule_code, status, completed_at, - caldav_uid, caldav_etag, notes, created_by, created_at, updated_at, - approval_status, pending_request_id, approved_by, approved_at - FROM paliad.deadlines - WHERE project_id = $1 - AND rule_id = $2 - AND status = 'pending' - ORDER BY due_date ASC - LIMIT 1`, projectID, ruleID) - if errors.Is(err, sql.ErrNoRows) { - return nil, nil - } - if err != nil { - return nil, fmt.Errorf("load next deadline (project=%s rule=%s): %w", projectID, ruleID, err) - } - return &d, nil -} - -// addFirmVars populates the firm.* namespace. -func addFirmVars(bag PlaceholderMap) { - bag["firm.name"] = branding.Name - // firm.signature_block is reserved for Phase 2; emit empty so - // templates that already reference it don't render the missing - // marker (less noisy for the lawyer). - bag["firm.signature_block"] = "" -} - -// addTodayVars populates today.* in both DE and EN long forms. ISO -// short form is the default {{today}}. -func addTodayVars(bag PlaceholderMap, now time.Time) { - loc, _ := time.LoadLocation("Europe/Berlin") - if loc != nil { - now = now.In(loc) - } - bag["today"] = now.Format("2006-01-02") - bag["today.iso"] = now.Format("2006-01-02") - bag["today.long_de"] = formatLongDateDE(now) - bag["today.long_en"] = formatLongDateEN(now) -} - -// addUserVars populates user.*. -func addUserVars(bag PlaceholderMap, u *models.User) { - bag["user.display_name"] = u.DisplayName - bag["user.email"] = u.Email - bag["user.office"] = u.Office -} - -// addProjectVars populates project.* — title / case_number / court / -// patent_number / dates / our_side / proceeding metadata. -func addProjectVars(bag PlaceholderMap, p *models.Project, pt *models.ProceedingType, lang string) { - bag["project.title"] = p.Title - bag["project.reference"] = derefString(p.Reference) - // project.code is the auto-derived (or override) dotted project - // code computed by services.BuildProjectCode. Populated upstream - // by the service projection; templates that want the explicit - // override should read project.reference instead. - bag["project.code"] = p.Code - bag["project.case_number"] = derefString(p.CaseNumber) - bag["project.court"] = derefString(p.Court) - bag["project.patent_number"] = derefString(p.PatentNumber) - // project.patent_number_upc is the UPC-brief convention — kind code - // parenthesised ("EP 1 234 567 (B1)") instead of the DE form - // ("EP 1 234 567 B1"). Pure-function rewrite; pass-through when no - // kind code is present so the lawyer's draft never sees a worse - // number than the source value. - bag["project.patent_number_upc"] = patentNumberUPC(derefString(p.PatentNumber)) - bag["project.filing_date"] = formatDatePtr(p.FilingDate, "2006-01-02") - bag["project.grant_date"] = formatDatePtr(p.GrantDate, "2006-01-02") - bag["project.our_side"] = derefString(p.OurSide) - bag["project.our_side_de"] = ourSideDE(derefString(p.OurSide)) - bag["project.our_side_en"] = ourSideEN(derefString(p.OurSide)) - bag["project.instance_level"] = derefString(p.InstanceLevel) - bag["project.client_number"] = derefString(p.ClientNumber) - bag["project.matter_number"] = derefString(p.MatterNumber) - if pt != nil { - bag["project.proceeding.code"] = pt.Code - if strings.EqualFold(lang, "en") { - bag["project.proceeding.name"] = pt.NameEN - } else { - bag["project.proceeding.name"] = pt.Name - } - bag["project.proceeding.name_de"] = pt.Name - bag["project.proceeding.name_en"] = pt.NameEN - } -} - -// addPartyVars populates parties.* using the first row of each role. -// Multi-claimant / multi-defendant suits use the first row in Slice 1 -// per design §13.6; expanded grouping is Phase 2. -func addPartyVars(bag PlaceholderMap, parties []models.Party) { - var claimant, defendant, other *models.Party - for i := range parties { - role := strings.ToLower(strings.TrimSpace(derefString(parties[i].Role))) - switch role { - case "claimant", "kläger", "klaeger": - if claimant == nil { - claimant = &parties[i] - } - case "defendant", "beklagter", "beklagte": - if defendant == nil { - defendant = &parties[i] - } - default: - if other == nil { - other = &parties[i] - } - } - } - if claimant != nil { - bag["parties.claimant.name"] = claimant.Name - bag["parties.claimant.representative"] = derefString(claimant.Representative) - } - if defendant != nil { - bag["parties.defendant.name"] = defendant.Name - bag["parties.defendant.representative"] = derefString(defendant.Representative) - } - if other != nil { - bag["parties.other.name"] = other.Name - bag["parties.other.representative"] = derefString(other.Representative) - } -} - -// addRuleVars populates rule.* — submission_code, name(_en), -// legal_source (+ pretty form), primary_party, event_type. -func addRuleVars(bag PlaceholderMap, r *models.DeadlineRule, lang string) { - bag["rule.submission_code"] = derefString(r.SubmissionCode) - if strings.EqualFold(lang, "en") { - bag["rule.name"] = r.NameEN - } else { - bag["rule.name"] = r.Name - } - bag["rule.name_de"] = r.Name - bag["rule.name_en"] = r.NameEN - bag["rule.legal_source"] = derefString(r.LegalSource) - bag["rule.legal_source_pretty"] = legalSourcePretty(derefString(r.LegalSource), lang) - bag["rule.primary_party"] = derefString(r.PrimaryParty) - bag["rule.event_type"] = derefString(r.EventType) -} - -// addDeadlineVars populates deadline.* from the next pending row. When -// no row exists the values fall through to the missing marker — the -// lawyer sees [KEIN WERT: deadline.due_date] in Word and knows to fix. -func addDeadlineVars(bag PlaceholderMap, d *models.Deadline, p *models.Project, lang string) { - if d == nil { - return - } - bag["deadline.due_date"] = d.DueDate.Format("2006-01-02") - bag["deadline.due_date_long_de"] = formatLongDateDE(d.DueDate) - bag["deadline.due_date_long_en"] = formatLongDateEN(d.DueDate) - if d.OriginalDueDate != nil { - bag["deadline.original_due_date"] = d.OriginalDueDate.Format("2006-01-02") - } - // computed_from carries the human-readable anchor description - // (e.g. "Klagezustellung am 14.05.2026 + 6 Wochen"). Notes is - // the closest existing field — the calculator stores anchor - // metadata there. If empty we leave the placeholder unresolved. - if d.Notes != nil && strings.TrimSpace(*d.Notes) != "" { - bag["deadline.computed_from"] = strings.TrimSpace(*d.Notes) - } - bag["deadline.title"] = d.Title - bag["deadline.source"] = d.Source - _ = p // reserved for future shape decisions where the deadline - // var depends on project context. - _ = lang -} - -// derefString returns *s or "" when s is nil. -func derefString(s *string) string { - if s == nil { - return "" - } - return *s -} - -// formatDatePtr formats a *time.Time, returning "" for nil. -func formatDatePtr(t *time.Time, layout string) string { - if t == nil { - return "" - } - return t.Format(layout) -} - -// ourSideDE returns the German legal-prose form of an our_side value. -// -// t-paliad-222: unified on the gender-neutral "-Seite" / "-Partei" -// suffix shape to match the form labels and to avoid implying the -// firm represents a single (female) natural person — a B2B patent -// practice almost always represents companies. The seven sub-roles -// map onto the post-mig-110 schema; legacy 'court' / 'both' no -// longer exist in the column. -func ourSideDE(side string) string { - switch strings.ToLower(side) { - case "claimant": - return "Klägerseite" - case "defendant": - return "Beklagtenseite" - case "applicant": - return "Antragstellerseite" - case "appellant": - return "Berufungsklägerseite" - case "respondent": - return "Antragsgegnerseite" - case "third_party": - return "Drittpartei" - case "other": - return "sonstige Verfahrensbeteiligte" - } - return "" -} - -// ourSideEN returns the English legal-prose form of an our_side value. -func ourSideEN(side string) string { - switch strings.ToLower(side) { - case "claimant": - return "Claimant" - case "defendant": - return "Defendant" - case "applicant": - return "Applicant" - case "appellant": - return "Appellant" - case "respondent": - return "Respondent" - case "third_party": - return "Third Party" - case "other": - return "other party" - } - return "" -} - -// formatLongDateDE renders a date in the German long form -// ("19. Mai 2026"). Pure function for unit testing. -func formatLongDateDE(t time.Time) string { - months := []string{ - "Januar", "Februar", "März", "April", "Mai", "Juni", - "Juli", "August", "September", "Oktober", "November", "Dezember", - } - idx := int(t.Month()) - 1 - if idx < 0 || idx >= len(months) { - return t.Format("2006-01-02") - } - return fmt.Sprintf("%d. %s %d", t.Day(), months[idx], t.Year()) -} - -// formatLongDateEN renders a date in the English long form -// ("19 May 2026"). -func formatLongDateEN(t time.Time) string { - return t.Format("2 January 2006") -} - -// legalSourcePretty rewrites the shorthand stored on deadline_rules -// (DE.ZPO.276.1, UPC.RoP.23.1, …) into the form a lawyer would type -// in a brief ("§ 276 Abs. 1 ZPO", "Rule 23.1 RoP UPC"). Unknown -// prefixes pass through unchanged — preferring the raw shorthand over -// an incorrect prettification. -// -// Lang controls the language of connective words (Abs / Section, -// Regel / Rule, …). The pretty table covers the prefixes used by the -// 254 published rules in the corpus today; new prefixes default to -// pass-through and a follow-up CL extends the table. -func legalSourcePretty(src, lang string) string { - src = strings.TrimSpace(src) - if src == "" { - return "" - } - parts := strings.Split(src, ".") - en := strings.EqualFold(lang, "en") - - switch { - case len(parts) == 4 && parts[0] == "DE" && parts[1] == "ZPO": - if en { - return fmt.Sprintf("Section %s(%s) ZPO", parts[2], parts[3]) - } - return fmt.Sprintf("§ %s Abs. %s ZPO", parts[2], parts[3]) - case len(parts) == 3 && parts[0] == "DE" && parts[1] == "ZPO": - if en { - return fmt.Sprintf("Section %s ZPO", parts[2]) - } - return fmt.Sprintf("§ %s ZPO", parts[2]) - case len(parts) == 4 && parts[0] == "UPC" && parts[1] == "RoP": - if en { - return fmt.Sprintf("Rule %s.%s RoP UPC", parts[2], parts[3]) - } - return fmt.Sprintf("Regel %s.%s VerfO UPC", parts[2], parts[3]) - case len(parts) == 3 && parts[0] == "UPC" && parts[1] == "RoP": - if en { - return fmt.Sprintf("Rule %s RoP UPC", parts[2]) - } - return fmt.Sprintf("Regel %s VerfO UPC", parts[2]) - case len(parts) >= 3 && parts[0] == "DE" && parts[1] == "PatG": - if en { - return fmt.Sprintf("Section %s PatG", parts[2]) - } - return fmt.Sprintf("§ %s PatG", parts[2]) - case len(parts) == 2 && parts[0] == "EPC": - if en { - return fmt.Sprintf("Art. %s EPC", parts[1]) - } - return fmt.Sprintf("Art. %s EPÜ", parts[1]) - } - return src -} - -// patentNumberKindCodeRegex matches a trailing kind code on a patent -// number: a whitespace-separated single uppercase letter followed by -// a single digit (B1, A1, A2, B2, B9, C1, T2, U1, …). Capturing -// groups split the base from the kind code so the formatter can -// parenthesise the kind without touching the rest of the number. -var patentNumberKindCodeRegex = regexp.MustCompile(`^(.*?)\s+([A-Z]\d)$`) - -// patentNumberUPC reformats a patent number from the DE convention -// ("EP 1 234 567 B1") to the UPC-brief convention -// ("EP 1 234 567 (B1)"). The kind code is parenthesised; everything -// else is preserved verbatim. Numbers without a recognised trailing -// kind code pass through unchanged so a lawyer's draft never sees a -// number worse than the source value. -// -// Recognised inputs: -// -// "EP 1 234 567 B1" → "EP 1 234 567 (B1)" -// "EP 4 056 049 A1" → "EP 4 056 049 (A1)" -// "DE 10 2020 123 456 A1" → "DE 10 2020 123 456 (A1)" -// " EP 1 234 567 B1 " → "EP 1 234 567 (B1)" (trimmed) -// -// Pass-through: -// -// "EP 1 234 567" → "EP 1 234 567" -// "WO/2023/123456" → "WO/2023/123456" (no kind code shape) -// "" → "" -// -// Pure function; unit-tested in submission_vars_test.go. -func patentNumberUPC(s string) string { - s = strings.TrimSpace(s) - if s == "" { - return "" - } - if m := patentNumberKindCodeRegex.FindStringSubmatch(s); m != nil { - base := strings.TrimSpace(m[1]) - kind := m[2] - if base == "" { - return s - } - return base + " (" + kind + ")" - } - return s -}