From 0763b7daa23c5faea912ffbd889ab896fbd38bb8 Mon Sep 17 00:00:00 2001 From: mAi Date: Mon, 1 Jun 2026 12:39:53 +0200 Subject: [PATCH] feat(submissions): fill firm.signature_block + fix generate-fallback junk (t-paliad-358 A-S1) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Two letterhead/Rubrum auto-fill fixes (Option A, no schema change): 1. firm.signature_block: was hardcoded "" ("reserved for Phase 2"), so every template referencing {{firm.signature_block}} rendered blank. Now filled from branding.Name — the firm identity line of a submission's signature block (the signature section seeds with signature_block + user.display_name). Firm-agnostic: a FIRM_NAME redeploy signs with the right firm. 2. Generate-fallback junk (kepler audit §1 Path 3): resolveSubmissionTemplate is the merge-path resolver (every caller feeds merge.go), but its lower tiers fetched _firm-skeleton.docx / _skeleton.docx — which were repurposed into anchors-only Composer bases (t-paliad-313 Slice B). Their bodies hold only {{#section:KEY}} markers, which placeholderRegex ignores, so merge.go emitted them verbatim as literal "{{#section:letterhead}}…" junk for every code without a per-code template (i.e. everything except de.inf.lg.erwidg). Fix: - docx.BuildFallbackSkeleton(lang): in-process, lang-aware, merge-safe basic Schriftsatz with a data-driven basic Rubrum (real {{key}} placeholders the var bag fills). Always available, no Gitea round-trip. - docx.HasMergePlaceholders guards tiers 3/4/5: a fetched skeleton is used only if it carries real placeholders, else we fall through to the embedded fallback. Today's anchors-only/placeholder-free files are skipped; a future merge-safe firm-skeleton (with letterhead) is preferred again automatically. - merge.go strips stray {{#section:…}}/{{/section:…}} markers defensively so no anchors-only carrier can ever leak Composer junk into a merged document. Verified: confirmed live that deployed _firm-skeleton.docx + _skeleton.docx are anchors-only (fetch+unzip); unit tests cover BuildFallbackSkeleton rendering a real Rubrum (de+en), HasMergePlaceholders classification, marker stripping, and the signature_block fill. go build / vet ./... / test ./... + bun build clean. Out of scope (flagged for next slices): demo template's closing prints {{firm.name}} then {{firm.signature_block}} (=firm.name) → A-S2 dedups the demo wording. Restoring firm letterhead chrome to the merge fallback → A-S3. --- internal/handlers/submission_drafts.go | 76 +++-- .../handlers/submission_template_lang_test.go | 4 + internal/services/submission_vars.go | 17 +- .../services/submission_vars_firm_test.go | 27 ++ pkg/docforge/docx/fallback_skeleton.go | 305 ++++++++++++++++++ pkg/docforge/docx/fallback_skeleton_test.go | 107 ++++++ pkg/docforge/docx/merge.go | 63 ++++ 7 files changed, 570 insertions(+), 29 deletions(-) create mode 100644 internal/services/submission_vars_firm_test.go create mode 100644 pkg/docforge/docx/fallback_skeleton.go create mode 100644 pkg/docforge/docx/fallback_skeleton_test.go diff --git a/internal/handlers/submission_drafts.go b/internal/handlers/submission_drafts.go index 3edfdf6..efda9b6 100644 --- a/internal/handlers/submission_drafts.go +++ b/internal/handlers/submission_drafts.go @@ -30,6 +30,8 @@ package handlers import ( "context" + "crypto/sha256" + "encoding/hex" "encoding/json" "errors" "fmt" @@ -45,6 +47,7 @@ import ( "mgit.msbls.de/m/paliad/internal/models" "mgit.msbls.de/m/paliad/internal/services" "mgit.msbls.de/m/paliad/pkg/docforge" + "mgit.msbls.de/m/paliad/pkg/docforge/docx" ) // submissionDraftPreviewTimeout caps a single preview round-trip. @@ -1314,12 +1317,17 @@ const ( tplTierPerCode submissionTemplateTier = "per_code" // {firm}/{code}.docx (unsuffixed) tplTierSkeletonLang submissionTemplateTier = "skeleton_lang" // _skeleton.{lang}.docx tplTierSkeleton submissionTemplateTier = "skeleton" // _skeleton.docx + tplTierFallback submissionTemplateTier = "fallback" // embedded merge-safe basic-Rubrum skeleton tplTierLetterhead submissionTemplateTier = "letterhead" // HL Patents Style .dotm ) // resolveSubmissionTemplate returns the .docx bytes for the given -// (submission_code, language). Merges t-paliad-275 (firm-skeleton tier) -// and t-paliad-276 (language-selector + EN skeleton tier). Lookup order: +// (submission_code, language). This is the *merge-path* resolver: every +// caller feeds the result into SubmissionRenderer (merge.go), which fills +// {{key}} tokens. The result must therefore be merge-safe — it must carry +// real {{key}} placeholders. Merges t-paliad-275 (firm-skeleton tier), +// t-paliad-276 (language-selector + EN skeleton tier), t-paliad-358 A-S1 +// (merge-safe guard + embedded fallback). Lookup order: // // 1. per-firm per-(code, lang) template — most specific. e.g. // `de.inf.lg.erwidg.en.docx` for EN drafts. t-paliad-276. @@ -1328,12 +1336,22 @@ const ( // 3. universal language-matched skeleton — `_skeleton.en.docx` for EN // drafts. Skipped for DE drafts (steps 4+5 already cover DE). // 4. firm-formatted skeleton — `_firm-skeleton.docx` (t-paliad-275). -// HL paragraph + character styles + letterhead, full placeholder -// bag. DE-flavored: counts as language_fallback=true for EN drafts. -// 5. universal _skeleton.docx — plain DE skeleton, no firm styles. -// Backstop when the firm skeleton is unreachable. -// 6. universal HL Patents Style .dotm — macro-only letterhead, no -// placeholders. Last-ditch when every skeleton tier is unreachable. +// 5. universal _skeleton.docx. +// 6. embedded merge-safe fallback — a lang-aware basic-Rubrum skeleton +// built in-process (docx.BuildFallbackSkeleton). Always available, no +// Gitea round-trip. This is what makes one-click /generate produce a +// real merged document for ANY submission_code. +// 7. HL Patents Style .dotm — placeholder-free letterhead, the pre-358 +// last-ditch. Reached only if the in-process build (6) fails. +// +// Tiers 3/4/5 are GUARDED by docx.HasMergePlaceholders: the firm and +// universal skeletons were repurposed into anchors-only Composer bases +// (t-paliad-313 Slice B) — their bodies hold only {{#section:KEY}} markers +// the merge engine can't fill, so feeding them to merge.go produced literal +// "{{#section:…}}" junk (kepler audit §1 Path 3 / §2). The guard skips any +// fetched skeleton that lacks real placeholders, so today they fall through +// to the embedded fallback (6); should a merge-safe firm-skeleton (with +// letterhead) be restored later it is preferred again automatically. // // The returned SHA pins the audit row's template provenance. The tier // tells the editor whether the result language-matches the request so @@ -1357,25 +1375,30 @@ func resolveSubmissionTemplate(ctx context.Context, submissionCode, lang string) // 3. language-matched skeleton — only meaningful for EN drafts; DE // drafts fall through to the firm/universal DE skeletons below. if lang == "en" { - if data, sha, langMatched, err := fetchSubmissionSkeletonBytesForLang(ctx, lang); err == nil && langMatched { + if data, sha, langMatched, err := fetchSubmissionSkeletonBytesForLang(ctx, lang); err == nil && langMatched && docx.HasMergePlaceholders(data) { return data, sha, tplTierSkeletonLang, nil } } - // 4. firm-formatted skeleton (HL styles, DE prose). For DE drafts - // this is a first-class match; for EN drafts it counts as a - // language fallback (handled by languageFallback()). - if data, sha, err := fetchFirmSkeletonBytes(ctx); err == nil { + // 4. firm-formatted skeleton — used only if it is merge-safe (carries + // real {{key}} placeholders, not anchors-only Composer markers). + if data, sha, err := fetchFirmSkeletonBytes(ctx); err == nil && docx.HasMergePlaceholders(data) { return data, sha, tplTierSkeleton, nil - } else { - log.Printf("submission_drafts: firm-skeleton fetch failed for code=%s lang=%s, falling back to universal skeleton: %v", submissionCode, lang, err) } - // 5. universal plain DE skeleton. - if data, sha, err := fetchSubmissionSkeletonBytes(ctx); err == nil { + // 5. universal plain DE skeleton — same merge-safe guard. + if data, sha, err := fetchSubmissionSkeletonBytes(ctx); err == nil && docx.HasMergePlaceholders(data) { return data, sha, tplTierSkeleton, nil - } else { - log.Printf("submission_drafts: skeleton fetch failed for code=%s lang=%s, falling back to HL Patents Style: %v", submissionCode, lang, err) } - // 6. HL Patents Style letterhead (no placeholders, last-ditch). + // 6. embedded merge-safe fallback — lang-aware basic Rubrum, always + // available. Supersedes the placeholder-free .dotm so /generate on + // any code yields a real merged document (basic Rubrum), never the + // {{#section:…}} junk an anchors-only base produced (t-paliad-358 A-S1). + if data, err := docx.BuildFallbackSkeleton(lang); err == nil { + sum := sha256.Sum256(data) + return data, hex.EncodeToString(sum[:]), tplTierFallback, nil + } else { + log.Printf("submission_drafts: embedded fallback skeleton build failed for code=%s lang=%s, falling back to HL Patents Style: %v", submissionCode, lang, err) + } + // 7. HL Patents Style letterhead (no placeholders, last-ditch). bytes, err := fetchHLPatentsStyleBytes(ctx) if err != nil { return nil, "", "", err @@ -1386,16 +1409,19 @@ func resolveSubmissionTemplate(ctx context.Context, submissionCode, lang string) // languageFallback reports whether the resolved template tier failed // to match the requested draft language. For an EN draft, anything -// other than per_code_lang or skeleton_lang is a fallback (per_code is -// the legacy DE-baked template, skeleton is the DE skeleton). For a DE -// draft, only `letterhead` counts as a fallback — the DE skeleton and -// per-code template are both first-class DE outputs. t-paliad-276. +// other than per_code_lang, skeleton_lang or the lang-aware embedded +// fallback is a fallback (per_code is the legacy DE-baked template, +// skeleton is the DE skeleton). For a DE draft, only `letterhead` counts +// as a fallback — the DE skeleton, per-code template, and the embedded +// fallback are all first-class DE outputs. t-paliad-276 / t-paliad-358 A-S1. func languageFallback(lang string, tier submissionTemplateTier) bool { if tier == tplTierLetterhead { return true } if strings.EqualFold(lang, "en") { - return tier != tplTierPerCodeLang && tier != tplTierSkeletonLang + // tplTierFallback is built per-language (English labels for EN), so + // it is NOT a language fallback. + return tier != tplTierPerCodeLang && tier != tplTierSkeletonLang && tier != tplTierFallback } return false } diff --git a/internal/handlers/submission_template_lang_test.go b/internal/handlers/submission_template_lang_test.go index 0817dbf..96ba122 100644 --- a/internal/handlers/submission_template_lang_test.go +++ b/internal/handlers/submission_template_lang_test.go @@ -21,6 +21,7 @@ func TestLanguageFallback(t *testing.T) { {"de_per_code", "de", tplTierPerCode, false}, {"de_skeleton_lang", "de", tplTierSkeletonLang, false}, {"de_skeleton", "de", tplTierSkeleton, false}, + {"de_fallback", "de", tplTierFallback, false}, {"de_letterhead", "de", tplTierLetterhead, true}, // EN drafts: per_code (DE-baked) and skeleton (DE-baked) both @@ -30,6 +31,9 @@ func TestLanguageFallback(t *testing.T) { {"en_per_code", "en", tplTierPerCode, true}, {"en_skeleton_lang", "en", tplTierSkeletonLang, false}, {"en_skeleton", "en", tplTierSkeleton, true}, + // The embedded fallback is built per-language (EN labels for EN), + // so it is NOT a language fallback (t-paliad-358 A-S1). + {"en_fallback", "en", tplTierFallback, false}, {"en_letterhead", "en", tplTierLetterhead, true}, } for _, c := range cases { diff --git a/internal/services/submission_vars.go b/internal/services/submission_vars.go index f4987c6..2e54aa6 100644 --- a/internal/services/submission_vars.go +++ b/internal/services/submission_vars.go @@ -318,10 +318,19 @@ func (s *SubmissionVarsService) nextOpenDeadline(ctx context.Context, projectID, // addFirmVars populates the firm.* namespace. func addFirmVars(bag PlaceholderMap) { bag["firm.name"] = branding.Name - // firm.signature_block is reserved for Phase 2; emit empty so - // templates that already reference it don't render the missing - // marker (less noisy for the lawyer). - bag["firm.signature_block"] = "" + // firm.signature_block is the firm identity line of a submission's + // signature block — the signature section seeds with + // {{firm.signature_block}} + {{user.display_name}} (the lawyer's name), + // so this carries the firm, not the person. It is firm-agnostic: + // derived from branding.Name so a FIRM_NAME redeploy or non-HLC + // deployment signs with the right firm (t-paliad-358 A-S1). It used to + // emit "" ("reserved for Phase 2"), which left every template that + // referenced it blank. A richer block (postal/contact address, + // professional designation such as "Rechtsanwälte/Patentanwälte") needs + // per-firm config paliad does not capture yet — deferred to the + // structured-data work (Option B); we do not guess legally-flavoured + // designations here. + bag["firm.signature_block"] = branding.Name } // addTodayVars populates today.* in both DE and EN long forms. ISO diff --git a/internal/services/submission_vars_firm_test.go b/internal/services/submission_vars_firm_test.go new file mode 100644 index 0000000..881bf9e --- /dev/null +++ b/internal/services/submission_vars_firm_test.go @@ -0,0 +1,27 @@ +package services + +// Pins the firm.* namespace (t-paliad-358 A-S1): firm.signature_block must +// be filled from branding.Name, not left empty. Before A-S1 it emitted "" +// ("reserved for Phase 2"), which made every template that referenced +// {{firm.signature_block}} render blank. + +import ( + "testing" + + "mgit.msbls.de/m/paliad/internal/branding" +) + +func TestAddFirmVars_SignatureBlockFilledFromBranding(t *testing.T) { + bag := PlaceholderMap{} + addFirmVars(bag) + + if got := bag["firm.name"]; got != branding.Name { + t.Errorf("firm.name = %q, want %q", got, branding.Name) + } + if got := bag["firm.signature_block"]; got == "" { + t.Fatal("firm.signature_block is empty — the A-S1 fix should fill it from branding") + } + if got := bag["firm.signature_block"]; got != branding.Name { + t.Errorf("firm.signature_block = %q, want %q (firm identity line, firm-agnostic)", got, branding.Name) + } +} diff --git a/pkg/docforge/docx/fallback_skeleton.go b/pkg/docforge/docx/fallback_skeleton.go new file mode 100644 index 0000000..e0b2261 --- /dev/null +++ b/pkg/docforge/docx/fallback_skeleton.go @@ -0,0 +1,305 @@ +package docx + +// Merge-safe fallback skeleton (t-paliad-358 A-S1). +// +// Why this exists: resolveSubmissionTemplate is the *merge-path* template +// resolver — every caller feeds its result into SubmissionRenderer (merge.go), +// which substitutes {{key}} tokens. Its lower fallback tiers used to fetch the +// universal / firm skeletons from mWorkRepo, but those .docx files were +// repurposed into Composer *bases* (t-paliad-313 Slice B): their bodies now +// carry only {{#section:KEY}} anchor markers, which the Composer (compose.go) +// splices section content into. placeholderRegex deliberately ignores markers +// that start with '#' or '/', so when an anchors-only base reaches merge.go the +// markers pass through verbatim and the lawyer sees literal +// "{{#section:letterhead}}…" junk in Word (kepler audit §1 Path 3 / §2). +// +// Only de.inf.lg.erwidg ships a real per-code merge template today, so every +// other submission_code's one-click /generate (and the v1 draft-export +// fallback) was exposed to that junk. This builder gives the merge path a +// self-contained, merge-safe fallback: a clean basic Schriftsatz with a +// data-driven basic Rubrum built from real {{key}} placeholders the variable +// bag fills. No Gitea round-trip, no Composer anchors, always available. +// +// Scope (A-S1): a *basic* caption — neutral, forum-hedged designation labels. +// Parametrising heading / designations / court line per forum +// (our_side / instance_level / proceeding.code) is A-S2. Restoring the firm +// letterhead chrome to the merge path is A-S3 (firm-agnostic headers). This +// file emits no firm-specific letterhead — {{firm.name}} / {{firm.signature_block}} +// are filled from branding by the bag, keeping the skeleton firm-agnostic. + +import ( + "archive/zip" + "bytes" + "fmt" + "strings" + "time" +) + +// fallbackSkeletonTime pins every zip entry's mtime so the generated bytes are +// byte-stable across calls (cheap to cache / diff, no spurious churn). +var fallbackSkeletonTime = time.Date(2026, 6, 1, 0, 0, 0, 0, time.UTC) + +// BuildFallbackSkeleton returns a minimal, Word-compatible .docx whose body is +// a basic Schriftsatz with a data-driven Rubrum. Every dynamic value is a real +// {{key}} placeholder resolved by SubmissionVarsService, so rendering it +// through SubmissionRenderer.Render produces a merged document — never the +// {{#section:…}} junk an anchors-only Composer base would. +// +// lang selects the static label language ("en" → English labels + EN date / +// our-side aliases; anything else → German). The returned bytes are +// self-contained: no external media, no firm letterhead, no macros. +func BuildFallbackSkeleton(lang string) ([]byte, error) { + var buf bytes.Buffer + zw := zip.NewWriter(&buf) + + add := func(name, body string) error { + w, err := zw.CreateHeader(&zip.FileHeader{ + Name: name, + Method: zip.Deflate, + Modified: fallbackSkeletonTime, + }) + if err != nil { + return fmt.Errorf("create %s: %w", name, err) + } + if _, err := w.Write([]byte(body)); err != nil { + return fmt.Errorf("write %s: %w", name, err) + } + return nil + } + + for _, part := range []struct{ name, body string }{ + {"[Content_Types].xml", fallbackContentTypesXML}, + {"_rels/.rels", fallbackRootRelsXML}, + {"word/_rels/document.xml.rels", fallbackDocumentRelsXML}, + {"word/styles.xml", fallbackStylesXML}, + {"word/document.xml", buildFallbackDocumentXML(lang)}, + } { + if err := add(part.name, part.body); err != nil { + return nil, err + } + } + + if err := zw.Close(); err != nil { + return nil, fmt.Errorf("finalise zip: %w", err) + } + return buf.Bytes(), nil +} + +const fallbackContentTypesXML = ` + + + + + +` + +const fallbackRootRelsXML = ` + + +` + +const fallbackDocumentRelsXML = ` + + +` + +const fallbackStylesXML = ` + + + + + + + + + + + + + + + + +` + +// fallbackLabels holds the language-dependent static text for the skeleton. +// Dynamic values stay as {{key}} placeholders regardless of language. +type fallbackLabels struct { + editor string // "Bearbeiter:" / "Attorney:" + dateKey string // {{today.long_de}} / {{today.long_en}} + caseNo string // "Aktenzeichen:" / "Case no.:" + inTheMatter string // "In der Sache" / "In the matter" + representedBy string // "vertreten durch" / "represented by" + claimantRole string // role designation line + versus string // "gegen" / "against" + defendantRole string + others string // "Weitere Beteiligte:" / "Further parties:" + subject string // "Betreff" / "Subject" + patent string // "Streitpatent:" / "Patent in suit:" + proceeding string // "Verfahrensart:" / "Proceeding:" + ourSideKey string // {{project.our_side_de}} / {{project.our_side_en}} + bodyHint string // editorial placeholder for the actual submission text + closing string // "Schlussformel" / "Closing" +} + +func fallbackLabelsFor(lang string) fallbackLabels { + if strings.EqualFold(lang, "en") { + return fallbackLabels{ + editor: "Attorney:", + dateKey: "{{today.long_en}}", + caseNo: "Case no.:", + inTheMatter: "In the matter", + representedBy: "represented by", + claimantRole: "— Claimant / Patent proprietor / Applicant —", + versus: "against", + defendantRole: "— Defendant / Opponent / Respondent —", + others: "Further parties:", + subject: "Subject", + patent: "Patent in suit:", + proceeding: "Proceeding:", + ourSideKey: "{{project.our_side_en}}", + bodyHint: "[Body of the submission goes here. This is a basic skeleton — fill in according to the submission type.]", + closing: "Closing", + } + } + return fallbackLabels{ + editor: "Bearbeiter:", + dateKey: "{{today.long_de}}", + caseNo: "Aktenzeichen:", + inTheMatter: "In der Sache", + representedBy: "vertreten durch", + claimantRole: "— Klägerin / Patentinhaberin / Anmelderin —", + versus: "gegen", + defendantRole: "— Beklagte / Einsprechende / Beschwerdegegnerin —", + others: "Weitere Beteiligte:", + subject: "Betreff", + patent: "Streitpatent:", + proceeding: "Verfahrensart:", + ourSideKey: "{{project.our_side_de}}", + bodyHint: "[Hier folgt der Schriftsatztext. Diese Skelett-Vorlage trägt keine vorgefertigte Struktur — bitte gemäß Schriftsatz-Typ ergänzen.]", + closing: "Schlussformel", + } +} + +// buildFallbackDocumentXML emits the document body. Layout: firm header line → +// court + case number → basic Rubrum (claimant / vs / defendant / others) → +// subject (patent) → submission body placeholder → closing (date / author / +// firm signature block). Every placeholder occupies its own run so the +// renderer's pass-1 single-run substitution catches it. +func buildFallbackDocumentXML(lang string) string { + l := fallbackLabelsFor(lang) + + var b strings.Builder + b.WriteString(``) + b.WriteString(``) + b.WriteString(``) + + // Letterhead-ish header block (firm-agnostic — values from branding bag). + fbHeading1(&b, "{{firm.name}}") + fbPlain(&b, l.editor+" {{user.display_name}}") + fbPlain(&b, "{{user.email}} · {{user.office}}") + fbPlain(&b, l.dateKey) + + // Court + case number. + fbHeading2(&b, "{{project.court}}") + fbPlain(&b, l.caseNo+" {{project.case_number}}") + fbPlain(&b, l.proceeding+" {{project.proceeding.name}}") + + // Basic Rubrum. + fbHeading2(&b, l.inTheMatter) + fbPlain(&b, "{{parties.claimant.name}}") + fbPlain(&b, l.representedBy+" {{parties.claimant.representative}}") + fbBold(&b, l.claimantRole) + fbPlain(&b, "") + fbPlain(&b, l.versus) + fbPlain(&b, "") + fbPlain(&b, "{{parties.defendant.name}}") + fbPlain(&b, l.representedBy+" {{parties.defendant.representative}}") + fbBold(&b, l.defendantRole) + fbPlain(&b, l.others+" {{parties.other.name}}") + + // Subject (patent in suit). + fbHeading2(&b, l.subject) + fbPlain(&b, l.patent+" {{project.patent_number}}") + fbPlain(&b, "{{project.title}} ("+l.ourSideKey+")") + + // Body placeholder for the actual submission text. + fbPlain(&b, "") + fbPlain(&b, l.bodyHint) + fbPlain(&b, "") + + // Closing / signature. + fbHeading2(&b, l.closing) + fbPlain(&b, l.dateKey) + fbPlain(&b, "{{user.display_name}}") + fbPlain(&b, "{{firm.signature_block}}") + + b.WriteString(``) + return b.String() +} + +func fbHeading1(b *strings.Builder, text string) { fbParagraph(b, "Heading1", text, false) } +func fbHeading2(b *strings.Builder, text string) { fbParagraph(b, "Heading2", text, false) } +func fbPlain(b *strings.Builder, text string) { fbParagraph(b, "", text, false) } +func fbBold(b *strings.Builder, text string) { fbParagraph(b, "", text, true) } + +// fbParagraph writes one paragraph with the given pStyle and optional bold runs. +// Placeholders are split into their own runs so the renderer's format-preserving +// pass-1 substitution catches each one independently. +func fbParagraph(b *strings.Builder, style, text string, bold bool) { + b.WriteString(``) + if style != "" { + b.WriteString(``) + } + for _, seg := range fbSplitOnPlaceholders(text) { + b.WriteString(``) + if bold { + b.WriteString(``) + } + b.WriteString(``) + b.WriteString(fbXMLEscape(seg)) + b.WriteString(``) + } + b.WriteString(``) +} + +// fbSplitOnPlaceholders splits text so each {{placeholder}} sits in its own +// segment (and therefore its own run), keeping every key inside a single run. +func fbSplitOnPlaceholders(s string) []string { + if s == "" { + return []string{""} + } + var out []string + for { + open := strings.Index(s, "{{") + if open < 0 { + out = append(out, s) + return out + } + closeIdx := strings.Index(s[open:], "}}") + if closeIdx < 0 { + out = append(out, s) + return out + } + end := open + closeIdx + 2 + if open > 0 { + out = append(out, s[:open]) + } + out = append(out, s[open:end]) + s = s[end:] + if s == "" { + return out + } + } +} + +func fbXMLEscape(s string) string { + s = strings.ReplaceAll(s, "&", "&") + s = strings.ReplaceAll(s, "<", "<") + s = strings.ReplaceAll(s, ">", ">") + s = strings.ReplaceAll(s, `"`, """) + s = strings.ReplaceAll(s, "'", "'") + return s +} diff --git a/pkg/docforge/docx/fallback_skeleton_test.go b/pkg/docforge/docx/fallback_skeleton_test.go new file mode 100644 index 0000000..a91344a --- /dev/null +++ b/pkg/docforge/docx/fallback_skeleton_test.go @@ -0,0 +1,107 @@ +package docx + +// Tests for the merge-safe fallback skeleton + the merge-path guards that +// keep anchors-only Composer bases from leaking {{#section:…}} junk into a +// merged document (t-paliad-358 A-S1). + +import ( + "strings" + "testing" + + "mgit.msbls.de/m/paliad/pkg/docforge" +) + +func TestBuildFallbackSkeleton_IsMergeSafeAndRendersRubrum(t *testing.T) { + for _, lang := range []string{"de", "en"} { + t.Run(lang, func(t *testing.T) { + tpl, err := BuildFallbackSkeleton(lang) + if err != nil { + t.Fatalf("BuildFallbackSkeleton(%q): %v", lang, err) + } + if !HasMergePlaceholders(tpl) { + t.Fatalf("fallback skeleton (%s) reported no merge placeholders", lang) + } + + // The fallback must never carry Composer section anchors — it is a + // merge template, not a Composer base. + body := readMergeDocumentXML(t, tpl) + if strings.Contains(body, "{{#section:") || strings.Contains(body, "{{/section:") { + t.Fatalf("fallback skeleton (%s) leaked a section anchor: %s", lang, body) + } + + // Render it the way the merge path does and confirm the basic Rubrum + // fills from the bag (claimant + defendant + court + case number). + r := NewSubmissionRenderer() + out, err := r.Render(tpl, docforge.PlaceholderMap{ + "firm.name": "HLC", + "firm.signature_block": "HLC", + "user.display_name": "Dr. Max Mustermann", + "parties.claimant.name": "Acme Corp.", + "parties.defendant.name": "Globex GmbH", + "project.court": "Landgericht München I", + "project.case_number": "7 O 1234/26", + "project.patent_number": "EP 1 234 567 B1", + }, docforge.DefaultMissingMarker(lang)) + if err != nil { + t.Fatalf("render fallback (%s): %v", lang, err) + } + rendered := readMergeDocumentXML(t, out) + for _, want := range []string{ + "Acme Corp.", "Globex GmbH", "Landgericht München I", + "7 O 1234/26", "EP 1 234 567 B1", "HLC", + } { + if !strings.Contains(rendered, want) { + t.Errorf("rendered fallback (%s) missing %q\n%s", lang, want, rendered) + } + } + // No unresolved placeholder braces for the keys we bound. + if strings.Contains(rendered, "{{parties.claimant.name}}") { + t.Errorf("rendered fallback (%s) left an unresolved bound placeholder", lang) + } + }) + } +} + +func TestHasMergePlaceholders(t *testing.T) { + mergeSafe := minimalMergeDOCX(t, ``+ + `{{firm.name}}`) + if !HasMergePlaceholders(mergeSafe) { + t.Error("expected merge-safe body to report placeholders") + } + + anchorsOnly := minimalMergeDOCX(t, ``+ + `{{#section:letterhead}}`+ + `{{/section:letterhead}}`) + if HasMergePlaceholders(anchorsOnly) { + t.Error("anchors-only Composer base must NOT report merge placeholders") + } + + noPlaceholders := minimalMergeDOCX(t, ``+ + `Letterhead only, no merge fields.`) + if HasMergePlaceholders(noPlaceholders) { + t.Error("placeholder-free body must NOT report merge placeholders") + } +} + +// TestRender_StripsStraySectionMarkers is the depth-in-defense check: if an +// anchors-only Composer base ever reaches the merge path, the output must be +// clean (markers stripped), never literal "{{#section:…}}" junk. +func TestRender_StripsStraySectionMarkers(t *testing.T) { + tmpl := minimalMergeDOCX(t, ``+ + `{{#section:letterhead}}`+ + `{{firm.name}}`+ + `{{/section:letterhead}}`) + + r := NewSubmissionRenderer() + out, err := r.Render(tmpl, docforge.PlaceholderMap{"firm.name": "HLC"}, nil) + if err != nil { + t.Fatalf("render: %v", err) + } + body := readMergeDocumentXML(t, out) + if strings.Contains(body, "{{#section:") || strings.Contains(body, "{{/section:") { + t.Errorf("section markers survived the merge: %s", body) + } + if !strings.Contains(body, "HLC") { + t.Errorf("real placeholder around the markers was not substituted: %s", body) + } +} diff --git a/pkg/docforge/docx/merge.go b/pkg/docforge/docx/merge.go index 8391ba7..8e0011b 100644 --- a/pkg/docforge/docx/merge.go +++ b/pkg/docforge/docx/merge.go @@ -79,6 +79,19 @@ func htmlPreviewWrapper(key, value string) string { // always starts with an ASCII letter. var placeholderRegex = regexp.MustCompile(`\{\{\s*([A-Za-z][A-Za-z0-9_.]*)\s*\}\}`) +// sectionMarkerRegex matches a Composer section anchor — +// {{#section:KEY}} (open) or {{/section:KEY}} (close). These markers are +// the Composer's (compose.go) splice points; they are NOT merge +// placeholders (placeholderRegex ignores them because they start with +// '#' / '/'). When an anchors-only Composer base is mistakenly fed to +// the merge path, the markers would otherwise survive verbatim into the +// output and show up as literal "{{#section:letterhead}}…" junk in Word +// (kepler audit §1 Path 3). substituteInDocumentXML strips them +// defensively so no merged document ever leaks a Composer anchor — the +// normal merge path uses a merge-safe template (BuildFallbackSkeleton), +// this is depth-in-defense for any stray anchors-only carrier. +var sectionMarkerRegex = regexp.MustCompile(`\{\{\s*[#/]\s*section\s*:\s*[A-Za-z0-9_.\-]+\s*\}\}`) + // SubmissionRenderer renders a .docx template into a .docx output by // substituting {{placeholder}} tokens with values from a docforge.PlaceholderMap. // Stateless; safe for concurrent use. @@ -181,6 +194,37 @@ func (r *SubmissionRenderer) RenderHTML(templateBytes []byte, vars docforge.Plac return docXMLToHTML(merged), nil } +// HasMergePlaceholders reports whether the .docx at templateBytes carries +// at least one real {{key}} merge placeholder in word/document.xml. The +// merge path (resolveSubmissionTemplate → Render) needs this to tell a +// merge-usable template apart from an anchors-only Composer base (whose +// body holds only {{#section:KEY}} markers, which placeholderRegex +// ignores) or a placeholder-free letterhead (.dotm) — both of which would +// render an empty Rubrum. Returns false on any read/zip error so the +// caller safely falls back to a known merge-safe skeleton +// (t-paliad-358 A-S1). +func HasMergePlaceholders(templateBytes []byte) bool { + clean, err := ConvertDotmToDocx(templateBytes) + if err != nil { + return false + } + zr, err := zip.NewReader(bytes.NewReader(clean), int64(len(clean))) + if err != nil { + return false + } + for _, entry := range zr.File { + if entry.Name != "word/document.xml" { + continue + } + body, err := readMergeZipEntry(entry) + if err != nil { + return false + } + return placeholderRegex.Match(body) + } + return false +} + // isWordXMLEntry returns true for the .docx parts that contain // substitutable text. We touch document.xml plus header*.xml and // footer*.xml (templates may put firm letterhead in a header) but @@ -227,6 +271,7 @@ func readMergeZipEntry(f *zip.File) ([]byte, error) { // the paragraph's runs as a single using // the formatting properties of the first run. func substituteInDocumentXML(body []byte, vars docforge.PlaceholderMap, missing docforge.MissingPlaceholderFn, wrap valueWrapperFn) []byte { + body = stripSectionMarkers(body) replaced := substituteInTextNodes(body, vars, missing, wrap) if !needsCrossRunMerge(replaced) { return replaced @@ -234,6 +279,24 @@ func substituteInDocumentXML(body []byte, vars docforge.PlaceholderMap, missing return substituteAcrossRuns(replaced, vars, missing, wrap) } +// stripSectionMarkers removes any Composer section anchor ({{#section:KEY}} +// / {{/section:KEY}}) from the text nodes so a stray anchors-only +// carrier rendered through the merge path produces a clean document +// instead of literal "{{#section:…}}" junk. Markers are removed token-only +// (the enclosing run/paragraph survives, just emptied of the marker), which +// is safe because the generator emits each marker in its own paragraph. +func stripSectionMarkers(body []byte) []byte { + return wTextNodeRegex.ReplaceAllFunc(body, func(match []byte) []byte { + sub := wTextNodeRegex.FindSubmatch(match) + contents := xmlDecode(string(sub[2])) + if !sectionMarkerRegex.MatchString(contents) { + return match + } + stripped := sectionMarkerRegex.ReplaceAllString(contents, "") + return []byte(`` + xmlEncode(stripped) + ``) + }) +} + // wTextNodeRegex matches one contents element, capturing // the contents. var wTextNodeRegex = regexp.MustCompile(`]*)?>([^<]*)`)