Pure-Go {{path.dot.notation}} placeholder engine + unit tests
(t-paliad-215, design docs/design-submission-generator-2026-05-19.md
§6). Chosen over github.com/lukasjarosch/go-docx because that library
treats sibling placeholders inside one <w:t> run as nested and
refuses to replace them — patent submissions routinely carry multiple
placeholders per paragraph (party blocks especially), so the library
is a non-starter.
Two-pass strategy preserves run-level formatting on the common path:
1. Pass 1: regex replace inside each <w:t>…</w:t> independently —
no format loss for the 99% case where placeholders are intact.
2. Pass 2: paragraph-level merge for paragraphs that still contain
orphan "{{" or "}}" markers (Word fragmented the placeholder
across runs).
Missing placeholders render [KEIN WERT: <key>] / [NO VALUE: <key>]
markers so the lawyer sees the gap in Word rather than getting a 400.
Tests cover: single-run, multi-per-run (the go-docx failure mode),
cross-run merge, missing-marker (DE+EN), XML escaping of special
chars, non-document zip entries preserved, placeholder regex
grammar.
355 lines
11 KiB
Go
355 lines
11 KiB
Go
package services
|
|
|
|
import (
|
|
"archive/zip"
|
|
"bytes"
|
|
"io"
|
|
"strings"
|
|
"testing"
|
|
)
|
|
|
|
// minimalDOCX builds a tiny .docx zip with one document.xml that
|
|
// contains the given body. Just enough to exercise the renderer
|
|
// without depending on Word's full OOXML scaffolding.
|
|
func minimalDOCX(t *testing.T, documentBody string) []byte {
|
|
t.Helper()
|
|
var buf bytes.Buffer
|
|
zw := zip.NewWriter(&buf)
|
|
w, err := zw.Create("word/document.xml")
|
|
if err != nil {
|
|
t.Fatalf("create document.xml: %v", err)
|
|
}
|
|
if _, err := io.WriteString(w, documentBody); err != nil {
|
|
t.Fatalf("write document.xml: %v", err)
|
|
}
|
|
// Drop in a stub Content-Types so the bytes look more like a real
|
|
// .docx for any downstream sanity checks; Word doesn't care about
|
|
// the content during our unit tests but the shape stays honest.
|
|
w2, err := zw.Create("[Content_Types].xml")
|
|
if err != nil {
|
|
t.Fatalf("create content types: %v", err)
|
|
}
|
|
if _, err := io.WriteString(w2, `<?xml version="1.0"?><Types/>`); err != nil {
|
|
t.Fatalf("write content types: %v", err)
|
|
}
|
|
if err := zw.Close(); err != nil {
|
|
t.Fatalf("close zip: %v", err)
|
|
}
|
|
return buf.Bytes()
|
|
}
|
|
|
|
// readDocumentXML pulls word/document.xml out of a rendered .docx.
|
|
func readDocumentXML(t *testing.T, b []byte) string {
|
|
t.Helper()
|
|
zr, err := zip.NewReader(bytes.NewReader(b), int64(len(b)))
|
|
if err != nil {
|
|
t.Fatalf("open rendered zip: %v", err)
|
|
}
|
|
for _, f := range zr.File {
|
|
if f.Name != "word/document.xml" {
|
|
continue
|
|
}
|
|
rc, err := f.Open()
|
|
if err != nil {
|
|
t.Fatalf("open document.xml: %v", err)
|
|
}
|
|
defer rc.Close()
|
|
body, err := io.ReadAll(rc)
|
|
if err != nil {
|
|
t.Fatalf("read document.xml: %v", err)
|
|
}
|
|
return string(body)
|
|
}
|
|
t.Fatal("rendered .docx had no word/document.xml")
|
|
return ""
|
|
}
|
|
|
|
// TestRender_SingleRunPlaceholder covers the 99% case: a placeholder
|
|
// that sits inside a single <w:t> text node.
|
|
func TestRender_SingleRunPlaceholder(t *testing.T) {
|
|
doc := `<w:document><w:body><w:p><w:r><w:t>{{firm.name}}</w:t></w:r></w:p></w:body></w:document>`
|
|
tmpl := minimalDOCX(t, doc)
|
|
r := NewSubmissionRenderer()
|
|
out, err := r.Render(tmpl, PlaceholderMap{"firm.name": "HLC"}, nil)
|
|
if err != nil {
|
|
t.Fatalf("render: %v", err)
|
|
}
|
|
body := readDocumentXML(t, out)
|
|
if !strings.Contains(body, ">HLC<") {
|
|
t.Errorf("expected HLC in body, got %q", body)
|
|
}
|
|
if strings.Contains(body, "{{") {
|
|
t.Errorf("unreplaced placeholder marker in body: %q", body)
|
|
}
|
|
}
|
|
|
|
// TestRender_MultiplePlaceholdersPerRun is the case go-docx fails on
|
|
// — sibling placeholders inside the same <w:t> run. The in-house
|
|
// renderer must handle them.
|
|
func TestRender_MultiplePlaceholdersPerRun(t *testing.T) {
|
|
doc := `<w:document><w:body><w:p><w:r><w:t>{{parties.claimant.name}}, vertreten durch {{parties.claimant.representative}}</w:t></w:r></w:p></w:body></w:document>`
|
|
tmpl := minimalDOCX(t, doc)
|
|
r := NewSubmissionRenderer()
|
|
out, err := r.Render(tmpl, PlaceholderMap{
|
|
"parties.claimant.name": "Acme Inc.",
|
|
"parties.claimant.representative": "Kanzlei Müller",
|
|
}, nil)
|
|
if err != nil {
|
|
t.Fatalf("render: %v", err)
|
|
}
|
|
body := readDocumentXML(t, out)
|
|
if !strings.Contains(body, "Acme Inc.") || !strings.Contains(body, "Kanzlei Müller") {
|
|
t.Errorf("expected both party values, got %q", body)
|
|
}
|
|
if strings.Contains(body, "{{") {
|
|
t.Errorf("unreplaced placeholder marker in body: %q", body)
|
|
}
|
|
}
|
|
|
|
// TestRender_MissingMarker confirms unbound placeholders render the
|
|
// missing-value marker instead of failing the request.
|
|
func TestRender_MissingMarker(t *testing.T) {
|
|
doc := `<w:document><w:body><w:p><w:r><w:t>{{project.case_number}}</w:t></w:r></w:p></w:body></w:document>`
|
|
tmpl := minimalDOCX(t, doc)
|
|
r := NewSubmissionRenderer()
|
|
out, err := r.Render(tmpl, PlaceholderMap{}, DefaultMissingMarker("de"))
|
|
if err != nil {
|
|
t.Fatalf("render: %v", err)
|
|
}
|
|
body := readDocumentXML(t, out)
|
|
if !strings.Contains(body, "[KEIN WERT: project.case_number]") {
|
|
t.Errorf("expected KEIN WERT marker, got %q", body)
|
|
}
|
|
outEN, err := r.Render(tmpl, PlaceholderMap{}, DefaultMissingMarker("en"))
|
|
if err != nil {
|
|
t.Fatalf("render en: %v", err)
|
|
}
|
|
bodyEN := readDocumentXML(t, outEN)
|
|
if !strings.Contains(bodyEN, "[NO VALUE: project.case_number]") {
|
|
t.Errorf("expected NO VALUE marker, got %q", bodyEN)
|
|
}
|
|
}
|
|
|
|
// TestRender_CrossRunPlaceholder simulates Word fragmenting a
|
|
// placeholder across runs (autocorrect or post-edit run-split).
|
|
// Pass 2 must catch it.
|
|
func TestRender_CrossRunPlaceholder(t *testing.T) {
|
|
doc := `<w:document><w:body><w:p><w:r><w:t>Hello {{</w:t></w:r><w:r><w:t>project</w:t></w:r><w:r><w:t>.case_number}}!</w:t></w:r></w:p></w:body></w:document>`
|
|
tmpl := minimalDOCX(t, doc)
|
|
r := NewSubmissionRenderer()
|
|
out, err := r.Render(tmpl, PlaceholderMap{"project.case_number": "7 O 1234/26"}, nil)
|
|
if err != nil {
|
|
t.Fatalf("render: %v", err)
|
|
}
|
|
body := readDocumentXML(t, out)
|
|
if !strings.Contains(body, "7 O 1234/26") {
|
|
t.Errorf("expected case number after cross-run merge, got %q", body)
|
|
}
|
|
if strings.Contains(body, "{{") {
|
|
t.Errorf("orphan placeholder marker remained: %q", body)
|
|
}
|
|
}
|
|
|
|
// TestRender_XMLEscaping verifies special characters in placeholder
|
|
// values are escaped so they don't corrupt the document XML.
|
|
func TestRender_XMLEscaping(t *testing.T) {
|
|
doc := `<w:document><w:body><w:p><w:r><w:t>{{user.display_name}}</w:t></w:r></w:p></w:body></w:document>`
|
|
tmpl := minimalDOCX(t, doc)
|
|
r := NewSubmissionRenderer()
|
|
out, err := r.Render(tmpl, PlaceholderMap{
|
|
"user.display_name": `Müller & Söhne <GmbH> "Special"`,
|
|
}, nil)
|
|
if err != nil {
|
|
t.Fatalf("render: %v", err)
|
|
}
|
|
body := readDocumentXML(t, out)
|
|
if !strings.Contains(body, "Müller & Söhne <GmbH> "Special"") {
|
|
t.Errorf("expected escaped value, got %q", body)
|
|
}
|
|
}
|
|
|
|
// TestRender_PreservesNonWordEntries leaves the rest of the .docx
|
|
// untouched so any styles / theme / settings parts come through bit-
|
|
// for-bit.
|
|
func TestRender_PreservesNonWordEntries(t *testing.T) {
|
|
doc := `<w:document><w:body><w:p><w:r><w:t>{{firm.name}}</w:t></w:r></w:p></w:body></w:document>`
|
|
tmpl := minimalDOCX(t, doc)
|
|
r := NewSubmissionRenderer()
|
|
out, err := r.Render(tmpl, PlaceholderMap{"firm.name": "HLC"}, nil)
|
|
if err != nil {
|
|
t.Fatalf("render: %v", err)
|
|
}
|
|
zr, err := zip.NewReader(bytes.NewReader(out), int64(len(out)))
|
|
if err != nil {
|
|
t.Fatalf("open rendered: %v", err)
|
|
}
|
|
var sawTypes bool
|
|
for _, f := range zr.File {
|
|
if f.Name == "[Content_Types].xml" {
|
|
sawTypes = true
|
|
}
|
|
}
|
|
if !sawTypes {
|
|
t.Error("rendered .docx lost [Content_Types].xml")
|
|
}
|
|
}
|
|
|
|
// TestPlaceholderRegex_Boundaries pins the placeholder grammar.
|
|
func TestPlaceholderRegex_Boundaries(t *testing.T) {
|
|
tests := []struct {
|
|
in string
|
|
matches []string
|
|
}{
|
|
{"plain text", nil},
|
|
{"{{foo}}", []string{"{{foo}}"}},
|
|
{"{{ foo }}", []string{"{{ foo }}"}},
|
|
{"{{foo.bar}}", []string{"{{foo.bar}}"}},
|
|
{"{{ foo.bar_baz }}", []string{"{{ foo.bar_baz }}"}},
|
|
{"{{1bad}}", nil}, // must start with a letter
|
|
{"{{ foo }} and {{ bar }}", []string{"{{ foo }}", "{{ bar }}"}},
|
|
}
|
|
for _, tc := range tests {
|
|
t.Run(tc.in, func(t *testing.T) {
|
|
got := placeholderRegex.FindAllString(tc.in, -1)
|
|
if len(got) != len(tc.matches) {
|
|
t.Fatalf("got %d matches, want %d (in=%q)", len(got), len(tc.matches), tc.in)
|
|
}
|
|
for i := range got {
|
|
if got[i] != tc.matches[i] {
|
|
t.Errorf("match %d: got %q, want %q", i, got[i], tc.matches[i])
|
|
}
|
|
}
|
|
})
|
|
}
|
|
}
|
|
|
|
// TestFamilyOf covers the proceeding-family extraction used by the
|
|
// template registry's fallback chain.
|
|
func TestFamilyOf(t *testing.T) {
|
|
tests := map[string]string{
|
|
"de.inf.lg.erwidg": "de.inf.lg",
|
|
"upc.inf.cfi.soc": "upc.inf.cfi",
|
|
"dpma.opp.dpma": "", // only three segments → no family
|
|
"de.inf.lg": "",
|
|
"": "",
|
|
}
|
|
for in, want := range tests {
|
|
t.Run(in, func(t *testing.T) {
|
|
got := familyOf(in)
|
|
if got != want {
|
|
t.Errorf("familyOf(%q) = %q, want %q", in, got, want)
|
|
}
|
|
})
|
|
}
|
|
}
|
|
|
|
// TestLegalSourcePretty covers the prefix table.
|
|
func TestLegalSourcePretty(t *testing.T) {
|
|
tests := []struct {
|
|
src, lang, want string
|
|
}{
|
|
{"DE.ZPO.276.1", "de", "§ 276 Abs. 1 ZPO"},
|
|
{"DE.ZPO.276.1", "en", "Section 276(1) ZPO"},
|
|
{"DE.ZPO.253", "de", "§ 253 ZPO"},
|
|
{"DE.ZPO.253", "en", "Section 253 ZPO"},
|
|
{"UPC.RoP.23.1", "de", "Regel 23.1 VerfO UPC"},
|
|
{"UPC.RoP.23.1", "en", "Rule 23.1 RoP UPC"},
|
|
{"UPC.RoP.198", "de", "Regel 198 VerfO UPC"},
|
|
{"DE.PatG.83", "de", "§ 83 PatG"},
|
|
{"EPC.123", "de", "Art. 123 EPÜ"},
|
|
{"EPC.123", "en", "Art. 123 EPC"},
|
|
// Unknown prefix → pass-through unchanged.
|
|
{"FOO.BAR.123", "de", "FOO.BAR.123"},
|
|
{"", "de", ""},
|
|
}
|
|
for _, tc := range tests {
|
|
t.Run(tc.src+"/"+tc.lang, func(t *testing.T) {
|
|
got := legalSourcePretty(tc.src, tc.lang)
|
|
if got != tc.want {
|
|
t.Errorf("legalSourcePretty(%q, %q) = %q, want %q", tc.src, tc.lang, got, tc.want)
|
|
}
|
|
})
|
|
}
|
|
}
|
|
|
|
// TestOurSideTranslations pins the our_side enum → DE/EN prose
|
|
// mapping used by addProjectVars.
|
|
func TestOurSideTranslations(t *testing.T) {
|
|
cases := []struct {
|
|
in, wantDE, wantEN string
|
|
}{
|
|
{"claimant", "Klägerin", "Claimant"},
|
|
{"defendant", "Beklagte", "Defendant"},
|
|
{"court", "Gericht", "Court"},
|
|
{"both", "Klägerin und Beklagte", "Claimant and Defendant"},
|
|
{"", "", ""},
|
|
{"unknown", "", ""},
|
|
}
|
|
for _, tc := range cases {
|
|
t.Run(tc.in, func(t *testing.T) {
|
|
if got := ourSideDE(tc.in); got != tc.wantDE {
|
|
t.Errorf("ourSideDE(%q) = %q, want %q", tc.in, got, tc.wantDE)
|
|
}
|
|
if got := ourSideEN(tc.in); got != tc.wantEN {
|
|
t.Errorf("ourSideEN(%q) = %q, want %q", tc.in, got, tc.wantEN)
|
|
}
|
|
})
|
|
}
|
|
}
|
|
|
|
// TestTemplateRegistry_Candidates verifies the fallback-chain order
|
|
// matches the m-locked Q4 decision (firm → base/code → base/family →
|
|
// skeleton).
|
|
func TestTemplateRegistry_Candidates(t *testing.T) {
|
|
r := NewTemplateRegistry("", "HLC")
|
|
got := r.candidates("de.inf.lg.erwidg")
|
|
want := []string{
|
|
"templates/HLC/de.inf.lg.erwidg.docx",
|
|
"templates/_base/de.inf.lg.erwidg.docx",
|
|
"templates/_base/de.inf.lg.docx",
|
|
"templates/_base/_skeleton.docx",
|
|
}
|
|
if len(got) != len(want) {
|
|
t.Fatalf("candidates = %v, want %v", got, want)
|
|
}
|
|
for i := range got {
|
|
if got[i] != want[i] {
|
|
t.Errorf("candidate[%d] = %q, want %q", i, got[i], want[i])
|
|
}
|
|
}
|
|
}
|
|
|
|
// TestTemplateRegistry_Candidates_NoFamily covers submission codes
|
|
// without a family suffix (only three dot-segments).
|
|
func TestTemplateRegistry_Candidates_NoFamily(t *testing.T) {
|
|
r := NewTemplateRegistry("", "HLC")
|
|
got := r.candidates("dpma.opp.dpma")
|
|
want := []string{
|
|
"templates/HLC/dpma.opp.dpma.docx",
|
|
"templates/_base/dpma.opp.dpma.docx",
|
|
"templates/_base/_skeleton.docx",
|
|
}
|
|
if len(got) != len(want) {
|
|
t.Fatalf("candidates = %v, want %v", got, want)
|
|
}
|
|
for i := range got {
|
|
if got[i] != want[i] {
|
|
t.Errorf("candidate[%d] = %q, want %q", i, got[i], want[i])
|
|
}
|
|
}
|
|
}
|
|
|
|
// TestTemplateRegistry_Tiers labels each candidate slot. Must stay
|
|
// 1:1 with candidates().
|
|
func TestTemplateRegistry_Tiers(t *testing.T) {
|
|
r := NewTemplateRegistry("", "HLC")
|
|
codes := []string{"de.inf.lg.erwidg", "dpma.opp.dpma"}
|
|
for _, code := range codes {
|
|
c := r.candidates(code)
|
|
ts := r.tiers(code)
|
|
if len(c) != len(ts) {
|
|
t.Fatalf("candidate/tier mismatch for %q: %d vs %d", code, len(c), len(ts))
|
|
}
|
|
}
|
|
}
|