Files
paliad/pkg/docforge/docx/merge_test.go
mAi 78a30a7ee0 refactor(docforge): slice 1 — extract .docx engine to pkg/docforge/docx (t-paliad-349)
Relocate the in-house OOXML machinery out of internal/services into the
first docforge adapter, with zero behaviour change:

  submission_merge.go  -> pkg/docforge/docx/merge.go     (placeholder
                          substitution renderer + preview-HTML emitter)
  submission_md.go     -> pkg/docforge/docx/markdown.go  (Markdown->OOXML
                          walker incl. the b78a984 underscore-fix)
  submission_render.go -> pkg/docforge/docx/dotm.go      (.dotm->.docx)
  + their _test.go files (git-tracked renames, 84-99% identical)

internal/services keeps thin type-alias + forwarder shims
(docforge_shims.go) so every caller in services/handlers/main compiles
and behaves identically: PlaceholderMap, MissingPlaceholderFn,
SubmissionRenderer, HyperlinkAllocator (aliases); NewSubmissionRenderer,
DefaultMissingMarker, RenderMarkdownToOOXML[WithStyles], ConvertDotmToDocx,
SanitiseSubmissionFileName (forwarders). docx.XMLAttrEscape is exported so
submission_compose.go's hyperlink-rels inserts reuse the walker's escaping.

Three mis-filed pretty-printer tests (legalSourcePretty, ourSideDE/EN,
patentNumberUPC) that exercise the vars layer move back to
internal/services/submission_vars_pretty_test.go.

Placeholder grammar + PlaceholderMap stay co-located with the renderer in
docx for now; slice 3 hoists the format-neutral grammar to the docforge
root with the VariableResolver interface.

Verification: go build ./... clean, go vet clean, full module test green
(the byte-exact OOXML golden tests in merge/compose/render pass unchanged
= behaviour preserved). gofmt drift on the moved files is pre-existing
(72/169 services files already drift; no gofmt gate).

m/paliad#157
2026-05-29 14:51:59 +02:00

315 lines
12 KiB
Go

package docx
// Submission merge-engine tests — resurrected from the original
// t-paliad-215 Slice 1 (commit 8ea3509) + Slice 2 (commit 1765d5e).
// Adapted: helper names suffixed with "Merge" so they don't collide
// with the convert tests in submission_render_test.go (minimalDOTM,
// unzipEntries) that test the format-only ConvertDotmToDocx path.
import (
"archive/zip"
"bytes"
"io"
"strings"
"testing"
)
// minimalMergeDOCX builds a tiny .docx zip with one document.xml that
// contains the given body. Just enough to exercise the merge engine.
func minimalMergeDOCX(t *testing.T, documentBody string) []byte {
t.Helper()
var buf bytes.Buffer
zw := zip.NewWriter(&buf)
w, err := zw.Create("word/document.xml")
if err != nil {
t.Fatalf("create document.xml: %v", err)
}
if _, err := io.WriteString(w, documentBody); err != nil {
t.Fatalf("write document.xml: %v", err)
}
w2, err := zw.Create("[Content_Types].xml")
if err != nil {
t.Fatalf("create content types: %v", err)
}
// Use a docx-compatible content type so the convert pre-pass treats
// the input as already-clean (no .dotm rewrites needed).
body := `<?xml version="1.0"?><Types xmlns="http://schemas.openxmlformats.org/package/2006/content-types">` +
`<Override PartName="/word/document.xml" ContentType="` + docxMainContentType + `"/></Types>`
if _, err := io.WriteString(w2, body); err != nil {
t.Fatalf("write content types: %v", err)
}
if err := zw.Close(); err != nil {
t.Fatalf("close zip: %v", err)
}
return buf.Bytes()
}
// readMergeDocumentXML pulls word/document.xml out of a rendered .docx.
func readMergeDocumentXML(t *testing.T, b []byte) string {
t.Helper()
zr, err := zip.NewReader(bytes.NewReader(b), int64(len(b)))
if err != nil {
t.Fatalf("open rendered zip: %v", err)
}
for _, f := range zr.File {
if f.Name != "word/document.xml" {
continue
}
rc, err := f.Open()
if err != nil {
t.Fatalf("open document.xml: %v", err)
}
defer rc.Close()
body, err := io.ReadAll(rc)
if err != nil {
t.Fatalf("read document.xml: %v", err)
}
return string(body)
}
t.Fatal("rendered .docx had no word/document.xml")
return ""
}
func TestRender_SingleRunPlaceholder(t *testing.T) {
doc := `<w:document><w:body><w:p><w:r><w:t>{{firm.name}}</w:t></w:r></w:p></w:body></w:document>`
tmpl := minimalMergeDOCX(t, doc)
r := NewSubmissionRenderer()
out, err := r.Render(tmpl, PlaceholderMap{"firm.name": "HLC"}, nil)
if err != nil {
t.Fatalf("render: %v", err)
}
body := readMergeDocumentXML(t, out)
if !strings.Contains(body, ">HLC<") {
t.Errorf("expected HLC in body, got %q", body)
}
if strings.Contains(body, "{{") {
t.Errorf("unreplaced placeholder marker in body: %q", body)
}
}
func TestRender_MultiplePlaceholdersPerRun(t *testing.T) {
doc := `<w:document><w:body><w:p><w:r><w:t>{{parties.claimant.name}}, vertreten durch {{parties.claimant.representative}}</w:t></w:r></w:p></w:body></w:document>`
tmpl := minimalMergeDOCX(t, doc)
r := NewSubmissionRenderer()
out, err := r.Render(tmpl, PlaceholderMap{
"parties.claimant.name": "Acme Inc.",
"parties.claimant.representative": "Kanzlei Müller",
}, nil)
if err != nil {
t.Fatalf("render: %v", err)
}
body := readMergeDocumentXML(t, out)
if !strings.Contains(body, "Acme Inc.") || !strings.Contains(body, "Kanzlei Müller") {
t.Errorf("expected both party values, got %q", body)
}
if strings.Contains(body, "{{") {
t.Errorf("unreplaced placeholder marker in body: %q", body)
}
}
func TestRender_MissingMarker(t *testing.T) {
doc := `<w:document><w:body><w:p><w:r><w:t>{{project.case_number}}</w:t></w:r></w:p></w:body></w:document>`
tmpl := minimalMergeDOCX(t, doc)
r := NewSubmissionRenderer()
out, err := r.Render(tmpl, PlaceholderMap{}, DefaultMissingMarker("de"))
if err != nil {
t.Fatalf("render: %v", err)
}
body := readMergeDocumentXML(t, out)
if !strings.Contains(body, "[KEIN WERT: project.case_number]") {
t.Errorf("expected KEIN WERT marker, got %q", body)
}
outEN, err := r.Render(tmpl, PlaceholderMap{}, DefaultMissingMarker("en"))
if err != nil {
t.Fatalf("render en: %v", err)
}
bodyEN := readMergeDocumentXML(t, outEN)
if !strings.Contains(bodyEN, "[NO VALUE: project.case_number]") {
t.Errorf("expected NO VALUE marker, got %q", bodyEN)
}
}
func TestRender_CrossRunPlaceholder(t *testing.T) {
doc := `<w:document><w:body><w:p><w:r><w:t>Hello {{</w:t></w:r><w:r><w:t>project</w:t></w:r><w:r><w:t>.case_number}}!</w:t></w:r></w:p></w:body></w:document>`
tmpl := minimalMergeDOCX(t, doc)
r := NewSubmissionRenderer()
out, err := r.Render(tmpl, PlaceholderMap{"project.case_number": "7 O 1234/26"}, nil)
if err != nil {
t.Fatalf("render: %v", err)
}
body := readMergeDocumentXML(t, out)
if !strings.Contains(body, "7 O 1234/26") {
t.Errorf("expected case number after cross-run merge, got %q", body)
}
if strings.Contains(body, "{{") {
t.Errorf("orphan placeholder marker remained: %q", body)
}
}
func TestRender_XMLEscaping(t *testing.T) {
doc := `<w:document><w:body><w:p><w:r><w:t>{{user.display_name}}</w:t></w:r></w:p></w:body></w:document>`
tmpl := minimalMergeDOCX(t, doc)
r := NewSubmissionRenderer()
out, err := r.Render(tmpl, PlaceholderMap{
"user.display_name": `Müller & Söhne <GmbH> "Special"`,
}, nil)
if err != nil {
t.Fatalf("render: %v", err)
}
body := readMergeDocumentXML(t, out)
if !strings.Contains(body, "Müller &amp; Söhne &lt;GmbH&gt; &quot;Special&quot;") {
t.Errorf("expected escaped value, got %q", body)
}
}
func TestPlaceholderRegex_Boundaries(t *testing.T) {
tests := []struct {
in string
matches []string
}{
{"plain text", nil},
{"{{foo}}", []string{"{{foo}}"}},
{"{{ foo }}", []string{"{{ foo }}"}},
{"{{foo.bar}}", []string{"{{foo.bar}}"}},
{"{{ foo.bar_baz }}", []string{"{{ foo.bar_baz }}"}},
{"{{1bad}}", nil},
{"{{ foo }} and {{ bar }}", []string{"{{ foo }}", "{{ bar }}"}},
}
for _, tc := range tests {
t.Run(tc.in, func(t *testing.T) {
got := placeholderRegex.FindAllString(tc.in, -1)
if len(got) != len(tc.matches) {
t.Fatalf("got %d matches, want %d (in=%q)", len(got), len(tc.matches), tc.in)
}
for i := range got {
if got[i] != tc.matches[i] {
t.Errorf("match %d: got %q, want %q", i, got[i], tc.matches[i])
}
}
})
}
}
// TestRenderHTML_ExtractsParagraphsAndFormatting verifies the preview
// HTML emitter walks <w:p> / <w:r> / <w:t> correctly and carries
// bold/italic through to <strong>/<em>. Substituted placeholders are
// wrapped in <span class="draft-var" data-var="…"> so the client can
// make them clickable (t-paliad-261).
func TestRenderHTML_ExtractsParagraphsAndFormatting(t *testing.T) {
doc := `<w:document><w:body>` +
`<w:p><w:r><w:t>Hello {{firm.name}}</w:t></w:r></w:p>` +
`<w:p><w:r><w:rPr><w:b/></w:rPr><w:t>Bold line</w:t></w:r></w:p>` +
`<w:p><w:r><w:rPr><w:i/></w:rPr><w:t>Italic line</w:t></w:r></w:p>` +
`</w:body></w:document>`
tmpl := minimalMergeDOCX(t, doc)
r := NewSubmissionRenderer()
html, err := r.RenderHTML(tmpl, PlaceholderMap{"firm.name": "HLC"}, nil)
if err != nil {
t.Fatalf("render html: %v", err)
}
if !strings.Contains(html, `<p>Hello <span class="draft-var" data-var="firm.name">HLC</span></p>`) {
t.Errorf("expected merged paragraph with draft-var span, got %q", html)
}
if !strings.Contains(html, "<strong>Bold line</strong>") {
t.Errorf("expected bold span, got %q", html)
}
if !strings.Contains(html, "<em>Italic line</em>") {
t.Errorf("expected italic span, got %q", html)
}
}
// TestRenderHTML_EscapesContent confirms the preview emitter HTML-escapes
// special characters in placeholder values even inside the draft-var
// span wrapper.
func TestRenderHTML_EscapesContent(t *testing.T) {
doc := `<w:document><w:body><w:p><w:r><w:t>{{user.display_name}}</w:t></w:r></w:p></w:body></w:document>`
tmpl := minimalMergeDOCX(t, doc)
r := NewSubmissionRenderer()
html, err := r.RenderHTML(tmpl, PlaceholderMap{
"user.display_name": `M&S <Inc> "X"`,
}, nil)
if err != nil {
t.Fatalf("render html: %v", err)
}
want := `<span class="draft-var" data-var="user.display_name">M&amp;S &lt;Inc&gt; &quot;X&quot;</span>`
if !strings.Contains(html, want) {
t.Errorf("expected escaped value inside draft-var span, got %q", html)
}
}
// TestRenderHTML_WrapsMissingMarker confirms that an unbound placeholder
// is still rendered as a clickable draft-var span so the user can click
// the [KEIN WERT: …] marker in the preview and jump to the field.
func TestRenderHTML_WrapsMissingMarker(t *testing.T) {
doc := `<w:document><w:body><w:p><w:r><w:t>{{project.case_number}}</w:t></w:r></w:p></w:body></w:document>`
tmpl := minimalMergeDOCX(t, doc)
r := NewSubmissionRenderer()
html, err := r.RenderHTML(tmpl, PlaceholderMap{}, nil)
if err != nil {
t.Fatalf("render html: %v", err)
}
want := `<span class="draft-var" data-var="project.case_number">[KEIN WERT: project.case_number]</span>`
if !strings.Contains(html, want) {
t.Errorf("expected missing marker wrapped in draft-var span, got %q", html)
}
}
// TestRenderHTML_WrapsOverriddenValueSameAsResolved is the t-paliad-274
// regression: m's report on m/paliad#106 was that "When filled, the link
// disappears". The preview HTML must wrap an override value with the
// same <span class="draft-var"> as it would an unfilled placeholder, so
// the click-jump from preview→sidebar persists after the user types a
// value. There is no distinction at the renderer level between a value
// that came from the resolved bag (project / parties / deadline lookups)
// and a value the lawyer typed into the sidebar — both arrive in the
// same PlaceholderMap and both must be wrapped.
func TestRenderHTML_WrapsOverriddenValueSameAsResolved(t *testing.T) {
doc := `<w:document><w:body>` +
`<w:p><w:r><w:t>{{project.case_number}} / {{firm.name}}</w:t></w:r></w:p>` +
`</w:body></w:document>`
tmpl := minimalMergeDOCX(t, doc)
r := NewSubmissionRenderer()
// project.case_number is the typed-by-lawyer override.
// firm.name is the always-resolved value from the firm bag.
html, err := r.RenderHTML(tmpl, PlaceholderMap{
"project.case_number": "UPC_CFI_42/2026",
"firm.name": "HLC",
}, nil)
if err != nil {
t.Fatalf("render html: %v", err)
}
wantOverride := `<span class="draft-var" data-var="project.case_number">UPC_CFI_42/2026</span>`
if !strings.Contains(html, wantOverride) {
t.Errorf("expected overridden value wrapped in draft-var span (click-jump must persist after fill, t-paliad-274), got %q", html)
}
wantResolved := `<span class="draft-var" data-var="firm.name">HLC</span>`
if !strings.Contains(html, wantResolved) {
t.Errorf("expected resolved value still wrapped, got %q", html)
}
}
// TestRender_DocxOutputUnchangedByPreviewWrap asserts the hard rule from
// t-paliad-261: the .docx export path must NOT carry the preview-only
// draft-var sentinels or any draft-var span markup. Renders the same
// template through Render (.docx) and asserts the merged document.xml
// has only the resolved value, not a wrapped one.
func TestRender_DocxOutputUnchangedByPreviewWrap(t *testing.T) {
doc := `<w:document><w:body><w:p><w:r><w:t>{{firm.name}}</w:t></w:r></w:p></w:body></w:document>`
tmpl := minimalMergeDOCX(t, doc)
r := NewSubmissionRenderer()
out, err := r.Render(tmpl, PlaceholderMap{"firm.name": "HLC"}, nil)
if err != nil {
t.Fatalf("render docx: %v", err)
}
body := readMergeDocumentXML(t, out)
if !strings.Contains(body, `<w:t>HLC</w:t>`) {
t.Errorf("expected raw resolved value in .docx, got %q", body)
}
// PUA sentinels and any span markup must NOT appear in the .docx.
for _, forbidden := range []string{"draft-var", "data-var", previewVarBegin, previewVarMid, previewVarEnd} {
if strings.Contains(body, forbidden) {
t.Errorf("docx output unexpectedly contains %q: %q", forbidden, body)
}
}
}