Merge: t-paliad-349 docforge slice 1 — extract .docx engine to pkg/docforge/docx (m/paliad#157)

This commit is contained in:
mAi
2026-05-29 15:10:33 +02:00
10 changed files with 212 additions and 79 deletions

View File

@@ -0,0 +1,65 @@
package services
// Shims bridging the submission generator to the extracted docforge .docx
// adapter (pkg/docforge/docx). Slice 1 of the docforge train
// (t-paliad-349 / m/paliad#157) relocated the Markdown→OOXML walker, the
// placeholder substitution engine, and the .dotm→.docx converter into
// pkg/docforge/docx with no behaviour change. These type aliases and
// forwarders keep every existing caller in internal/services and
// internal/handlers compiling and behaving identically — the names,
// signatures, and semantics are unchanged; only the implementation moved.
//
// Later slices retire these shims as the submission services are
// refactored to call docforge directly through the neutral model and the
// VariableResolver interface.
import "mgit.msbls.de/m/paliad/pkg/docforge/docx"
// PlaceholderMap is the variable bag (dotted-key → substituted value),
// built by SubmissionVarsService and consumed by the renderer.
type PlaceholderMap = docx.PlaceholderMap
// MissingPlaceholderFn translates an unbound placeholder key into the
// in-document marker token.
type MissingPlaceholderFn = docx.MissingPlaceholderFn
// SubmissionRenderer renders a .docx template by substituting
// {{placeholder}} tokens. Stateless; safe for concurrent use.
type SubmissionRenderer = docx.SubmissionRenderer
// HyperlinkAllocator hands the Markdown walker a rId for each external
// URL it encounters in [label](url) inline links.
type HyperlinkAllocator = docx.HyperlinkAllocator
// NewSubmissionRenderer constructs the renderer.
func NewSubmissionRenderer() *SubmissionRenderer { return docx.NewSubmissionRenderer() }
// DefaultMissingMarker returns the standard missing-value marker for the
// given UI language ("[KEIN WERT: <key>]" / "[NO VALUE: <key>]").
func DefaultMissingMarker(lang string) MissingPlaceholderFn { return docx.DefaultMissingMarker(lang) }
// RenderMarkdownToOOXML renders Markdown source into OOXML paragraph
// elements using a single paragraph style.
func RenderMarkdownToOOXML(md, paragraphStyle string) string {
return docx.RenderMarkdownToOOXML(md, paragraphStyle)
}
// RenderMarkdownToOOXMLWithStyles is the full rich-prose entry point
// (headings, lists, blockquote, inline hyperlinks via the allocator).
func RenderMarkdownToOOXMLWithStyles(md string, stylemap map[string]string, links HyperlinkAllocator) string {
return docx.RenderMarkdownToOOXMLWithStyles(md, stylemap, links)
}
// ConvertDotmToDocx rewrites a .dotm/.docm/.dotx zip into a clean .docx
// zip. Idempotent on a zip that is already a plain .docx.
func ConvertDotmToDocx(dotmBytes []byte) ([]byte, error) { return docx.ConvertDotmToDocx(dotmBytes) }
// SanitiseSubmissionFileName cleans a string for use inside a download
// filename (strips path separators / quotes, ASCII-folds DE umlauts).
func SanitiseSubmissionFileName(s string) string { return docx.SanitiseSubmissionFileName(s) }
// xmlAttrEscape forwards to docx.XMLAttrEscape so submission_compose.go's
// hyperlink-rels inserts reuse the walker's exact attribute escaping
// without importing the docx package directly. Retires when the composer
// splice folds into pkg/docforge/docx (slice 2).
func xmlAttrEscape(s string) string { return docx.XMLAttrEscape(s) }

View File

@@ -0,0 +1,81 @@
package services
// Pretty-printer tests for the variable-resolution layer (legalSourcePretty,
// ourSideDE/EN, patentNumberUPC). These live with submission_vars.go;
// they were relocated out of the docx engine test suite when the
// .docx renderer moved to pkg/docforge/docx (t-paliad-349 slice 1).
import "testing"
func TestLegalSourcePretty(t *testing.T) {
tests := []struct {
src, lang, want string
}{
{"DE.ZPO.276.1", "de", "§ 276 Abs. 1 ZPO"},
{"DE.ZPO.276.1", "en", "Section 276(1) ZPO"},
{"DE.ZPO.253", "de", "§ 253 ZPO"},
{"DE.ZPO.253", "en", "Section 253 ZPO"},
{"UPC.RoP.23.1", "de", "Regel 23.1 VerfO UPC"},
{"UPC.RoP.23.1", "en", "Rule 23.1 RoP UPC"},
{"UPC.RoP.198", "de", "Regel 198 VerfO UPC"},
{"DE.PatG.83", "de", "§ 83 PatG"},
{"EPC.123", "de", "Art. 123 EPÜ"},
{"EPC.123", "en", "Art. 123 EPC"},
{"FOO.BAR.123", "de", "FOO.BAR.123"},
{"", "de", ""},
}
for _, tc := range tests {
t.Run(tc.src+"/"+tc.lang, func(t *testing.T) {
got := legalSourcePretty(tc.src, tc.lang)
if got != tc.want {
t.Errorf("legalSourcePretty(%q, %q) = %q, want %q", tc.src, tc.lang, got, tc.want)
}
})
}
}
func TestOurSideTranslations(t *testing.T) {
cases := []struct {
in, wantDE, wantEN string
}{
{"claimant", "Klägerin", "Claimant"},
{"defendant", "Beklagte", "Defendant"},
{"court", "Gericht", "Court"},
{"both", "Klägerin und Beklagte", "Claimant and Defendant"},
{"", "", ""},
{"unknown", "", ""},
}
for _, tc := range cases {
t.Run(tc.in, func(t *testing.T) {
if got := ourSideDE(tc.in); got != tc.wantDE {
t.Errorf("ourSideDE(%q) = %q, want %q", tc.in, got, tc.wantDE)
}
if got := ourSideEN(tc.in); got != tc.wantEN {
t.Errorf("ourSideEN(%q) = %q, want %q", tc.in, got, tc.wantEN)
}
})
}
}
func TestPatentNumberUPC(t *testing.T) {
tests := []struct {
in, want string
}{
{"EP 1 234 567 B1", "EP 1 234 567 (B1)"},
{"EP 4 056 049 A1", "EP 4 056 049 (A1)"},
{"DE 10 2020 123 456 A1", "DE 10 2020 123 456 (A1)"},
{"EP 1 234 567", "EP 1 234 567"},
{" EP 1 234 567 B1 ", "EP 1 234 567 (B1)"},
{"", ""},
{"WO/2023/123456", "WO/2023/123456"},
{"EP 1 234 567 B12", "EP 1 234 567 B12"},
}
for _, tc := range tests {
t.Run(tc.in, func(t *testing.T) {
got := patentNumberUPC(tc.in)
if got != tc.want {
t.Errorf("patentNumberUPC(%q) = %q, want %q", tc.in, got, tc.want)
}
})
}
}

24
pkg/docforge/doc.go Normal file
View File

@@ -0,0 +1,24 @@
// Package docforge is paliad's modular document-generator engine — the
// format-neutral core that turns templates + variables into rendered
// documents, with format-specific adapters living in sub-packages.
//
// The package is being extracted from the in-tree submission generator
// (internal/services/submission_*.go) per the PRD in
// docs/plans/prd-docforge-2026-05-29.md (t-paliad-349 / m/paliad#157).
// The extraction follows the same packaging discipline as
// pkg/litigationplanner: docforge owns its types and exposes interfaces
// for the stateful inputs (variable resolution, template storage); the
// consuming application (paliad) implements those interfaces against its
// own database, and a future second consumer reaches the engine over an
// HTTP veneer rather than importing it.
//
// Slice 1 (this commit) relocates the .docx adapter — the Markdown→OOXML
// walker, the placeholder substitution engine, and the .dotm→.docx
// converter — into pkg/docforge/docx with no behaviour change. paliad's
// internal/services package keeps thin type-alias + forwarder shims so
// the submission generator and its HTTP surface compile and behave
// identically. Later slices introduce the neutral document model,
// hoist the format-neutral placeholder grammar up to this root package,
// and add the VariableResolver interface, the TemplateStore, the
// authoring surface, and the pluggable Exporter.
package docforge

28
pkg/docforge/docx/doc.go Normal file
View File

@@ -0,0 +1,28 @@
// Package docx is docforge's .docx (OOXML) adapter — the first
// format adapter in the docforge engine (t-paliad-349 / m/paliad#157).
//
// It owns the in-house OOXML machinery extracted from paliad's submission
// generator in slice 1, with no behaviour change:
//
// - merge.go — the placeholder substitution renderer
// (SubmissionRenderer.Render / RenderHTML). Two-pass {{placeholder}}
// substitution (single-run, then cross-run merge for fragmented
// placeholders), plus the preview-HTML emitter that wraps substituted
// values in clickable <span class="draft-var" data-var="…"> markup.
// - markdown.go — the Markdown→OOXML walker (RenderMarkdownToOOXML*),
// including the b78a984 fix that preserves {{…}} placeholders verbatim
// through inline-span parsing (underscores in keys survive).
// - dotm.go — ConvertDotmToDocx: strips macros from a .dotm/.docm/
// .dotx and rewrites the content-types + rels to a clean .docx,
// passing every other part through bit-for-bit.
//
// Why no third-party docx library: lukasjarosch/go-docx treats sibling
// placeholders in one run ("{{a}} ./. {{b}}") as nested and refuses to
// replace either; patent submissions routinely have several placeholders
// per paragraph, so this in-house renderer is required. See merge.go.
//
// The placeholder grammar — \{\{\s*([A-Za-z][A-Za-z0-9_.]*)\s*\}\} — and
// the PlaceholderMap type currently live here with the renderer; a later
// slice hoists the format-neutral grammar up to the docforge root once
// the neutral document model and the VariableResolver interface land.
package docx

View File

@@ -1,4 +1,4 @@
package services
package docx
// Submission .dotm → .docx converter (t-paliad-230, "format-only" scope
// reduction of the original t-paliad-215 submission generator).

View File

@@ -1,4 +1,4 @@
package services
package docx
import (
"archive/zip"

View File

@@ -1,4 +1,4 @@
package services
package docx
// Markdown → OOXML walker for Composer section content (t-paliad-313
// Slice B, design doc §9.2).
@@ -492,6 +492,14 @@ func xmlTextEscape(s string) string {
return s
}
// XMLAttrEscape is the exported form of xmlAttrEscape, used by the
// paliad-side composer (submission_compose.go) when it builds hyperlink
// relationship inserts. It exists so the composer can reuse the exact
// attribute-escaping the walker applies without reaching across the
// package boundary for an unexported helper. Slice 2 folds the
// composer's splice into this package, after which the wrapper retires.
func XMLAttrEscape(s string) string { return xmlAttrEscape(s) }
// xmlAttrEscape escapes for safe insertion into an attribute value
// (e.g. `<w:pStyle w:val="…"/>`).
func xmlAttrEscape(s string) string {

View File

@@ -1,4 +1,4 @@
package services
package docx
// Unit tests for the Composer's Markdown → OOXML walker (t-paliad-313
// Slice B). Pure function; no DB dependency.

View File

@@ -1,4 +1,4 @@
package services
package docx
// Submission template renderer — in-house engine for the submission
// draft editor (t-paliad-238, design doc

View File

@@ -1,4 +1,4 @@
package services
package docx
// Submission merge-engine tests — resurrected from the original
// t-paliad-215 Slice 1 (commit 8ea3509) + Slice 2 (commit 1765d5e).
@@ -190,79 +190,6 @@ func TestPlaceholderRegex_Boundaries(t *testing.T) {
}
}
func TestLegalSourcePretty(t *testing.T) {
tests := []struct {
src, lang, want string
}{
{"DE.ZPO.276.1", "de", "§ 276 Abs. 1 ZPO"},
{"DE.ZPO.276.1", "en", "Section 276(1) ZPO"},
{"DE.ZPO.253", "de", "§ 253 ZPO"},
{"DE.ZPO.253", "en", "Section 253 ZPO"},
{"UPC.RoP.23.1", "de", "Regel 23.1 VerfO UPC"},
{"UPC.RoP.23.1", "en", "Rule 23.1 RoP UPC"},
{"UPC.RoP.198", "de", "Regel 198 VerfO UPC"},
{"DE.PatG.83", "de", "§ 83 PatG"},
{"EPC.123", "de", "Art. 123 EPÜ"},
{"EPC.123", "en", "Art. 123 EPC"},
{"FOO.BAR.123", "de", "FOO.BAR.123"},
{"", "de", ""},
}
for _, tc := range tests {
t.Run(tc.src+"/"+tc.lang, func(t *testing.T) {
got := legalSourcePretty(tc.src, tc.lang)
if got != tc.want {
t.Errorf("legalSourcePretty(%q, %q) = %q, want %q", tc.src, tc.lang, got, tc.want)
}
})
}
}
func TestOurSideTranslations(t *testing.T) {
cases := []struct {
in, wantDE, wantEN string
}{
{"claimant", "Klägerin", "Claimant"},
{"defendant", "Beklagte", "Defendant"},
{"court", "Gericht", "Court"},
{"both", "Klägerin und Beklagte", "Claimant and Defendant"},
{"", "", ""},
{"unknown", "", ""},
}
for _, tc := range cases {
t.Run(tc.in, func(t *testing.T) {
if got := ourSideDE(tc.in); got != tc.wantDE {
t.Errorf("ourSideDE(%q) = %q, want %q", tc.in, got, tc.wantDE)
}
if got := ourSideEN(tc.in); got != tc.wantEN {
t.Errorf("ourSideEN(%q) = %q, want %q", tc.in, got, tc.wantEN)
}
})
}
}
func TestPatentNumberUPC(t *testing.T) {
tests := []struct {
in, want string
}{
{"EP 1 234 567 B1", "EP 1 234 567 (B1)"},
{"EP 4 056 049 A1", "EP 4 056 049 (A1)"},
{"DE 10 2020 123 456 A1", "DE 10 2020 123 456 (A1)"},
{"EP 1 234 567", "EP 1 234 567"},
{" EP 1 234 567 B1 ", "EP 1 234 567 (B1)"},
{"", ""},
{"WO/2023/123456", "WO/2023/123456"},
{"EP 1 234 567 B12", "EP 1 234 567 B12"},
}
for _, tc := range tests {
t.Run(tc.in, func(t *testing.T) {
got := patentNumberUPC(tc.in)
if got != tc.want {
t.Errorf("patentNumberUPC(%q) = %q, want %q", tc.in, got, tc.want)
}
})
}
}
// TestRenderHTML_ExtractsParagraphsAndFormatting verifies the preview
// HTML emitter walks <w:p> / <w:r> / <w:t> correctly and carries
// bold/italic through to <strong>/<em>. Substituted placeholders are