package services // Submission template renderer — in-house engine for the submission // generator (t-paliad-215, design doc // docs/design-submission-generator-2026-05-19.md §6). // // Design choice — why not lukasjarosch/go-docx: // The library's "nested placeholder" guard treats sibling placeholders // inside the same run (e.g. "{{a}} ./. {{b}}") as nested and // refuses to replace either. Patent submissions routinely have multiple // placeholders per paragraph (party blocks especially), so the library // is a non-starter without a custom fork. The in-house renderer below // is ~150 LoC and handles both the single-run common case and the // cross-run case (where Word may split a placeholder across runs after // editing). // // Placeholder grammar: {{[A-Za-z][A-Za-z0-9_.]*}} with optional // whitespace inside braces ({{ project.case_number }} ≡ // {{project.case_number}}). // // Missing-value behaviour: when a placeholder has no binding in the // PlaceholderMap, the renderer emits a marker token so the lawyer sees // the gap in Word rather than failing the request. See §6.3 of the // design doc. import ( "archive/zip" "bytes" "fmt" "io" "regexp" "strings" ) // PlaceholderMap is the variable bag built by SubmissionVarsService. // Keys are dotted paths without braces (e.g. "project.case_number"). // Values are the substituted text — already locale-aware, pretty- // printed, and sanitised by the caller. type PlaceholderMap map[string]string // MissingPlaceholderFn translates an unbound placeholder key into the // in-document marker token. The default in DefaultMissingMarker is // "[KEIN WERT: ]" / "[NO VALUE: ]" depending on lang. type MissingPlaceholderFn func(key string) string // DefaultMissingMarker returns the standard missing-value marker for // the given UI language. func DefaultMissingMarker(lang string) MissingPlaceholderFn { prefix := "KEIN WERT" if strings.EqualFold(lang, "en") { prefix = "NO VALUE" } return func(key string) string { return "[" + prefix + ": " + key + "]" } } // placeholderRegex matches a single placeholder. The capture group // extracts the key name without braces or surrounding whitespace. // // Restricted to [A-Za-z][A-Za-z0-9_.]* so that stray "{{" sequences in // legal prose (extremely rare in DE/EN court briefs but possible) // don't get mistaken for placeholders. A genuine placeholder always // starts with an ASCII letter. var placeholderRegex = regexp.MustCompile(`\{\{\s*([A-Za-z][A-Za-z0-9_.]*)\s*\}\}`) // SubmissionRenderer renders a .docx template into a .docx output by // substituting {{placeholder}} tokens with values from a PlaceholderMap. // Stateless; safe for concurrent use. type SubmissionRenderer struct{} // NewSubmissionRenderer constructs the renderer. func NewSubmissionRenderer() *SubmissionRenderer { return &SubmissionRenderer{} } // Render reads the .docx template at templateBytes, substitutes every // placeholder from vars (or emits the missing-marker token), and writes // the result to the returned byte slice. Unknown placeholders never // fail the render — the lawyer sees the marker in Word and fixes it. func (r *SubmissionRenderer) Render(templateBytes []byte, vars PlaceholderMap, missing MissingPlaceholderFn) ([]byte, error) { if missing == nil { missing = DefaultMissingMarker("de") } zr, err := zip.NewReader(bytes.NewReader(templateBytes), int64(len(templateBytes))) if err != nil { return nil, fmt.Errorf("submission template: open zip: %w", err) } var out bytes.Buffer zw := zip.NewWriter(&out) defer zw.Close() for _, entry := range zr.File { body, err := readZipEntry(entry) if err != nil { return nil, fmt.Errorf("submission template: read %s: %w", entry.Name, err) } if isWordXMLEntry(entry.Name) { body = substituteInDocumentXML(body, vars, missing) } w, err := zw.CreateHeader(&zip.FileHeader{ Name: entry.Name, Method: entry.Method, Modified: entry.Modified, }) if err != nil { return nil, fmt.Errorf("submission template: write header %s: %w", entry.Name, err) } if _, err := w.Write(body); err != nil { return nil, fmt.Errorf("submission template: write %s: %w", entry.Name, err) } } if err := zw.Close(); err != nil { return nil, fmt.Errorf("submission template: finalise zip: %w", err) } return out.Bytes(), nil } // isWordXMLEntry returns true for the .docx parts that contain // substitutable text. We touch document.xml plus header*.xml and // footer*.xml (templates may put firm letterhead in a header) but // skip styles, theme, settings, comments, footnotes — none of which // should carry merge placeholders in a well-formed template. func isWordXMLEntry(name string) bool { switch { case name == "word/document.xml": return true case strings.HasPrefix(name, "word/header") && strings.HasSuffix(name, ".xml"): return true case strings.HasPrefix(name, "word/footer") && strings.HasSuffix(name, ".xml"): return true } return false } // readZipEntry slurps a zip entry's bytes. func readZipEntry(f *zip.File) ([]byte, error) { rc, err := f.Open() if err != nil { return nil, err } defer rc.Close() return io.ReadAll(rc) } // substituteInDocumentXML walks document XML and replaces every // {{placeholder}} occurrence inside text nodes. Handles both // single-run placeholders (the common case for freshly authored // templates) and cross-run placeholders (where Word's autocorrect or // manual editing has split a placeholder across runs). // // Two-pass strategy: // // 1. Pass 1: replace placeholders that fit entirely within one // . This is the 99% case and preserves all run-level // formatting (bold, italic, font runs). // 2. Pass 2: for paragraphs that still contain orphan "{{" or "}}" // markers after pass 1, merge the text of every inside the // paragraph, run the replacement on the merged text, and rewrite // the paragraph's runs as a single using // the formatting properties of the first run. Loses intra-paragraph // formatting on the affected paragraph — but only on paragraphs // where Word genuinely fragmented a placeholder. func substituteInDocumentXML(body []byte, vars PlaceholderMap, missing MissingPlaceholderFn) []byte { replaced := substituteInTextNodes(body, vars, missing) if !needsCrossRunMerge(replaced) { return replaced } return substituteAcrossRuns(replaced, vars, missing) } // wTextNodeRegex matches one contents element, capturing // the contents. Attributes on (xml:space="preserve") are preserved // because the entire match is rewritten. var wTextNodeRegex = regexp.MustCompile(`]*)?>([^<]*)`) // substituteInTextNodes runs the placeholder replacement inside each // text node independently. Format-preserving for single-run // placeholders. func substituteInTextNodes(body []byte, vars PlaceholderMap, missing MissingPlaceholderFn) []byte { return wTextNodeRegex.ReplaceAllFunc(body, func(match []byte) []byte { sub := wTextNodeRegex.FindSubmatch(match) attrs := string(sub[1]) contents := xmlDecode(string(sub[2])) replaced := replacePlaceholders(contents, vars, missing) if replaced == contents { return match } // xml:space="preserve" stays attached whenever the original // content had leading/trailing whitespace; ensure it's still // declared after replacement to avoid Word collapsing spaces. if !strings.Contains(attrs, "xml:space") && (strings.HasPrefix(replaced, " ") || strings.HasSuffix(replaced, " ")) { attrs += ` xml:space="preserve"` } return []byte(`` + xmlEncode(replaced) + ``) }) } // needsCrossRunMerge returns true when the body still contains an // unmatched "{{" or "}}" after pass 1 — a sign that Word fragmented // the placeholder across runs and pass 1 couldn't touch it. func needsCrossRunMerge(body []byte) bool { // Cheap heuristic: count "{{" vs "}}" inside nodes. If we have // either marker present in the text-node space, pass 2 will handle // it. (Inside attributes or other XML, the markers don't matter.) for _, m := range wTextNodeRegex.FindAllSubmatch(body, -1) { t := string(m[2]) if strings.Contains(t, "{{") || strings.Contains(t, "}}") { return true } } return false } // wParagraphRegex matches one paragraph block. Greedy // inner-content match is safe here because elements do not nest // in WordprocessingML — a paragraph is the leaf container for text. var wParagraphRegex = regexp.MustCompile(`(?s)]*>.*?`) // wRunPropsRegex pulls the first block from a // paragraph so we can reuse it as the formatting of the merged run. var wRunPropsRegex = regexp.MustCompile(`(?s).*?`) // wParagraphPropsRegex pulls the optional that sits // at the top of a paragraph (alignment, spacing, etc.). Preserved. var wParagraphPropsRegex = regexp.MustCompile(`(?s).*?`) // substituteAcrossRuns is pass 2: for any paragraph that still has a // split placeholder, concatenate every text node, run replacement, and // rewrite the paragraph as a single run using the first run's // properties. Paragraphs without orphan markers are left untouched so // run-level formatting survives wherever pass 1 already resolved the // placeholders. func substituteAcrossRuns(body []byte, vars PlaceholderMap, missing MissingPlaceholderFn) []byte { return wParagraphRegex.ReplaceAllFunc(body, func(para []byte) []byte { textNodes := wTextNodeRegex.FindAllSubmatch(para, -1) if len(textNodes) == 0 { return para } var merged strings.Builder for _, m := range textNodes { merged.WriteString(xmlDecode(string(m[2]))) } original := merged.String() if !strings.Contains(original, "{{") { // No fragmented placeholder in this paragraph; leave it // alone so pass 1's run-level edits survive. return para } replaced := replacePlaceholders(original, vars, missing) if replaced == original { return para } // Preserve paragraph properties (alignment, spacing) and the // first run's properties (font, bold/italic). pPr := wParagraphPropsRegex.Find(para) rPr := wRunPropsRegex.Find(para) var rebuilt bytes.Buffer rebuilt.WriteString(``) if pPr != nil { rebuilt.Write(pPr) } rebuilt.WriteString(``) if rPr != nil { rebuilt.Write(rPr) } rebuilt.WriteString(``) rebuilt.WriteString(xmlEncode(replaced)) rebuilt.WriteString(``) return rebuilt.Bytes() }) } // replacePlaceholders performs the actual substitution on a plain // string. Unbound placeholders render the missing marker. func replacePlaceholders(s string, vars PlaceholderMap, missing MissingPlaceholderFn) string { return placeholderRegex.ReplaceAllStringFunc(s, func(match string) string { sub := placeholderRegex.FindStringSubmatch(match) if len(sub) < 2 { return match } key := sub[1] if value, ok := vars[key]; ok { return value } return missing(key) }) } // xmlDecode reverses the small set of escapes used in WordprocessingML // text content. We don't need a full XML parser — text nodes carry only // the standard five entities, and Word never emits numeric-character // references inside for printable content. func xmlDecode(s string) string { s = strings.ReplaceAll(s, "<", "<") s = strings.ReplaceAll(s, ">", ">") s = strings.ReplaceAll(s, """, `"`) s = strings.ReplaceAll(s, "'", "'") s = strings.ReplaceAll(s, "&", "&") return s } // xmlEncode escapes a substituted value for safe insertion back into a // WordprocessingML text node. & must be replaced first to avoid double // encoding the entity prefixes we introduce on the other characters. func xmlEncode(s string) string { s = strings.ReplaceAll(s, "&", "&") s = strings.ReplaceAll(s, "<", "<") s = strings.ReplaceAll(s, ">", ">") s = strings.ReplaceAll(s, `"`, """) s = strings.ReplaceAll(s, "'", "'") return s }