m's 2026-05-21 scope reduction of the t-paliad-215 submission generator:
ship a demo that hands the lawyer the firm style template as a clean
.docx. No variable-merge engine, no per-submission template registry,
no fallback chain — the merge slice is deferred to a future task.
Replaces the previous engine (template registry + variable bag +
{{placeholder}} renderer + dual project_events/documents writes) with:
* services.ConvertDotmToDocx — single-function .dotm/.docm/.dotx → .docx
format converter that strips word/vbaProject.bin, word/vbaData.xml,
word/customizations.xml, and word/_rels/vbaProject.bin.rels, rewrites
[Content_Types].xml (demotes the macro/template main type to plain
docx, drops the .bin Default Extension and the macro Overrides), and
rewrites word/_rels/document.xml.rels to drop the vbaProject +
keyMapCustomizations relationships. Idempotent on a plain .docx.
archive/zip + regex stdlib only — no new third-party dependencies.
* handlers/submissions.go — POST /api/projects/{id}/submissions/{code}
/generate fetches the cached HL Patents Style .dotm (via a new
fetchHLPatentsStyleBytes accessor on files.go that shares the same
cache as /files/{slug}), converts, writes one paliad.system_audit_log
row (event_type='submission.generated', metadata={submission_code,
rule_name, filename}), and streams the .docx as an attachment. GET
/api/projects/{id}/submissions still lists filing rules but
has_template is unconditionally true (one universal template).
* Filename per design §7: {rule.name}-{project.case_number}-{YYYY-MM-DD}
.docx, with Umlauts ASCII-folded and slashes → underscores.
Drops services/submission_templates.go, services/submission_vars.go,
and the wiring in cmd/server/main.go + handlers/handlers.go that bound
them together. Frontend client switched to POST.
Verified the converter against the real HL Patents Style.dotm (361 KB
input → 243 KB output, 46 parts in output zip):
unzip -tq /tmp/hl-patents-style.converted.docx → No errors
python3 -c "import zipfile, xml.etree.ElementTree as ET; \
z=zipfile.ZipFile('/tmp/hl-patents-style.converted.docx'); \
[ET.fromstring(z.read(p)) for p in z.namelist() if p.endswith('.xml')]"
uv run --with python-docx python3 -c "import docx; \
d=docx.Document('/tmp/hl-patents-style.converted.docx'); \
print(len(d.paragraphs), 'paragraphs', len(d.styles), 'styles')"
→ 236 paragraphs, 168 styles, 1 section
All assertions passed: every Override in [Content_Types].xml resolves
to a real part, every internal Target in document.xml.rels resolves,
zero macro-related residue, and the document body + styles + theme
survive untouched.
go test -run TestBootSmoke ./cmd/server/... clean (route additions
register without conflict on the Go ServeMux).
205 lines
7.6 KiB
Go
205 lines
7.6 KiB
Go
package services
|
|
|
|
// Submission .dotm → .docx converter (t-paliad-230, "format-only" scope
|
|
// reduction of the original t-paliad-215 submission generator).
|
|
//
|
|
// Word .dotm (macro-enabled template), .docm (macro-enabled document),
|
|
// .dotx (template, no macros), and .docx (document, no macros) are all
|
|
// OOXML zip containers. The macro-bearing variants carry an extra set
|
|
// of parts:
|
|
//
|
|
// word/vbaProject.bin — the VBA project binary
|
|
// word/_rels/vbaProject.bin.rels — auxiliary relationships
|
|
// word/vbaData.xml — VBA support data
|
|
// word/customizations.xml — keyMapCustomizations
|
|
//
|
|
// plus a Content-Types override for each of those, a Default extension
|
|
// declaring all .bin files as vbaProject, and a different "main" content
|
|
// type for word/document.xml itself.
|
|
//
|
|
// ConvertDotmToDocx walks the zip, drops the macro parts, rewrites
|
|
// [Content_Types].xml and word/_rels/document.xml.rels to remove every
|
|
// reference to them, and switches the main document content type to
|
|
// the plain .docx form. Every other part — styles, fonts, theme,
|
|
// settings, document body, header/footer/numbering, glossary, custom
|
|
// XML — passes through bit-for-bit at the original compression method
|
|
// and modification time.
|
|
//
|
|
// No variable substitution. Today's slice hands the lawyer the firm
|
|
// style template as a clean .docx so they can edit and save under
|
|
// their own filename. The merge-engine slice is deferred.
|
|
|
|
import (
|
|
"archive/zip"
|
|
"bytes"
|
|
"fmt"
|
|
"io"
|
|
"regexp"
|
|
"strings"
|
|
)
|
|
|
|
// The four OOXML "main" content types we may see on word/document.xml.
|
|
// Anything other than docxMainContentType gets rewritten so the output
|
|
// reads as a plain document.
|
|
const (
|
|
dotmMainContentType = "application/vnd.ms-word.template.macroEnabledTemplate.main+xml"
|
|
docmMainContentType = "application/vnd.ms-word.document.macroEnabled.main+xml"
|
|
dotxMainContentType = "application/vnd.openxmlformats-officedocument.wordprocessingml.template.main+xml"
|
|
docxMainContentType = "application/vnd.openxmlformats-officedocument.wordprocessingml.document.main+xml"
|
|
)
|
|
|
|
// Macro-related parts dropped wholesale from the output zip.
|
|
var macroParts = map[string]bool{
|
|
"word/vbaProject.bin": true,
|
|
"word/_rels/vbaProject.bin.rels": true,
|
|
"word/vbaData.xml": true,
|
|
"word/customizations.xml": true,
|
|
}
|
|
|
|
const (
|
|
contentTypesPath = "[Content_Types].xml"
|
|
documentRelsPath = "word/_rels/document.xml.rels"
|
|
)
|
|
|
|
// vbaDefaultExtensionRegex matches the `<Default Extension="bin"
|
|
// ContentType=".../vbaProject"/>` row in [Content_Types].xml. After
|
|
// vbaProject.bin is dropped, the Default is dead weight (and Word will
|
|
// flag the file as macro-bearing if it survives).
|
|
var vbaDefaultExtensionRegex = regexp.MustCompile(
|
|
`\s*<Default\b[^>]*\bExtension\s*=\s*"bin"[^>]*\bContentType\s*=\s*"application/vnd\.ms-office\.vbaProject"[^>]*/>`,
|
|
)
|
|
|
|
// macroOverridePartRegex matches any <Override PartName="…"/> element
|
|
// whose PartName is one of the dropped macro parts. The /word/
|
|
// prefix is the OOXML convention for the absolute part path in
|
|
// [Content_Types].xml — file paths in the zip itself omit the leading
|
|
// slash.
|
|
var macroOverridePartRegex = regexp.MustCompile(
|
|
`\s*<Override\b[^>]*\bPartName\s*=\s*"/word/(?:vbaProject\.bin|vbaData\.xml|customizations\.xml)"[^>]*/>`,
|
|
)
|
|
|
|
// macroRelTypeRegex matches the two macro-related relationship Types
|
|
// in word/_rels/document.xml.rels: vbaProject (binds to vbaProject.bin)
|
|
// and keyMapCustomizations (binds to customizations.xml). After both
|
|
// targets are dropped, leaving the relationships in would make Word
|
|
// flag the file as corrupt.
|
|
var macroRelTypeRegex = regexp.MustCompile(
|
|
`\s*<Relationship\b[^>]*\bType\s*=\s*"http://schemas\.microsoft\.com/office/2006/relationships/(?:vbaProject|keyMapCustomizations)"[^>]*/>`,
|
|
)
|
|
|
|
// ConvertDotmToDocx rewrites a .dotm (or .docm, or .dotx) zip into a
|
|
// clean .docx zip. Idempotent on a zip that is already a plain .docx.
|
|
// Returns an error if the input is not a valid zip.
|
|
func ConvertDotmToDocx(dotmBytes []byte) ([]byte, error) {
|
|
zr, err := zip.NewReader(bytes.NewReader(dotmBytes), int64(len(dotmBytes)))
|
|
if err != nil {
|
|
return nil, fmt.Errorf("dotm→docx: open zip: %w", err)
|
|
}
|
|
|
|
var out bytes.Buffer
|
|
zw := zip.NewWriter(&out)
|
|
|
|
for _, entry := range zr.File {
|
|
if macroParts[entry.Name] {
|
|
continue
|
|
}
|
|
|
|
body, err := readZipFile(entry)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("dotm→docx: read %s: %w", entry.Name, err)
|
|
}
|
|
|
|
switch entry.Name {
|
|
case contentTypesPath:
|
|
body = rewriteContentTypes(body)
|
|
case documentRelsPath:
|
|
body = rewriteDocumentRels(body)
|
|
}
|
|
|
|
w, err := zw.CreateHeader(&zip.FileHeader{
|
|
Name: entry.Name,
|
|
Method: entry.Method,
|
|
Modified: entry.Modified,
|
|
})
|
|
if err != nil {
|
|
return nil, fmt.Errorf("dotm→docx: write header %s: %w", entry.Name, err)
|
|
}
|
|
if _, err := w.Write(body); err != nil {
|
|
return nil, fmt.Errorf("dotm→docx: write body %s: %w", entry.Name, err)
|
|
}
|
|
}
|
|
|
|
if err := zw.Close(); err != nil {
|
|
return nil, fmt.Errorf("dotm→docx: finalise zip: %w", err)
|
|
}
|
|
return out.Bytes(), nil
|
|
}
|
|
|
|
// rewriteContentTypes demotes any of the three non-docx "main" content
|
|
// types to plain docx, drops the bin Default-Extension entry, and
|
|
// drops every Override that targeted a dropped macro part.
|
|
//
|
|
// String-level substitution rather than encoding/xml: round-tripping
|
|
// through Go's XML marshaller would re-emit the document with
|
|
// canonical namespace declarations on every child, which Word reads
|
|
// but which makes the binary diff unnecessarily large. Direct
|
|
// substitution preserves the file's original shape.
|
|
func rewriteContentTypes(body []byte) []byte {
|
|
body = bytes.ReplaceAll(body, []byte(dotmMainContentType), []byte(docxMainContentType))
|
|
body = bytes.ReplaceAll(body, []byte(docmMainContentType), []byte(docxMainContentType))
|
|
body = bytes.ReplaceAll(body, []byte(dotxMainContentType), []byte(docxMainContentType))
|
|
body = vbaDefaultExtensionRegex.ReplaceAll(body, nil)
|
|
body = macroOverridePartRegex.ReplaceAll(body, nil)
|
|
return body
|
|
}
|
|
|
|
// rewriteDocumentRels drops the two macro-related relationships from
|
|
// word/_rels/document.xml.rels (vbaProject + keyMapCustomizations) so
|
|
// the manifest no longer points at parts the zip no longer carries.
|
|
// Every other relationship — styles, settings, numbering, theme,
|
|
// headers/footers, customXml — passes through untouched.
|
|
func rewriteDocumentRels(body []byte) []byte {
|
|
return macroRelTypeRegex.ReplaceAll(body, nil)
|
|
}
|
|
|
|
// readZipFile slurps a zip entry's bytes.
|
|
func readZipFile(f *zip.File) ([]byte, error) {
|
|
rc, err := f.Open()
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
defer rc.Close()
|
|
return io.ReadAll(rc)
|
|
}
|
|
|
|
// SanitiseSubmissionFileName cleans a string for use inside a download
|
|
// filename — strips path separators and quote characters that would
|
|
// break Content-Disposition or confuse browsers across OSes. ASCII-folds
|
|
// the small set of German umlaut letters that show up in submission
|
|
// names today (Klageerwiderung, Berufungsbegründung, …) so the file
|
|
// lands cleanly on legacy SMB shares whose layer is still cp1252.
|
|
// Other Unicode is preserved so non-DE/EN names still produce a
|
|
// recognisable file.
|
|
func SanitiseSubmissionFileName(s string) string {
|
|
s = strings.TrimSpace(s)
|
|
s = umlautFolder.Replace(s)
|
|
s = strings.Map(func(r rune) rune {
|
|
switch r {
|
|
case '/', '\\':
|
|
return '_'
|
|
case '"', '\'':
|
|
return -1
|
|
}
|
|
return r
|
|
}, s)
|
|
return s
|
|
}
|
|
|
|
// umlautFolder turns the four DE umlaut letters (both cases) into ASCII
|
|
// digraphs; ß → ss.
|
|
var umlautFolder = strings.NewReplacer(
|
|
"ä", "ae", "ö", "oe", "ü", "ue",
|
|
"Ä", "Ae", "Ö", "Oe", "Ü", "Ue",
|
|
"ß", "ss",
|
|
)
|