Merge: t-paliad-349 docforge slice 6a — authoring core + TemplateStore wiring (m/paliad#157)

This commit is contained in:
mAi
2026-05-29 16:08:46 +02:00
5 changed files with 295 additions and 0 deletions

View File

@@ -174,6 +174,9 @@ func main() {
submissionComposerSvc := services.NewSubmissionComposer(submissionRenderer)
// t-paliad-315 Slice C — building-block library.
submissionBuildingBlockSvc := services.NewBuildingBlockService(pool, branding.Name)
// t-paliad-349 docforge slice 4/6 — uploaded-template store
// (Postgres bytea) backing the authoring surface.
templateStoreSvc := services.NewPgTemplateStore(pool)
// t-paliad-225 Slice A — user-authored checklist templates.
// Slice B adds checklist_shares grants + admin promotion.
checklistCatalogSvc := services.NewChecklistCatalogService(pool)
@@ -190,6 +193,7 @@ func main() {
SubmissionSection: submissionSectionSvc,
SubmissionComposer: submissionComposerSvc,
SubmissionBuildingBlock: submissionBuildingBlockSvc,
TemplateStore: templateStoreSvc,
Deadline: deadlineSvc,
Appointment: appointmentSvc,
CalDAV: caldavSvc,

View File

@@ -128,6 +128,10 @@ type Services struct {
// editor. Per Q2: paste sources only, no lineage on sections.
SubmissionBuildingBlock *services.BuildingBlockService
// t-paliad-349 docforge slice 4/6 — uploaded-template store backing
// the authoring surface.
TemplateStore *services.PgTemplateStore
// t-paliad-265 / m/paliad#96 — per-event-card optional choices on
// the Verfahrensablauf timeline.
EventChoice *services.EventChoiceService
@@ -215,6 +219,7 @@ func Register(mux *http.ServeMux, client *auth.Client, giteaAPIToken string, svc
submissionSection: svc.SubmissionSection,
submissionComposer: svc.SubmissionComposer,
submissionBuildingBlock: svc.SubmissionBuildingBlock,
templateStore: svc.TemplateStore,
eventChoice: svc.EventChoice,
scenario: svc.Scenario,
scenarioFlags: svc.ScenarioFlags,

View File

@@ -77,6 +77,9 @@ type dbServices struct {
submissionComposer *services.SubmissionComposer
submissionBuildingBlock *services.BuildingBlockService
// t-paliad-349 docforge slice 4/6 — uploaded-template store.
templateStore *services.PgTemplateStore
// t-paliad-265 — per-event-card optional choices.
eventChoice *services.EventChoiceService

View File

@@ -0,0 +1,172 @@
package docx
// Authoring support — the .docx side of the docforge authoring surface
// (t-paliad-349 slice 6). Two operations back the "upload a base .docx →
// place variable slots" flow:
//
// ImportForAuthoring — parse an uploaded .docx into a run-addressable
// preview (one <span data-run="N"> per <w:t>, in document order) plus
// the slots already present in the carrier.
// InjectSlot — replace a selected piece of text inside run N with a
// {{slot_key}} placeholder, returning the new carrier bytes. The
// placeholder is the sentinel that locates the slot (PRD §5 lean) and
// the same token the generation-time renderer substitutes.
//
// Both walk runs in the same order (paragraphs, then <w:t> within), so the
// data-run indices the preview emits address exactly the runs InjectSlot
// targets. Injection keys on the selected text
// (not a byte/UTF-16 offset) so umlauts in German prose can't desync the
// client's selection from the server's slice.
//
// v1 scope (PRD §2.1): text-level slots inside body paragraphs. A run is a
// <w:t> within a <w:p>; selections spanning runs or sitting in
// headers/footers/tables are out of scope and surface as an error the UI
// turns into "select within a single text span".
import (
"bytes"
"fmt"
"strconv"
"strings"
"mgit.msbls.de/m/paliad/pkg/docforge"
)
// AuthoringView is the parsed, run-addressable form of an uploaded
// template, ready for the authoring editor.
type AuthoringView struct {
// PreviewHTML is the body rendered as paragraphs of run spans:
// <p>…<span class="docforge-run" data-run="N">text</span>…</p>.
// The client attaches selection handling to the run spans; data-run
// is the index InjectSlot expects.
PreviewHTML string
// Slots are the {{placeholder}} tokens already present in the
// carrier (so re-opening a saved template shows its slots).
Slots []docforge.TemplateSlot
}
// ImportForAuthoring parses carrierBytes (any .docx/.dotm/...) into an
// AuthoringView. Runs the .dotm→.docx pre-pass so macro templates import
// cleanly.
func ImportForAuthoring(carrierBytes []byte) (*AuthoringView, error) {
clean, err := ConvertDotmToDocx(carrierBytes)
if err != nil {
return nil, fmt.Errorf("authoring import: convert: %w", err)
}
documentXML, _, err := splitBaseZip(clean)
if err != nil {
return nil, fmt.Errorf("authoring import: %w", err)
}
return &AuthoringView{
PreviewHTML: authoringPreviewHTML(documentXML),
Slots: detectSlots(documentXML),
}, nil
}
// authoringPreviewHTML renders the body as run-addressable HTML. One <p>
// per <w:p>; one <span class="docforge-run" data-run="N"> per <w:t>, with
// the decoded run text HTML-escaped. N is the global run index in
// document-then-paragraph order — the same order InjectSlot walks.
func authoringPreviewHTML(documentXML []byte) string {
var out bytes.Buffer
runIdx := 0
paras := wParagraphRegex.FindAll(documentXML, -1)
for _, para := range paras {
out.WriteString("<p>")
for _, m := range wTextNodeRegex.FindAllSubmatch(para, -1) {
text := xmlDecode(string(m[2]))
out.WriteString(`<span class="docforge-run" data-run="`)
out.WriteString(strconv.Itoa(runIdx))
out.WriteString(`">`)
out.WriteString(htmlEscape(text))
out.WriteString(`</span>`)
runIdx++
}
out.WriteString("</p>\n")
}
if out.Len() == 0 {
return "<p></p>"
}
return out.String()
}
// detectSlots returns the distinct {{placeholder}} tokens present in the
// document body, in first-appearance order.
func detectSlots(documentXML []byte) []docforge.TemplateSlot {
seen := map[string]bool{}
var slots []docforge.TemplateSlot
// Match against decoded text so a placeholder split by an entity is
// still found the same way the renderer would substitute it.
for _, m := range wTextNodeRegex.FindAllSubmatch(documentXML, -1) {
text := xmlDecode(string(m[2]))
for _, pm := range placeholderRegex.FindAllStringSubmatch(text, -1) {
key := pm[1]
if seen[key] {
continue
}
seen[key] = true
slots = append(slots, docforge.TemplateSlot{
Key: key,
Anchor: "{{" + key + "}}",
OrderIndex: len(slots),
})
}
}
return slots
}
// InjectSlot replaces the first occurrence of selectedText inside run
// runIndex with a {{slotKey}} placeholder and returns the new carrier
// bytes. Errors when the run is out of range or selectedText isn't found
// in that run (a render/selection desync, or a cross-run selection).
func InjectSlot(carrierBytes []byte, runIndex int, selectedText, slotKey string) ([]byte, error) {
if selectedText == "" {
return nil, fmt.Errorf("authoring inject: empty selection")
}
if !placeholderRegex.MatchString("{{" + slotKey + "}}") {
return nil, fmt.Errorf("authoring inject: invalid slot key %q", slotKey)
}
clean, err := ConvertDotmToDocx(carrierBytes)
if err != nil {
return nil, fmt.Errorf("authoring inject: convert: %w", err)
}
documentXML, parts, err := splitBaseZip(clean)
if err != nil {
return nil, fmt.Errorf("authoring inject: %w", err)
}
runIdx := 0
injected := false
newDoc := wParagraphRegex.ReplaceAllFunc(documentXML, func(para []byte) []byte {
return wTextNodeRegex.ReplaceAllFunc(para, func(tnode []byte) []byte {
idx := runIdx
runIdx++
if injected || idx != runIndex {
return tnode
}
sub := wTextNodeRegex.FindSubmatch(tnode)
attrs := string(sub[1])
content := xmlDecode(string(sub[2]))
before, after, found := strings.Cut(content, selectedText)
if !found {
return tnode // not found here — reported after the walk
}
newContent := before + "{{" + slotKey + "}}" + after
if !strings.Contains(attrs, "xml:space") &&
(strings.HasPrefix(newContent, " ") || strings.HasSuffix(newContent, " ")) {
attrs += ` xml:space="preserve"`
}
injected = true
return []byte(`<w:t` + attrs + `>` + xmlEncode(newContent) + `</w:t>`)
})
})
if !injected {
return nil, fmt.Errorf("authoring inject: selection %q not found in run %d", selectedText, runIndex)
}
repacked, err := repackBaseZip(parts, newDoc)
if err != nil {
return nil, fmt.Errorf("authoring inject: %w", err)
}
return repacked, nil
}

View File

@@ -0,0 +1,111 @@
package docx
import (
"strings"
"testing"
)
// docBody wraps a <w:body> inner string into a full document.xml.
func docBody(inner string) string {
return `<?xml version="1.0" encoding="UTF-8" standalone="yes"?>` +
`<w:document xmlns:w="http://schemas.openxmlformats.org/wordprocessingml/2006/main">` +
`<w:body>` + inner + `</w:body></w:document>`
}
func TestImportForAuthoring_PreviewIsRunAddressable(t *testing.T) {
body := docBody(
`<w:p><w:r><w:t>Az. 4c O 12/23</w:t></w:r></w:p>` +
`<w:p><w:r><w:t>Klägerin</w:t></w:r><w:r><w:t xml:space="preserve"> GmbH</w:t></w:r></w:p>`)
view, err := ImportForAuthoring(minimalMergeDOCX(t, body))
if err != nil {
t.Fatalf("ImportForAuthoring: %v", err)
}
// Three <w:t> → three run spans, indexed 0,1,2 in document order.
for i, want := range []string{`data-run="0"`, `data-run="1"`, `data-run="2"`} {
if !strings.Contains(view.PreviewHTML, want) {
t.Errorf("preview missing %s (run %d); html=%s", want, i, view.PreviewHTML)
}
}
if !strings.Contains(view.PreviewHTML, "Az. 4c O 12/23") {
t.Errorf("preview missing run text; html=%s", view.PreviewHTML)
}
// Two paragraphs.
if n := strings.Count(view.PreviewHTML, "<p>"); n != 2 {
t.Errorf("paragraph count = %d; want 2", n)
}
if len(view.Slots) != 0 {
t.Errorf("fresh doc should have no slots; got %v", view.Slots)
}
}
func TestImportForAuthoring_DetectsExistingSlots(t *testing.T) {
body := docBody(`<w:p><w:r><w:t>Az. {{project.case_number}} vor {{project.court}}</w:t></w:r></w:p>`)
view, err := ImportForAuthoring(minimalMergeDOCX(t, body))
if err != nil {
t.Fatalf("ImportForAuthoring: %v", err)
}
if len(view.Slots) != 2 {
t.Fatalf("slots = %d; want 2 (%v)", len(view.Slots), view.Slots)
}
if view.Slots[0].Key != "project.case_number" || view.Slots[0].Anchor != "{{project.case_number}}" {
t.Errorf("slot[0] = %+v; want project.case_number", view.Slots[0])
}
if view.Slots[1].Key != "project.court" {
t.Errorf("slot[1].Key = %q; want project.court", view.Slots[1].Key)
}
}
func TestInjectSlot_ReplacesSelectionWithPlaceholder(t *testing.T) {
body := docBody(`<w:p><w:r><w:t>Az. 4c O 12/23 vor dem LG</w:t></w:r></w:p>`)
out, err := InjectSlot(minimalMergeDOCX(t, body), 0, "4c O 12/23", "project.case_number")
if err != nil {
t.Fatalf("InjectSlot: %v", err)
}
doc := readMergeDocumentXML(t, out)
if !strings.Contains(doc, "Az. {{project.case_number}} vor dem LG") {
t.Errorf("injected doc wrong; got %s", doc)
}
// Round-trips: re-importing finds the new slot.
view, err := ImportForAuthoring(out)
if err != nil {
t.Fatalf("re-import: %v", err)
}
if len(view.Slots) != 1 || view.Slots[0].Key != "project.case_number" {
t.Errorf("re-imported slots = %v; want [project.case_number]", view.Slots)
}
}
func TestInjectSlot_TargetsTheNamedRun(t *testing.T) {
// "GmbH" appears in run 1 only; "Müller" (with umlaut) in run 0.
body := docBody(
`<w:p><w:r><w:t>Müller</w:t></w:r><w:r><w:t xml:space="preserve"> GmbH</w:t></w:r></w:p>`)
out, err := InjectSlot(minimalMergeDOCX(t, body), 0, "Müller", "parties.claimant.name")
if err != nil {
t.Fatalf("InjectSlot: %v", err)
}
doc := readMergeDocumentXML(t, out)
if !strings.Contains(doc, "{{parties.claimant.name}}") {
t.Errorf("umlaut selection not replaced; got %s", doc)
}
if !strings.Contains(doc, " GmbH") {
t.Errorf("run 1 should be untouched; got %s", doc)
}
}
func TestInjectSlot_ErrorsWhenSelectionNotInRun(t *testing.T) {
body := docBody(`<w:p><w:r><w:t>Hello</w:t></w:r></w:p>`)
if _, err := InjectSlot(minimalMergeDOCX(t, body), 0, "Goodbye", "firm.name"); err == nil {
t.Error("expected error when selection absent from run; got nil")
}
// Out-of-range run index.
if _, err := InjectSlot(minimalMergeDOCX(t, body), 9, "Hello", "firm.name"); err == nil {
t.Error("expected error for out-of-range run index; got nil")
}
}
func TestInjectSlot_RejectsInvalidSlotKey(t *testing.T) {
body := docBody(`<w:p><w:r><w:t>Hello</w:t></w:r></w:p>`)
if _, err := InjectSlot(minimalMergeDOCX(t, body), 0, "Hello", "9bad-key!"); err == nil {
t.Error("expected error for invalid slot key; got nil")
}
}