Files
paliad/pkg/docforge/docx/authoring.go
mAi a111a82640 feat(docforge): slice 6a — docx authoring core + TemplateStore wiring (t-paliad-349)
The verifiable backend heart of the authoring surface, before the HTTP +
frontend layers.

pkg/docforge/docx/authoring.go:
  - ImportForAuthoring(carrier) → AuthoringView{PreviewHTML, Slots}: parses
    an uploaded .docx into a run-addressable preview (one
    <span class="docforge-run" data-run="N"> per <w:t>, document order)
    plus the {{placeholder}} slots already present.
  - InjectSlot(carrier, runIndex, selectedText, slotKey) → new carrier:
    replaces the selection inside run N with a {{slot_key}} token. Keys on
    the selected TEXT (not a byte/UTF-16 offset) so umlauts can't desync the
    client selection from the server slice; preview + injection walk runs in
    the identical paragraph→<w:t> order so data-run indices line up.
  - v1 scope: text slots in body paragraphs; out-of-run / cross-run / not-
    found selections return an error the UI turns into a hint.

6 unit tests cover run-addressable preview, slot detection, injection +
round-trip re-import, umlaut/run-targeting, and the error paths (selection
absent, out-of-range run, invalid slot key) — all passing.

Wired PgTemplateStore through the stack (main.go → handlers.Services →
dbServices) so the upcoming authoring endpoints can reach it.

Verification: go build/vet clean, full module test green (13 pkgs), new
files gofmt-clean. The HTTP endpoints + frontend authoring page land next;
their live flow needs the post-merge e2e/manual loop (DB+Supabase).

m/paliad#157
2026-05-29 16:00:27 +02:00

173 lines
6.0 KiB
Go

package docx
// Authoring support — the .docx side of the docforge authoring surface
// (t-paliad-349 slice 6). Two operations back the "upload a base .docx →
// place variable slots" flow:
//
// ImportForAuthoring — parse an uploaded .docx into a run-addressable
// preview (one <span data-run="N"> per <w:t>, in document order) plus
// the slots already present in the carrier.
// InjectSlot — replace a selected piece of text inside run N with a
// {{slot_key}} placeholder, returning the new carrier bytes. The
// placeholder is the sentinel that locates the slot (PRD §5 lean) and
// the same token the generation-time renderer substitutes.
//
// Both walk runs in the same order (paragraphs, then <w:t> within), so the
// data-run indices the preview emits address exactly the runs InjectSlot
// targets. Injection keys on the selected text
// (not a byte/UTF-16 offset) so umlauts in German prose can't desync the
// client's selection from the server's slice.
//
// v1 scope (PRD §2.1): text-level slots inside body paragraphs. A run is a
// <w:t> within a <w:p>; selections spanning runs or sitting in
// headers/footers/tables are out of scope and surface as an error the UI
// turns into "select within a single text span".
import (
"bytes"
"fmt"
"strconv"
"strings"
"mgit.msbls.de/m/paliad/pkg/docforge"
)
// AuthoringView is the parsed, run-addressable form of an uploaded
// template, ready for the authoring editor.
type AuthoringView struct {
// PreviewHTML is the body rendered as paragraphs of run spans:
// <p>…<span class="docforge-run" data-run="N">text</span>…</p>.
// The client attaches selection handling to the run spans; data-run
// is the index InjectSlot expects.
PreviewHTML string
// Slots are the {{placeholder}} tokens already present in the
// carrier (so re-opening a saved template shows its slots).
Slots []docforge.TemplateSlot
}
// ImportForAuthoring parses carrierBytes (any .docx/.dotm/...) into an
// AuthoringView. Runs the .dotm→.docx pre-pass so macro templates import
// cleanly.
func ImportForAuthoring(carrierBytes []byte) (*AuthoringView, error) {
clean, err := ConvertDotmToDocx(carrierBytes)
if err != nil {
return nil, fmt.Errorf("authoring import: convert: %w", err)
}
documentXML, _, err := splitBaseZip(clean)
if err != nil {
return nil, fmt.Errorf("authoring import: %w", err)
}
return &AuthoringView{
PreviewHTML: authoringPreviewHTML(documentXML),
Slots: detectSlots(documentXML),
}, nil
}
// authoringPreviewHTML renders the body as run-addressable HTML. One <p>
// per <w:p>; one <span class="docforge-run" data-run="N"> per <w:t>, with
// the decoded run text HTML-escaped. N is the global run index in
// document-then-paragraph order — the same order InjectSlot walks.
func authoringPreviewHTML(documentXML []byte) string {
var out bytes.Buffer
runIdx := 0
paras := wParagraphRegex.FindAll(documentXML, -1)
for _, para := range paras {
out.WriteString("<p>")
for _, m := range wTextNodeRegex.FindAllSubmatch(para, -1) {
text := xmlDecode(string(m[2]))
out.WriteString(`<span class="docforge-run" data-run="`)
out.WriteString(strconv.Itoa(runIdx))
out.WriteString(`">`)
out.WriteString(htmlEscape(text))
out.WriteString(`</span>`)
runIdx++
}
out.WriteString("</p>\n")
}
if out.Len() == 0 {
return "<p></p>"
}
return out.String()
}
// detectSlots returns the distinct {{placeholder}} tokens present in the
// document body, in first-appearance order.
func detectSlots(documentXML []byte) []docforge.TemplateSlot {
seen := map[string]bool{}
var slots []docforge.TemplateSlot
// Match against decoded text so a placeholder split by an entity is
// still found the same way the renderer would substitute it.
for _, m := range wTextNodeRegex.FindAllSubmatch(documentXML, -1) {
text := xmlDecode(string(m[2]))
for _, pm := range placeholderRegex.FindAllStringSubmatch(text, -1) {
key := pm[1]
if seen[key] {
continue
}
seen[key] = true
slots = append(slots, docforge.TemplateSlot{
Key: key,
Anchor: "{{" + key + "}}",
OrderIndex: len(slots),
})
}
}
return slots
}
// InjectSlot replaces the first occurrence of selectedText inside run
// runIndex with a {{slotKey}} placeholder and returns the new carrier
// bytes. Errors when the run is out of range or selectedText isn't found
// in that run (a render/selection desync, or a cross-run selection).
func InjectSlot(carrierBytes []byte, runIndex int, selectedText, slotKey string) ([]byte, error) {
if selectedText == "" {
return nil, fmt.Errorf("authoring inject: empty selection")
}
if !placeholderRegex.MatchString("{{" + slotKey + "}}") {
return nil, fmt.Errorf("authoring inject: invalid slot key %q", slotKey)
}
clean, err := ConvertDotmToDocx(carrierBytes)
if err != nil {
return nil, fmt.Errorf("authoring inject: convert: %w", err)
}
documentXML, parts, err := splitBaseZip(clean)
if err != nil {
return nil, fmt.Errorf("authoring inject: %w", err)
}
runIdx := 0
injected := false
newDoc := wParagraphRegex.ReplaceAllFunc(documentXML, func(para []byte) []byte {
return wTextNodeRegex.ReplaceAllFunc(para, func(tnode []byte) []byte {
idx := runIdx
runIdx++
if injected || idx != runIndex {
return tnode
}
sub := wTextNodeRegex.FindSubmatch(tnode)
attrs := string(sub[1])
content := xmlDecode(string(sub[2]))
before, after, found := strings.Cut(content, selectedText)
if !found {
return tnode // not found here — reported after the walk
}
newContent := before + "{{" + slotKey + "}}" + after
if !strings.Contains(attrs, "xml:space") &&
(strings.HasPrefix(newContent, " ") || strings.HasSuffix(newContent, " ")) {
attrs += ` xml:space="preserve"`
}
injected = true
return []byte(`<w:t` + attrs + `>` + xmlEncode(newContent) + `</w:t>`)
})
})
if !injected {
return nil, fmt.Errorf("authoring inject: selection %q not found in run %d", selectedText, runIndex)
}
repacked, err := repackBaseZip(parts, newDoc)
if err != nil {
return nil, fmt.Errorf("authoring inject: %w", err)
}
return repacked, nil
}