The verifiable backend heart of the authoring surface, before the HTTP +
frontend layers.
pkg/docforge/docx/authoring.go:
- ImportForAuthoring(carrier) → AuthoringView{PreviewHTML, Slots}: parses
an uploaded .docx into a run-addressable preview (one
<span class="docforge-run" data-run="N"> per <w:t>, document order)
plus the {{placeholder}} slots already present.
- InjectSlot(carrier, runIndex, selectedText, slotKey) → new carrier:
replaces the selection inside run N with a {{slot_key}} token. Keys on
the selected TEXT (not a byte/UTF-16 offset) so umlauts can't desync the
client selection from the server slice; preview + injection walk runs in
the identical paragraph→<w:t> order so data-run indices line up.
- v1 scope: text slots in body paragraphs; out-of-run / cross-run / not-
found selections return an error the UI turns into a hint.
6 unit tests cover run-addressable preview, slot detection, injection +
round-trip re-import, umlaut/run-targeting, and the error paths (selection
absent, out-of-range run, invalid slot key) — all passing.
Wired PgTemplateStore through the stack (main.go → handlers.Services →
dbServices) so the upcoming authoring endpoints can reach it.
Verification: go build/vet clean, full module test green (13 pkgs), new
files gofmt-clean. The HTTP endpoints + frontend authoring page land next;
their live flow needs the post-merge e2e/manual loop (DB+Supabase).
m/paliad#157
173 lines
6.0 KiB
Go
173 lines
6.0 KiB
Go
package docx
|
|
|
|
// Authoring support — the .docx side of the docforge authoring surface
|
|
// (t-paliad-349 slice 6). Two operations back the "upload a base .docx →
|
|
// place variable slots" flow:
|
|
//
|
|
// ImportForAuthoring — parse an uploaded .docx into a run-addressable
|
|
// preview (one <span data-run="N"> per <w:t>, in document order) plus
|
|
// the slots already present in the carrier.
|
|
// InjectSlot — replace a selected piece of text inside run N with a
|
|
// {{slot_key}} placeholder, returning the new carrier bytes. The
|
|
// placeholder is the sentinel that locates the slot (PRD §5 lean) and
|
|
// the same token the generation-time renderer substitutes.
|
|
//
|
|
// Both walk runs in the same order (paragraphs, then <w:t> within), so the
|
|
// data-run indices the preview emits address exactly the runs InjectSlot
|
|
// targets. Injection keys on the selected text
|
|
// (not a byte/UTF-16 offset) so umlauts in German prose can't desync the
|
|
// client's selection from the server's slice.
|
|
//
|
|
// v1 scope (PRD §2.1): text-level slots inside body paragraphs. A run is a
|
|
// <w:t> within a <w:p>; selections spanning runs or sitting in
|
|
// headers/footers/tables are out of scope and surface as an error the UI
|
|
// turns into "select within a single text span".
|
|
|
|
import (
|
|
"bytes"
|
|
"fmt"
|
|
"strconv"
|
|
"strings"
|
|
|
|
"mgit.msbls.de/m/paliad/pkg/docforge"
|
|
)
|
|
|
|
// AuthoringView is the parsed, run-addressable form of an uploaded
|
|
// template, ready for the authoring editor.
|
|
type AuthoringView struct {
|
|
// PreviewHTML is the body rendered as paragraphs of run spans:
|
|
// <p>…<span class="docforge-run" data-run="N">text</span>…</p>.
|
|
// The client attaches selection handling to the run spans; data-run
|
|
// is the index InjectSlot expects.
|
|
PreviewHTML string
|
|
// Slots are the {{placeholder}} tokens already present in the
|
|
// carrier (so re-opening a saved template shows its slots).
|
|
Slots []docforge.TemplateSlot
|
|
}
|
|
|
|
// ImportForAuthoring parses carrierBytes (any .docx/.dotm/...) into an
|
|
// AuthoringView. Runs the .dotm→.docx pre-pass so macro templates import
|
|
// cleanly.
|
|
func ImportForAuthoring(carrierBytes []byte) (*AuthoringView, error) {
|
|
clean, err := ConvertDotmToDocx(carrierBytes)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("authoring import: convert: %w", err)
|
|
}
|
|
documentXML, _, err := splitBaseZip(clean)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("authoring import: %w", err)
|
|
}
|
|
return &AuthoringView{
|
|
PreviewHTML: authoringPreviewHTML(documentXML),
|
|
Slots: detectSlots(documentXML),
|
|
}, nil
|
|
}
|
|
|
|
// authoringPreviewHTML renders the body as run-addressable HTML. One <p>
|
|
// per <w:p>; one <span class="docforge-run" data-run="N"> per <w:t>, with
|
|
// the decoded run text HTML-escaped. N is the global run index in
|
|
// document-then-paragraph order — the same order InjectSlot walks.
|
|
func authoringPreviewHTML(documentXML []byte) string {
|
|
var out bytes.Buffer
|
|
runIdx := 0
|
|
paras := wParagraphRegex.FindAll(documentXML, -1)
|
|
for _, para := range paras {
|
|
out.WriteString("<p>")
|
|
for _, m := range wTextNodeRegex.FindAllSubmatch(para, -1) {
|
|
text := xmlDecode(string(m[2]))
|
|
out.WriteString(`<span class="docforge-run" data-run="`)
|
|
out.WriteString(strconv.Itoa(runIdx))
|
|
out.WriteString(`">`)
|
|
out.WriteString(htmlEscape(text))
|
|
out.WriteString(`</span>`)
|
|
runIdx++
|
|
}
|
|
out.WriteString("</p>\n")
|
|
}
|
|
if out.Len() == 0 {
|
|
return "<p></p>"
|
|
}
|
|
return out.String()
|
|
}
|
|
|
|
// detectSlots returns the distinct {{placeholder}} tokens present in the
|
|
// document body, in first-appearance order.
|
|
func detectSlots(documentXML []byte) []docforge.TemplateSlot {
|
|
seen := map[string]bool{}
|
|
var slots []docforge.TemplateSlot
|
|
// Match against decoded text so a placeholder split by an entity is
|
|
// still found the same way the renderer would substitute it.
|
|
for _, m := range wTextNodeRegex.FindAllSubmatch(documentXML, -1) {
|
|
text := xmlDecode(string(m[2]))
|
|
for _, pm := range placeholderRegex.FindAllStringSubmatch(text, -1) {
|
|
key := pm[1]
|
|
if seen[key] {
|
|
continue
|
|
}
|
|
seen[key] = true
|
|
slots = append(slots, docforge.TemplateSlot{
|
|
Key: key,
|
|
Anchor: "{{" + key + "}}",
|
|
OrderIndex: len(slots),
|
|
})
|
|
}
|
|
}
|
|
return slots
|
|
}
|
|
|
|
// InjectSlot replaces the first occurrence of selectedText inside run
|
|
// runIndex with a {{slotKey}} placeholder and returns the new carrier
|
|
// bytes. Errors when the run is out of range or selectedText isn't found
|
|
// in that run (a render/selection desync, or a cross-run selection).
|
|
func InjectSlot(carrierBytes []byte, runIndex int, selectedText, slotKey string) ([]byte, error) {
|
|
if selectedText == "" {
|
|
return nil, fmt.Errorf("authoring inject: empty selection")
|
|
}
|
|
if !placeholderRegex.MatchString("{{" + slotKey + "}}") {
|
|
return nil, fmt.Errorf("authoring inject: invalid slot key %q", slotKey)
|
|
}
|
|
clean, err := ConvertDotmToDocx(carrierBytes)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("authoring inject: convert: %w", err)
|
|
}
|
|
documentXML, parts, err := splitBaseZip(clean)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("authoring inject: %w", err)
|
|
}
|
|
|
|
runIdx := 0
|
|
injected := false
|
|
newDoc := wParagraphRegex.ReplaceAllFunc(documentXML, func(para []byte) []byte {
|
|
return wTextNodeRegex.ReplaceAllFunc(para, func(tnode []byte) []byte {
|
|
idx := runIdx
|
|
runIdx++
|
|
if injected || idx != runIndex {
|
|
return tnode
|
|
}
|
|
sub := wTextNodeRegex.FindSubmatch(tnode)
|
|
attrs := string(sub[1])
|
|
content := xmlDecode(string(sub[2]))
|
|
before, after, found := strings.Cut(content, selectedText)
|
|
if !found {
|
|
return tnode // not found here — reported after the walk
|
|
}
|
|
newContent := before + "{{" + slotKey + "}}" + after
|
|
if !strings.Contains(attrs, "xml:space") &&
|
|
(strings.HasPrefix(newContent, " ") || strings.HasSuffix(newContent, " ")) {
|
|
attrs += ` xml:space="preserve"`
|
|
}
|
|
injected = true
|
|
return []byte(`<w:t` + attrs + `>` + xmlEncode(newContent) + `</w:t>`)
|
|
})
|
|
})
|
|
if !injected {
|
|
return nil, fmt.Errorf("authoring inject: selection %q not found in run %d", selectedText, runIndex)
|
|
}
|
|
|
|
repacked, err := repackBaseZip(parts, newDoc)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("authoring inject: %w", err)
|
|
}
|
|
return repacked, nil
|
|
}
|