The final slice: land the format-neutral document model with REAL consumers
and unify the Markdown parser — no duplication, byte-identical output.
Neutral model (pkg/docforge/model.go): Document / Block / InlineSpan.
BlockKind values are the stylemap keys. A hyperlink is a span with Link set
+ Children (the label's spans), preserving link boundaries so adjacent
same-URL links stay distinct — byte-exact with the pre-model walker.
Markdown importer (pkg/docforge/markdown): Import(md) → Document. The SINGLE
Markdown parser for docforge — block split, marker detection, inline
bold/italic/link tokenisation, {{placeholder}} pass-through (the b78a984
fix). Relocated out of the docx walker.
docx renderer (pkg/docforge/docx/markdown.go): now RENDERS a Document →
OOXML (RenderDocumentToOOXML); RenderMarkdownToOOXML[WithStyles] = render(
markdown.Import(md)). The shipped submission walker routes through the model,
so there is one parser, not two. The comprehensive byte-exact render tests
(RenderMarkdownToOOXML_*) all PASS unchanged = output identical.
Exporter interface (pkg/docforge/exporter.go, PRD §4 B4): Exporter{Format,
MIMEType, RenderBody(Document)} with the .docx impl (pkg/docforge/docx/
exporter.go). The seam a future PDF/HTML exporter slots into.
Tests: parser tests relocated to the markdown pkg (parseSpans/detectBlockMarker)
+ new importer Document tests + exporter conformance test.
Verification: go build/vet clean; gofmt clean; full NO-DB test suite GREEN
(authoritative — proves no regression); docforge byte-exact render oracle
PASS; composer live test renders through the rewired walker (PASS); bun build
+ bun test 274/274. The shared-DB live run fails ~85 tests across unrelated
services from a harness pq-42P08 $1-type seeding quirk + a stale
deadline_rules test — systemic/environmental (the no-DB run is clean), not
this change.
docforge train complete: 8 slices, the engine extracted + cleaned + a working
author→generate→export loop on uploaded templates, plus the neutral model +
importer + exporter seam for future formats/consumers.
m/paliad#157
146 lines
4.4 KiB
Go
146 lines
4.4 KiB
Go
package markdown
|
|
|
|
import (
|
|
"strings"
|
|
"testing"
|
|
)
|
|
|
|
// Inline-span + block-marker tests, relocated from the docx walker when
|
|
// parsing moved here (t-paliad-349 slice 8). parseSpans is the inline
|
|
// tokeniser; detectBlockMarker classifies a line.
|
|
|
|
func TestParseSpans_PlaceholderWithUnderscoresIsLiteral(t *testing.T) {
|
|
// {{project.case_number}} must emit as a single non-italic span
|
|
// containing the full placeholder (the b78a984 fix).
|
|
spans := parseSpans("{{project.case_number}}")
|
|
if len(spans) != 1 {
|
|
t.Fatalf("expected 1 span; got %d (%+v)", len(spans), spans)
|
|
}
|
|
if spans[0].Italic || spans[0].Bold {
|
|
t.Errorf("placeholder must not be italic/bold; got %+v", spans[0])
|
|
}
|
|
if spans[0].Text != "{{project.case_number}}" {
|
|
t.Errorf("placeholder text corrupted: got %q", spans[0].Text)
|
|
}
|
|
}
|
|
|
|
func TestParseSpans_ItalicAroundPlaceholder(t *testing.T) {
|
|
spans := parseSpans("_before_ {{x.y_z}} _after_")
|
|
var saw struct {
|
|
italicBefore bool
|
|
placeholder bool
|
|
italicAfter bool
|
|
}
|
|
for _, s := range spans {
|
|
if s.Italic && s.Text == "before" {
|
|
saw.italicBefore = true
|
|
}
|
|
if !s.Italic && !s.Bold && strings.Contains(s.Text, "{{x.y_z}}") {
|
|
saw.placeholder = true
|
|
}
|
|
if s.Italic && s.Text == "after" {
|
|
saw.italicAfter = true
|
|
}
|
|
}
|
|
if !saw.italicBefore || !saw.placeholder || !saw.italicAfter {
|
|
t.Errorf("expected italic/placeholder/italic structure; got %+v", spans)
|
|
}
|
|
}
|
|
|
|
func TestParseSpans_Plain(t *testing.T) {
|
|
spans := parseSpans("hello world")
|
|
if len(spans) != 1 || spans[0].Bold || spans[0].Italic || spans[0].Text != "hello world" {
|
|
t.Errorf("expected single plain span; got %+v", spans)
|
|
}
|
|
}
|
|
|
|
func TestParseSpans_UnderscoreItalic(t *testing.T) {
|
|
spans := parseSpans("_emph_")
|
|
var italicHits int
|
|
for _, s := range spans {
|
|
if s.Italic && s.Text == "emph" {
|
|
italicHits++
|
|
}
|
|
}
|
|
if italicHits != 1 {
|
|
t.Errorf("expected one italic 'emph' span; got %+v", spans)
|
|
}
|
|
}
|
|
|
|
func TestParseSpans_UnderscoreBold(t *testing.T) {
|
|
spans := parseSpans("__strong__")
|
|
var boldHits int
|
|
for _, s := range spans {
|
|
if s.Bold && s.Text == "strong" {
|
|
boldHits++
|
|
}
|
|
}
|
|
if boldHits != 1 {
|
|
t.Errorf("expected one bold 'strong' span; got %+v", spans)
|
|
}
|
|
}
|
|
|
|
func TestDetectBlockMarker(t *testing.T) {
|
|
cases := []struct {
|
|
in string
|
|
kind string
|
|
want string
|
|
ok bool
|
|
}{
|
|
{"# A", "heading_1", "A", true},
|
|
{"## B", "heading_2", "B", true},
|
|
{"### C", "heading_3", "C", true},
|
|
{" # indented", "heading_1", "indented", true}, // up to 3 spaces tolerated
|
|
{" # too-deep", "", "", false}, // 4 spaces → not a heading
|
|
{"- bullet", "list_bullet", "bullet", true},
|
|
{"* star", "list_bullet", "star", true},
|
|
{"1. one", "list_numbered", "one", true},
|
|
{"42. forty-two", "list_numbered", "forty-two", true},
|
|
{"1) paren", "list_numbered", "paren", true},
|
|
{"1.no-space", "", "", false}, // ordinal needs trailing space
|
|
{"> quote", "blockquote", "quote", true},
|
|
{"plain", "", "", false},
|
|
{"#nospace", "", "", false}, // heading needs space after hash
|
|
}
|
|
for _, tc := range cases {
|
|
t.Run(tc.in, func(t *testing.T) {
|
|
kind, payload, ok := detectBlockMarker(tc.in)
|
|
if ok != tc.ok || kind != tc.kind || payload != tc.want {
|
|
t.Errorf("detectBlockMarker(%q) = (%q,%q,%v); want (%q,%q,%v)", tc.in, kind, payload, ok, tc.kind, tc.want, tc.ok)
|
|
}
|
|
})
|
|
}
|
|
}
|
|
|
|
// TestImport_Document spot-checks the neutral Document the importer
|
|
// produces — block kinds, the link-span shape, and placeholder pass-through.
|
|
func TestImport_Document(t *testing.T) {
|
|
doc := Import("# Title\n\nBody **bold** and [label](http://x).\n\n- item")
|
|
if len(doc.Blocks) != 3 {
|
|
t.Fatalf("blocks = %d; want 3 (%+v)", len(doc.Blocks), doc.Blocks)
|
|
}
|
|
if doc.Blocks[0].Kind != "heading_1" {
|
|
t.Errorf("block0 kind = %q; want heading_1", doc.Blocks[0].Kind)
|
|
}
|
|
if doc.Blocks[2].Kind != "list_bullet" {
|
|
t.Errorf("block2 kind = %q; want list_bullet", doc.Blocks[2].Kind)
|
|
}
|
|
// The body paragraph carries a link span with Link set + children.
|
|
var sawLink bool
|
|
for _, s := range doc.Blocks[1].Spans {
|
|
if s.Link == "http://x" && len(s.Children) > 0 {
|
|
sawLink = true
|
|
}
|
|
}
|
|
if !sawLink {
|
|
t.Errorf("body block missing link span; got %+v", doc.Blocks[1].Spans)
|
|
}
|
|
}
|
|
|
|
func TestImport_EmptyYieldsOneEmptyParagraph(t *testing.T) {
|
|
doc := Import("")
|
|
if len(doc.Blocks) != 1 || doc.Blocks[0].Kind != "paragraph" || len(doc.Blocks[0].Spans) != 0 {
|
|
t.Errorf("empty import = %+v; want one empty paragraph block", doc.Blocks)
|
|
}
|
|
}
|