Files
paliad/pkg/docforge/markdown/importer_test.go
mAi 8763ab013c feat(docforge): slice 8 — neutral model + Markdown importer + Exporter iface (t-paliad-349)
The final slice: land the format-neutral document model with REAL consumers
and unify the Markdown parser — no duplication, byte-identical output.

Neutral model (pkg/docforge/model.go): Document / Block / InlineSpan.
BlockKind values are the stylemap keys. A hyperlink is a span with Link set
+ Children (the label's spans), preserving link boundaries so adjacent
same-URL links stay distinct — byte-exact with the pre-model walker.

Markdown importer (pkg/docforge/markdown): Import(md) → Document. The SINGLE
Markdown parser for docforge — block split, marker detection, inline
bold/italic/link tokenisation, {{placeholder}} pass-through (the b78a984
fix). Relocated out of the docx walker.

docx renderer (pkg/docforge/docx/markdown.go): now RENDERS a Document →
OOXML (RenderDocumentToOOXML); RenderMarkdownToOOXML[WithStyles] = render(
markdown.Import(md)). The shipped submission walker routes through the model,
so there is one parser, not two. The comprehensive byte-exact render tests
(RenderMarkdownToOOXML_*) all PASS unchanged = output identical.

Exporter interface (pkg/docforge/exporter.go, PRD §4 B4): Exporter{Format,
MIMEType, RenderBody(Document)} with the .docx impl (pkg/docforge/docx/
exporter.go). The seam a future PDF/HTML exporter slots into.

Tests: parser tests relocated to the markdown pkg (parseSpans/detectBlockMarker)
+ new importer Document tests + exporter conformance test.

Verification: go build/vet clean; gofmt clean; full NO-DB test suite GREEN
(authoritative — proves no regression); docforge byte-exact render oracle
PASS; composer live test renders through the rewired walker (PASS); bun build
+ bun test 274/274. The shared-DB live run fails ~85 tests across unrelated
services from a harness pq-42P08 $1-type seeding quirk + a stale
deadline_rules test — systemic/environmental (the no-DB run is clean), not
this change.

docforge train complete: 8 slices, the engine extracted + cleaned + a working
author→generate→export loop on uploaded templates, plus the neutral model +
importer + exporter seam for future formats/consumers.

m/paliad#157
2026-05-29 18:10:16 +02:00

146 lines
4.4 KiB
Go

package markdown
import (
"strings"
"testing"
)
// Inline-span + block-marker tests, relocated from the docx walker when
// parsing moved here (t-paliad-349 slice 8). parseSpans is the inline
// tokeniser; detectBlockMarker classifies a line.
func TestParseSpans_PlaceholderWithUnderscoresIsLiteral(t *testing.T) {
// {{project.case_number}} must emit as a single non-italic span
// containing the full placeholder (the b78a984 fix).
spans := parseSpans("{{project.case_number}}")
if len(spans) != 1 {
t.Fatalf("expected 1 span; got %d (%+v)", len(spans), spans)
}
if spans[0].Italic || spans[0].Bold {
t.Errorf("placeholder must not be italic/bold; got %+v", spans[0])
}
if spans[0].Text != "{{project.case_number}}" {
t.Errorf("placeholder text corrupted: got %q", spans[0].Text)
}
}
func TestParseSpans_ItalicAroundPlaceholder(t *testing.T) {
spans := parseSpans("_before_ {{x.y_z}} _after_")
var saw struct {
italicBefore bool
placeholder bool
italicAfter bool
}
for _, s := range spans {
if s.Italic && s.Text == "before" {
saw.italicBefore = true
}
if !s.Italic && !s.Bold && strings.Contains(s.Text, "{{x.y_z}}") {
saw.placeholder = true
}
if s.Italic && s.Text == "after" {
saw.italicAfter = true
}
}
if !saw.italicBefore || !saw.placeholder || !saw.italicAfter {
t.Errorf("expected italic/placeholder/italic structure; got %+v", spans)
}
}
func TestParseSpans_Plain(t *testing.T) {
spans := parseSpans("hello world")
if len(spans) != 1 || spans[0].Bold || spans[0].Italic || spans[0].Text != "hello world" {
t.Errorf("expected single plain span; got %+v", spans)
}
}
func TestParseSpans_UnderscoreItalic(t *testing.T) {
spans := parseSpans("_emph_")
var italicHits int
for _, s := range spans {
if s.Italic && s.Text == "emph" {
italicHits++
}
}
if italicHits != 1 {
t.Errorf("expected one italic 'emph' span; got %+v", spans)
}
}
func TestParseSpans_UnderscoreBold(t *testing.T) {
spans := parseSpans("__strong__")
var boldHits int
for _, s := range spans {
if s.Bold && s.Text == "strong" {
boldHits++
}
}
if boldHits != 1 {
t.Errorf("expected one bold 'strong' span; got %+v", spans)
}
}
func TestDetectBlockMarker(t *testing.T) {
cases := []struct {
in string
kind string
want string
ok bool
}{
{"# A", "heading_1", "A", true},
{"## B", "heading_2", "B", true},
{"### C", "heading_3", "C", true},
{" # indented", "heading_1", "indented", true}, // up to 3 spaces tolerated
{" # too-deep", "", "", false}, // 4 spaces → not a heading
{"- bullet", "list_bullet", "bullet", true},
{"* star", "list_bullet", "star", true},
{"1. one", "list_numbered", "one", true},
{"42. forty-two", "list_numbered", "forty-two", true},
{"1) paren", "list_numbered", "paren", true},
{"1.no-space", "", "", false}, // ordinal needs trailing space
{"> quote", "blockquote", "quote", true},
{"plain", "", "", false},
{"#nospace", "", "", false}, // heading needs space after hash
}
for _, tc := range cases {
t.Run(tc.in, func(t *testing.T) {
kind, payload, ok := detectBlockMarker(tc.in)
if ok != tc.ok || kind != tc.kind || payload != tc.want {
t.Errorf("detectBlockMarker(%q) = (%q,%q,%v); want (%q,%q,%v)", tc.in, kind, payload, ok, tc.kind, tc.want, tc.ok)
}
})
}
}
// TestImport_Document spot-checks the neutral Document the importer
// produces — block kinds, the link-span shape, and placeholder pass-through.
func TestImport_Document(t *testing.T) {
doc := Import("# Title\n\nBody **bold** and [label](http://x).\n\n- item")
if len(doc.Blocks) != 3 {
t.Fatalf("blocks = %d; want 3 (%+v)", len(doc.Blocks), doc.Blocks)
}
if doc.Blocks[0].Kind != "heading_1" {
t.Errorf("block0 kind = %q; want heading_1", doc.Blocks[0].Kind)
}
if doc.Blocks[2].Kind != "list_bullet" {
t.Errorf("block2 kind = %q; want list_bullet", doc.Blocks[2].Kind)
}
// The body paragraph carries a link span with Link set + children.
var sawLink bool
for _, s := range doc.Blocks[1].Spans {
if s.Link == "http://x" && len(s.Children) > 0 {
sawLink = true
}
}
if !sawLink {
t.Errorf("body block missing link span; got %+v", doc.Blocks[1].Spans)
}
}
func TestImport_EmptyYieldsOneEmptyParagraph(t *testing.T) {
doc := Import("")
if len(doc.Blocks) != 1 || doc.Blocks[0].Kind != "paragraph" || len(doc.Blocks[0].Spans) != 0 {
t.Errorf("empty import = %+v; want one empty paragraph block", doc.Blocks)
}
}