paliad/internal/services/submission_md_test.go

package services

// Unit tests for the Composer's Markdown → OOXML walker (t-paliad-313
// Slice B). Pure function; no DB dependency.

import (
	"strings"
	"testing"
)

func TestRenderMarkdownToOOXML_EmptyInput(t *testing.T) {
	out := RenderMarkdownToOOXML("", "Normal")
	if !strings.Contains(out, `<w:p>`) {
		t.Errorf("empty input must still emit one <w:p>; got %q", out)
	}
	if !strings.Contains(out, `<w:pStyle w:val="Normal"/>`) {
		t.Errorf("empty input must carry the paragraph style; got %q", out)
	}
}

func TestRenderMarkdownToOOXML_SingleParagraph(t *testing.T) {
	out := RenderMarkdownToOOXML("Hello world", "HLpat-Body-B0")
	if !strings.Contains(out, `<w:pStyle w:val="HLpat-Body-B0"/>`) {
		t.Errorf("paragraph missing stylemap entry: %q", out)
	}
	if !strings.Contains(out, "Hello world") {
		t.Errorf("paragraph text missing: %q", out)
	}
	// Exactly one <w:p>.
	if got := strings.Count(out, "<w:p>"); got != 1 {
		t.Errorf("expected 1 <w:p>; got %d", got)
	}
}

func TestRenderMarkdownToOOXML_TwoParagraphs(t *testing.T) {
	out := RenderMarkdownToOOXML("first\n\nsecond", "Normal")
	if got := strings.Count(out, "<w:p>"); got != 2 {
		t.Errorf("expected 2 <w:p>; got %d, out=%q", got, out)
	}
	if !strings.Contains(out, "first") || !strings.Contains(out, "second") {
		t.Errorf("paragraph text missing: %q", out)
	}
}

func TestRenderMarkdownToOOXML_BoldInline(t *testing.T) {
	out := RenderMarkdownToOOXML("hello **bold** world", "")
	if !strings.Contains(out, `<w:rPr><w:b/></w:rPr>`) {
		t.Errorf("bold rPr missing: %q", out)
	}
	if !strings.Contains(out, ">bold<") {
		t.Errorf("bold text payload missing: %q", out)
	}
	// The surrounding "hello " and " world" pieces are separate runs;
	// the bold rPr should appear exactly once in this output.
	if got := strings.Count(out, "<w:b/>"); got != 1 {
		t.Errorf("expected exactly one <w:b/> tag; got %d in %q", got, out)
	}
}

func TestRenderMarkdownToOOXML_ItalicInline(t *testing.T) {
	out := RenderMarkdownToOOXML("see *italic* here", "")
	if !strings.Contains(out, `<w:rPr><w:i/></w:rPr>`) {
		t.Errorf("italic rPr missing: %q", out)
	}
	if !strings.Contains(out, ">italic<") {
		t.Errorf("italic text payload missing: %q", out)
	}
}

func TestRenderMarkdownToOOXML_BoldItalicCombo(t *testing.T) {
	// Nested: ***both*** → entering both flags. The walker toggles each
	// delimiter independently, so the resulting run carries both <w:b/>
	// and <w:i/>.
	out := RenderMarkdownToOOXML("***both***", "")
	if !strings.Contains(out, `<w:b/>`) || !strings.Contains(out, `<w:i/>`) {
		t.Errorf("expected both <w:b/> and <w:i/>; got %q", out)
	}
}

func TestRenderMarkdownToOOXML_PlaceholdersPassThrough(t *testing.T) {
	// Placeholders are sacred — the walker must preserve them verbatim
	// so the v1 placeholder pass can substitute them later.
	out := RenderMarkdownToOOXML("Sehr geehrter {{parties.claimant.0.name}}", "Normal")
	if !strings.Contains(out, "{{parties.claimant.0.name}}") {
		t.Errorf("placeholder corrupted: %q", out)
	}
}

func TestRenderMarkdownToOOXML_PlaceholderUnderscoresPreserved(t *testing.T) {
	// Regression: a placeholder key containing underscores (project.case_number,
	// user.display_name, project.patent_number_upc) used to get its underscores
	// consumed by the italic/bold inline scanner — the OOXML stored
	// {{project.casenumber}} and the preview surfaced
	// [KEIN WERT: project.casenumber] instead of the real value.
	cases := []string{
		"{{project.case_number}}",
		"{{user.display_name}}",
		"{{project.patent_number_upc}}",
		"prefix {{project.case_number}} suffix",
		"two: {{a.b_c}} and {{d.e_f}}",
		"mixed: _italic_ then {{project.case_number}} then __bold__",
	}
	for _, in := range cases {
		out := RenderMarkdownToOOXML(in, "Normal")
		// Every placeholder substring in the input must appear verbatim
		// in the output (XML escaping is irrelevant for {} and _).
		for _, ph := range extractPlaceholders(in) {
			if !strings.Contains(out, ph) {
				t.Errorf("input %q: placeholder %q lost; got %q", in, ph, out)
			}
		}
	}
}

func TestParseInlineSpans_PlaceholderWithUnderscoresIsLiteral(t *testing.T) {
	// Direct guard on the inline scanner. {{project.case_number}} must
	// emit as a single non-italic span containing the full placeholder.
	spans := parseInlineSpans("{{project.case_number}}")
	if len(spans) != 1 {
		t.Fatalf("expected 1 span; got %d (%+v)", len(spans), spans)
	}
	if spans[0].Italic || spans[0].Bold {
		t.Errorf("placeholder must not be italic/bold; got %+v", spans[0])
	}
	if spans[0].Text != "{{project.case_number}}" {
		t.Errorf("placeholder text corrupted: got %q", spans[0].Text)
	}
}

func TestParseInlineSpans_ItalicAroundPlaceholder(t *testing.T) {
	// Italic delimiters outside a placeholder still work; the placeholder
	// itself stays literal even when it sits between italics.
	spans := parseInlineSpans("_before_ {{x.y_z}} _after_")
	var saw struct {
		italicBefore bool
		placeholder  bool
		italicAfter  bool
	}
	for _, s := range spans {
		if s.Italic && s.Text == "before" {
			saw.italicBefore = true
		}
		if !s.Italic && !s.Bold && strings.Contains(s.Text, "{{x.y_z}}") {
			saw.placeholder = true
		}
		if s.Italic && s.Text == "after" {
			saw.italicAfter = true
		}
	}
	if !saw.italicBefore || !saw.placeholder || !saw.italicAfter {
		t.Errorf("expected italic/placeholder/italic structure; got %+v", spans)
	}
}

// extractPlaceholders pulls every {{...}} occurrence out of a Markdown
// source. Tiny helper, only used by the regression test above.
func extractPlaceholders(s string) []string {
	var out []string
	for {
		start := strings.Index(s, "{{")
		if start < 0 {
			return out
		}
		end := strings.Index(s[start+2:], "}}")
		if end < 0 {
			return out
		}
		out = append(out, s[start:start+2+end+2])
		s = s[start+2+end+2:]
	}
}

func TestRenderMarkdownToOOXML_XMLEscape(t *testing.T) {
	out := RenderMarkdownToOOXML("a & b < c > d", "")
	if strings.Contains(out, " & ") {
		t.Errorf("unescaped & survived: %q", out)
	}
	if !strings.Contains(out, "&amp;") || !strings.Contains(out, "&lt;") || !strings.Contains(out, "&gt;") {
		t.Errorf("expected escaped entities; got %q", out)
	}
}

func TestRenderMarkdownToOOXML_BlankLinesPreserveSpacing(t *testing.T) {
	// Two blank lines between paragraphs → one empty paragraph in
	// between, preserving the lawyer's intentional whitespace.
	out := RenderMarkdownToOOXML("first\n\n\nsecond", "Normal")
	if got := strings.Count(out, "<w:p>"); got != 3 {
		t.Errorf("expected 3 <w:p> (first + blank + second); got %d in %q", got, out)
	}
}

func TestRenderMarkdownToOOXML_CRLFNormalisation(t *testing.T) {
	out := RenderMarkdownToOOXML("first\r\n\r\nsecond", "")
	if got := strings.Count(out, "<w:p>"); got != 2 {
		t.Errorf("CRLF input should produce 2 paragraphs; got %d in %q", got, out)
	}
}

func TestParseInlineSpans_Plain(t *testing.T) {
	spans := parseInlineSpans("hello world")
	if len(spans) != 1 || spans[0].Bold || spans[0].Italic || spans[0].Text != "hello world" {
		t.Errorf("expected single plain span; got %+v", spans)
	}
}

func TestParseInlineSpans_UnderscoreItalic(t *testing.T) {
	spans := parseInlineSpans("_emph_")
	var italicHits int
	for _, s := range spans {
		if s.Italic && s.Text == "emph" {
			italicHits++
		}
	}
	if italicHits != 1 {
		t.Errorf("expected one italic 'emph' span; got %+v", spans)
	}
}

func TestParseInlineSpans_UnderscoreBold(t *testing.T) {
	spans := parseInlineSpans("__strong__")
	var boldHits int
	for _, s := range spans {
		if s.Bold && s.Text == "strong" {
			boldHits++
		}
	}
	if boldHits != 1 {
		t.Errorf("expected one bold 'strong' span; got %+v", spans)
	}
}

// ─────────────────────────────────────────────────────────────────────
// Slice D — rich-prose constructs
// ─────────────────────────────────────────────────────────────────────

func slicedStylemap() map[string]string {
	return map[string]string{
		"paragraph":     "Body",
		"heading_1":     "H1",
		"heading_2":     "H2",
		"heading_3":     "H3",
		"list_bullet":   "ListBullet",
		"list_numbered": "ListNumber",
		"blockquote":    "Quote",
	}
}

func TestRenderMarkdownToOOXML_Heading1(t *testing.T) {
	out := RenderMarkdownToOOXMLWithStyles("# A heading", slicedStylemap(), nil)
	if !strings.Contains(out, `<w:pStyle w:val="H1"/>`) {
		t.Errorf("heading_1 missing H1 style: %q", out)
	}
	if !strings.Contains(out, "A heading") {
		t.Errorf("heading text missing: %q", out)
	}
}

func TestRenderMarkdownToOOXML_Heading2And3(t *testing.T) {
	out := RenderMarkdownToOOXMLWithStyles("## H2 line\n### H3 line", slicedStylemap(), nil)
	if !strings.Contains(out, `<w:pStyle w:val="H2"/>`) || !strings.Contains(out, "H2 line") {
		t.Errorf("h2 not rendered: %q", out)
	}
	if !strings.Contains(out, `<w:pStyle w:val="H3"/>`) || !strings.Contains(out, "H3 line") {
		t.Errorf("h3 not rendered: %q", out)
	}
}

func TestRenderMarkdownToOOXML_BulletList(t *testing.T) {
	out := RenderMarkdownToOOXMLWithStyles("- first\n- second\n* third", slicedStylemap(), nil)
	if !strings.Contains(out, `<w:pStyle w:val="ListBullet"/>`) {
		t.Errorf("bullet stylemap not applied: %q", out)
	}
	if strings.Count(out, "• ") != 3 {
		t.Errorf("expected 3 bullet prefixes; got %d in %q", strings.Count(out, "• "), out)
	}
}

func TestRenderMarkdownToOOXML_NumberedList(t *testing.T) {
	out := RenderMarkdownToOOXMLWithStyles("1. first\n2. second\n3. third", slicedStylemap(), nil)
	if !strings.Contains(out, `<w:pStyle w:val="ListNumber"/>`) {
		t.Errorf("numbered stylemap not applied: %q", out)
	}
	for _, want := range []string{"1. ", "2. ", "3. "} {
		if !strings.Contains(out, want) {
			t.Errorf("missing ordinal prefix %q in %q", want, out)
		}
	}
}

func TestRenderMarkdownToOOXML_NumberedListResetsOnNonList(t *testing.T) {
	// "1. A\n2. B\nplain\n1. C" → 1. A, 2. B, plain para, 1. C
	out := RenderMarkdownToOOXMLWithStyles("1. A\n2. B\nplain\n1. C", slicedStylemap(), nil)
	// The plain "plain" line breaks the list, so the next numbered
	// item restarts at 1.
	idxA := strings.Index(out, "1. ")
	if idxA < 0 {
		t.Fatalf("first 1. missing: %q", out)
	}
	idxB := strings.Index(out, "2. ")
	if idxB < 0 || idxB <= idxA {
		t.Fatalf("2. not after 1.: idxA=%d idxB=%d", idxA, idxB)
	}
	rest := out[idxB+1:]
	idxC := strings.Index(rest, "1. ")
	if idxC < 0 {
		t.Errorf("numbered counter didn't reset on non-list block: %q", out)
	}
}

func TestRenderMarkdownToOOXML_Blockquote(t *testing.T) {
	out := RenderMarkdownToOOXMLWithStyles("> the quoted text", slicedStylemap(), nil)
	if !strings.Contains(out, `<w:pStyle w:val="Quote"/>`) {
		t.Errorf("blockquote stylemap not applied: %q", out)
	}
	if !strings.Contains(out, "the quoted text") {
		t.Errorf("blockquote text missing: %q", out)
	}
}

func TestRenderMarkdownToOOXML_Hyperlink(t *testing.T) {
	allocated := map[string]string{}
	alloc := func(url string) string {
		rid := "rIdComposer" + url
		allocated[url] = rid
		return rid
	}
	out := RenderMarkdownToOOXMLWithStyles("See [Bundesgerichtshof](https://bgh.bund.de) for details.", slicedStylemap(), alloc)
	if _, ok := allocated["https://bgh.bund.de"]; !ok {
		t.Errorf("allocator never called for URL: %q", out)
	}
	if !strings.Contains(out, `<w:hyperlink r:id="rIdComposerhttps://bgh.bund.de">`) {
		t.Errorf("hyperlink tag missing or wrong rid: %q", out)
	}
	if !strings.Contains(out, "Bundesgerichtshof") {
		t.Errorf("link label missing: %q", out)
	}
	if !strings.Contains(out, `<w:rStyle w:val="Hyperlink"/>`) {
		t.Errorf("hyperlink character style missing: %q", out)
	}
}

func TestRenderMarkdownToOOXML_HyperlinkNilAllocatorFallsBackToPlain(t *testing.T) {
	out := RenderMarkdownToOOXMLWithStyles("See [BGH](https://bgh.bund.de) here.", slicedStylemap(), nil)
	// Without an allocator, the label still renders as plain text.
	if !strings.Contains(out, "BGH") {
		t.Errorf("label dropped: %q", out)
	}
	if strings.Contains(out, "<w:hyperlink") {
		t.Errorf("hyperlink emitted without allocator: %q", out)
	}
}

func TestDetectBlockMarker(t *testing.T) {
	cases := []struct {
		in     string
		kind   string
		want   string
		ok     bool
	}{
		{"# A", "heading_1", "A", true},
		{"## B", "heading_2", "B", true},
		{"### C", "heading_3", "C", true},
		{"  # indented", "heading_1", "indented", true},   // up to 3 spaces tolerated
		{"    # too-deep", "", "", false},                  // 4 spaces → not a heading
		{"- bullet", "list_bullet", "bullet", true},
		{"* star", "list_bullet", "star", true},
		{"1. one", "list_numbered", "one", true},
		{"42. forty-two", "list_numbered", "forty-two", true},
		{"1) paren", "list_numbered", "paren", true},
		{"1.no-space", "", "", false}, // ordinal needs trailing space
		{"> quote", "blockquote", "quote", true},
		{"plain", "", "", false},
		{"#nospace", "", "", false}, // heading needs space after hash
	}
	for _, tc := range cases {
		t.Run(tc.in, func(t *testing.T) {
			kind, payload, ok := detectBlockMarker(tc.in)
			if ok != tc.ok || kind != tc.kind || payload != tc.want {
				t.Errorf("detectBlockMarker(%q) = (%q,%q,%v); want (%q,%q,%v)", tc.in, kind, payload, ok, tc.kind, tc.want, tc.ok)
			}
		})
	}
}