package services // Pure-function tests for the ExportService writer plumbing. // // Live DB behaviour (the actual personal-scope query running against // Postgres) is covered by the integration test in // export_service_live_test.go (skipped without TEST_DATABASE_URL). // // What's pinned here: // // - formatCellValue value coercion (bool / time / []byte JSON / string / nil) // - piiColumnDenyRegex catches the canonical credential-shaped names // - buildCSV emits UTF-8 BOM + RFC 4180 quoting + survives umlauts // - buildJSON has the expected top-level shape // - metaToKeyValueRows keeps a stable key order (deterministic xlsx) // - ExportFilename + slugifyFilename produce safe filenames import ( "archive/zip" "bytes" "encoding/json" "regexp" "strings" "testing" "time" "github.com/google/uuid" ) func TestFormatCellValue_Booleans(t *testing.T) { if got := formatCellValue(true); got != "TRUE" { t.Fatalf("true → %q, want TRUE", got) } if got := formatCellValue(false); got != "FALSE" { t.Fatalf("false → %q, want FALSE", got) } } func TestFormatCellValue_NilEmpty(t *testing.T) { if got := formatCellValue(nil); got != "" { t.Fatalf("nil → %q, want empty string", got) } } func TestFormatCellValue_Time_RFC3339UTC(t *testing.T) { ts := time.Date(2026, 5, 19, 14, 23, 45, 0, time.UTC) got := formatCellValue(ts) if got != "2026-05-19T14:23:45Z" { t.Fatalf("timestamp → %q, want RFC 3339 UTC", got) } } func TestFormatCellValue_Time_DateOnly_MidnightUTC(t *testing.T) { // A DATE column comes back as time.Time at midnight UTC. ts := time.Date(2026, 5, 19, 0, 0, 0, 0, time.UTC) got := formatCellValue(ts) if got != "2026-05-19" { t.Fatalf("date → %q, want ISO YYYY-MM-DD", got) } } func TestFormatCellValue_Time_ZeroValue(t *testing.T) { got := formatCellValue(time.Time{}) if got != "" { t.Fatalf("zero time → %q, want empty", got) } } func TestFormatCellValue_JSONBytes_CompactedOneLine(t *testing.T) { // jsonb columns come back as []byte holding pretty JSON. The writer // must compact it onto one line so cells don't wrap. pretty := []byte("{\n \"a\": 1,\n \"b\": [\n 2,\n 3\n ]\n}") got := formatCellValue(pretty) if strings.ContainsRune(got, '\n') { t.Fatalf("compacted JSON has newline: %q", got) } // Must still be valid JSON. var m map[string]any if err := json.Unmarshal([]byte(got), &m); err != nil { t.Fatalf("compacted JSON is no longer valid: %v (input=%q)", err, got) } } func TestFormatCellValue_PlainBytes_AsString(t *testing.T) { // Postgres returns text/uuid columns as []byte. Non-JSON-shaped // payload must be returned verbatim (preserves umlauts). got := formatCellValue([]byte("Müller & Söhne")) if got != "Müller & Söhne" { t.Fatalf("bytes → %q, want UTF-8 string preserved", got) } } func TestFormatCellValue_String(t *testing.T) { if got := formatCellValue("Hügelmäßig"); got != "Hügelmäßig" { t.Fatalf("string → %q, want passthrough", got) } } func TestFormatCellValue_Numbers(t *testing.T) { cases := []struct { in any want string }{ {int(42), "42"}, {int64(-7), "-7"}, {uint32(99), "99"}, {float64(3.14), "3.14"}, } for _, c := range cases { if got := formatCellValue(c.in); got != c.want { t.Errorf("%v → %q, want %q", c.in, got, c.want) } } } func TestPIIColumnDenyRegex_MatchesKnownSecrets(t *testing.T) { must := []string{ "password", "password_encrypted", "PASSWORD_HASH", "api_key", "apiKey", "api-key", "private_key", "some_secret", "jwt_token", "access_token", } for _, name := range must { if !piiColumnDenyRegex.MatchString(name) { t.Errorf("deny regex should match %q but did not", name) } } } func TestPIIColumnDenyRegex_DoesNotMatchInnocuousNames(t *testing.T) { // Sanity: common business columns must NOT trip the deny regex. innocuous := []string{ "id", "title", "created_at", "event_type", "project_id", "email", "display_name", "office", "profession", } for _, name := range innocuous { if piiColumnDenyRegex.MatchString(name) { t.Errorf("deny regex should NOT match %q but did", name) } } } func TestBuildCSV_BOM_AndUmlauts(t *testing.T) { cols := []string{"id", "title"} rows := [][]string{ {"1", "Mündliche Verhandlung"}, {"2", "Süßmäßig"}, } got, err := buildCSV(cols, rows) if err != nil { t.Fatalf("buildCSV: %v", err) } // BOM if len(got) < 3 || got[0] != 0xEF || got[1] != 0xBB || got[2] != 0xBF { t.Fatalf("missing UTF-8 BOM: % x", got[:3]) } // Body is valid UTF-8 with umlauts preserved body := string(got[3:]) if !strings.Contains(body, "Mündliche Verhandlung") { t.Errorf("umlaut text missing from CSV body: %q", body) } if !strings.Contains(body, "Süßmäßig") { t.Errorf("ß / umlaut text missing from CSV body: %q", body) } // Header row first lines := strings.SplitN(body, "\n", 3) if !strings.HasPrefix(lines[0], "id,title") { t.Errorf("first line should be CSV header, got %q", lines[0]) } } func TestBuildCSV_QuotingForCommaAndQuote(t *testing.T) { cols := []string{"id", "label"} rows := [][]string{ {"1", `Müller, Schulze "Krause" & Co`}, } got, err := buildCSV(cols, rows) if err != nil { t.Fatalf("buildCSV: %v", err) } body := string(got[3:]) // RFC 4180: comma + double-quote in field → wrap in quotes, escape " if !strings.Contains(body, `"Müller, Schulze ""Krause"" & Co"`) { t.Errorf("RFC 4180 quoting wrong: %q", body) } } func TestBuildJSON_TopLevelShape(t *testing.T) { tables := map[string][]map[string]string{ "projects": {{"id": "u1", "title": "Acme"}}, } meta := ExportMeta{ SchemaVersion: 1, FirmName: "HLC", Scope: ExportScopePersonal, GeneratedAt: time.Date(2026, 5, 19, 0, 0, 0, 0, time.UTC), RowCounts: map[string]int{"projects": 1}, } got, err := buildJSON(tables, meta) if err != nil { t.Fatalf("buildJSON: %v", err) } var payload map[string]any if err := json.Unmarshal(got, &payload); err != nil { t.Fatalf("buildJSON not valid JSON: %v", err) } if _, ok := payload["meta"]; !ok { t.Errorf("payload missing meta key") } if _, ok := payload["tables"]; !ok { t.Errorf("payload missing tables key") } if !bytes.Contains(got, []byte(`"Acme"`)) { t.Errorf("payload missing project title: %s", string(got)) } } func TestMetaToKeyValueRows_StableOrder(t *testing.T) { m := ExportMeta{ SchemaVersion: 1, FirmName: "HLC", Scope: ExportScopePersonal, GeneratedAt: time.Date(2026, 5, 19, 14, 23, 0, 0, time.UTC), GeneratedByID: uuid.MustParse("00000000-0000-0000-0000-000000000001"), GeneratedByEml: "m@hlc.de", GeneratedByLbl: "m", RowCounts: map[string]int{"projects": 11, "deadlines": 26, "appointments": 5}, Warnings: []string{"sheet=foo column=token dropped"}, } rows1 := metaToKeyValueRows(m) rows2 := metaToKeyValueRows(m) if len(rows1) != len(rows2) { t.Fatalf("row count differs between runs") } for i := range rows1 { if rows1[i] != rows2[i] { t.Fatalf("row %d differs between runs: %v vs %v", i, rows1[i], rows2[i]) } } // row_count rows must be sorted (deadlines < projects < appointments? no: alpha) // → row_count.appointments < row_count.deadlines < row_count.projects wantOrder := []string{"row_count.appointments", "row_count.deadlines", "row_count.projects"} gotKeys := []string{} for _, r := range rows1 { if strings.HasPrefix(r[0], "row_count.") { gotKeys = append(gotKeys, r[0]) } } for i, k := range wantOrder { if i >= len(gotKeys) || gotKeys[i] != k { t.Errorf("row_count order wrong at %d: got %v, want %v", i, gotKeys, wantOrder) break } } } func TestExportFilename_PerScope(t *testing.T) { ts := time.Date(2026, 5, 19, 14, 23, 0, 0, time.UTC) // Project-scope filenames carry an 8-hex disambiguator (last UUID // block); personal + org omit it. rootID := uuid.MustParse("61e3fb9e-29fb-44aa-867e-a89469e2cacb") cases := []struct { scope, label string id uuid.UUID want string }{ {ExportScopePersonal, "", uuid.Nil, "paliad-export-personal-2026-05-19T1423Z.zip"}, {ExportScopeOrg, "", uuid.Nil, "paliad-export-org-2026-05-19T1423Z.zip"}, {ExportScopeProject, "Siemens AG", rootID, "paliad-export-project-Siemens-AG-a89469e2cacb-2026-05-19T1423Z.zip"}, {ExportScopeProject, "Hügel & Söhne", rootID, "paliad-export-project-H-gel-S-hne-a89469e2cacb-2026-05-19T1423Z.zip"}, // Nil UUID falls back to the slug-only variant — same as Slice 1's // pre-disambiguator filename. Useful for unit tests of label-only // behaviour. {ExportScopeProject, "Siemens AG", uuid.Nil, "paliad-export-project-Siemens-AG-2026-05-19T1423Z.zip"}, } for _, c := range cases { got := ExportFilename(c.scope, c.label, c.id, ts) if got != c.want { t.Errorf("ExportFilename(%q, %q, %q) → %q, want %q", c.scope, c.label, c.id, got, c.want) } } } func TestExportFilename_ShortUUIDDisambiguator(t *testing.T) { // Two projects with identical titles must produce different filenames // when the UUID suffix is present — that's the whole point of Q5's // disambiguator. ts := time.Date(2026, 5, 19, 14, 23, 0, 0, time.UTC) idA := uuid.MustParse("11111111-1111-1111-1111-aaaaaaaaaaaa") idB := uuid.MustParse("22222222-2222-2222-2222-bbbbbbbbbbbb") a := ExportFilename(ExportScopeProject, "Standard NDA", idA, ts) b := ExportFilename(ExportScopeProject, "Standard NDA", idB, ts) if a == b { t.Fatalf("same-title same-ts filenames collide: %q", a) } if !strings.Contains(a, "aaaaaaaaaaaa") { t.Errorf("filename missing UUID-A suffix: %q", a) } if !strings.Contains(b, "bbbbbbbbbbbb") { t.Errorf("filename missing UUID-B suffix: %q", b) } } func TestSlugifyFilename_StripsUnsafe(t *testing.T) { cases := []struct{ in, want string }{ {"Siemens AG", "Siemens-AG"}, {"Müller & Söhne", "M-ller-S-hne"}, {" /etc/passwd ", "etc-passwd"}, {"", ""}, {"this-is-already-fine", "this-is-already-fine"}, } for _, c := range cases { got := slugifyFilename(c.in) if got != c.want { t.Errorf("slugifyFilename(%q) → %q, want %q", c.in, got, c.want) } } } // TestZipDeterminism verifies that two bundle assemblies of the same // sheet data + same meta produce byte-identical output. This is the core // guarantee m signed off on (Q6=yes deterministic). // // We can't go through writeBundle here (it needs a DB), so we exercise // the deterministic path at the layer where it matters: the outer zip's // file order + each entry's deterministic content + fixed Modified time. func TestZipDeterminism_TwoRunsSameBytes(t *testing.T) { meta := ExportMeta{ SchemaVersion: 1, FirmName: "HLC", Scope: ExportScopePersonal, GeneratedAt: time.Date(2026, 5, 19, 14, 23, 0, 0, time.UTC), RowCounts: map[string]int{"projects": 1, "deadlines": 0}, } sheets := []collectedSheet{ {name: "projects", columns: []string{"id", "title"}, rows: [][]string{{"u1", "Acme"}}}, {name: "deadlines", columns: []string{"id", "due_date"}, rows: nil}, } first := assembleBundleForTest(t, sheets, meta) second := assembleBundleForTest(t, sheets, meta) if !bytes.Equal(first, second) { t.Fatalf("two assemblies of same data produced different bytes (%d vs %d)", len(first), len(second)) } // Sanity: the bundle is a valid zip and contains the expected files. zr, err := zip.NewReader(bytes.NewReader(first), int64(len(first))) if err != nil { t.Fatalf("bundle is not a valid zip: %v", err) } wantFiles := []string{"README.txt", "__meta.json", "csv/deadlines.csv", "csv/projects.csv", "paliad-export.json", "paliad-export.xlsx"} gotFiles := []string{} for _, f := range zr.File { gotFiles = append(gotFiles, f.Name) } for _, want := range wantFiles { found := false for _, got := range gotFiles { if got == want { found = true break } } if !found { t.Errorf("missing %q in bundle (got %v)", want, gotFiles) } } } // assembleBundleForTest mirrors writeBundle's assembly step without // hitting the DB. Exposed as a test helper here to keep production code // strictly DB-coupled while still pinning the deterministic-zip contract. func assembleBundleForTest(t *testing.T, sheets []collectedSheet, meta ExportMeta) []byte { t.Helper() xlsxBytes, err := buildXLSX(sheets, meta) if err != nil { t.Fatalf("buildXLSX: %v", err) } tables := map[string][]map[string]string{} for _, sh := range sheets { rs := make([]map[string]string, 0, len(sh.rows)) for _, r := range sh.rows { obj := map[string]string{} for i, c := range sh.columns { if i < len(r) { obj[c] = r[i] } } rs = append(rs, obj) } tables[sh.name] = rs } jsonBytes, err := buildJSON(tables, meta) if err != nil { t.Fatalf("buildJSON: %v", err) } csvBlobs := map[string][]byte{} for _, sh := range sheets { b, err := buildCSV(sh.columns, sh.rows) if err != nil { t.Fatalf("buildCSV %q: %v", sh.name, err) } csvBlobs[sh.name] = b } metaJSON, err := json.MarshalIndent(meta, "", " ") if err != nil { t.Fatalf("meta marshal: %v", err) } readme := buildREADME(meta) // Mirror writeBundle's zip-assembly: sort entries, fixed mod time. type ent struct { name string body []byte } entries := []ent{ {"README.txt", []byte(readme)}, {"__meta.json", metaJSON}, {"paliad-export.json", jsonBytes}, {"paliad-export.xlsx", xlsxBytes}, } // CSV names sorted. for _, sh := range sheets { entries = append(entries, ent{"csv/" + sh.name + ".csv", csvBlobs[sh.name]}) } // Outer sort to mirror writeBundle. for i := 1; i < len(entries); i++ { for j := i; j > 0 && entries[j-1].name > entries[j].name; j-- { entries[j-1], entries[j] = entries[j], entries[j-1] } } var buf bytes.Buffer zw := zip.NewWriter(&buf) // Mirror writeBundle's mtime convention so the helper produces // realistic bytes — and so the TestBundle_ZipEntryMTime regression // test actually exercises the right code path. mod := meta.GeneratedAt.UTC() if mod.IsZero() { mod = time.Now().UTC() } for _, e := range entries { hdr := &zip.FileHeader{Name: e.name, Method: zip.Deflate, Modified: mod} fw, err := zw.CreateHeader(hdr) if err != nil { t.Fatalf("zip create %q: %v", e.name, err) } if _, err := fw.Write(e.body); err != nil { t.Fatalf("zip write %q: %v", e.name, err) } } if err := zw.Close(); err != nil { t.Fatalf("zip close: %v", err) } return buf.Bytes() } // TestExportScopeConstants ensures the scope discriminator strings are // the stable contract — the audit row, __meta sheet, and external // importers depend on them not drifting. func TestExportScopeConstants(t *testing.T) { if ExportScopePersonal != "personal" { t.Errorf("ExportScopePersonal drifted: %q", ExportScopePersonal) } if ExportScopeProject != "project" { t.Errorf("ExportScopeProject drifted: %q", ExportScopeProject) } if ExportScopeOrg != "org" { t.Errorf("ExportScopeOrg drifted: %q", ExportScopeOrg) } } // TestPIIRegex_IsExported makes sure the deny regex stays a compiled // regexp (catches accidental nil if someone refactors). func TestPIIRegex_IsExported(t *testing.T) { if piiColumnDenyRegex == nil { t.Fatal("piiColumnDenyRegex is nil") } if _, ok := any(piiColumnDenyRegex).(*regexp.Regexp); !ok { t.Fatal("piiColumnDenyRegex is not *regexp.Regexp") } }