Files
paliad/internal/services/export_service_test.go
mAi e598759a34 fix(export): xlsx docProps + pane XML — Excel "repairs required" + wrong Modified date
m hit two bugs opening the Slice 1 export in Excel / Windows:

1. **Excel showed a "Repairs required" prompt** on open. Root cause:
   the SetPanes call passed only `{Freeze: true, YSplit: 1}` — the
   obvious-but-wrong shape. The resulting <pane> XML missed the
   `topLeftCell` and `activePane` attributes that Excel requires for
   a frozen-row pane (excelize's parser is permissive on re-read but
   Excel is strict). Fix: complete the Panes struct (TopLeftCell="A2",
   ActivePane="bottomLeft", Selection on bottomLeft) and surface
   SetPanes errors instead of `_ =`-ignoring them.

2. **Windows Explorer / Excel's File→Info showed Modified=2006-09-16
   ("xuri")** — excelize's hardcoded first-commit defaults. Root cause:
   buildXLSX never called SetDocProps so the canned defaults leaked.
   Fix: SetDocProps({Created, Modified} = meta.GeneratedAt;
   Creator = "Paliad (<firm>)"; Title/Description scoped per export).

3. **Bonus**: the outer-zip entry mtimes were stamped 2000-01-01 (the
   deterministic constant) so extracted files showed a Y2K Modified
   date in Explorer. Now stamped meta.GeneratedAt, which preserves
   determinism within an export (same row state + same GeneratedAt →
   same bytes, the actual m's-Q6 contract).

Also: set the active sheet to __meta (index 0) after sheet creation so
a future code path that adds/removes sheets can't leave an out-of-range
active-sheet index that would trip a separate "repairs required" path.

Regression tests in dump_export_test.go pin all three fixes by re-opening
the generated xlsx via excelize.OpenReader and asserting:
- docProps Created/Modified == meta.GeneratedAt (RFC 3339 UTC)
- docProps Creator contains "Paliad"
- xlsx bytes never contain "2006-09-16T00:00:00Z" or "<dc:creator>xuri</dc:creator>"
- sheet2/sheet3 raw XML carries topLeftCell + activePane + state=frozen
- outer-zip entries' Modified is within ±2s of GeneratedAt
- developer hatch: DUMP_EXPORT=1 writes /tmp/paliad-export-debug.{zip,xlsx}
  for opening in real Excel.
2026-05-19 13:04:38 +02:00

467 lines
14 KiB
Go

package services
// Pure-function tests for the ExportService writer plumbing.
//
// Live DB behaviour (the actual personal-scope query running against
// Postgres) is covered by the integration test in
// export_service_live_test.go (skipped without TEST_DATABASE_URL).
//
// What's pinned here:
//
// - formatCellValue value coercion (bool / time / []byte JSON / string / nil)
// - piiColumnDenyRegex catches the canonical credential-shaped names
// - buildCSV emits UTF-8 BOM + RFC 4180 quoting + survives umlauts
// - buildJSON has the expected top-level shape
// - metaToKeyValueRows keeps a stable key order (deterministic xlsx)
// - ExportFilename + slugifyFilename produce safe filenames
import (
"archive/zip"
"bytes"
"encoding/json"
"regexp"
"strings"
"testing"
"time"
"github.com/google/uuid"
)
func TestFormatCellValue_Booleans(t *testing.T) {
if got := formatCellValue(true); got != "TRUE" {
t.Fatalf("true → %q, want TRUE", got)
}
if got := formatCellValue(false); got != "FALSE" {
t.Fatalf("false → %q, want FALSE", got)
}
}
func TestFormatCellValue_NilEmpty(t *testing.T) {
if got := formatCellValue(nil); got != "" {
t.Fatalf("nil → %q, want empty string", got)
}
}
func TestFormatCellValue_Time_RFC3339UTC(t *testing.T) {
ts := time.Date(2026, 5, 19, 14, 23, 45, 0, time.UTC)
got := formatCellValue(ts)
if got != "2026-05-19T14:23:45Z" {
t.Fatalf("timestamp → %q, want RFC 3339 UTC", got)
}
}
func TestFormatCellValue_Time_DateOnly_MidnightUTC(t *testing.T) {
// A DATE column comes back as time.Time at midnight UTC.
ts := time.Date(2026, 5, 19, 0, 0, 0, 0, time.UTC)
got := formatCellValue(ts)
if got != "2026-05-19" {
t.Fatalf("date → %q, want ISO YYYY-MM-DD", got)
}
}
func TestFormatCellValue_Time_ZeroValue(t *testing.T) {
got := formatCellValue(time.Time{})
if got != "" {
t.Fatalf("zero time → %q, want empty", got)
}
}
func TestFormatCellValue_JSONBytes_CompactedOneLine(t *testing.T) {
// jsonb columns come back as []byte holding pretty JSON. The writer
// must compact it onto one line so cells don't wrap.
pretty := []byte("{\n \"a\": 1,\n \"b\": [\n 2,\n 3\n ]\n}")
got := formatCellValue(pretty)
if strings.ContainsRune(got, '\n') {
t.Fatalf("compacted JSON has newline: %q", got)
}
// Must still be valid JSON.
var m map[string]any
if err := json.Unmarshal([]byte(got), &m); err != nil {
t.Fatalf("compacted JSON is no longer valid: %v (input=%q)", err, got)
}
}
func TestFormatCellValue_PlainBytes_AsString(t *testing.T) {
// Postgres returns text/uuid columns as []byte. Non-JSON-shaped
// payload must be returned verbatim (preserves umlauts).
got := formatCellValue([]byte("Müller & Söhne"))
if got != "Müller & Söhne" {
t.Fatalf("bytes → %q, want UTF-8 string preserved", got)
}
}
func TestFormatCellValue_String(t *testing.T) {
if got := formatCellValue("Hügelmäßig"); got != "Hügelmäßig" {
t.Fatalf("string → %q, want passthrough", got)
}
}
func TestFormatCellValue_Numbers(t *testing.T) {
cases := []struct {
in any
want string
}{
{int(42), "42"},
{int64(-7), "-7"},
{uint32(99), "99"},
{float64(3.14), "3.14"},
}
for _, c := range cases {
if got := formatCellValue(c.in); got != c.want {
t.Errorf("%v → %q, want %q", c.in, got, c.want)
}
}
}
func TestPIIColumnDenyRegex_MatchesKnownSecrets(t *testing.T) {
must := []string{
"password",
"password_encrypted",
"PASSWORD_HASH",
"api_key",
"apiKey",
"api-key",
"private_key",
"some_secret",
"jwt_token",
"access_token",
}
for _, name := range must {
if !piiColumnDenyRegex.MatchString(name) {
t.Errorf("deny regex should match %q but did not", name)
}
}
}
func TestPIIColumnDenyRegex_DoesNotMatchInnocuousNames(t *testing.T) {
// Sanity: common business columns must NOT trip the deny regex.
innocuous := []string{
"id",
"title",
"created_at",
"event_type",
"project_id",
"email",
"display_name",
"office",
"profession",
}
for _, name := range innocuous {
if piiColumnDenyRegex.MatchString(name) {
t.Errorf("deny regex should NOT match %q but did", name)
}
}
}
func TestBuildCSV_BOM_AndUmlauts(t *testing.T) {
cols := []string{"id", "title"}
rows := [][]string{
{"1", "Mündliche Verhandlung"},
{"2", "Süßmäßig"},
}
got, err := buildCSV(cols, rows)
if err != nil {
t.Fatalf("buildCSV: %v", err)
}
// BOM
if len(got) < 3 || got[0] != 0xEF || got[1] != 0xBB || got[2] != 0xBF {
t.Fatalf("missing UTF-8 BOM: % x", got[:3])
}
// Body is valid UTF-8 with umlauts preserved
body := string(got[3:])
if !strings.Contains(body, "Mündliche Verhandlung") {
t.Errorf("umlaut text missing from CSV body: %q", body)
}
if !strings.Contains(body, "Süßmäßig") {
t.Errorf("ß / umlaut text missing from CSV body: %q", body)
}
// Header row first
lines := strings.SplitN(body, "\n", 3)
if !strings.HasPrefix(lines[0], "id,title") {
t.Errorf("first line should be CSV header, got %q", lines[0])
}
}
func TestBuildCSV_QuotingForCommaAndQuote(t *testing.T) {
cols := []string{"id", "label"}
rows := [][]string{
{"1", `Müller, Schulze "Krause" & Co`},
}
got, err := buildCSV(cols, rows)
if err != nil {
t.Fatalf("buildCSV: %v", err)
}
body := string(got[3:])
// RFC 4180: comma + double-quote in field → wrap in quotes, escape "
if !strings.Contains(body, `"Müller, Schulze ""Krause"" & Co"`) {
t.Errorf("RFC 4180 quoting wrong: %q", body)
}
}
func TestBuildJSON_TopLevelShape(t *testing.T) {
tables := map[string][]map[string]string{
"projects": {{"id": "u1", "title": "Acme"}},
}
meta := ExportMeta{
SchemaVersion: 1,
FirmName: "HLC",
Scope: ExportScopePersonal,
GeneratedAt: time.Date(2026, 5, 19, 0, 0, 0, 0, time.UTC),
RowCounts: map[string]int{"projects": 1},
}
got, err := buildJSON(tables, meta)
if err != nil {
t.Fatalf("buildJSON: %v", err)
}
var payload map[string]any
if err := json.Unmarshal(got, &payload); err != nil {
t.Fatalf("buildJSON not valid JSON: %v", err)
}
if _, ok := payload["meta"]; !ok {
t.Errorf("payload missing meta key")
}
if _, ok := payload["tables"]; !ok {
t.Errorf("payload missing tables key")
}
if !bytes.Contains(got, []byte(`"Acme"`)) {
t.Errorf("payload missing project title: %s", string(got))
}
}
func TestMetaToKeyValueRows_StableOrder(t *testing.T) {
m := ExportMeta{
SchemaVersion: 1,
FirmName: "HLC",
Scope: ExportScopePersonal,
GeneratedAt: time.Date(2026, 5, 19, 14, 23, 0, 0, time.UTC),
GeneratedByID: uuid.MustParse("00000000-0000-0000-0000-000000000001"),
GeneratedByEml: "m@hlc.de",
GeneratedByLbl: "m",
RowCounts: map[string]int{"projects": 11, "deadlines": 26, "appointments": 5},
Warnings: []string{"sheet=foo column=token dropped"},
}
rows1 := metaToKeyValueRows(m)
rows2 := metaToKeyValueRows(m)
if len(rows1) != len(rows2) {
t.Fatalf("row count differs between runs")
}
for i := range rows1 {
if rows1[i] != rows2[i] {
t.Fatalf("row %d differs between runs: %v vs %v", i, rows1[i], rows2[i])
}
}
// row_count rows must be sorted (deadlines < projects < appointments? no: alpha)
// → row_count.appointments < row_count.deadlines < row_count.projects
wantOrder := []string{"row_count.appointments", "row_count.deadlines", "row_count.projects"}
gotKeys := []string{}
for _, r := range rows1 {
if strings.HasPrefix(r[0], "row_count.") {
gotKeys = append(gotKeys, r[0])
}
}
for i, k := range wantOrder {
if i >= len(gotKeys) || gotKeys[i] != k {
t.Errorf("row_count order wrong at %d: got %v, want %v", i, gotKeys, wantOrder)
break
}
}
}
func TestExportFilename_PerScope(t *testing.T) {
ts := time.Date(2026, 5, 19, 14, 23, 0, 0, time.UTC)
cases := []struct {
scope, label, want string
}{
{ExportScopePersonal, "", "paliad-export-personal-2026-05-19T1423Z.zip"},
{ExportScopeOrg, "", "paliad-export-org-2026-05-19T1423Z.zip"},
{ExportScopeProject, "Siemens AG", "paliad-export-project-Siemens-AG-2026-05-19T1423Z.zip"},
{ExportScopeProject, "Hügel & Söhne", "paliad-export-project-H-gel-S-hne-2026-05-19T1423Z.zip"},
}
for _, c := range cases {
got := ExportFilename(c.scope, c.label, ts)
if got != c.want {
t.Errorf("ExportFilename(%q, %q) → %q, want %q", c.scope, c.label, got, c.want)
}
}
}
func TestSlugifyFilename_StripsUnsafe(t *testing.T) {
cases := []struct{ in, want string }{
{"Siemens AG", "Siemens-AG"},
{"Müller & Söhne", "M-ller-S-hne"},
{" /etc/passwd ", "etc-passwd"},
{"", ""},
{"this-is-already-fine", "this-is-already-fine"},
}
for _, c := range cases {
got := slugifyFilename(c.in)
if got != c.want {
t.Errorf("slugifyFilename(%q) → %q, want %q", c.in, got, c.want)
}
}
}
// TestZipDeterminism verifies that two bundle assemblies of the same
// sheet data + same meta produce byte-identical output. This is the core
// guarantee m signed off on (Q6=yes deterministic).
//
// We can't go through writeBundle here (it needs a DB), so we exercise
// the deterministic path at the layer where it matters: the outer zip's
// file order + each entry's deterministic content + fixed Modified time.
func TestZipDeterminism_TwoRunsSameBytes(t *testing.T) {
meta := ExportMeta{
SchemaVersion: 1,
FirmName: "HLC",
Scope: ExportScopePersonal,
GeneratedAt: time.Date(2026, 5, 19, 14, 23, 0, 0, time.UTC),
RowCounts: map[string]int{"projects": 1, "deadlines": 0},
}
sheets := []collectedSheet{
{name: "projects", columns: []string{"id", "title"}, rows: [][]string{{"u1", "Acme"}}},
{name: "deadlines", columns: []string{"id", "due_date"}, rows: nil},
}
first := assembleBundleForTest(t, sheets, meta)
second := assembleBundleForTest(t, sheets, meta)
if !bytes.Equal(first, second) {
t.Fatalf("two assemblies of same data produced different bytes (%d vs %d)", len(first), len(second))
}
// Sanity: the bundle is a valid zip and contains the expected files.
zr, err := zip.NewReader(bytes.NewReader(first), int64(len(first)))
if err != nil {
t.Fatalf("bundle is not a valid zip: %v", err)
}
wantFiles := []string{"README.txt", "__meta.json", "csv/deadlines.csv", "csv/projects.csv", "paliad-export.json", "paliad-export.xlsx"}
gotFiles := []string{}
for _, f := range zr.File {
gotFiles = append(gotFiles, f.Name)
}
for _, want := range wantFiles {
found := false
for _, got := range gotFiles {
if got == want {
found = true
break
}
}
if !found {
t.Errorf("missing %q in bundle (got %v)", want, gotFiles)
}
}
}
// assembleBundleForTest mirrors writeBundle's assembly step without
// hitting the DB. Exposed as a test helper here to keep production code
// strictly DB-coupled while still pinning the deterministic-zip contract.
func assembleBundleForTest(t *testing.T, sheets []collectedSheet, meta ExportMeta) []byte {
t.Helper()
xlsxBytes, err := buildXLSX(sheets, meta)
if err != nil {
t.Fatalf("buildXLSX: %v", err)
}
tables := map[string][]map[string]string{}
for _, sh := range sheets {
rs := make([]map[string]string, 0, len(sh.rows))
for _, r := range sh.rows {
obj := map[string]string{}
for i, c := range sh.columns {
if i < len(r) {
obj[c] = r[i]
}
}
rs = append(rs, obj)
}
tables[sh.name] = rs
}
jsonBytes, err := buildJSON(tables, meta)
if err != nil {
t.Fatalf("buildJSON: %v", err)
}
csvBlobs := map[string][]byte{}
for _, sh := range sheets {
b, err := buildCSV(sh.columns, sh.rows)
if err != nil {
t.Fatalf("buildCSV %q: %v", sh.name, err)
}
csvBlobs[sh.name] = b
}
metaJSON, err := json.MarshalIndent(meta, "", " ")
if err != nil {
t.Fatalf("meta marshal: %v", err)
}
readme := buildREADME(meta)
// Mirror writeBundle's zip-assembly: sort entries, fixed mod time.
type ent struct {
name string
body []byte
}
entries := []ent{
{"README.txt", []byte(readme)},
{"__meta.json", metaJSON},
{"paliad-export.json", jsonBytes},
{"paliad-export.xlsx", xlsxBytes},
}
// CSV names sorted.
for _, sh := range sheets {
entries = append(entries, ent{"csv/" + sh.name + ".csv", csvBlobs[sh.name]})
}
// Outer sort to mirror writeBundle.
for i := 1; i < len(entries); i++ {
for j := i; j > 0 && entries[j-1].name > entries[j].name; j-- {
entries[j-1], entries[j] = entries[j], entries[j-1]
}
}
var buf bytes.Buffer
zw := zip.NewWriter(&buf)
// Mirror writeBundle's mtime convention so the helper produces
// realistic bytes — and so the TestBundle_ZipEntryMTime regression
// test actually exercises the right code path.
mod := meta.GeneratedAt.UTC()
if mod.IsZero() {
mod = time.Now().UTC()
}
for _, e := range entries {
hdr := &zip.FileHeader{Name: e.name, Method: zip.Deflate, Modified: mod}
fw, err := zw.CreateHeader(hdr)
if err != nil {
t.Fatalf("zip create %q: %v", e.name, err)
}
if _, err := fw.Write(e.body); err != nil {
t.Fatalf("zip write %q: %v", e.name, err)
}
}
if err := zw.Close(); err != nil {
t.Fatalf("zip close: %v", err)
}
return buf.Bytes()
}
// TestExportScopeConstants ensures the scope discriminator strings are
// the stable contract — the audit row, __meta sheet, and external
// importers depend on them not drifting.
func TestExportScopeConstants(t *testing.T) {
if ExportScopePersonal != "personal" {
t.Errorf("ExportScopePersonal drifted: %q", ExportScopePersonal)
}
if ExportScopeProject != "project" {
t.Errorf("ExportScopeProject drifted: %q", ExportScopeProject)
}
if ExportScopeOrg != "org" {
t.Errorf("ExportScopeOrg drifted: %q", ExportScopeOrg)
}
}
// TestPIIRegex_IsExported makes sure the deny regex stays a compiled
// regexp (catches accidental nil if someone refactors).
func TestPIIRegex_IsExported(t *testing.T) {
if piiColumnDenyRegex == nil {
t.Fatal("piiColumnDenyRegex is nil")
}
if _, ok := any(piiColumnDenyRegex).(*regexp.Regexp); !ok {
t.Fatal("piiColumnDenyRegex is not *regexp.Regexp")
}
}