fix(export): xlsx docProps + pane XML — Excel "repairs required" + wrong Modified date

m hit two bugs opening the Slice 1 export in Excel / Windows:

1. **Excel showed a "Repairs required" prompt** on open. Root cause:
   the SetPanes call passed only `{Freeze: true, YSplit: 1}` — the
   obvious-but-wrong shape. The resulting <pane> XML missed the
   `topLeftCell` and `activePane` attributes that Excel requires for
   a frozen-row pane (excelize's parser is permissive on re-read but
   Excel is strict). Fix: complete the Panes struct (TopLeftCell="A2",
   ActivePane="bottomLeft", Selection on bottomLeft) and surface
   SetPanes errors instead of `_ =`-ignoring them.

2. **Windows Explorer / Excel's File→Info showed Modified=2006-09-16
   ("xuri")** — excelize's hardcoded first-commit defaults. Root cause:
   buildXLSX never called SetDocProps so the canned defaults leaked.
   Fix: SetDocProps({Created, Modified} = meta.GeneratedAt;
   Creator = "Paliad (<firm>)"; Title/Description scoped per export).

3. **Bonus**: the outer-zip entry mtimes were stamped 2000-01-01 (the
   deterministic constant) so extracted files showed a Y2K Modified
   date in Explorer. Now stamped meta.GeneratedAt, which preserves
   determinism within an export (same row state + same GeneratedAt →
   same bytes, the actual m's-Q6 contract).

Also: set the active sheet to __meta (index 0) after sheet creation so
a future code path that adds/removes sheets can't leave an out-of-range
active-sheet index that would trip a separate "repairs required" path.

Regression tests in dump_export_test.go pin all three fixes by re-opening
the generated xlsx via excelize.OpenReader and asserting:
- docProps Created/Modified == meta.GeneratedAt (RFC 3339 UTC)
- docProps Creator contains "Paliad"
- xlsx bytes never contain "2006-09-16T00:00:00Z" or "<dc:creator>xuri</dc:creator>"
- sheet2/sheet3 raw XML carries topLeftCell + activePane + state=frozen
- outer-zip entries' Modified is within ±2s of GeneratedAt
- developer hatch: DUMP_EXPORT=1 writes /tmp/paliad-export-debug.{zip,xlsx}
  for opening in real Excel.
This commit is contained in:
mAi
2026-05-19 13:04:38 +02:00
parent 86d20ed6d4
commit f9ff7b93e8
3 changed files with 342 additions and 19 deletions

View File

@@ -0,0 +1,266 @@
package services
// Regression tests for the xlsx-generator pitfalls reported by m on
// 2026-05-19:
//
// 1. Excel showed a "Repairs required" prompt on opening the .xlsx.
// Root cause: SetPanes call passed only Freeze + YSplit; the
// resulting <pane> XML missed topLeftCell + activePane, which
// Excel rejects. Fix in buildXLSX: complete the Panes struct
// (TopLeftCell="A2", ActivePane="bottomLeft", Selection on
// bottomLeft).
//
// 2. Windows Explorer / Excel's File→Info showed Modified=2006-09-16
// ("xuri" — excelize's first-commit defaults). Root cause:
// SetDocProps was never called, so the canned default leaked
// through. Fix in buildXLSX: SetDocProps({Created, Modified} =
// meta.GeneratedAt; Creator = "Paliad (<firm>)").
//
// The tests are always-on (no env var gate) so a future writer
// regression shows up loudly in `go test`. Developer-convenience hatch
// at the bottom: set DUMP_EXPORT=1 to additionally write the bundle +
// xlsx to /tmp for opening in real Excel.
import (
"archive/zip"
"bytes"
"io"
"os"
"strings"
"testing"
"time"
"github.com/google/uuid"
"github.com/xuri/excelize/v2"
)
// fixturePersonalExport builds a tiny in-memory bundle + the raw xlsx
// for the regression assertions and the optional /tmp dump.
func fixturePersonalExport(t *testing.T) (bundle []byte, xlsxBytes []byte, meta ExportMeta) {
t.Helper()
meta = ExportMeta{
SchemaVersion: 1,
FirmName: "HLC",
Scope: ExportScopePersonal,
GeneratedAt: time.Date(2026, 5, 19, 14, 23, 0, 0, time.UTC),
GeneratedByID: uuid.MustParse("00000000-0000-0000-0000-000000000001"),
GeneratedByEml: "m@hlc.de",
GeneratedByLbl: "m",
RowCounts: map[string]int{"projects": 1, "deadlines": 0},
}
sheets := []collectedSheet{
{name: "projects", columns: []string{"id", "title", "umlauts"}, rows: [][]string{{"u1", "Acme", "Müller"}}},
{name: "deadlines", columns: []string{"id", "due_date"}, rows: nil},
}
bundle = assembleBundleForTest(t, sheets, meta)
var err error
xlsxBytes, err = buildXLSX(sheets, meta)
if err != nil {
t.Fatalf("buildXLSX: %v", err)
}
return bundle, xlsxBytes, meta
}
// TestXLSX_DocProps_NotExcelizeDefault pins fix #2.
//
// Before the fix: core.xml had Created=Modified="2006-09-16T00:00:00Z"
// (xuri's first commit). Now we expect both to equal meta.GeneratedAt
// in RFC 3339 UTC, and Creator to be "Paliad (<firm>)".
func TestXLSX_DocProps_NotExcelizeDefault(t *testing.T) {
_, xlsxBytes, meta := fixturePersonalExport(t)
fl, err := excelize.OpenReader(bytes.NewReader(xlsxBytes))
if err != nil {
t.Fatalf("excelize.OpenReader: %v", err)
}
defer fl.Close()
props, err := fl.GetDocProps()
if err != nil {
t.Fatalf("GetDocProps: %v", err)
}
wantTS := meta.GeneratedAt.UTC().Format(time.RFC3339)
if props.Created != wantTS {
t.Errorf("Created = %q, want %q (excelize-default leak)", props.Created, wantTS)
}
if props.Modified != wantTS {
t.Errorf("Modified = %q, want %q (excelize-default leak)", props.Modified, wantTS)
}
if props.Creator == "xuri" || props.Creator == "" {
t.Errorf("Creator = %q, want non-empty non-xuri (e.g. \"Paliad (HLC)\")", props.Creator)
}
if !strings.Contains(props.Creator, "Paliad") {
t.Errorf("Creator = %q, expected to contain \"Paliad\"", props.Creator)
}
}
// TestXLSX_DocProps_TracksGeneratedAt pins that docProps stays bound to
// meta.GeneratedAt across different timestamps — belt-and-braces vs
// the fixed-fixture timestamp in the previous test.
func TestXLSX_DocProps_TracksGeneratedAt(t *testing.T) {
for _, ts := range []time.Time{
time.Date(2026, 1, 1, 0, 0, 0, 0, time.UTC),
time.Date(2027, 12, 31, 23, 59, 59, 0, time.UTC),
time.Now().UTC().Truncate(time.Second),
} {
meta := ExportMeta{
SchemaVersion: 1,
FirmName: "HLC",
Scope: ExportScopePersonal,
GeneratedAt: ts,
RowCounts: map[string]int{"projects": 0},
}
xlsxBytes, err := buildXLSX([]collectedSheet{
{name: "projects", columns: []string{"id"}, rows: nil},
}, meta)
if err != nil {
t.Fatalf("buildXLSX: %v", err)
}
fl, err := excelize.OpenReader(bytes.NewReader(xlsxBytes))
if err != nil {
t.Fatalf("OpenReader: %v", err)
}
props, err := fl.GetDocProps()
_ = fl.Close()
if err != nil {
t.Fatalf("GetDocProps: %v", err)
}
want := ts.Format(time.RFC3339)
if props.Modified != want {
t.Errorf("Modified = %q, want %q", props.Modified, want)
}
}
}
// TestXLSX_PaneXML_IsCompleteAndValid pins fix #1.
//
// excelize accepts the half-broken <pane state="frozen" ySplit="1"/>
// shape on re-read (its parser is permissive), but Excel rejects it
// with "Repairs required". To detect the regression without spinning
// up Office, we read the raw worksheet XML out of the in-memory xlsx
// zip and assert that the pane element has both topLeftCell + activePane.
func TestXLSX_PaneXML_IsCompleteAndValid(t *testing.T) {
_, xlsxBytes, _ := fixturePersonalExport(t)
zr, err := zip.NewReader(bytes.NewReader(xlsxBytes), int64(len(xlsxBytes)))
if err != nil {
t.Fatalf("xlsx is not a valid zip: %v", err)
}
// sheet1 = __meta (no pane). sheet2 = projects, sheet3 = deadlines —
// both have the frozen header.
for _, target := range []string{"xl/worksheets/sheet2.xml", "xl/worksheets/sheet3.xml"} {
var body []byte
for _, f := range zr.File {
if f.Name == target {
rc, err := f.Open()
if err != nil {
t.Fatalf("open %s: %v", target, err)
}
body, _ = io.ReadAll(rc)
rc.Close()
break
}
}
if body == nil {
t.Fatalf("missing %s in xlsx zip", target)
}
s := string(body)
if !strings.Contains(s, `topLeftCell="A2"`) {
t.Errorf("%s pane missing topLeftCell — Excel will prompt 'repairs required'.\nXML: %s",
target, s)
}
if !strings.Contains(s, `activePane="bottomLeft"`) {
t.Errorf("%s pane missing activePane — Excel will prompt 'repairs required'.\nXML: %s",
target, s)
}
if !strings.Contains(s, `state="frozen"`) {
t.Errorf("%s pane missing state=frozen.\nXML: %s", target, s)
}
}
}
// TestXLSX_NoExcelizeBuildDefaults guards against any future regression
// where a code path writes the .xlsx without first overriding excelize's
// canned defaults. Cheap byte-level assertions.
func TestXLSX_NoExcelizeBuildDefaults(t *testing.T) {
_, xlsxBytes, _ := fixturePersonalExport(t)
if bytes.Contains(xlsxBytes, []byte("2006-09-16T00:00:00Z")) {
t.Errorf("xlsx leaks excelize default Created/Modified=2006-09-16 — SetDocProps not called?")
}
if bytes.Contains(xlsxBytes, []byte(`<dc:creator>xuri</dc:creator>`)) {
t.Errorf("xlsx leaks excelize default Creator=xuri — SetDocProps not called?")
}
}
// TestXLSX_OpensCleanly is the catch-all: round-trip the file through
// excelize and confirm sheet names, row counts, and GetDocProps work.
func TestXLSX_OpensCleanly(t *testing.T) {
_, xlsxBytes, _ := fixturePersonalExport(t)
fl, err := excelize.OpenReader(bytes.NewReader(xlsxBytes))
if err != nil {
t.Fatalf("OpenReader: %v", err)
}
defer fl.Close()
wantSheets := []string{"__meta", "projects", "deadlines"}
got := fl.GetSheetList()
if len(got) != len(wantSheets) {
t.Fatalf("sheet list length = %d, want %d (%v vs %v)", len(got), len(wantSheets), got, wantSheets)
}
for i, want := range wantSheets {
if got[i] != want {
t.Errorf("sheet[%d] = %q, want %q", i, got[i], want)
}
}
rows, err := fl.GetRows("projects")
if err != nil {
t.Fatalf("GetRows(projects): %v", err)
}
if len(rows) != 2 {
t.Fatalf("projects rows = %d, want 2 (header + 1)", len(rows))
}
if rows[0][0] != "id" || rows[1][0] != "u1" || rows[1][2] != "Müller" {
t.Errorf("projects rows = %v, want header=[id title umlauts] row=[u1 Acme Müller]", rows)
}
}
// TestBundle_ZipEntryMTime_TracksGeneratedAt pins the outer-zip side of
// fix #2. Pre-fix every entry was stamped 2000-01-01 (the deterministic
// constant) so Windows showed extracted files with a stale Modified
// column. Now they carry meta.GeneratedAt.
func TestBundle_ZipEntryMTime_TracksGeneratedAt(t *testing.T) {
bundle, _, meta := fixturePersonalExport(t)
zr, err := zip.NewReader(bytes.NewReader(bundle), int64(len(bundle)))
if err != nil {
t.Fatalf("bundle not a valid zip: %v", err)
}
want := meta.GeneratedAt.UTC()
for _, f := range zr.File {
got := f.Modified.UTC()
// Zip stores mtime at 2-second resolution; allow ≤2s drift.
diff := got.Sub(want)
if diff < -2*time.Second || diff > 2*time.Second {
t.Errorf("zip entry %q Modified = %v, want ~%v", f.Name, got, want)
}
// Specifically catch the old 2000-01-01 stamp.
if got.Year() == 2000 && got.Month() == 1 && got.Day() == 1 {
t.Errorf("zip entry %q stamped 2000-01-01 — old deterministic-constant regression", f.Name)
}
}
}
// TestDumpExport is the developer-convenience hatch. Skipped by default;
// set DUMP_EXPORT=1 to write artifacts to /tmp for opening in real Excel.
func TestDumpExport(t *testing.T) {
if os.Getenv("DUMP_EXPORT") == "" {
t.Skip("set DUMP_EXPORT=1 to dump artifacts to /tmp/paliad-export-debug.{zip,xlsx}")
}
bundle, xlsxBytes, _ := fixturePersonalExport(t)
if err := os.WriteFile("/tmp/paliad-export-debug.zip", bundle, 0o644); err != nil {
t.Fatal(err)
}
if err := os.WriteFile("/tmp/paliad-export-debug.xlsx", xlsxBytes, 0o644); err != nil {
t.Fatal(err)
}
t.Logf("wrote /tmp/paliad-export-debug.zip (%d bytes) + .xlsx (%d bytes)", len(bundle), len(xlsxBytes))
}

View File

@@ -274,15 +274,24 @@ func (s *ExportService) writeBundle(ctx context.Context, w io.Writer, sheets []s
sort.Slice(entries, func(i, j int) bool { return entries[i].name < entries[j].name })
zw := zip.NewWriter(w)
// Force a fixed Modified time on every entry so the zip header bytes
// don't drift between runs. archive/zip otherwise stamps Modified
// with time.Now() which would defeat the deterministic guarantee.
fixedMod := time.Date(2000, 1, 1, 0, 0, 0, 0, time.UTC)
// Stamp every zip entry's Modified with the export's GeneratedAt so
// the extracted files carry a meaningful timestamp in Windows
// Explorer / Finder (instead of "01.01.2000" or the build time).
// This is still deterministic-within-an-export: two calls with the
// same ExportMeta produce identical bytes (m's Q6 contract is
// "same row state at same generation time → identical bytes",
// modulo __meta.generated_at — and now the file mtimes too).
mod := meta.GeneratedAt.UTC()
if mod.IsZero() {
// Defensive: a zero time would cause archive/zip to write 1980-01-01
// (the DOS epoch) which would re-surface the original bug.
mod = time.Now().UTC()
}
for _, e := range entries {
hdr := &zip.FileHeader{
Name: e.name,
Method: zip.Deflate,
Modified: fixedMod,
Modified: mod,
}
fw, err := zw.CreateHeader(hdr)
if err != nil {
@@ -414,10 +423,43 @@ func formatCellValue(v any) string {
// buildXLSX assembles the workbook from the collected sheets + meta. Uses
// excelize's row-by-row writer; at personal/project scale the dataset
// fits comfortably in memory. Returns the xlsx-file bytes.
//
// Two non-obvious things this function gets right (because past versions
// got them wrong and Excel complained):
//
// 1. excelize's default core.xml carries Created=Modified="2006-09-16T00:00:00Z"
// (xuri's first commit date) until SetDocProps is called. We overwrite
// both with meta.GeneratedAt so Excel's File→Info shows the real time
// and Windows Explorer shows a sensible Modified column.
//
// 2. A frozen header row needs a complete <pane> definition or Excel
// pops the "Repairs required" prompt on open. excelize's Panes struct
// requires Freeze + YSplit + TopLeftCell + ActivePane; passing just
// Freeze + YSplit (the obvious-but-wrong form) emits invalid XML that
// excelize itself accepts on re-read but Excel rejects.
func buildXLSX(sheets []collectedSheet, meta ExportMeta) ([]byte, error) {
f := excelize.NewFile()
defer f.Close()
// Replace the hardcoded "Author: xuri / Created: 2006-09-16" defaults
// with real per-export metadata. Modified == Created on first write
// (no editing has happened by the time the user downloads).
tsISO := meta.GeneratedAt.UTC().Format(time.RFC3339)
creator := "Paliad"
if meta.FirmName != "" {
creator = "Paliad (" + meta.FirmName + ")"
}
if err := f.SetDocProps(&excelize.DocProperties{
Created: tsISO,
Modified: tsISO,
Creator: creator,
LastModifiedBy: creator,
Title: fmt.Sprintf("Paliad export (%s)", meta.Scope),
Description: fmt.Sprintf("Paliad data export, scope=%s, generated_by=%s", meta.Scope, meta.GeneratedByEml),
}); err != nil {
return nil, fmt.Errorf("excelize SetDocProps: %w", err)
}
// excelize creates a default "Sheet1" we want to rename to __meta.
const metaName = "__meta"
first := f.GetSheetName(0)
@@ -453,10 +495,6 @@ func buildXLSX(sheets []collectedSheet, meta ExportMeta) ([]byte, error) {
if _, err := f.NewSheet(sheetName); err != nil {
return nil, err
}
// Stream rows via the row-by-row API (NewStreamWriter is faster
// but it forbids re-opening sheets and silently truncates writes
// past the streamer's offset — at our scale the simple API is
// safer and the perf cost is negligible).
// Header row
for ci, col := range sh.columns {
cell, _ := excelize.CoordinatesToCellName(ci+1, 1)
@@ -472,21 +510,34 @@ func buildXLSX(sheets []collectedSheet, meta ExportMeta) ([]byte, error) {
}
}
}
// Freeze the header row.
_ = f.SetPanes(sheetName, &excelize.Panes{
Freeze: true,
YSplit: 1,
})
// Freeze the header row. The complete <pane> shape Excel insists
// on for a Y-only freeze: TopLeftCell="A2" (cell below the frozen
// row), ActivePane="bottomLeft", Selection on bottomLeft. The
// obvious-but-incomplete form {Freeze: true, YSplit: 1} produces
// invalid pane XML that triggers Excel's repair prompt on open.
if err := f.SetPanes(sheetName, &excelize.Panes{
Freeze: true,
YSplit: 1,
TopLeftCell: "A2",
ActivePane: "bottomLeft",
Selection: []excelize.Selection{
{SQRef: "A2", ActiveCell: "A2", Pane: "bottomLeft"},
},
}); err != nil {
return nil, fmt.Errorf("excelize SetPanes(%q): %w", sheetName, err)
}
}
// Set the active sheet to the __meta sheet (index 0). Without this,
// excelize's default active-sheet index can point at a sheet that no
// longer exists at that ordinal — also a "repair required" trigger.
f.SetActiveSheet(0)
// Write to buffer.
var buf strings.Builder
// excelize writes to an io.Writer via WriteTo
bw := &byteBuf{}
if _, err := f.WriteTo(bw); err != nil {
return nil, err
}
_ = buf // silence unused (kept for clarity that we considered a strings.Builder)
return bw.Bytes(), nil
}

View File

@@ -416,9 +416,15 @@ func assembleBundleForTest(t *testing.T, sheets []collectedSheet, meta ExportMet
var buf bytes.Buffer
zw := zip.NewWriter(&buf)
fixedMod := time.Date(2000, 1, 1, 0, 0, 0, 0, time.UTC)
// Mirror writeBundle's mtime convention so the helper produces
// realistic bytes — and so the TestBundle_ZipEntryMTime regression
// test actually exercises the right code path.
mod := meta.GeneratedAt.UTC()
if mod.IsZero() {
mod = time.Now().UTC()
}
for _, e := range entries {
hdr := &zip.FileHeader{Name: e.name, Method: zip.Deflate, Modified: fixedMod}
hdr := &zip.FileHeader{Name: e.name, Method: zip.Deflate, Modified: mod}
fw, err := zw.CreateHeader(hdr)
if err != nil {
t.Fatalf("zip create %q: %v", e.name, err)