package services // ExportService streams a paliad data-export bundle to an io.Writer. // // One .zip per export, containing: // // - paliad-export.xlsx canonical workbook, one sheet per entity // - paliad-export.json Excel-independent re-ingest twin // - csv/.csv per-sheet flat tables (RFC 4180 + UTF-8 BOM) // - README.txt human-readable explainer // - __meta.json standalone meta (same as the __meta sheet) // // Three scopes (per docs/design-paliad-data-export-2026-05-19.md): // // - personal — caller's RLS-visible projection + personal sidecars // - project — one project + its ltree subtree (slice 2, not in this file yet) // - org — full schema dump (slice 3, async path) // // Slice 1 ships personal only; the writer abstraction is scope-aware so // slices 2 + 3 layer on without rewriting the core. // // Determinism: sheets emitted in a fixed canonical order; rows ordered by // id ASC (or another stable tuple where no id exists); JSON object keys // sorted alphabetically; the outer zip writes its file list in sorted // order. Same row-state → identical bytes. The only non-deterministic // field is __meta.generated_at, externalised to the filename. // // PII posture: // // - Column names matching (?i)secret|token|password|api[_-]?key|private[_-]?key // are dropped at column-discovery time and recorded in __meta.warnings. // - Specific column overrides (user_caldav_config.password_encrypted, // invitations.token where it exists, etc.) live in the sheet definitions // as explicit column-filter lists. // - paliadin_turns is OFF in org scope and ON in personal scope (it's // literally the caller's own data). Org-scope exclusion is structural // (sheet absent from the registry), not just column-level. import ( "archive/zip" "context" "crypto/rand" "database/sql" "encoding/hex" "encoding/json" "encoding/csv" "fmt" "io" "regexp" "sort" "strings" "time" "github.com/google/uuid" "github.com/jmoiron/sqlx" "github.com/xuri/excelize/v2" ) // Export scope discriminators. Stable strings — exposed in the audit row // and the __meta sheet. const ( ExportScopePersonal = "personal" ExportScopeProject = "project" ExportScopeOrg = "org" ) // ExportSchemaVersion is bumped whenever the on-disk shape changes in a // way that requires importers to adapt. v1 is the slice-1/2/3 baseline. const ExportSchemaVersion = 1 // PII column-name deny regex. Any column whose name matches is dropped // during column discovery and recorded in __meta.warnings. The list of // known column names (e.g. user_caldav_config.password_encrypted) is // deliberately covered by the regex too — explicit + regex belt-and-braces. var piiColumnDenyRegex = regexp.MustCompile(`(?i)secret|token|password|api[_-]?key|private[_-]?key`) // ExportService writes a scoped export bundle. Stateless except for the // DB handle + firm-name display string. type ExportService struct { db *sqlx.DB firmName string } // NewExportService wires the service. firmName is read once at process // start from internal/branding.Name and embedded in every export's __meta. func NewExportService(db *sqlx.DB, firmName string) *ExportService { return &ExportService{db: db, firmName: firmName} } // ExportMeta is the bundle metadata. Stored on the __meta sheet, in // __meta.json, and as part of the audit row. type ExportMeta struct { SchemaVersion int `json:"schema_version"` FirmName string `json:"firm_name"` Scope string `json:"scope"` ScopeRootID *uuid.UUID `json:"scope_root_id,omitempty"` // ScopeRootLabel is the project title (project scope only). Empty // for personal + org scope. ScopeRootLabel string `json:"scope_root_label,omitempty"` // ScopeRootPath is the ltree path of the root project (project scope // only). Preserved in the audit row so closed-out projects retain a // usable ancestry pointer (Q6 lock-in). ScopeRootPath string `json:"scope_root_path,omitempty"` // DirectOnly is true when ?direct_only=1 was passed (project scope // only) — narrows the export to the root project, no descendants. DirectOnly bool `json:"direct_only,omitempty"` GeneratedAt time.Time `json:"generated_at"` GeneratedByID uuid.UUID `json:"generated_by_user_id"` GeneratedByEml string `json:"generated_by_user_email"` GeneratedByLbl string `json:"generated_by_user_label"` RowCounts map[string]int `json:"row_counts"` Warnings []string `json:"warnings,omitempty"` PaliadVersion string `json:"paliad_version,omitempty"` Notes string `json:"notes,omitempty"` } // ExportSpec is the per-run inputs. type ExportSpec struct { Scope string ScopeRoot *uuid.UUID // project_id when Scope==ExportScopeProject; nil otherwise // ScopeRootLabel + ScopeRootPath are populated by the project-export // handler (resolved from the root project row) so the audit + __meta // carry stable labels even if the project is later renamed. ScopeRootLabel string ScopeRootPath string // DirectOnly narrows the export to the root project only (project // scope, ?direct_only=1). DirectOnly bool ActorID uuid.UUID ActorEmail string ActorLabel string // display_name for the audit + meta GeneratedAt time.Time } // sheetQuery is one entity sheet's SQL recipe. Sheets emit in the order // they appear in the registry, which is fixed (alphabetical inside each // scope-prefix group). args are sqlx-positional. type sheetQuery struct { // SheetName lands in the workbook sheet, the JSON top-level key, and // the CSV filename stem. snake_case, ≤31 chars (Excel's hard limit). SheetName string // SQL runs as-is; should select rows in a deterministic order (ORDER // BY id ASC or a comparable stable tuple). SQL string // Args are sqlx-positional, bound 1:1 against the SQL's $1, $2, …. Args []any // DropColumns is an explicit list of column names to drop from the // result regardless of the regex deny-list. Used for jsonb columns // that contain credentials, or paliadin response bodies in org scope. DropColumns []string } // WritePersonal streams the caller's personal-scope bundle into w. Returns // the meta (incl. row_counts) for audit-row patching. // // Order of operations: // // 1. Build the sheet-query registry for the caller's visible set. // 2. Execute each query, materialise rows + columns + types. // 3. Run column-discovery + PII filter, collect warnings. // 4. Write the xlsx (excelize streaming writer), JSON, and CSVs into a // memory buffer (small at personal-scope sizes — ≪ 10MB is normal). // 5. Bundle into the outer zip in deterministic file-list order. // // The handler is responsible for the audit-row INSERT before calling + // the UPDATE after the call returns. We do not write the audit row here // because the handler also needs to decide what to do on failure (the // audit row gets a separate event_type='data_export_failed' UPDATE in // that case). func (s *ExportService) WritePersonal(ctx context.Context, w io.Writer, spec ExportSpec) (ExportMeta, error) { if spec.Scope == "" { spec.Scope = ExportScopePersonal } if spec.GeneratedAt.IsZero() { spec.GeneratedAt = time.Now().UTC() } meta := ExportMeta{ SchemaVersion: ExportSchemaVersion, FirmName: s.firmName, Scope: spec.Scope, GeneratedAt: spec.GeneratedAt, GeneratedByID: spec.ActorID, GeneratedByEml: spec.ActorEmail, GeneratedByLbl: spec.ActorLabel, RowCounts: map[string]int{}, } sheets := personalSheetQueries(spec.ActorID) if err := s.writeBundle(ctx, s.db, w, sheets, &meta); err != nil { return meta, err } return meta, nil } // WriteProject streams the project-subtree bundle for the project named // in spec.ScopeRoot into w. Returns the meta (incl. row_counts) for the // audit-row patch. // // Behavior contract (per Slice 2 design §2): // // - Every entity sheet is filtered to the subtree (project + descendants // via ltree path). When spec.DirectOnly is true, narrows to the root // project only (no descendants). // - approval_policies carries all 3 sources (project rows + ancestor // rows + partner-unit-default rows) tagged with a `source` column — // m's Q4 lock-in lets recipients reconstruct the effective gate. // - users_referenced restricts the user disclosure to FK-referenced // users only (avoids dumping the full firm roster into a per-matter // handover). // - Cross-subtree FKs (projects.counterclaim_of pointing outside the // subtree) are kept but warned about in __meta.warnings — m's Q3 // lock-in preserves the no-lock-in promise. // // Permission gate (§4) lives on the handler, NOT here — the service // trusts the caller has already authorised. Wiring is in handlers/export.go. func (s *ExportService) WriteProject(ctx context.Context, w io.Writer, spec ExportSpec) (ExportMeta, error) { if spec.Scope == "" { spec.Scope = ExportScopeProject } if spec.GeneratedAt.IsZero() { spec.GeneratedAt = time.Now().UTC() } if spec.ScopeRoot == nil { return ExportMeta{}, fmt.Errorf("WriteProject: ScopeRoot is required") } meta := ExportMeta{ SchemaVersion: ExportSchemaVersion, FirmName: s.firmName, Scope: spec.Scope, ScopeRootID: spec.ScopeRoot, ScopeRootLabel: spec.ScopeRootLabel, ScopeRootPath: spec.ScopeRootPath, DirectOnly: spec.DirectOnly, GeneratedAt: spec.GeneratedAt, GeneratedByID: spec.ActorID, GeneratedByEml: spec.ActorEmail, GeneratedByLbl: spec.ActorLabel, RowCounts: map[string]int{}, } sheets := projectSheetQueries(*spec.ScopeRoot, spec.DirectOnly) if err := s.writeBundle(ctx, s.db, w, sheets, &meta); err != nil { return meta, err } // Cross-subtree FK detection (Q3 lock-in: keep FK + warn). After the // bundle is built we run one lightweight scan to surface // counterclaim_of references that escape the subtree. The result // gets appended to meta.Warnings so it lands in __meta + the audit // row + the README's warning list. if warns, err := s.detectCrossSubtreeFKs(ctx, *spec.ScopeRoot, spec.DirectOnly); err == nil && len(warns) > 0 { meta.Warnings = append(meta.Warnings, warns...) sort.Strings(meta.Warnings) } return meta, nil } // WriteOrg streams the full org-scope backup bundle into w. Bypasses // paliad.can_see_project — admin-only, gated at the handler layer (the // service trusts the caller has been authorised). // // Wraps the entire read pass in a REPEATABLE READ READ ONLY transaction // so every sheet sees the same snapshot. Without this a backup that runs // while users are editing can land internally inconsistent rows (e.g. a // deadlines.project_id pointing at a project the projects sheet just // missed). Design §3.3. // // The handler is responsible for the audit-row INSERT / PATCH (the // org-scope backup uses BackupRunner.Run, not WriteAuditRow, because the // event_type is 'backup_created' not 'data_export'). func (s *ExportService) WriteOrg(ctx context.Context, w io.Writer, spec ExportSpec) (ExportMeta, error) { if spec.Scope == "" { spec.Scope = ExportScopeOrg } if spec.GeneratedAt.IsZero() { spec.GeneratedAt = time.Now().UTC() } meta := ExportMeta{ SchemaVersion: ExportSchemaVersion, FirmName: s.firmName, Scope: spec.Scope, GeneratedAt: spec.GeneratedAt, GeneratedByID: spec.ActorID, GeneratedByEml: spec.ActorEmail, GeneratedByLbl: spec.ActorLabel, RowCounts: map[string]int{}, } tx, err := s.db.BeginTxx(ctx, &sql.TxOptions{ Isolation: sql.LevelRepeatableRead, ReadOnly: true, }) if err != nil { return meta, fmt.Errorf("backup snapshot tx: %w", err) } // Always rollback — the tx is read-only by construction, the rollback // is just bookkeeping that releases the snapshot. defer func() { _ = tx.Rollback() }() sheets := orgSheetQueries() if err := s.writeBundle(ctx, tx, w, sheets, &meta); err != nil { return meta, err } return meta, nil } // detectCrossSubtreeFKs scans subtree-resident projects for FKs that // point outside the subtree (today: only projects.counterclaim_of). One // warning row per outbound reference. Best-effort: a query error here // degrades silently (the export still ships) since the warning is // informational, not load-bearing. func (s *ExportService) detectCrossSubtreeFKs(ctx context.Context, rootID uuid.UUID, directOnly bool) ([]string, error) { subtreeSQL := projectSubtreeProjectIDsSQL(directOnly) q := ` SELECT p.id, p.title, p.counterclaim_of FROM paliad.projects p WHERE p.id IN ` + subtreeSQL + ` AND p.counterclaim_of IS NOT NULL AND p.counterclaim_of NOT IN ` + subtreeSQL + ` ORDER BY p.id` type row struct { ID uuid.UUID `db:"id"` Title string `db:"title"` CounterclaimOf uuid.UUID `db:"counterclaim_of"` } var rows []row if err := s.db.SelectContext(ctx, &rows, q, rootID); err != nil { return nil, err } out := make([]string, 0, len(rows)) for _, r := range rows { out = append(out, fmt.Sprintf( "cross-subtree FK: project %q (%s).counterclaim_of → %s (not in this export)", r.Title, r.ID, r.CounterclaimOf, )) } return out, nil } // collectedSheet holds one sheet's data after column-discovery + row // materialisation. Used to hand data from writeBundle to buildXLSX + // buildJSON + buildCSV. type collectedSheet struct { name string columns []string rows [][]string // pre-stringified for cell writes } // writeBundle is the scope-agnostic core. Runs each query, writes one // xlsx sheet + one JSON branch + one CSV per sheet, packs everything into // the outer zip in sorted file-list order so two runs of the same row // state produce byte-identical bundles. // // queryer is the executor for sheet queries — typically s.db, but // WriteOrg passes a REPEATABLE READ *sqlx.Tx so the org dump sees a // consistent snapshot across all sheets (design §3.3). func (s *ExportService) writeBundle(ctx context.Context, queryer sqlx.QueryerContext, w io.Writer, sheets []sheetQuery, meta *ExportMeta) error { collectedSheets := make([]collectedSheet, 0, len(sheets)) jsonTables := make(map[string][]map[string]string, len(sheets)) warnings := []string{} for _, sq := range sheets { cols, rowMatrix, dropped, err := s.runSheetQuery(ctx, queryer, sq) if err != nil { return fmt.Errorf("export sheet %q: %w", sq.SheetName, err) } for _, c := range dropped { warnings = append(warnings, fmt.Sprintf("sheet=%s column=%s dropped (PII deny-list)", sq.SheetName, c)) } collectedSheets = append(collectedSheets, collectedSheet{ name: sq.SheetName, columns: cols, rows: rowMatrix, }) // JSON twin: one object per row, keyed by column name. We accept // the value-as-string convention so JSON shape matches CSV shape // 1:1 — anyone re-ingesting can re-parse with the same rules. jsonRows := make([]map[string]string, 0, len(rowMatrix)) for _, r := range rowMatrix { obj := make(map[string]string, len(cols)) for i, c := range cols { if i < len(r) { obj[c] = r[i] } } jsonRows = append(jsonRows, obj) } jsonTables[sq.SheetName] = jsonRows meta.RowCounts[sq.SheetName] = len(rowMatrix) } sort.Strings(warnings) meta.Warnings = warnings // --- build the xlsx in a memory buffer --- xlsxBytes, err := buildXLSX(collectedSheets, *meta) if err != nil { return fmt.Errorf("export build xlsx: %w", err) } // --- build the JSON twin --- jsonBytes, err := buildJSON(jsonTables, *meta) if err != nil { return fmt.Errorf("export build json: %w", err) } // --- build per-sheet CSVs (in-memory map, written in sorted order) --- csvBlobs := map[string][]byte{} for _, c := range collectedSheets { b, err := buildCSV(c.columns, c.rows) if err != nil { return fmt.Errorf("export build csv %q: %w", c.name, err) } csvBlobs[c.name] = b } // --- build __meta.json + README.txt --- metaJSON, err := json.MarshalIndent(*meta, "", " ") if err != nil { return fmt.Errorf("export marshal meta: %w", err) } readme := buildREADME(*meta) // --- assemble outer zip in deterministic file order --- type zipEntry struct { name string body []byte } entries := []zipEntry{ {"README.txt", []byte(readme)}, {"__meta.json", metaJSON}, {"paliad-export.json", jsonBytes}, {"paliad-export.xlsx", xlsxBytes}, } csvNames := make([]string, 0, len(csvBlobs)) for name := range csvBlobs { csvNames = append(csvNames, name) } sort.Strings(csvNames) for _, name := range csvNames { entries = append(entries, zipEntry{"csv/" + name + ".csv", csvBlobs[name]}) } sort.Slice(entries, func(i, j int) bool { return entries[i].name < entries[j].name }) zw := zip.NewWriter(w) // Stamp every zip entry's Modified with the export's GeneratedAt so // the extracted files carry a meaningful timestamp in Windows // Explorer / Finder (instead of "01.01.2000" or the build time). // This is still deterministic-within-an-export: two calls with the // same ExportMeta produce identical bytes (m's Q6 contract is // "same row state at same generation time → identical bytes", // modulo __meta.generated_at — and now the file mtimes too). mod := meta.GeneratedAt.UTC() if mod.IsZero() { // Defensive: a zero time would cause archive/zip to write 1980-01-01 // (the DOS epoch) which would re-surface the original bug. mod = time.Now().UTC() } for _, e := range entries { hdr := &zip.FileHeader{ Name: e.name, Method: zip.Deflate, Modified: mod, } fw, err := zw.CreateHeader(hdr) if err != nil { return fmt.Errorf("export zip header %q: %w", e.name, err) } if _, err := fw.Write(e.body); err != nil { return fmt.Errorf("export zip write %q: %w", e.name, err) } } if err := zw.Close(); err != nil { return fmt.Errorf("export zip close: %w", err) } return nil } // runSheetQuery executes one sheetQuery against the given queryer and // returns the kept columns, row matrix (pre-stringified per the design's // value-as-string convention), and the list of columns that were dropped // by the PII filter. queryer is typically s.db, but WriteOrg passes a // REPEATABLE READ *sqlx.Tx (see writeBundle docs). func (s *ExportService) runSheetQuery(ctx context.Context, queryer sqlx.QueryerContext, sq sheetQuery) (cols []string, rows [][]string, dropped []string, err error) { rs, err := queryer.QueryxContext(ctx, sq.SQL, sq.Args...) if err != nil { return nil, nil, nil, fmt.Errorf("query: %w", err) } defer rs.Close() rawCols, err := rs.Columns() if err != nil { return nil, nil, nil, fmt.Errorf("columns: %w", err) } // Filter columns through the PII deny-list + the per-sheet drop set. keepIdx := make([]int, 0, len(rawCols)) keepCols := make([]string, 0, len(rawCols)) drops := map[string]bool{} for _, c := range sq.DropColumns { drops[c] = true } for i, c := range rawCols { if drops[c] || piiColumnDenyRegex.MatchString(c) { dropped = append(dropped, c) continue } keepIdx = append(keepIdx, i) keepCols = append(keepCols, c) } for rs.Next() { // Read raw values; Postgres returns text/numeric/etc as []byte, // uuids as []byte, jsonb as []byte. The map-row helper picks the // right Go type per column via reflection. rawRow := make([]any, len(rawCols)) ptrs := make([]any, len(rawCols)) for i := range rawRow { ptrs[i] = &rawRow[i] } if err := rs.Scan(ptrs...); err != nil { return nil, nil, nil, fmt.Errorf("scan: %w", err) } out := make([]string, len(keepIdx)) for j, srcIdx := range keepIdx { out[j] = formatCellValue(rawRow[srcIdx]) } rows = append(rows, out) } if err := rs.Err(); err != nil { return nil, nil, nil, fmt.Errorf("rows: %w", err) } return keepCols, rows, dropped, nil } // formatCellValue renders a Postgres-driver value as the canonical export // string. Conventions per design §3.1: // // - timestamptz → RFC3339 UTC ("2026-05-19T14:23:00Z") // - date → ISO 8601 ("2026-05-19") // - booleans → "TRUE" / "FALSE" // - []byte that is valid JSON → compact JSON string (jsonb columns) // - []byte that looks like UUID/text → string // - nil → "" (the empty cell) // - arrays → semicolon-joined (Postgres returns text[] as "{a,b}" via lib/pq) // // Returning strings (vs typed Excel values) is intentional — see design // §3.1 (Q4 = ISO strings only). func formatCellValue(v any) string { if v == nil { return "" } switch x := v.(type) { case bool: if x { return "TRUE" } return "FALSE" case time.Time: // Try date-only when the value is exactly midnight UTC (Postgres // returns DATE columns as time.Time with H/M/S/N all zero). if x.Hour() == 0 && x.Minute() == 0 && x.Second() == 0 && x.Nanosecond() == 0 && (x.Location() == time.UTC || x.Location() == time.Local) { // Heuristic: if year < 2 it's likely the zero value if x.Year() < 2 { return "" } return x.UTC().Format("2006-01-02") } return x.UTC().Format(time.RFC3339) case []byte: // jsonb columns come back as []byte holding valid JSON. Pass them // through verbatim (one-liner) so PowerQuery's Json.Document can // re-parse. Non-JSON []byte is treated as a UTF-8 string. s := string(x) trim := strings.TrimSpace(s) if strings.HasPrefix(trim, "{") || strings.HasPrefix(trim, "[") { // Compactify so the cell has no embedded newlines. var raw json.RawMessage = []byte(trim) if b, err := json.Marshal(raw); err == nil { return string(b) } return trim } return s case string: return x case int, int8, int16, int32, int64, uint, uint8, uint16, uint32, uint64, float32, float64: return fmt.Sprintf("%v", x) default: return fmt.Sprintf("%v", x) } } // buildXLSX assembles the workbook from the collected sheets + meta. Uses // excelize's row-by-row writer; at personal/project scale the dataset // fits comfortably in memory. Returns the xlsx-file bytes. // // Two non-obvious things this function gets right (because past versions // got them wrong and Excel complained): // // 1. excelize's default core.xml carries Created=Modified="2006-09-16T00:00:00Z" // (xuri's first commit date) until SetDocProps is called. We overwrite // both with meta.GeneratedAt so Excel's File→Info shows the real time // and Windows Explorer shows a sensible Modified column. // // 2. A frozen header row needs a complete definition or Excel // pops the "Repairs required" prompt on open. excelize's Panes struct // requires Freeze + YSplit + TopLeftCell + ActivePane; passing just // Freeze + YSplit (the obvious-but-wrong form) emits invalid XML that // excelize itself accepts on re-read but Excel rejects. func buildXLSX(sheets []collectedSheet, meta ExportMeta) ([]byte, error) { f := excelize.NewFile() defer f.Close() // Replace the hardcoded "Author: xuri / Created: 2006-09-16" defaults // with real per-export metadata. Modified == Created on first write // (no editing has happened by the time the user downloads). tsISO := meta.GeneratedAt.UTC().Format(time.RFC3339) creator := "Paliad" if meta.FirmName != "" { creator = "Paliad (" + meta.FirmName + ")" } if err := f.SetDocProps(&excelize.DocProperties{ Created: tsISO, Modified: tsISO, Creator: creator, LastModifiedBy: creator, Title: fmt.Sprintf("Paliad export (%s)", meta.Scope), Description: fmt.Sprintf("Paliad data export, scope=%s, generated_by=%s", meta.Scope, meta.GeneratedByEml), }); err != nil { return nil, fmt.Errorf("excelize SetDocProps: %w", err) } // excelize creates a default "Sheet1" we want to rename to __meta. const metaName = "__meta" first := f.GetSheetName(0) if first != metaName { if err := f.SetSheetName(first, metaName); err != nil { return nil, err } } // Write meta as key/value rows. metaRows := metaToKeyValueRows(meta) for i, kv := range metaRows { cellA, _ := excelize.CoordinatesToCellName(1, i+1) cellB, _ := excelize.CoordinatesToCellName(2, i+1) if err := f.SetCellValue(metaName, cellA, kv[0]); err != nil { return nil, err } if err := f.SetCellValue(metaName, cellB, kv[1]); err != nil { return nil, err } } // One sheet per entity, columns in column-discovery order (= SELECT // order = stable across runs because the SQL is fixed). for _, sh := range sheets { // Excel sheet name limit is 31 chars; truncate defensively (none // of our names hit it today, but the personal-scope users_referenced // sheet is right at the edge). sheetName := sh.name if len(sheetName) > 31 { sheetName = sheetName[:31] } if _, err := f.NewSheet(sheetName); err != nil { return nil, err } // Header row for ci, col := range sh.columns { cell, _ := excelize.CoordinatesToCellName(ci+1, 1) if err := f.SetCellValue(sheetName, cell, col); err != nil { return nil, err } } for ri, row := range sh.rows { for ci, val := range row { cell, _ := excelize.CoordinatesToCellName(ci+1, ri+2) if err := f.SetCellValue(sheetName, cell, val); err != nil { return nil, err } } } // Freeze the header row. The complete shape Excel insists // on for a Y-only freeze: TopLeftCell="A2" (cell below the frozen // row), ActivePane="bottomLeft", Selection on bottomLeft. The // obvious-but-incomplete form {Freeze: true, YSplit: 1} produces // invalid pane XML that triggers Excel's repair prompt on open. if err := f.SetPanes(sheetName, &excelize.Panes{ Freeze: true, YSplit: 1, TopLeftCell: "A2", ActivePane: "bottomLeft", Selection: []excelize.Selection{ {SQRef: "A2", ActiveCell: "A2", Pane: "bottomLeft"}, }, }); err != nil { return nil, fmt.Errorf("excelize SetPanes(%q): %w", sheetName, err) } } // Set the active sheet to the __meta sheet (index 0). Without this, // excelize's default active-sheet index can point at a sheet that no // longer exists at that ordinal — also a "repair required" trigger. f.SetActiveSheet(0) // Write to buffer. bw := &byteBuf{} if _, err := f.WriteTo(bw); err != nil { return nil, err } return bw.Bytes(), nil } // byteBuf is a tiny io.Writer that accumulates into a byte slice. We don't // use bytes.Buffer because we need WriteTo to round-trip the result and // bytes.Buffer's interface is wider than we need. type byteBuf struct{ b []byte } func (b *byteBuf) Write(p []byte) (int, error) { b.b = append(b.b, p...) return len(p), nil } func (b *byteBuf) Bytes() []byte { return b.b } // metaToKeyValueRows flattens the meta into stable (key, value) tuples // in a fixed key order for the __meta sheet. func metaToKeyValueRows(m ExportMeta) [][2]string { rows := [][2]string{ {"schema_version", fmt.Sprintf("%d", m.SchemaVersion)}, {"firm_name", m.FirmName}, {"scope", m.Scope}, } if m.ScopeRootID != nil { rows = append(rows, [2]string{"scope_root_id", m.ScopeRootID.String()}) } else { rows = append(rows, [2]string{"scope_root_id", ""}) } // Project-scope-only rows (Slice 2 §2.4). Surface as empty rows for // other scopes so the __meta layout stays stable + Excel users can // see "this field exists but doesn't apply here". rows = append(rows, [2]string{"scope_root_label", m.ScopeRootLabel}, [2]string{"scope_root_path", m.ScopeRootPath}, ) if m.Scope == ExportScopeProject { if m.DirectOnly { rows = append(rows, [2]string{"direct_only", "TRUE"}) } else { rows = append(rows, [2]string{"direct_only", "FALSE"}) } } rows = append(rows, [2]string{"generated_at", m.GeneratedAt.UTC().Format(time.RFC3339)}, [2]string{"generated_by_user_id", m.GeneratedByID.String()}, [2]string{"generated_by_user_email", m.GeneratedByEml}, [2]string{"generated_by_user_label", m.GeneratedByLbl}, [2]string{"paliad_version", m.PaliadVersion}, [2]string{"notes", m.Notes}, ) // Row counts as one row per sheet (sorted). names := make([]string, 0, len(m.RowCounts)) for k := range m.RowCounts { names = append(names, k) } sort.Strings(names) for _, n := range names { rows = append(rows, [2]string{"row_count." + n, fmt.Sprintf("%d", m.RowCounts[n])}) } for _, w := range m.Warnings { rows = append(rows, [2]string{"warning", w}) } return rows } // buildJSON produces the JSON twin. Top-level shape: // // { // "meta": { ... }, // "tables": { "": [ {"": "", ...}, ... ] } // } // // Keys in every map are alphabetically sorted (encoding/json does this by // default for map[string]X, which is what we use everywhere). func buildJSON(tables map[string][]map[string]string, meta ExportMeta) ([]byte, error) { payload := map[string]any{ "meta": meta, "tables": tables, } return json.MarshalIndent(payload, "", " ") } // buildCSV emits a UTF-8-BOM-prefixed CSV with RFC 4180 quoting. The BOM // makes Excel-DE open the file with the correct encoding instead of // guessing windows-1252 and corrupting umlauts. func buildCSV(cols []string, rows [][]string) ([]byte, error) { var buf byteBuf // UTF-8 BOM buf.Write([]byte{0xEF, 0xBB, 0xBF}) w := csv.NewWriter(&buf) if err := w.Write(cols); err != nil { return nil, err } for _, r := range rows { if err := w.Write(r); err != nil { return nil, err } } w.Flush() if err := w.Error(); err != nil { return nil, err } return buf.Bytes(), nil } // buildREADME produces a short human-readable explainer embedded as the // first file in the bundle. Bilingual (DE primary, EN secondary). func buildREADME(m ExportMeta) string { var b strings.Builder fmt.Fprintf(&b, "Paliad Datenexport (%s)\n", m.FirmName) fmt.Fprintf(&b, "============================\n\n") fmt.Fprintf(&b, "Erstellt am : %s\n", m.GeneratedAt.UTC().Format(time.RFC3339)) fmt.Fprintf(&b, "Erstellt von : %s <%s>\n", m.GeneratedByLbl, m.GeneratedByEml) fmt.Fprintf(&b, "Umfang : %s\n", m.Scope) if m.Scope == ExportScopeProject { if m.ScopeRootLabel != "" { fmt.Fprintf(&b, "Projekt : %s\n", m.ScopeRootLabel) } if m.ScopeRootID != nil { fmt.Fprintf(&b, "Projekt-ID : %s\n", m.ScopeRootID.String()) } if m.DirectOnly { fmt.Fprintf(&b, "Hinweis : nur das Root-Projekt (?direct_only=1), keine Unter-Projekte.\n") } else { fmt.Fprintf(&b, "Hinweis : Root-Projekt + alle Unter-Projekte.\n") } } fmt.Fprintf(&b, "Schema-Version: %d\n", m.SchemaVersion) fmt.Fprintf(&b, "\n") fmt.Fprintf(&b, "Inhalt\n------\n") fmt.Fprintf(&b, "- paliad-export.xlsx — kanonische Excel-Mappe (eine Tabelle pro Entität)\n") fmt.Fprintf(&b, "- paliad-export.json — maschinenlesbare Kopie der gleichen Daten\n") fmt.Fprintf(&b, "- csv/.csv — Tabellen einzeln als CSV (UTF-8 mit BOM)\n") fmt.Fprintf(&b, "- __meta.json — Metadaten dieses Exports (auch im __meta-Sheet)\n") fmt.Fprintf(&b, "\n") fmt.Fprintf(&b, "Zeilen pro Tabelle:\n") names := make([]string, 0, len(m.RowCounts)) for k := range m.RowCounts { names = append(names, k) } sort.Strings(names) for _, n := range names { fmt.Fprintf(&b, " %-32s %d\n", n, m.RowCounts[n]) } fmt.Fprintf(&b, "\n") fmt.Fprintf(&b, "Hinweise\n--------\n") fmt.Fprintf(&b, "Diese Datei enthält möglicherweise vertrauliche Mandantsdaten.\n") fmt.Fprintf(&b, "Sie wurde erzeugt am %s durch %s aus Paliad (%s).\n", m.GeneratedAt.UTC().Format(time.RFC3339), m.GeneratedByEml, m.FirmName) fmt.Fprintf(&b, "Die Weitergabe an Dritte erfolgt in eigener Verantwortung des Empfängers.\n") fmt.Fprintf(&b, "\n") fmt.Fprintf(&b, "Passwörter, CalDAV-Zugangsdaten, Einladungstoken und andere Geheimnisse\n") fmt.Fprintf(&b, "werden NIE exportiert (Spalten-Filter und allgemeine Deny-Regel).\n") fmt.Fprintf(&b, "\n") fmt.Fprintf(&b, "--- English ---\n\n") fmt.Fprintf(&b, "This Paliad export bundle contains structured data of the scope above.\n") fmt.Fprintf(&b, "Open paliad-export.xlsx in Excel/LibreOffice, or parse paliad-export.json\n") fmt.Fprintf(&b, "with any JSON-capable tool. CSVs are RFC 4180 with a UTF-8 BOM.\n") fmt.Fprintf(&b, "\n") fmt.Fprintf(&b, "Dates are ISO 8601 strings; timestamps are RFC 3339 UTC. Booleans are\n") fmt.Fprintf(&b, "the literal strings TRUE/FALSE. JSON-typed columns are stored as compact\n") fmt.Fprintf(&b, "one-line JSON in each cell.\n") fmt.Fprintf(&b, "\n") fmt.Fprintf(&b, "This bundle is byte-deterministic: two exports of the same row state\n") fmt.Fprintf(&b, "produce identical zip bytes (modulo the generated_at field stored on\n") fmt.Fprintf(&b, "the __meta sheet and in __meta.json).\n") return b.String() } // ExportFilename returns the canonical filename for a download. Slugify is // minimal — only the project-scope variant has a free-text component to // sanitise. // // Project-scope filenames include an 8-hex-char disambiguator derived from // the root project's UUID (Slice 2 §3 Q5). Two projects with identical // titles (common: "Standard NDA" per client) would otherwise produce // filename collisions when archived together; 4-billion-class disambiguation // is cheap insurance. // // rootID is consumed only for ExportScopeProject; pass uuid.Nil for the // other scopes. func ExportFilename(scope string, scopeLabel string, rootID uuid.UUID, generatedAt time.Time) string { ts := generatedAt.UTC().Format("2006-01-02T1504Z") switch scope { case ExportScopePersonal: return fmt.Sprintf("paliad-export-personal-%s.zip", ts) case ExportScopeOrg: return fmt.Sprintf("paliad-export-org-%s.zip", ts) case ExportScopeProject: slug := slugifyFilename(scopeLabel) if slug == "" { slug = randomSlug() } short := shortUUIDSuffix(rootID) if short == "" { return fmt.Sprintf("paliad-export-project-%s-%s.zip", slug, ts) } return fmt.Sprintf("paliad-export-project-%s-%s-%s.zip", slug, short, ts) default: return fmt.Sprintf("paliad-export-%s.zip", ts) } } // shortUUIDSuffix returns the last 8 hex chars of the UUID's canonical // representation (the trailing block after the final dash). Empty string // for uuid.Nil so callers can fall back to the slug-only variant. func shortUUIDSuffix(id uuid.UUID) string { if id == uuid.Nil { return "" } s := id.String() if i := strings.LastIndex(s, "-"); i != -1 && i+1 < len(s) { return s[i+1:] } return "" } var filenameSafeRegex = regexp.MustCompile(`[^A-Za-z0-9-]+`) func slugifyFilename(s string) string { s = strings.TrimSpace(s) s = filenameSafeRegex.ReplaceAllString(s, "-") s = strings.Trim(s, "-") if len(s) > 40 { s = s[:40] } return s } func randomSlug() string { var b [4]byte _, _ = rand.Read(b[:]) return hex.EncodeToString(b[:]) } // --------------------------------------------------------------------------- // Personal-scope sheet registry. // --------------------------------------------------------------------------- // // Per design §2.3, "personal scope" is the RLS-visible projection plus // caller-personal sidecars. Every visible-projects query goes through // visibilityPredicatePositional so the gate is the same as runtime list // endpoints. The ?-positional binding takes the caller's user_id at $1. // // Ordering: every SELECT uses `ORDER BY id` (or the natural stable // sort-tuple for tables without an id PK) to keep two-runs-same-state // byte-deterministic. func personalSheetQueries(actorID uuid.UUID) []sheetQuery { uid := actorID visiblePProj := visibilityPredicatePositional("p", 1) // The visible-projects CTE is used by all entity sheets that scope by // project_id. Building it inline keeps each sheet's SQL self-contained // for readability + lets the query planner choose its own join order. visibleProjectsSubquery := `(SELECT p.id FROM paliad.projects p WHERE ` + visiblePProj + `)` return []sheetQuery{ // --- entity sheets (subtree-aware via visibility predicate) --- { SheetName: "projects", SQL: `SELECT * FROM paliad.projects p WHERE ` + visiblePProj + ` ORDER BY p.id`, Args: []any{uid}, }, { SheetName: "project_teams", SQL: `SELECT * FROM paliad.project_teams WHERE user_id = $1 OR project_id IN ` + visibleProjectsSubquery + ` ORDER BY project_id, user_id`, Args: []any{uid}, }, { SheetName: "deadlines", SQL: `SELECT * FROM paliad.deadlines WHERE project_id IN ` + visibleProjectsSubquery + ` ORDER BY id`, Args: []any{uid}, }, { SheetName: "appointments", SQL: `SELECT * FROM paliad.appointments WHERE project_id IN ` + visibleProjectsSubquery + ` ORDER BY id`, Args: []any{uid}, }, { SheetName: "parties", SQL: `SELECT * FROM paliad.parties WHERE project_id IN ` + visibleProjectsSubquery + ` ORDER BY id`, Args: []any{uid}, }, { SheetName: "notes", SQL: `SELECT * FROM paliad.notes WHERE COALESCE(project_id, (SELECT d.project_id FROM paliad.deadlines d WHERE d.id = notes.deadline_id), (SELECT a.project_id FROM paliad.appointments a WHERE a.id = notes.appointment_id), (SELECT pe.project_id FROM paliad.project_events pe WHERE pe.id = notes.project_event_id) ) IN ` + visibleProjectsSubquery + ` ORDER BY id`, Args: []any{uid}, }, { SheetName: "documents", SQL: `SELECT id, project_id, title, doc_type, file_path, file_size, mime_type, uploaded_by, created_at, updated_at FROM paliad.documents WHERE project_id IN ` + visibleProjectsSubquery + ` ORDER BY id`, Args: []any{uid}, // ai_extracted jsonb is the only column omitted from the // personal projection because it can carry verbose AI prompts. }, { SheetName: "project_events", SQL: `SELECT * FROM paliad.project_events WHERE project_id IN ` + visibleProjectsSubquery + ` ORDER BY id`, Args: []any{uid}, }, { SheetName: "approval_requests", SQL: `SELECT * FROM paliad.approval_requests WHERE requested_by = $1 OR decided_by = $1 OR project_id IN ` + visibleProjectsSubquery + ` ORDER BY id`, Args: []any{uid}, }, { SheetName: "checklist_instances", SQL: `SELECT * FROM paliad.checklist_instances WHERE project_id IN ` + visibleProjectsSubquery + ` ORDER BY id`, Args: []any{uid}, }, // --- personal sidecars (my_*) --- { SheetName: "me", SQL: `SELECT id, email, display_name, office, profession, job_title, practice_group, lang, reminder_morning_time, reminder_evening_time, reminder_timezone, reminder_warning_offset_days, escalation_contact_id, email_preferences, additional_offices, global_role, forum_pref, created_at, updated_at FROM paliad.users WHERE id = $1`, Args: []any{uid}, }, { SheetName: "my_caldav_config", SQL: `SELECT user_id, url, username, calendar_path, enabled, last_sync_at, last_sync_error, created_at, updated_at FROM paliad.user_caldav_config WHERE user_id = $1`, Args: []any{uid}, DropColumns: []string{"password_encrypted"}, // belt-and-braces; the SELECT above already omits it }, { SheetName: "my_views", SQL: `SELECT * FROM paliad.user_views WHERE user_id = $1 ORDER BY id`, Args: []any{uid}, }, { SheetName: "my_pinned_projects", SQL: `SELECT * FROM paliad.user_pinned_projects WHERE user_id = $1 ORDER BY project_id`, Args: []any{uid}, }, { SheetName: "my_card_layouts", SQL: `SELECT * FROM paliad.user_card_layouts WHERE user_id = $1 ORDER BY id`, Args: []any{uid}, }, { SheetName: "my_paliadin_turns", SQL: `SELECT * FROM paliad.paliadin_turns WHERE user_id = $1 ORDER BY started_at`, Args: []any{uid}, }, // --- restricted users-referenced sheet --- // Surfaces only id/email/display_name/office/profession for users // who appear as FKs anywhere in the export — avoids dumping all 47 // users on a personal-scope handoff. { SheetName: "users_referenced", SQL: `SELECT id, email, display_name, office, profession FROM paliad.users u WHERE u.id IN ( SELECT created_by FROM paliad.projects WHERE id IN ` + visibleProjectsSubquery + ` UNION SELECT created_by FROM paliad.deadlines WHERE project_id IN ` + visibleProjectsSubquery + ` UNION SELECT created_by FROM paliad.appointments WHERE project_id IN ` + visibleProjectsSubquery + ` UNION SELECT created_by FROM paliad.project_events WHERE project_id IN ` + visibleProjectsSubquery + ` UNION SELECT user_id FROM paliad.project_teams WHERE project_id IN ` + visibleProjectsSubquery + ` UNION SELECT created_by FROM paliad.notes WHERE COALESCE(project_id, (SELECT d.project_id FROM paliad.deadlines d WHERE d.id = notes.deadline_id), (SELECT a.project_id FROM paliad.appointments a WHERE a.id = notes.appointment_id), (SELECT pe.project_id FROM paliad.project_events pe WHERE pe.id = notes.project_event_id) ) IN ` + visibleProjectsSubquery + ` UNION SELECT $1::uuid ) ORDER BY id`, Args: []any{uid}, }, // --- reference data (read-only, prefixed ref__) --- // Same set as project scope; included so the workbook is // interpretable standalone without paliad context. { SheetName: "ref__proceeding_types", SQL: `SELECT * FROM paliad.proceeding_types ORDER BY id`, }, { SheetName: "ref__event_types", SQL: `SELECT * FROM paliad.event_types ORDER BY id`, }, { SheetName: "ref__event_categories", SQL: `SELECT * FROM paliad.event_categories ORDER BY id`, }, { SheetName: "ref__deadline_rules", SQL: `SELECT * FROM paliad.deadline_rules ORDER BY id`, }, { SheetName: "ref__deadline_concepts", SQL: `SELECT * FROM paliad.deadline_concepts ORDER BY id`, }, { SheetName: "ref__courts", SQL: `SELECT * FROM paliad.courts ORDER BY id`, }, { SheetName: "ref__countries", SQL: `SELECT * FROM paliad.countries ORDER BY code`, }, { SheetName: "ref__holidays", SQL: `SELECT * FROM paliad.holidays ORDER BY date, country`, }, } } // --------------------------------------------------------------------------- // Audit row helpers (used by the handler; here to keep all export-related // SQL in one file). // --------------------------------------------------------------------------- // WriteAuditRow inserts a system_audit_log row before the export runs and // returns the new row id. The handler PATCHes the row with file_size_bytes // + final row_counts on success or marks it failed on error. // // For project-scope exports the metadata jsonb carries the ltree path // (Q6 lock-in) so the audit row remains interpretable after a project // deletion: scope_root → just the UUID; metadata.root_path → the // ancestry. Same goes for root_label + direct_only so dashboards don't // need to round-trip back to paliad.projects on render. func (s *ExportService) WriteAuditRow(ctx context.Context, spec ExportSpec) (uuid.UUID, error) { meta := map[string]any{ "requested_at": spec.GeneratedAt.UTC().Format(time.RFC3339), } if spec.Scope == ExportScopeProject { if spec.ScopeRootLabel != "" { meta["root_label"] = spec.ScopeRootLabel } if spec.ScopeRootPath != "" { meta["root_path"] = spec.ScopeRootPath } meta["direct_only"] = spec.DirectOnly } mb, _ := json.Marshal(meta) var id uuid.UUID err := s.db.QueryRowContext(ctx, `INSERT INTO paliad.system_audit_log (event_type, actor_id, actor_email, scope, scope_root, metadata) VALUES ('data_export', $1, $2, $3, $4, $5::jsonb) RETURNING id`, spec.ActorID, spec.ActorEmail, spec.Scope, spec.ScopeRoot, string(mb), ).Scan(&id) if err != nil { return uuid.Nil, fmt.Errorf("audit insert: %w", err) } return id, nil } // PatchAuditRowSuccess updates the audit row with final row counts and the // generated artifact size. func (s *ExportService) PatchAuditRowSuccess(ctx context.Context, id uuid.UUID, meta ExportMeta, fileSizeBytes int64) error { payload := map[string]any{ "row_counts": meta.RowCounts, "file_size_bytes": fileSizeBytes, "warnings": meta.Warnings, "completed_at": time.Now().UTC().Format(time.RFC3339), } mb, _ := json.Marshal(payload) _, err := s.db.ExecContext(ctx, `UPDATE paliad.system_audit_log SET metadata = metadata || $2::jsonb, updated_at = now() WHERE id = $1`, id, string(mb), ) if err != nil { return fmt.Errorf("audit patch success: %w", err) } return nil } // PatchAuditRowFailure marks the audit row as a failed export and stores // the error string. Uses a separate event_type so dashboards can count // failures distinctly. func (s *ExportService) PatchAuditRowFailure(ctx context.Context, id uuid.UUID, errStr string) { payload := map[string]any{ "error": errStr, "failed_at": time.Now().UTC().Format(time.RFC3339), } mb, _ := json.Marshal(payload) // Best-effort — never propagate audit-write errors back to the caller // because the original export error is the real one to bubble. _, _ = s.db.ExecContext(ctx, `UPDATE paliad.system_audit_log SET event_type = 'data_export_failed', metadata = metadata || $2::jsonb, updated_at = now() WHERE id = $1`, id, string(mb), ) } // --------------------------------------------------------------------------- // Project-scope sheet registry (Slice 2). // --------------------------------------------------------------------------- // // Subtree-aware queries via paliad.projects.path (ltree as text). The // subtree predicate works on the materialised path column: // // p.path LIKE root.path || '%' -- descendants + self // p.path = root.path -- self only (direct_only=true) // // We use the path-prefix-LIKE form instead of ltree `<@` because the // schema stores path as text (the underlying ltree is materialised in // the projects.path column). The LIKE pattern is anchored at the start // and uses indexes built on path. // // Ordering: every SELECT uses ORDER BY id (or another stable tuple) so // byte-determinism holds across runs. // projectSubtreeProjectIDsSQL returns a SQL subquery expression that // resolves to "the set of project ids in the subtree of $1". Use as the // right-hand side of `IN`. The $1 placeholder must bind the root // project's UUID. // // When directOnly is true, narrows to the root project itself only. func projectSubtreeProjectIDsSQL(directOnly bool) string { if directOnly { // Tighter: just the root, no descendants. Still framed as a // subquery so the outer SQL can be uniformly composed. return `(SELECT $1::uuid AS id)` } // Subtree = root + descendants. The materialised path column on // every project includes its own UUID as the trailing label, so the // LIKE pattern matches both the root and every descendant in one // expression. r.path is read from the root row keyed by $1. return `( SELECT p.id FROM paliad.projects p JOIN paliad.projects r ON r.id = $1::uuid WHERE p.path = r.path OR p.path LIKE r.path || '.%' )` } // projectSheetQueries returns the sheet registry for a project-scope // export. rootID is bound to $1 in every query; directOnly narrows the // subtree to just the root project. // // Sheet inclusion follows design §2.2. Same shape as personalSheetQueries // but with subtree filtering instead of RLS-visibility and a tighter // users-disclosure profile. func projectSheetQueries(rootID uuid.UUID, directOnly bool) []sheetQuery { subtree := projectSubtreeProjectIDsSQL(directOnly) queries := []sheetQuery{ // --- entity sheets (subtree-scoped) --- { SheetName: "projects", SQL: `SELECT * FROM paliad.projects WHERE id IN ` + subtree + ` ORDER BY id`, Args: []any{rootID}, }, { SheetName: "project_teams", SQL: `SELECT * FROM paliad.project_teams WHERE project_id IN ` + subtree + ` ORDER BY project_id, user_id`, Args: []any{rootID}, }, { SheetName: "project_partner_units", SQL: `SELECT * FROM paliad.project_partner_units WHERE project_id IN ` + subtree + ` ORDER BY project_id, partner_unit_id`, Args: []any{rootID}, }, { SheetName: "deadlines", SQL: `SELECT * FROM paliad.deadlines WHERE project_id IN ` + subtree + ` ORDER BY id`, Args: []any{rootID}, }, { SheetName: "appointments", SQL: `SELECT * FROM paliad.appointments WHERE project_id IN ` + subtree + ` ORDER BY id`, Args: []any{rootID}, }, { SheetName: "parties", SQL: `SELECT * FROM paliad.parties WHERE project_id IN ` + subtree + ` ORDER BY id`, Args: []any{rootID}, }, { SheetName: "notes", SQL: `SELECT * FROM paliad.notes WHERE COALESCE(project_id, (SELECT d.project_id FROM paliad.deadlines d WHERE d.id = notes.deadline_id), (SELECT a.project_id FROM paliad.appointments a WHERE a.id = notes.appointment_id), (SELECT pe.project_id FROM paliad.project_events pe WHERE pe.id = notes.project_event_id) ) IN ` + subtree + ` ORDER BY id`, Args: []any{rootID}, }, { SheetName: "documents", SQL: `SELECT id, project_id, title, doc_type, file_path, file_size, mime_type, uploaded_by, created_at, updated_at FROM paliad.documents WHERE project_id IN ` + subtree + ` ORDER BY id`, Args: []any{rootID}, }, { SheetName: "project_events", SQL: `SELECT * FROM paliad.project_events WHERE project_id IN ` + subtree + ` ORDER BY id`, Args: []any{rootID}, }, { SheetName: "approval_requests", SQL: `SELECT * FROM paliad.approval_requests WHERE project_id IN ` + subtree + ` ORDER BY id`, Args: []any{rootID}, }, // Approval policies — m's Q4 lock: ship all three sources with // `source` attribution column so an importer can reconstruct // "what gate applies" without re-running paliad's resolver. // // Source 1: project rows for any project in the subtree. // Source 2: project rows for ancestors of the root (so a // descendant export still sees the gate inherited // from above the subtree). // Source 3: partner-unit-default rows for units attached to // any subtree project. // // One UNION query, with a `source` column tagged per branch. // We hand-pick the columns to keep the shape stable across the // three sources (approval_policies.project_id is nullable when // the row is a partner-unit-default, etc.). { SheetName: "approval_policies", SQL: ` SELECT 'project'::text AS source, id, project_id, partner_unit_id, entity_type, lifecycle_event, required_role, requires_approval, min_role, created_by, created_at, updated_at FROM paliad.approval_policies WHERE project_id IN ` + subtree + ` UNION ALL SELECT 'ancestor'::text AS source, ap.id, ap.project_id, ap.partner_unit_id, ap.entity_type, ap.lifecycle_event, ap.required_role, ap.requires_approval, ap.min_role, ap.created_by, ap.created_at, ap.updated_at FROM paliad.approval_policies ap JOIN paliad.projects r ON r.id = $1::uuid WHERE ap.project_id IS NOT NULL AND ap.project_id <> $1::uuid AND ap.project_id IN ( SELECT pa.id FROM paliad.projects pa WHERE r.path LIKE pa.path || '.%' ) UNION ALL SELECT 'partner_unit_default'::text AS source, ap.id, ap.project_id, ap.partner_unit_id, ap.entity_type, ap.lifecycle_event, ap.required_role, ap.requires_approval, ap.min_role, ap.created_by, ap.created_at, ap.updated_at FROM paliad.approval_policies ap WHERE ap.partner_unit_id IS NOT NULL AND ap.partner_unit_id IN ( SELECT ppu.partner_unit_id FROM paliad.project_partner_units ppu WHERE ppu.project_id IN ` + subtree + ` ) ORDER BY source, id`, Args: []any{rootID}, }, { SheetName: "checklist_instances", SQL: `SELECT * FROM paliad.checklist_instances WHERE project_id IN ` + subtree + ` ORDER BY id`, Args: []any{rootID}, }, // --- attached partner-unit subset --- // Only units attached to any subtree project (avoids dumping // the full org chart into a per-matter handover). { SheetName: "partner_units", SQL: `SELECT * FROM paliad.partner_units pu WHERE pu.id IN ( SELECT ppu.partner_unit_id FROM paliad.project_partner_units ppu WHERE ppu.project_id IN ` + subtree + ` ) ORDER BY pu.id`, Args: []any{rootID}, }, { SheetName: "partner_unit_members", SQL: `SELECT * FROM paliad.partner_unit_members pum WHERE pum.partner_unit_id IN ( SELECT ppu.partner_unit_id FROM paliad.project_partner_units ppu WHERE ppu.project_id IN ` + subtree + ` ) ORDER BY partner_unit_id, user_id`, Args: []any{rootID}, }, // --- restricted users sheet --- // Limit user disclosure to those referenced by some FK in the // export. Keeps a per-matter handover from leaking the full // firm roster (47 users → typically 3-5 per matter). { SheetName: "users_referenced", SQL: `SELECT id, email, display_name, office, profession FROM paliad.users u WHERE u.id IN ( SELECT created_by FROM paliad.projects WHERE id IN ` + subtree + ` UNION SELECT created_by FROM paliad.deadlines WHERE project_id IN ` + subtree + ` UNION SELECT created_by FROM paliad.appointments WHERE project_id IN ` + subtree + ` UNION SELECT created_by FROM paliad.project_events WHERE project_id IN ` + subtree + ` UNION SELECT user_id FROM paliad.project_teams WHERE project_id IN ` + subtree + ` UNION SELECT requested_by FROM paliad.approval_requests WHERE project_id IN ` + subtree + ` UNION SELECT decided_by FROM paliad.approval_requests WHERE project_id IN ` + subtree + ` AND decided_by IS NOT NULL UNION SELECT created_by FROM paliad.notes WHERE COALESCE(project_id, (SELECT d.project_id FROM paliad.deadlines d WHERE d.id = notes.deadline_id), (SELECT a.project_id FROM paliad.appointments a WHERE a.id = notes.appointment_id), (SELECT pe.project_id FROM paliad.project_events pe WHERE pe.id = notes.project_event_id) ) IN ` + subtree + ` UNION SELECT uploaded_by FROM paliad.documents WHERE project_id IN ` + subtree + ` AND uploaded_by IS NOT NULL UNION SELECT user_id FROM paliad.partner_unit_members pum WHERE pum.partner_unit_id IN ( SELECT ppu.partner_unit_id FROM paliad.project_partner_units ppu WHERE ppu.project_id IN ` + subtree + ` ) ) ORDER BY id`, Args: []any{rootID}, }, // --- system_audit_log subset (the export's own audit trail) --- // Includes prior export events scoped to this subtree's // projects — lets a recipient see "who has previously // exported this matter". { SheetName: "system_audit_log_subset", SQL: `SELECT * FROM paliad.system_audit_log WHERE scope_root IN ` + subtree + ` ORDER BY created_at, id`, Args: []any{rootID}, }, // --- reference data (same set as personal scope) --- {SheetName: "ref__proceeding_types", SQL: `SELECT * FROM paliad.proceeding_types ORDER BY id`}, {SheetName: "ref__event_types", SQL: `SELECT * FROM paliad.event_types ORDER BY id`}, {SheetName: "ref__event_categories", SQL: `SELECT * FROM paliad.event_categories ORDER BY id`}, {SheetName: "ref__deadline_rules", SQL: `SELECT * FROM paliad.deadline_rules ORDER BY id`}, {SheetName: "ref__deadline_concepts", SQL: `SELECT * FROM paliad.deadline_concepts ORDER BY id`}, {SheetName: "ref__courts", SQL: `SELECT * FROM paliad.courts ORDER BY id`}, {SheetName: "ref__countries", SQL: `SELECT * FROM paliad.countries ORDER BY code`}, {SheetName: "ref__holidays", SQL: `SELECT * FROM paliad.holidays ORDER BY date, country`}, } return queries } // --------------------------------------------------------------------------- // Org-scope sheet registry (Slice 3 / Backup Mode — t-paliad-246). // --------------------------------------------------------------------------- // // Full-schema dump. Bypasses paliad.can_see_project — admin-only, // gated at the handler layer (BackupRunner trusts the caller). // // Sheet ordering: entity sheets first (alphabetical), then ref__* // reference sheets (alphabetical). The xlsx writer iterates the slice // in order; downstream consumers get the same order across runs. // // Hard exclusions (per design §5.2 / m's Q3 decision): // // - paliadin_turns // - paliadin_aichat_conversation // // AI conversation history is the most-sensitive personal data paliad // carries; m's prior Q5 decision in t-paliad-214 made the exclusion // structural. The two tables are absent from the registry — not just // column-level redacted — so a future schema addition cannot // accidentally re-include them. // // Also excluded unconditionally (operational / shadow): // // - *_pre_NNN shadow tables (CREATE TABLE … AS SELECT backups // written by destructive migrations) // - paliad_schema_migrations (operational) // - auth.* (Supabase Auth schema — not ours) // // The PII column deny-regex (piiColumnDenyRegex) catches // secret|token|password|api_key|private_key on every sheet as a // belt-and-braces filter. user_caldav_config.password_encrypted is // explicitly named in DropColumns too. func orgSheetQueries() []sheetQuery { return []sheetQuery{ // --- entity sheets (alphabetical) --- {SheetName: "appointment_caldav_targets", SQL: `SELECT * FROM paliad.appointment_caldav_targets ORDER BY appointment_id, calendar_binding_id`}, {SheetName: "appointments", SQL: `SELECT * FROM paliad.appointments ORDER BY id`}, {SheetName: "approval_policies", SQL: `SELECT * FROM paliad.approval_policies ORDER BY id`}, {SheetName: "approval_requests", SQL: `SELECT * FROM paliad.approval_requests ORDER BY id`}, // backups is self-reflexive — including it makes "what backups // have we taken" recoverable from any prior backup. Tiny table. {SheetName: "backups", SQL: `SELECT * FROM paliad.backups ORDER BY started_at, id`}, {SheetName: "caldav_sync_log", SQL: `SELECT * FROM paliad.caldav_sync_log ORDER BY occurred_at, id`}, {SheetName: "checklist_instances", SQL: `SELECT * FROM paliad.checklist_instances ORDER BY id`}, {SheetName: "checklist_shares", SQL: `SELECT * FROM paliad.checklist_shares ORDER BY id`}, {SheetName: "checklists", SQL: `SELECT * FROM paliad.checklists ORDER BY id`}, {SheetName: "deadline_rule_audit", SQL: `SELECT * FROM paliad.deadline_rule_audit ORDER BY changed_at, id`}, {SheetName: "deadlines", SQL: `SELECT * FROM paliad.deadlines ORDER BY id`}, // documents: ai_extracted jsonb dropped (verbose AI prompts; // matches the personal/project precedent). Binaries are not in // the export — only metadata. { SheetName: "documents", SQL: `SELECT id, project_id, title, doc_type, file_path, file_size, mime_type, uploaded_by, created_at, updated_at FROM paliad.documents ORDER BY id`, }, {SheetName: "email_broadcasts", SQL: `SELECT * FROM paliad.email_broadcasts ORDER BY id`}, {SheetName: "email_template_versions", SQL: `SELECT * FROM paliad.email_template_versions ORDER BY id`}, {SheetName: "email_templates", SQL: `SELECT * FROM paliad.email_templates ORDER BY id`}, {SheetName: "firm_dashboard_default", SQL: `SELECT * FROM paliad.firm_dashboard_default ORDER BY id`}, {SheetName: "invitations", SQL: `SELECT * FROM paliad.invitations ORDER BY sent_at, id`}, {SheetName: "notes", SQL: `SELECT * FROM paliad.notes ORDER BY id`}, {SheetName: "parties", SQL: `SELECT * FROM paliad.parties ORDER BY id`}, {SheetName: "partner_unit_events", SQL: `SELECT * FROM paliad.partner_unit_events ORDER BY id`}, {SheetName: "partner_unit_members", SQL: `SELECT * FROM paliad.partner_unit_members ORDER BY partner_unit_id, user_id`}, {SheetName: "partner_units", SQL: `SELECT * FROM paliad.partner_units ORDER BY id`}, {SheetName: "policy_audit_log", SQL: `SELECT * FROM paliad.policy_audit_log ORDER BY changed_at, id`}, {SheetName: "project_events", SQL: `SELECT * FROM paliad.project_events ORDER BY id`}, {SheetName: "project_partner_units", SQL: `SELECT * FROM paliad.project_partner_units ORDER BY project_id, partner_unit_id`}, {SheetName: "project_teams", SQL: `SELECT * FROM paliad.project_teams ORDER BY project_id, user_id`}, {SheetName: "projects", SQL: `SELECT * FROM paliad.projects ORDER BY id`}, {SheetName: "reminder_log", SQL: `SELECT * FROM paliad.reminder_log ORDER BY sent_at, id`}, {SheetName: "submission_drafts", SQL: `SELECT * FROM paliad.submission_drafts ORDER BY id`}, {SheetName: "system_audit_log", SQL: `SELECT * FROM paliad.system_audit_log ORDER BY created_at, id`}, { SheetName: "user_caldav_config", SQL: `SELECT * FROM paliad.user_caldav_config ORDER BY user_id`, DropColumns: []string{"password_encrypted"}, // belt-and-braces; piiColumnDenyRegex also catches it }, {SheetName: "user_calendar_bindings", SQL: `SELECT * FROM paliad.user_calendar_bindings ORDER BY user_id, calendar_path`}, {SheetName: "user_card_layouts", SQL: `SELECT * FROM paliad.user_card_layouts ORDER BY id`}, {SheetName: "user_dashboard_layouts", SQL: `SELECT * FROM paliad.user_dashboard_layouts ORDER BY user_id`}, {SheetName: "user_pinned_projects", SQL: `SELECT * FROM paliad.user_pinned_projects ORDER BY user_id, project_id`}, {SheetName: "user_views", SQL: `SELECT * FROM paliad.user_views ORDER BY id`}, {SheetName: "users", SQL: `SELECT * FROM paliad.users ORDER BY id`}, // --- reference data (alphabetical, prefixed ref__) --- {SheetName: "ref__countries", SQL: `SELECT * FROM paliad.countries ORDER BY code`}, {SheetName: "ref__courts", SQL: `SELECT * FROM paliad.courts ORDER BY id`}, {SheetName: "ref__deadline_concept_event_types", SQL: `SELECT * FROM paliad.deadline_concept_event_types ORDER BY concept_id, event_type_id`}, {SheetName: "ref__deadline_concepts", SQL: `SELECT * FROM paliad.deadline_concepts ORDER BY id`}, {SheetName: "ref__deadline_event_types", SQL: `SELECT * FROM paliad.deadline_event_types ORDER BY rule_id, event_type_id`}, {SheetName: "ref__deadline_rules", SQL: `SELECT * FROM paliad.deadline_rules ORDER BY id`}, {SheetName: "ref__event_categories", SQL: `SELECT * FROM paliad.event_categories ORDER BY id`}, {SheetName: "ref__event_category_concepts", SQL: `SELECT * FROM paliad.event_category_concepts ORDER BY category_id, concept_id`}, {SheetName: "ref__event_types", SQL: `SELECT * FROM paliad.event_types ORDER BY id`}, {SheetName: "ref__holidays", SQL: `SELECT * FROM paliad.holidays ORDER BY date, country`}, {SheetName: "ref__proceeding_types", SQL: `SELECT * FROM paliad.proceeding_types ORDER BY id`}, {SheetName: "ref__trigger_events", SQL: `SELECT * FROM paliad.trigger_events ORDER BY id`}, } }