Files
paliad/internal/services/export_service.go
mAi 99c9d89daa feat(backups): t-paliad-246 — Backup Mode Slice A (on-demand admin org export)
m/paliad#77 Slice A. Folds the unbuilt t-paliad-214 Slice 3 (org async
export) into a new "Backup Mode" surface gated by adminGate.

m's calls (all 4 material picks per design §2):
- Storage: local disk PALIAD_EXPORT_DIR (LocalDiskStore only)
- Format: .zip bundle (xlsx + JSON + CSV + README) — no-lock-in preserved
- paliadin_turns + paliadin_aichat_conversation: EXCLUDE structurally
- Scheduler (Slice B): nightly 03:00 UTC, env-tunable

Wiring:
- mig 123 adds paliad.backups catalog table (kind/status/storage_uri/
  size/row_counts/warnings/error/deleted_at + admin-only RLS).
- ExportService.WriteOrg + orgSheetQueries enumerate 37 entity sheets
  + 12 ref sheets; REPEATABLE READ READ ONLY tx wraps the dump for
  snapshot consistency (design §3.3).
- writeBundle + runSheetQuery refactored to take a sqlx.QueryerContext
  so both *sqlx.DB (personal/project paths, unchanged) and *sqlx.Tx
  (org snapshot path) work.
- BackupRunner orchestrates: catalog INSERT → audit INSERT
  (event_type='backup_created') → WriteOrg → ArtifactStore.Put → patch
  catalog + audit on success/failure.
- ArtifactStore interface + LocalDiskStore impl (defense-in-depth key
  validation + URI-outside-dir guard).
- Sentinel actor for scheduled runs: actor_email='system@paliad',
  actor_id=NULL — no phantom user in paliad.users.
- Admin handlers POST /api/admin/backups/run + GET list/get/download
  behind adminGate(users, …); /admin/backups page + sidebar entry +
  bilingual i18n keys.
- BackupRunner only wired when PALIAD_EXPORT_DIR is set; routes return
  503 otherwise (same shape as requireDB).

Tests: 8 pure-function tests cover registry shape (no dups, paliadin
absent both as sheet name and SQL substring, ref__* sheets unscoped,
every sheet has ORDER BY) and LocalDiskStore (round-trip, bad-key
rejection, URI-traversal rejection, mkdir on construction).

go build ./... + go test ./internal/... clean. bun run build clean.

Slice B (BackupScheduler + retention cleanup) and Slice C (UI polish)
are separate follow-ups per head's instruction.
2026-05-25 15:28:37 +02:00

1633 lines
62 KiB
Go

package services
// ExportService streams a paliad data-export bundle to an io.Writer.
//
// One .zip per export, containing:
//
// - paliad-export.xlsx canonical workbook, one sheet per entity
// - paliad-export.json Excel-independent re-ingest twin
// - csv/<sheet>.csv per-sheet flat tables (RFC 4180 + UTF-8 BOM)
// - README.txt human-readable explainer
// - __meta.json standalone meta (same as the __meta sheet)
//
// Three scopes (per docs/design-paliad-data-export-2026-05-19.md):
//
// - personal — caller's RLS-visible projection + personal sidecars
// - project — one project + its ltree subtree (slice 2, not in this file yet)
// - org — full schema dump (slice 3, async path)
//
// Slice 1 ships personal only; the writer abstraction is scope-aware so
// slices 2 + 3 layer on without rewriting the core.
//
// Determinism: sheets emitted in a fixed canonical order; rows ordered by
// id ASC (or another stable tuple where no id exists); JSON object keys
// sorted alphabetically; the outer zip writes its file list in sorted
// order. Same row-state → identical bytes. The only non-deterministic
// field is __meta.generated_at, externalised to the filename.
//
// PII posture:
//
// - Column names matching (?i)secret|token|password|api[_-]?key|private[_-]?key
// are dropped at column-discovery time and recorded in __meta.warnings.
// - Specific column overrides (user_caldav_config.password_encrypted,
// invitations.token where it exists, etc.) live in the sheet definitions
// as explicit column-filter lists.
// - paliadin_turns is OFF in org scope and ON in personal scope (it's
// literally the caller's own data). Org-scope exclusion is structural
// (sheet absent from the registry), not just column-level.
import (
"archive/zip"
"context"
"crypto/rand"
"database/sql"
"encoding/hex"
"encoding/json"
"encoding/csv"
"fmt"
"io"
"regexp"
"sort"
"strings"
"time"
"github.com/google/uuid"
"github.com/jmoiron/sqlx"
"github.com/xuri/excelize/v2"
)
// Export scope discriminators. Stable strings — exposed in the audit row
// and the __meta sheet.
const (
ExportScopePersonal = "personal"
ExportScopeProject = "project"
ExportScopeOrg = "org"
)
// ExportSchemaVersion is bumped whenever the on-disk shape changes in a
// way that requires importers to adapt. v1 is the slice-1/2/3 baseline.
const ExportSchemaVersion = 1
// PII column-name deny regex. Any column whose name matches is dropped
// during column discovery and recorded in __meta.warnings. The list of
// known column names (e.g. user_caldav_config.password_encrypted) is
// deliberately covered by the regex too — explicit + regex belt-and-braces.
var piiColumnDenyRegex = regexp.MustCompile(`(?i)secret|token|password|api[_-]?key|private[_-]?key`)
// ExportService writes a scoped export bundle. Stateless except for the
// DB handle + firm-name display string.
type ExportService struct {
db *sqlx.DB
firmName string
}
// NewExportService wires the service. firmName is read once at process
// start from internal/branding.Name and embedded in every export's __meta.
func NewExportService(db *sqlx.DB, firmName string) *ExportService {
return &ExportService{db: db, firmName: firmName}
}
// ExportMeta is the bundle metadata. Stored on the __meta sheet, in
// __meta.json, and as part of the audit row.
type ExportMeta struct {
SchemaVersion int `json:"schema_version"`
FirmName string `json:"firm_name"`
Scope string `json:"scope"`
ScopeRootID *uuid.UUID `json:"scope_root_id,omitempty"`
// ScopeRootLabel is the project title (project scope only). Empty
// for personal + org scope.
ScopeRootLabel string `json:"scope_root_label,omitempty"`
// ScopeRootPath is the ltree path of the root project (project scope
// only). Preserved in the audit row so closed-out projects retain a
// usable ancestry pointer (Q6 lock-in).
ScopeRootPath string `json:"scope_root_path,omitempty"`
// DirectOnly is true when ?direct_only=1 was passed (project scope
// only) — narrows the export to the root project, no descendants.
DirectOnly bool `json:"direct_only,omitempty"`
GeneratedAt time.Time `json:"generated_at"`
GeneratedByID uuid.UUID `json:"generated_by_user_id"`
GeneratedByEml string `json:"generated_by_user_email"`
GeneratedByLbl string `json:"generated_by_user_label"`
RowCounts map[string]int `json:"row_counts"`
Warnings []string `json:"warnings,omitempty"`
PaliadVersion string `json:"paliad_version,omitempty"`
Notes string `json:"notes,omitempty"`
}
// ExportSpec is the per-run inputs.
type ExportSpec struct {
Scope string
ScopeRoot *uuid.UUID // project_id when Scope==ExportScopeProject; nil otherwise
// ScopeRootLabel + ScopeRootPath are populated by the project-export
// handler (resolved from the root project row) so the audit + __meta
// carry stable labels even if the project is later renamed.
ScopeRootLabel string
ScopeRootPath string
// DirectOnly narrows the export to the root project only (project
// scope, ?direct_only=1).
DirectOnly bool
ActorID uuid.UUID
ActorEmail string
ActorLabel string // display_name for the audit + meta
GeneratedAt time.Time
}
// sheetQuery is one entity sheet's SQL recipe. Sheets emit in the order
// they appear in the registry, which is fixed (alphabetical inside each
// scope-prefix group). args are sqlx-positional.
type sheetQuery struct {
// SheetName lands in the workbook sheet, the JSON top-level key, and
// the CSV filename stem. snake_case, ≤31 chars (Excel's hard limit).
SheetName string
// SQL runs as-is; should select rows in a deterministic order (ORDER
// BY id ASC or a comparable stable tuple).
SQL string
// Args are sqlx-positional, bound 1:1 against the SQL's $1, $2, ….
Args []any
// DropColumns is an explicit list of column names to drop from the
// result regardless of the regex deny-list. Used for jsonb columns
// that contain credentials, or paliadin response bodies in org scope.
DropColumns []string
}
// WritePersonal streams the caller's personal-scope bundle into w. Returns
// the meta (incl. row_counts) for audit-row patching.
//
// Order of operations:
//
// 1. Build the sheet-query registry for the caller's visible set.
// 2. Execute each query, materialise rows + columns + types.
// 3. Run column-discovery + PII filter, collect warnings.
// 4. Write the xlsx (excelize streaming writer), JSON, and CSVs into a
// memory buffer (small at personal-scope sizes — ≪ 10MB is normal).
// 5. Bundle into the outer zip in deterministic file-list order.
//
// The handler is responsible for the audit-row INSERT before calling +
// the UPDATE after the call returns. We do not write the audit row here
// because the handler also needs to decide what to do on failure (the
// audit row gets a separate event_type='data_export_failed' UPDATE in
// that case).
func (s *ExportService) WritePersonal(ctx context.Context, w io.Writer, spec ExportSpec) (ExportMeta, error) {
if spec.Scope == "" {
spec.Scope = ExportScopePersonal
}
if spec.GeneratedAt.IsZero() {
spec.GeneratedAt = time.Now().UTC()
}
meta := ExportMeta{
SchemaVersion: ExportSchemaVersion,
FirmName: s.firmName,
Scope: spec.Scope,
GeneratedAt: spec.GeneratedAt,
GeneratedByID: spec.ActorID,
GeneratedByEml: spec.ActorEmail,
GeneratedByLbl: spec.ActorLabel,
RowCounts: map[string]int{},
}
sheets := personalSheetQueries(spec.ActorID)
if err := s.writeBundle(ctx, s.db, w, sheets, &meta); err != nil {
return meta, err
}
return meta, nil
}
// WriteProject streams the project-subtree bundle for the project named
// in spec.ScopeRoot into w. Returns the meta (incl. row_counts) for the
// audit-row patch.
//
// Behavior contract (per Slice 2 design §2):
//
// - Every entity sheet is filtered to the subtree (project + descendants
// via ltree path). When spec.DirectOnly is true, narrows to the root
// project only (no descendants).
// - approval_policies carries all 3 sources (project rows + ancestor
// rows + partner-unit-default rows) tagged with a `source` column —
// m's Q4 lock-in lets recipients reconstruct the effective gate.
// - users_referenced restricts the user disclosure to FK-referenced
// users only (avoids dumping the full firm roster into a per-matter
// handover).
// - Cross-subtree FKs (projects.counterclaim_of pointing outside the
// subtree) are kept but warned about in __meta.warnings — m's Q3
// lock-in preserves the no-lock-in promise.
//
// Permission gate (§4) lives on the handler, NOT here — the service
// trusts the caller has already authorised. Wiring is in handlers/export.go.
func (s *ExportService) WriteProject(ctx context.Context, w io.Writer, spec ExportSpec) (ExportMeta, error) {
if spec.Scope == "" {
spec.Scope = ExportScopeProject
}
if spec.GeneratedAt.IsZero() {
spec.GeneratedAt = time.Now().UTC()
}
if spec.ScopeRoot == nil {
return ExportMeta{}, fmt.Errorf("WriteProject: ScopeRoot is required")
}
meta := ExportMeta{
SchemaVersion: ExportSchemaVersion,
FirmName: s.firmName,
Scope: spec.Scope,
ScopeRootID: spec.ScopeRoot,
ScopeRootLabel: spec.ScopeRootLabel,
ScopeRootPath: spec.ScopeRootPath,
DirectOnly: spec.DirectOnly,
GeneratedAt: spec.GeneratedAt,
GeneratedByID: spec.ActorID,
GeneratedByEml: spec.ActorEmail,
GeneratedByLbl: spec.ActorLabel,
RowCounts: map[string]int{},
}
sheets := projectSheetQueries(*spec.ScopeRoot, spec.DirectOnly)
if err := s.writeBundle(ctx, s.db, w, sheets, &meta); err != nil {
return meta, err
}
// Cross-subtree FK detection (Q3 lock-in: keep FK + warn). After the
// bundle is built we run one lightweight scan to surface
// counterclaim_of references that escape the subtree. The result
// gets appended to meta.Warnings so it lands in __meta + the audit
// row + the README's warning list.
if warns, err := s.detectCrossSubtreeFKs(ctx, *spec.ScopeRoot, spec.DirectOnly); err == nil && len(warns) > 0 {
meta.Warnings = append(meta.Warnings, warns...)
sort.Strings(meta.Warnings)
}
return meta, nil
}
// WriteOrg streams the full org-scope backup bundle into w. Bypasses
// paliad.can_see_project — admin-only, gated at the handler layer (the
// service trusts the caller has been authorised).
//
// Wraps the entire read pass in a REPEATABLE READ READ ONLY transaction
// so every sheet sees the same snapshot. Without this a backup that runs
// while users are editing can land internally inconsistent rows (e.g. a
// deadlines.project_id pointing at a project the projects sheet just
// missed). Design §3.3.
//
// The handler is responsible for the audit-row INSERT / PATCH (the
// org-scope backup uses BackupRunner.Run, not WriteAuditRow, because the
// event_type is 'backup_created' not 'data_export').
func (s *ExportService) WriteOrg(ctx context.Context, w io.Writer, spec ExportSpec) (ExportMeta, error) {
if spec.Scope == "" {
spec.Scope = ExportScopeOrg
}
if spec.GeneratedAt.IsZero() {
spec.GeneratedAt = time.Now().UTC()
}
meta := ExportMeta{
SchemaVersion: ExportSchemaVersion,
FirmName: s.firmName,
Scope: spec.Scope,
GeneratedAt: spec.GeneratedAt,
GeneratedByID: spec.ActorID,
GeneratedByEml: spec.ActorEmail,
GeneratedByLbl: spec.ActorLabel,
RowCounts: map[string]int{},
}
tx, err := s.db.BeginTxx(ctx, &sql.TxOptions{
Isolation: sql.LevelRepeatableRead,
ReadOnly: true,
})
if err != nil {
return meta, fmt.Errorf("backup snapshot tx: %w", err)
}
// Always rollback — the tx is read-only by construction, the rollback
// is just bookkeeping that releases the snapshot.
defer func() { _ = tx.Rollback() }()
sheets := orgSheetQueries()
if err := s.writeBundle(ctx, tx, w, sheets, &meta); err != nil {
return meta, err
}
return meta, nil
}
// detectCrossSubtreeFKs scans subtree-resident projects for FKs that
// point outside the subtree (today: only projects.counterclaim_of). One
// warning row per outbound reference. Best-effort: a query error here
// degrades silently (the export still ships) since the warning is
// informational, not load-bearing.
func (s *ExportService) detectCrossSubtreeFKs(ctx context.Context, rootID uuid.UUID, directOnly bool) ([]string, error) {
subtreeSQL := projectSubtreeProjectIDsSQL(directOnly)
q := `
SELECT p.id, p.title, p.counterclaim_of
FROM paliad.projects p
WHERE p.id IN ` + subtreeSQL + `
AND p.counterclaim_of IS NOT NULL
AND p.counterclaim_of NOT IN ` + subtreeSQL + `
ORDER BY p.id`
type row struct {
ID uuid.UUID `db:"id"`
Title string `db:"title"`
CounterclaimOf uuid.UUID `db:"counterclaim_of"`
}
var rows []row
if err := s.db.SelectContext(ctx, &rows, q, rootID); err != nil {
return nil, err
}
out := make([]string, 0, len(rows))
for _, r := range rows {
out = append(out, fmt.Sprintf(
"cross-subtree FK: project %q (%s).counterclaim_of → %s (not in this export)",
r.Title, r.ID, r.CounterclaimOf,
))
}
return out, nil
}
// collectedSheet holds one sheet's data after column-discovery + row
// materialisation. Used to hand data from writeBundle to buildXLSX +
// buildJSON + buildCSV.
type collectedSheet struct {
name string
columns []string
rows [][]string // pre-stringified for cell writes
}
// writeBundle is the scope-agnostic core. Runs each query, writes one
// xlsx sheet + one JSON branch + one CSV per sheet, packs everything into
// the outer zip in sorted file-list order so two runs of the same row
// state produce byte-identical bundles.
//
// queryer is the executor for sheet queries — typically s.db, but
// WriteOrg passes a REPEATABLE READ *sqlx.Tx so the org dump sees a
// consistent snapshot across all sheets (design §3.3).
func (s *ExportService) writeBundle(ctx context.Context, queryer sqlx.QueryerContext, w io.Writer, sheets []sheetQuery, meta *ExportMeta) error {
collectedSheets := make([]collectedSheet, 0, len(sheets))
jsonTables := make(map[string][]map[string]string, len(sheets))
warnings := []string{}
for _, sq := range sheets {
cols, rowMatrix, dropped, err := s.runSheetQuery(ctx, queryer, sq)
if err != nil {
return fmt.Errorf("export sheet %q: %w", sq.SheetName, err)
}
for _, c := range dropped {
warnings = append(warnings, fmt.Sprintf("sheet=%s column=%s dropped (PII deny-list)", sq.SheetName, c))
}
collectedSheets = append(collectedSheets, collectedSheet{
name: sq.SheetName,
columns: cols,
rows: rowMatrix,
})
// JSON twin: one object per row, keyed by column name. We accept
// the value-as-string convention so JSON shape matches CSV shape
// 1:1 — anyone re-ingesting can re-parse with the same rules.
jsonRows := make([]map[string]string, 0, len(rowMatrix))
for _, r := range rowMatrix {
obj := make(map[string]string, len(cols))
for i, c := range cols {
if i < len(r) {
obj[c] = r[i]
}
}
jsonRows = append(jsonRows, obj)
}
jsonTables[sq.SheetName] = jsonRows
meta.RowCounts[sq.SheetName] = len(rowMatrix)
}
sort.Strings(warnings)
meta.Warnings = warnings
// --- build the xlsx in a memory buffer ---
xlsxBytes, err := buildXLSX(collectedSheets, *meta)
if err != nil {
return fmt.Errorf("export build xlsx: %w", err)
}
// --- build the JSON twin ---
jsonBytes, err := buildJSON(jsonTables, *meta)
if err != nil {
return fmt.Errorf("export build json: %w", err)
}
// --- build per-sheet CSVs (in-memory map, written in sorted order) ---
csvBlobs := map[string][]byte{}
for _, c := range collectedSheets {
b, err := buildCSV(c.columns, c.rows)
if err != nil {
return fmt.Errorf("export build csv %q: %w", c.name, err)
}
csvBlobs[c.name] = b
}
// --- build __meta.json + README.txt ---
metaJSON, err := json.MarshalIndent(*meta, "", " ")
if err != nil {
return fmt.Errorf("export marshal meta: %w", err)
}
readme := buildREADME(*meta)
// --- assemble outer zip in deterministic file order ---
type zipEntry struct {
name string
body []byte
}
entries := []zipEntry{
{"README.txt", []byte(readme)},
{"__meta.json", metaJSON},
{"paliad-export.json", jsonBytes},
{"paliad-export.xlsx", xlsxBytes},
}
csvNames := make([]string, 0, len(csvBlobs))
for name := range csvBlobs {
csvNames = append(csvNames, name)
}
sort.Strings(csvNames)
for _, name := range csvNames {
entries = append(entries, zipEntry{"csv/" + name + ".csv", csvBlobs[name]})
}
sort.Slice(entries, func(i, j int) bool { return entries[i].name < entries[j].name })
zw := zip.NewWriter(w)
// Stamp every zip entry's Modified with the export's GeneratedAt so
// the extracted files carry a meaningful timestamp in Windows
// Explorer / Finder (instead of "01.01.2000" or the build time).
// This is still deterministic-within-an-export: two calls with the
// same ExportMeta produce identical bytes (m's Q6 contract is
// "same row state at same generation time → identical bytes",
// modulo __meta.generated_at — and now the file mtimes too).
mod := meta.GeneratedAt.UTC()
if mod.IsZero() {
// Defensive: a zero time would cause archive/zip to write 1980-01-01
// (the DOS epoch) which would re-surface the original bug.
mod = time.Now().UTC()
}
for _, e := range entries {
hdr := &zip.FileHeader{
Name: e.name,
Method: zip.Deflate,
Modified: mod,
}
fw, err := zw.CreateHeader(hdr)
if err != nil {
return fmt.Errorf("export zip header %q: %w", e.name, err)
}
if _, err := fw.Write(e.body); err != nil {
return fmt.Errorf("export zip write %q: %w", e.name, err)
}
}
if err := zw.Close(); err != nil {
return fmt.Errorf("export zip close: %w", err)
}
return nil
}
// runSheetQuery executes one sheetQuery against the given queryer and
// returns the kept columns, row matrix (pre-stringified per the design's
// value-as-string convention), and the list of columns that were dropped
// by the PII filter. queryer is typically s.db, but WriteOrg passes a
// REPEATABLE READ *sqlx.Tx (see writeBundle docs).
func (s *ExportService) runSheetQuery(ctx context.Context, queryer sqlx.QueryerContext, sq sheetQuery) (cols []string, rows [][]string, dropped []string, err error) {
rs, err := queryer.QueryxContext(ctx, sq.SQL, sq.Args...)
if err != nil {
return nil, nil, nil, fmt.Errorf("query: %w", err)
}
defer rs.Close()
rawCols, err := rs.Columns()
if err != nil {
return nil, nil, nil, fmt.Errorf("columns: %w", err)
}
// Filter columns through the PII deny-list + the per-sheet drop set.
keepIdx := make([]int, 0, len(rawCols))
keepCols := make([]string, 0, len(rawCols))
drops := map[string]bool{}
for _, c := range sq.DropColumns {
drops[c] = true
}
for i, c := range rawCols {
if drops[c] || piiColumnDenyRegex.MatchString(c) {
dropped = append(dropped, c)
continue
}
keepIdx = append(keepIdx, i)
keepCols = append(keepCols, c)
}
for rs.Next() {
// Read raw values; Postgres returns text/numeric/etc as []byte,
// uuids as []byte, jsonb as []byte. The map-row helper picks the
// right Go type per column via reflection.
rawRow := make([]any, len(rawCols))
ptrs := make([]any, len(rawCols))
for i := range rawRow {
ptrs[i] = &rawRow[i]
}
if err := rs.Scan(ptrs...); err != nil {
return nil, nil, nil, fmt.Errorf("scan: %w", err)
}
out := make([]string, len(keepIdx))
for j, srcIdx := range keepIdx {
out[j] = formatCellValue(rawRow[srcIdx])
}
rows = append(rows, out)
}
if err := rs.Err(); err != nil {
return nil, nil, nil, fmt.Errorf("rows: %w", err)
}
return keepCols, rows, dropped, nil
}
// formatCellValue renders a Postgres-driver value as the canonical export
// string. Conventions per design §3.1:
//
// - timestamptz → RFC3339 UTC ("2026-05-19T14:23:00Z")
// - date → ISO 8601 ("2026-05-19")
// - booleans → "TRUE" / "FALSE"
// - []byte that is valid JSON → compact JSON string (jsonb columns)
// - []byte that looks like UUID/text → string
// - nil → "" (the empty cell)
// - arrays → semicolon-joined (Postgres returns text[] as "{a,b}" via lib/pq)
//
// Returning strings (vs typed Excel values) is intentional — see design
// §3.1 (Q4 = ISO strings only).
func formatCellValue(v any) string {
if v == nil {
return ""
}
switch x := v.(type) {
case bool:
if x {
return "TRUE"
}
return "FALSE"
case time.Time:
// Try date-only when the value is exactly midnight UTC (Postgres
// returns DATE columns as time.Time with H/M/S/N all zero).
if x.Hour() == 0 && x.Minute() == 0 && x.Second() == 0 && x.Nanosecond() == 0 && (x.Location() == time.UTC || x.Location() == time.Local) {
// Heuristic: if year < 2 it's likely the zero value
if x.Year() < 2 {
return ""
}
return x.UTC().Format("2006-01-02")
}
return x.UTC().Format(time.RFC3339)
case []byte:
// jsonb columns come back as []byte holding valid JSON. Pass them
// through verbatim (one-liner) so PowerQuery's Json.Document can
// re-parse. Non-JSON []byte is treated as a UTF-8 string.
s := string(x)
trim := strings.TrimSpace(s)
if strings.HasPrefix(trim, "{") || strings.HasPrefix(trim, "[") {
// Compactify so the cell has no embedded newlines.
var raw json.RawMessage = []byte(trim)
if b, err := json.Marshal(raw); err == nil {
return string(b)
}
return trim
}
return s
case string:
return x
case int, int8, int16, int32, int64, uint, uint8, uint16, uint32, uint64, float32, float64:
return fmt.Sprintf("%v", x)
default:
return fmt.Sprintf("%v", x)
}
}
// buildXLSX assembles the workbook from the collected sheets + meta. Uses
// excelize's row-by-row writer; at personal/project scale the dataset
// fits comfortably in memory. Returns the xlsx-file bytes.
//
// Two non-obvious things this function gets right (because past versions
// got them wrong and Excel complained):
//
// 1. excelize's default core.xml carries Created=Modified="2006-09-16T00:00:00Z"
// (xuri's first commit date) until SetDocProps is called. We overwrite
// both with meta.GeneratedAt so Excel's File→Info shows the real time
// and Windows Explorer shows a sensible Modified column.
//
// 2. A frozen header row needs a complete <pane> definition or Excel
// pops the "Repairs required" prompt on open. excelize's Panes struct
// requires Freeze + YSplit + TopLeftCell + ActivePane; passing just
// Freeze + YSplit (the obvious-but-wrong form) emits invalid XML that
// excelize itself accepts on re-read but Excel rejects.
func buildXLSX(sheets []collectedSheet, meta ExportMeta) ([]byte, error) {
f := excelize.NewFile()
defer f.Close()
// Replace the hardcoded "Author: xuri / Created: 2006-09-16" defaults
// with real per-export metadata. Modified == Created on first write
// (no editing has happened by the time the user downloads).
tsISO := meta.GeneratedAt.UTC().Format(time.RFC3339)
creator := "Paliad"
if meta.FirmName != "" {
creator = "Paliad (" + meta.FirmName + ")"
}
if err := f.SetDocProps(&excelize.DocProperties{
Created: tsISO,
Modified: tsISO,
Creator: creator,
LastModifiedBy: creator,
Title: fmt.Sprintf("Paliad export (%s)", meta.Scope),
Description: fmt.Sprintf("Paliad data export, scope=%s, generated_by=%s", meta.Scope, meta.GeneratedByEml),
}); err != nil {
return nil, fmt.Errorf("excelize SetDocProps: %w", err)
}
// excelize creates a default "Sheet1" we want to rename to __meta.
const metaName = "__meta"
first := f.GetSheetName(0)
if first != metaName {
if err := f.SetSheetName(first, metaName); err != nil {
return nil, err
}
}
// Write meta as key/value rows.
metaRows := metaToKeyValueRows(meta)
for i, kv := range metaRows {
cellA, _ := excelize.CoordinatesToCellName(1, i+1)
cellB, _ := excelize.CoordinatesToCellName(2, i+1)
if err := f.SetCellValue(metaName, cellA, kv[0]); err != nil {
return nil, err
}
if err := f.SetCellValue(metaName, cellB, kv[1]); err != nil {
return nil, err
}
}
// One sheet per entity, columns in column-discovery order (= SELECT
// order = stable across runs because the SQL is fixed).
for _, sh := range sheets {
// Excel sheet name limit is 31 chars; truncate defensively (none
// of our names hit it today, but the personal-scope users_referenced
// sheet is right at the edge).
sheetName := sh.name
if len(sheetName) > 31 {
sheetName = sheetName[:31]
}
if _, err := f.NewSheet(sheetName); err != nil {
return nil, err
}
// Header row
for ci, col := range sh.columns {
cell, _ := excelize.CoordinatesToCellName(ci+1, 1)
if err := f.SetCellValue(sheetName, cell, col); err != nil {
return nil, err
}
}
for ri, row := range sh.rows {
for ci, val := range row {
cell, _ := excelize.CoordinatesToCellName(ci+1, ri+2)
if err := f.SetCellValue(sheetName, cell, val); err != nil {
return nil, err
}
}
}
// Freeze the header row. The complete <pane> shape Excel insists
// on for a Y-only freeze: TopLeftCell="A2" (cell below the frozen
// row), ActivePane="bottomLeft", Selection on bottomLeft. The
// obvious-but-incomplete form {Freeze: true, YSplit: 1} produces
// invalid pane XML that triggers Excel's repair prompt on open.
if err := f.SetPanes(sheetName, &excelize.Panes{
Freeze: true,
YSplit: 1,
TopLeftCell: "A2",
ActivePane: "bottomLeft",
Selection: []excelize.Selection{
{SQRef: "A2", ActiveCell: "A2", Pane: "bottomLeft"},
},
}); err != nil {
return nil, fmt.Errorf("excelize SetPanes(%q): %w", sheetName, err)
}
}
// Set the active sheet to the __meta sheet (index 0). Without this,
// excelize's default active-sheet index can point at a sheet that no
// longer exists at that ordinal — also a "repair required" trigger.
f.SetActiveSheet(0)
// Write to buffer.
bw := &byteBuf{}
if _, err := f.WriteTo(bw); err != nil {
return nil, err
}
return bw.Bytes(), nil
}
// byteBuf is a tiny io.Writer that accumulates into a byte slice. We don't
// use bytes.Buffer because we need WriteTo to round-trip the result and
// bytes.Buffer's interface is wider than we need.
type byteBuf struct{ b []byte }
func (b *byteBuf) Write(p []byte) (int, error) {
b.b = append(b.b, p...)
return len(p), nil
}
func (b *byteBuf) Bytes() []byte { return b.b }
// metaToKeyValueRows flattens the meta into stable (key, value) tuples
// in a fixed key order for the __meta sheet.
func metaToKeyValueRows(m ExportMeta) [][2]string {
rows := [][2]string{
{"schema_version", fmt.Sprintf("%d", m.SchemaVersion)},
{"firm_name", m.FirmName},
{"scope", m.Scope},
}
if m.ScopeRootID != nil {
rows = append(rows, [2]string{"scope_root_id", m.ScopeRootID.String()})
} else {
rows = append(rows, [2]string{"scope_root_id", ""})
}
// Project-scope-only rows (Slice 2 §2.4). Surface as empty rows for
// other scopes so the __meta layout stays stable + Excel users can
// see "this field exists but doesn't apply here".
rows = append(rows,
[2]string{"scope_root_label", m.ScopeRootLabel},
[2]string{"scope_root_path", m.ScopeRootPath},
)
if m.Scope == ExportScopeProject {
if m.DirectOnly {
rows = append(rows, [2]string{"direct_only", "TRUE"})
} else {
rows = append(rows, [2]string{"direct_only", "FALSE"})
}
}
rows = append(rows,
[2]string{"generated_at", m.GeneratedAt.UTC().Format(time.RFC3339)},
[2]string{"generated_by_user_id", m.GeneratedByID.String()},
[2]string{"generated_by_user_email", m.GeneratedByEml},
[2]string{"generated_by_user_label", m.GeneratedByLbl},
[2]string{"paliad_version", m.PaliadVersion},
[2]string{"notes", m.Notes},
)
// Row counts as one row per sheet (sorted).
names := make([]string, 0, len(m.RowCounts))
for k := range m.RowCounts {
names = append(names, k)
}
sort.Strings(names)
for _, n := range names {
rows = append(rows, [2]string{"row_count." + n, fmt.Sprintf("%d", m.RowCounts[n])})
}
for _, w := range m.Warnings {
rows = append(rows, [2]string{"warning", w})
}
return rows
}
// buildJSON produces the JSON twin. Top-level shape:
//
// {
// "meta": { ... },
// "tables": { "<sheet>": [ {"<col>": "<val>", ...}, ... ] }
// }
//
// Keys in every map are alphabetically sorted (encoding/json does this by
// default for map[string]X, which is what we use everywhere).
func buildJSON(tables map[string][]map[string]string, meta ExportMeta) ([]byte, error) {
payload := map[string]any{
"meta": meta,
"tables": tables,
}
return json.MarshalIndent(payload, "", " ")
}
// buildCSV emits a UTF-8-BOM-prefixed CSV with RFC 4180 quoting. The BOM
// makes Excel-DE open the file with the correct encoding instead of
// guessing windows-1252 and corrupting umlauts.
func buildCSV(cols []string, rows [][]string) ([]byte, error) {
var buf byteBuf
// UTF-8 BOM
buf.Write([]byte{0xEF, 0xBB, 0xBF})
w := csv.NewWriter(&buf)
if err := w.Write(cols); err != nil {
return nil, err
}
for _, r := range rows {
if err := w.Write(r); err != nil {
return nil, err
}
}
w.Flush()
if err := w.Error(); err != nil {
return nil, err
}
return buf.Bytes(), nil
}
// buildREADME produces a short human-readable explainer embedded as the
// first file in the bundle. Bilingual (DE primary, EN secondary).
func buildREADME(m ExportMeta) string {
var b strings.Builder
fmt.Fprintf(&b, "Paliad Datenexport (%s)\n", m.FirmName)
fmt.Fprintf(&b, "============================\n\n")
fmt.Fprintf(&b, "Erstellt am : %s\n", m.GeneratedAt.UTC().Format(time.RFC3339))
fmt.Fprintf(&b, "Erstellt von : %s <%s>\n", m.GeneratedByLbl, m.GeneratedByEml)
fmt.Fprintf(&b, "Umfang : %s\n", m.Scope)
if m.Scope == ExportScopeProject {
if m.ScopeRootLabel != "" {
fmt.Fprintf(&b, "Projekt : %s\n", m.ScopeRootLabel)
}
if m.ScopeRootID != nil {
fmt.Fprintf(&b, "Projekt-ID : %s\n", m.ScopeRootID.String())
}
if m.DirectOnly {
fmt.Fprintf(&b, "Hinweis : nur das Root-Projekt (?direct_only=1), keine Unter-Projekte.\n")
} else {
fmt.Fprintf(&b, "Hinweis : Root-Projekt + alle Unter-Projekte.\n")
}
}
fmt.Fprintf(&b, "Schema-Version: %d\n", m.SchemaVersion)
fmt.Fprintf(&b, "\n")
fmt.Fprintf(&b, "Inhalt\n------\n")
fmt.Fprintf(&b, "- paliad-export.xlsx — kanonische Excel-Mappe (eine Tabelle pro Entität)\n")
fmt.Fprintf(&b, "- paliad-export.json — maschinenlesbare Kopie der gleichen Daten\n")
fmt.Fprintf(&b, "- csv/<sheet>.csv — Tabellen einzeln als CSV (UTF-8 mit BOM)\n")
fmt.Fprintf(&b, "- __meta.json — Metadaten dieses Exports (auch im __meta-Sheet)\n")
fmt.Fprintf(&b, "\n")
fmt.Fprintf(&b, "Zeilen pro Tabelle:\n")
names := make([]string, 0, len(m.RowCounts))
for k := range m.RowCounts {
names = append(names, k)
}
sort.Strings(names)
for _, n := range names {
fmt.Fprintf(&b, " %-32s %d\n", n, m.RowCounts[n])
}
fmt.Fprintf(&b, "\n")
fmt.Fprintf(&b, "Hinweise\n--------\n")
fmt.Fprintf(&b, "Diese Datei enthält möglicherweise vertrauliche Mandantsdaten.\n")
fmt.Fprintf(&b, "Sie wurde erzeugt am %s durch %s aus Paliad (%s).\n", m.GeneratedAt.UTC().Format(time.RFC3339), m.GeneratedByEml, m.FirmName)
fmt.Fprintf(&b, "Die Weitergabe an Dritte erfolgt in eigener Verantwortung des Empfängers.\n")
fmt.Fprintf(&b, "\n")
fmt.Fprintf(&b, "Passwörter, CalDAV-Zugangsdaten, Einladungstoken und andere Geheimnisse\n")
fmt.Fprintf(&b, "werden NIE exportiert (Spalten-Filter und allgemeine Deny-Regel).\n")
fmt.Fprintf(&b, "\n")
fmt.Fprintf(&b, "--- English ---\n\n")
fmt.Fprintf(&b, "This Paliad export bundle contains structured data of the scope above.\n")
fmt.Fprintf(&b, "Open paliad-export.xlsx in Excel/LibreOffice, or parse paliad-export.json\n")
fmt.Fprintf(&b, "with any JSON-capable tool. CSVs are RFC 4180 with a UTF-8 BOM.\n")
fmt.Fprintf(&b, "\n")
fmt.Fprintf(&b, "Dates are ISO 8601 strings; timestamps are RFC 3339 UTC. Booleans are\n")
fmt.Fprintf(&b, "the literal strings TRUE/FALSE. JSON-typed columns are stored as compact\n")
fmt.Fprintf(&b, "one-line JSON in each cell.\n")
fmt.Fprintf(&b, "\n")
fmt.Fprintf(&b, "This bundle is byte-deterministic: two exports of the same row state\n")
fmt.Fprintf(&b, "produce identical zip bytes (modulo the generated_at field stored on\n")
fmt.Fprintf(&b, "the __meta sheet and in __meta.json).\n")
return b.String()
}
// ExportFilename returns the canonical filename for a download. Slugify is
// minimal — only the project-scope variant has a free-text component to
// sanitise.
//
// Project-scope filenames include an 8-hex-char disambiguator derived from
// the root project's UUID (Slice 2 §3 Q5). Two projects with identical
// titles (common: "Standard NDA" per client) would otherwise produce
// filename collisions when archived together; 4-billion-class disambiguation
// is cheap insurance.
//
// rootID is consumed only for ExportScopeProject; pass uuid.Nil for the
// other scopes.
func ExportFilename(scope string, scopeLabel string, rootID uuid.UUID, generatedAt time.Time) string {
ts := generatedAt.UTC().Format("2006-01-02T1504Z")
switch scope {
case ExportScopePersonal:
return fmt.Sprintf("paliad-export-personal-%s.zip", ts)
case ExportScopeOrg:
return fmt.Sprintf("paliad-export-org-%s.zip", ts)
case ExportScopeProject:
slug := slugifyFilename(scopeLabel)
if slug == "" {
slug = randomSlug()
}
short := shortUUIDSuffix(rootID)
if short == "" {
return fmt.Sprintf("paliad-export-project-%s-%s.zip", slug, ts)
}
return fmt.Sprintf("paliad-export-project-%s-%s-%s.zip", slug, short, ts)
default:
return fmt.Sprintf("paliad-export-%s.zip", ts)
}
}
// shortUUIDSuffix returns the last 8 hex chars of the UUID's canonical
// representation (the trailing block after the final dash). Empty string
// for uuid.Nil so callers can fall back to the slug-only variant.
func shortUUIDSuffix(id uuid.UUID) string {
if id == uuid.Nil {
return ""
}
s := id.String()
if i := strings.LastIndex(s, "-"); i != -1 && i+1 < len(s) {
return s[i+1:]
}
return ""
}
var filenameSafeRegex = regexp.MustCompile(`[^A-Za-z0-9-]+`)
func slugifyFilename(s string) string {
s = strings.TrimSpace(s)
s = filenameSafeRegex.ReplaceAllString(s, "-")
s = strings.Trim(s, "-")
if len(s) > 40 {
s = s[:40]
}
return s
}
func randomSlug() string {
var b [4]byte
_, _ = rand.Read(b[:])
return hex.EncodeToString(b[:])
}
// ---------------------------------------------------------------------------
// Personal-scope sheet registry.
// ---------------------------------------------------------------------------
//
// Per design §2.3, "personal scope" is the RLS-visible projection plus
// caller-personal sidecars. Every visible-projects query goes through
// visibilityPredicatePositional so the gate is the same as runtime list
// endpoints. The ?-positional binding takes the caller's user_id at $1.
//
// Ordering: every SELECT uses `ORDER BY id` (or the natural stable
// sort-tuple for tables without an id PK) to keep two-runs-same-state
// byte-deterministic.
func personalSheetQueries(actorID uuid.UUID) []sheetQuery {
uid := actorID
visiblePProj := visibilityPredicatePositional("p", 1)
// The visible-projects CTE is used by all entity sheets that scope by
// project_id. Building it inline keeps each sheet's SQL self-contained
// for readability + lets the query planner choose its own join order.
visibleProjectsSubquery := `(SELECT p.id FROM paliad.projects p WHERE ` + visiblePProj + `)`
return []sheetQuery{
// --- entity sheets (subtree-aware via visibility predicate) ---
{
SheetName: "projects",
SQL: `SELECT * FROM paliad.projects p
WHERE ` + visiblePProj + `
ORDER BY p.id`,
Args: []any{uid},
},
{
SheetName: "project_teams",
SQL: `SELECT * FROM paliad.project_teams
WHERE user_id = $1
OR project_id IN ` + visibleProjectsSubquery + `
ORDER BY project_id, user_id`,
Args: []any{uid},
},
{
SheetName: "deadlines",
SQL: `SELECT * FROM paliad.deadlines
WHERE project_id IN ` + visibleProjectsSubquery + `
ORDER BY id`,
Args: []any{uid},
},
{
SheetName: "appointments",
SQL: `SELECT * FROM paliad.appointments
WHERE project_id IN ` + visibleProjectsSubquery + `
ORDER BY id`,
Args: []any{uid},
},
{
SheetName: "parties",
SQL: `SELECT * FROM paliad.parties
WHERE project_id IN ` + visibleProjectsSubquery + `
ORDER BY id`,
Args: []any{uid},
},
{
SheetName: "notes",
SQL: `SELECT * FROM paliad.notes
WHERE COALESCE(project_id,
(SELECT d.project_id FROM paliad.deadlines d WHERE d.id = notes.deadline_id),
(SELECT a.project_id FROM paliad.appointments a WHERE a.id = notes.appointment_id),
(SELECT pe.project_id FROM paliad.project_events pe WHERE pe.id = notes.project_event_id)
) IN ` + visibleProjectsSubquery + `
ORDER BY id`,
Args: []any{uid},
},
{
SheetName: "documents",
SQL: `SELECT id, project_id, title, doc_type, file_path, file_size, mime_type, uploaded_by, created_at, updated_at
FROM paliad.documents
WHERE project_id IN ` + visibleProjectsSubquery + `
ORDER BY id`,
Args: []any{uid},
// ai_extracted jsonb is the only column omitted from the
// personal projection because it can carry verbose AI prompts.
},
{
SheetName: "project_events",
SQL: `SELECT * FROM paliad.project_events
WHERE project_id IN ` + visibleProjectsSubquery + `
ORDER BY id`,
Args: []any{uid},
},
{
SheetName: "approval_requests",
SQL: `SELECT * FROM paliad.approval_requests
WHERE requested_by = $1
OR decided_by = $1
OR project_id IN ` + visibleProjectsSubquery + `
ORDER BY id`,
Args: []any{uid},
},
{
SheetName: "checklist_instances",
SQL: `SELECT * FROM paliad.checklist_instances
WHERE project_id IN ` + visibleProjectsSubquery + `
ORDER BY id`,
Args: []any{uid},
},
// --- personal sidecars (my_*) ---
{
SheetName: "me",
SQL: `SELECT id, email, display_name, office, profession, job_title,
practice_group, lang, reminder_morning_time, reminder_evening_time,
reminder_timezone, reminder_warning_offset_days, escalation_contact_id,
email_preferences, additional_offices, global_role, forum_pref,
created_at, updated_at
FROM paliad.users
WHERE id = $1`,
Args: []any{uid},
},
{
SheetName: "my_caldav_config",
SQL: `SELECT user_id, url, username, calendar_path, enabled,
last_sync_at, last_sync_error, created_at, updated_at
FROM paliad.user_caldav_config
WHERE user_id = $1`,
Args: []any{uid},
DropColumns: []string{"password_encrypted"}, // belt-and-braces; the SELECT above already omits it
},
{
SheetName: "my_views",
SQL: `SELECT * FROM paliad.user_views
WHERE user_id = $1
ORDER BY id`,
Args: []any{uid},
},
{
SheetName: "my_pinned_projects",
SQL: `SELECT * FROM paliad.user_pinned_projects
WHERE user_id = $1
ORDER BY project_id`,
Args: []any{uid},
},
{
SheetName: "my_card_layouts",
SQL: `SELECT * FROM paliad.user_card_layouts
WHERE user_id = $1
ORDER BY id`,
Args: []any{uid},
},
{
SheetName: "my_paliadin_turns",
SQL: `SELECT * FROM paliad.paliadin_turns
WHERE user_id = $1
ORDER BY started_at`,
Args: []any{uid},
},
// --- restricted users-referenced sheet ---
// Surfaces only id/email/display_name/office/profession for users
// who appear as FKs anywhere in the export — avoids dumping all 47
// users on a personal-scope handoff.
{
SheetName: "users_referenced",
SQL: `SELECT id, email, display_name, office, profession
FROM paliad.users u
WHERE u.id IN (
SELECT created_by FROM paliad.projects WHERE id IN ` + visibleProjectsSubquery + `
UNION SELECT created_by FROM paliad.deadlines WHERE project_id IN ` + visibleProjectsSubquery + `
UNION SELECT created_by FROM paliad.appointments WHERE project_id IN ` + visibleProjectsSubquery + `
UNION SELECT created_by FROM paliad.project_events WHERE project_id IN ` + visibleProjectsSubquery + `
UNION SELECT user_id FROM paliad.project_teams WHERE project_id IN ` + visibleProjectsSubquery + `
UNION SELECT created_by FROM paliad.notes WHERE COALESCE(project_id,
(SELECT d.project_id FROM paliad.deadlines d WHERE d.id = notes.deadline_id),
(SELECT a.project_id FROM paliad.appointments a WHERE a.id = notes.appointment_id),
(SELECT pe.project_id FROM paliad.project_events pe WHERE pe.id = notes.project_event_id)
) IN ` + visibleProjectsSubquery + `
UNION SELECT $1::uuid
)
ORDER BY id`,
Args: []any{uid},
},
// --- reference data (read-only, prefixed ref__) ---
// Same set as project scope; included so the workbook is
// interpretable standalone without paliad context.
{
SheetName: "ref__proceeding_types",
SQL: `SELECT * FROM paliad.proceeding_types ORDER BY id`,
},
{
SheetName: "ref__event_types",
SQL: `SELECT * FROM paliad.event_types ORDER BY id`,
},
{
SheetName: "ref__event_categories",
SQL: `SELECT * FROM paliad.event_categories ORDER BY id`,
},
{
SheetName: "ref__deadline_rules",
SQL: `SELECT * FROM paliad.deadline_rules ORDER BY id`,
},
{
SheetName: "ref__deadline_concepts",
SQL: `SELECT * FROM paliad.deadline_concepts ORDER BY id`,
},
{
SheetName: "ref__courts",
SQL: `SELECT * FROM paliad.courts ORDER BY id`,
},
{
SheetName: "ref__countries",
SQL: `SELECT * FROM paliad.countries ORDER BY code`,
},
{
SheetName: "ref__holidays",
SQL: `SELECT * FROM paliad.holidays ORDER BY date, country`,
},
}
}
// ---------------------------------------------------------------------------
// Audit row helpers (used by the handler; here to keep all export-related
// SQL in one file).
// ---------------------------------------------------------------------------
// WriteAuditRow inserts a system_audit_log row before the export runs and
// returns the new row id. The handler PATCHes the row with file_size_bytes
// + final row_counts on success or marks it failed on error.
//
// For project-scope exports the metadata jsonb carries the ltree path
// (Q6 lock-in) so the audit row remains interpretable after a project
// deletion: scope_root → just the UUID; metadata.root_path → the
// ancestry. Same goes for root_label + direct_only so dashboards don't
// need to round-trip back to paliad.projects on render.
func (s *ExportService) WriteAuditRow(ctx context.Context, spec ExportSpec) (uuid.UUID, error) {
meta := map[string]any{
"requested_at": spec.GeneratedAt.UTC().Format(time.RFC3339),
}
if spec.Scope == ExportScopeProject {
if spec.ScopeRootLabel != "" {
meta["root_label"] = spec.ScopeRootLabel
}
if spec.ScopeRootPath != "" {
meta["root_path"] = spec.ScopeRootPath
}
meta["direct_only"] = spec.DirectOnly
}
mb, _ := json.Marshal(meta)
var id uuid.UUID
err := s.db.QueryRowContext(ctx,
`INSERT INTO paliad.system_audit_log
(event_type, actor_id, actor_email, scope, scope_root, metadata)
VALUES ('data_export', $1, $2, $3, $4, $5::jsonb)
RETURNING id`,
spec.ActorID, spec.ActorEmail, spec.Scope, spec.ScopeRoot, string(mb),
).Scan(&id)
if err != nil {
return uuid.Nil, fmt.Errorf("audit insert: %w", err)
}
return id, nil
}
// PatchAuditRowSuccess updates the audit row with final row counts and the
// generated artifact size.
func (s *ExportService) PatchAuditRowSuccess(ctx context.Context, id uuid.UUID, meta ExportMeta, fileSizeBytes int64) error {
payload := map[string]any{
"row_counts": meta.RowCounts,
"file_size_bytes": fileSizeBytes,
"warnings": meta.Warnings,
"completed_at": time.Now().UTC().Format(time.RFC3339),
}
mb, _ := json.Marshal(payload)
_, err := s.db.ExecContext(ctx,
`UPDATE paliad.system_audit_log
SET metadata = metadata || $2::jsonb,
updated_at = now()
WHERE id = $1`,
id, string(mb),
)
if err != nil {
return fmt.Errorf("audit patch success: %w", err)
}
return nil
}
// PatchAuditRowFailure marks the audit row as a failed export and stores
// the error string. Uses a separate event_type so dashboards can count
// failures distinctly.
func (s *ExportService) PatchAuditRowFailure(ctx context.Context, id uuid.UUID, errStr string) {
payload := map[string]any{
"error": errStr,
"failed_at": time.Now().UTC().Format(time.RFC3339),
}
mb, _ := json.Marshal(payload)
// Best-effort — never propagate audit-write errors back to the caller
// because the original export error is the real one to bubble.
_, _ = s.db.ExecContext(ctx,
`UPDATE paliad.system_audit_log
SET event_type = 'data_export_failed',
metadata = metadata || $2::jsonb,
updated_at = now()
WHERE id = $1`,
id, string(mb),
)
}
// ---------------------------------------------------------------------------
// Project-scope sheet registry (Slice 2).
// ---------------------------------------------------------------------------
//
// Subtree-aware queries via paliad.projects.path (ltree as text). The
// subtree predicate works on the materialised path column:
//
// p.path LIKE root.path || '%' -- descendants + self
// p.path = root.path -- self only (direct_only=true)
//
// We use the path-prefix-LIKE form instead of ltree `<@` because the
// schema stores path as text (the underlying ltree is materialised in
// the projects.path column). The LIKE pattern is anchored at the start
// and uses indexes built on path.
//
// Ordering: every SELECT uses ORDER BY id (or another stable tuple) so
// byte-determinism holds across runs.
// projectSubtreeProjectIDsSQL returns a SQL subquery expression that
// resolves to "the set of project ids in the subtree of $1". Use as the
// right-hand side of `IN`. The $1 placeholder must bind the root
// project's UUID.
//
// When directOnly is true, narrows to the root project itself only.
func projectSubtreeProjectIDsSQL(directOnly bool) string {
if directOnly {
// Tighter: just the root, no descendants. Still framed as a
// subquery so the outer SQL can be uniformly composed.
return `(SELECT $1::uuid AS id)`
}
// Subtree = root + descendants. The materialised path column on
// every project includes its own UUID as the trailing label, so the
// LIKE pattern matches both the root and every descendant in one
// expression. r.path is read from the root row keyed by $1.
return `(
SELECT p.id
FROM paliad.projects p
JOIN paliad.projects r ON r.id = $1::uuid
WHERE p.path = r.path
OR p.path LIKE r.path || '.%'
)`
}
// projectSheetQueries returns the sheet registry for a project-scope
// export. rootID is bound to $1 in every query; directOnly narrows the
// subtree to just the root project.
//
// Sheet inclusion follows design §2.2. Same shape as personalSheetQueries
// but with subtree filtering instead of RLS-visibility and a tighter
// users-disclosure profile.
func projectSheetQueries(rootID uuid.UUID, directOnly bool) []sheetQuery {
subtree := projectSubtreeProjectIDsSQL(directOnly)
queries := []sheetQuery{
// --- entity sheets (subtree-scoped) ---
{
SheetName: "projects",
SQL: `SELECT * FROM paliad.projects
WHERE id IN ` + subtree + `
ORDER BY id`,
Args: []any{rootID},
},
{
SheetName: "project_teams",
SQL: `SELECT * FROM paliad.project_teams
WHERE project_id IN ` + subtree + `
ORDER BY project_id, user_id`,
Args: []any{rootID},
},
{
SheetName: "project_partner_units",
SQL: `SELECT * FROM paliad.project_partner_units
WHERE project_id IN ` + subtree + `
ORDER BY project_id, partner_unit_id`,
Args: []any{rootID},
},
{
SheetName: "deadlines",
SQL: `SELECT * FROM paliad.deadlines
WHERE project_id IN ` + subtree + `
ORDER BY id`,
Args: []any{rootID},
},
{
SheetName: "appointments",
SQL: `SELECT * FROM paliad.appointments
WHERE project_id IN ` + subtree + `
ORDER BY id`,
Args: []any{rootID},
},
{
SheetName: "parties",
SQL: `SELECT * FROM paliad.parties
WHERE project_id IN ` + subtree + `
ORDER BY id`,
Args: []any{rootID},
},
{
SheetName: "notes",
SQL: `SELECT * FROM paliad.notes
WHERE COALESCE(project_id,
(SELECT d.project_id FROM paliad.deadlines d WHERE d.id = notes.deadline_id),
(SELECT a.project_id FROM paliad.appointments a WHERE a.id = notes.appointment_id),
(SELECT pe.project_id FROM paliad.project_events pe WHERE pe.id = notes.project_event_id)
) IN ` + subtree + `
ORDER BY id`,
Args: []any{rootID},
},
{
SheetName: "documents",
SQL: `SELECT id, project_id, title, doc_type, file_path, file_size, mime_type, uploaded_by, created_at, updated_at
FROM paliad.documents
WHERE project_id IN ` + subtree + `
ORDER BY id`,
Args: []any{rootID},
},
{
SheetName: "project_events",
SQL: `SELECT * FROM paliad.project_events
WHERE project_id IN ` + subtree + `
ORDER BY id`,
Args: []any{rootID},
},
{
SheetName: "approval_requests",
SQL: `SELECT * FROM paliad.approval_requests
WHERE project_id IN ` + subtree + `
ORDER BY id`,
Args: []any{rootID},
},
// Approval policies — m's Q4 lock: ship all three sources with
// `source` attribution column so an importer can reconstruct
// "what gate applies" without re-running paliad's resolver.
//
// Source 1: project rows for any project in the subtree.
// Source 2: project rows for ancestors of the root (so a
// descendant export still sees the gate inherited
// from above the subtree).
// Source 3: partner-unit-default rows for units attached to
// any subtree project.
//
// One UNION query, with a `source` column tagged per branch.
// We hand-pick the columns to keep the shape stable across the
// three sources (approval_policies.project_id is nullable when
// the row is a partner-unit-default, etc.).
{
SheetName: "approval_policies",
SQL: `
SELECT 'project'::text AS source,
id, project_id, partner_unit_id, entity_type, lifecycle_event,
required_role, requires_approval, min_role,
created_by, created_at, updated_at
FROM paliad.approval_policies
WHERE project_id IN ` + subtree + `
UNION ALL
SELECT 'ancestor'::text AS source,
ap.id, ap.project_id, ap.partner_unit_id, ap.entity_type, ap.lifecycle_event,
ap.required_role, ap.requires_approval, ap.min_role,
ap.created_by, ap.created_at, ap.updated_at
FROM paliad.approval_policies ap
JOIN paliad.projects r ON r.id = $1::uuid
WHERE ap.project_id IS NOT NULL
AND ap.project_id <> $1::uuid
AND ap.project_id IN (
SELECT pa.id
FROM paliad.projects pa
WHERE r.path LIKE pa.path || '.%'
)
UNION ALL
SELECT 'partner_unit_default'::text AS source,
ap.id, ap.project_id, ap.partner_unit_id, ap.entity_type, ap.lifecycle_event,
ap.required_role, ap.requires_approval, ap.min_role,
ap.created_by, ap.created_at, ap.updated_at
FROM paliad.approval_policies ap
WHERE ap.partner_unit_id IS NOT NULL
AND ap.partner_unit_id IN (
SELECT ppu.partner_unit_id
FROM paliad.project_partner_units ppu
WHERE ppu.project_id IN ` + subtree + `
)
ORDER BY source, id`,
Args: []any{rootID},
},
{
SheetName: "checklist_instances",
SQL: `SELECT * FROM paliad.checklist_instances
WHERE project_id IN ` + subtree + `
ORDER BY id`,
Args: []any{rootID},
},
// --- attached partner-unit subset ---
// Only units attached to any subtree project (avoids dumping
// the full org chart into a per-matter handover).
{
SheetName: "partner_units",
SQL: `SELECT * FROM paliad.partner_units pu
WHERE pu.id IN (
SELECT ppu.partner_unit_id
FROM paliad.project_partner_units ppu
WHERE ppu.project_id IN ` + subtree + `
)
ORDER BY pu.id`,
Args: []any{rootID},
},
{
SheetName: "partner_unit_members",
SQL: `SELECT * FROM paliad.partner_unit_members pum
WHERE pum.partner_unit_id IN (
SELECT ppu.partner_unit_id
FROM paliad.project_partner_units ppu
WHERE ppu.project_id IN ` + subtree + `
)
ORDER BY partner_unit_id, user_id`,
Args: []any{rootID},
},
// --- restricted users sheet ---
// Limit user disclosure to those referenced by some FK in the
// export. Keeps a per-matter handover from leaking the full
// firm roster (47 users → typically 3-5 per matter).
{
SheetName: "users_referenced",
SQL: `SELECT id, email, display_name, office, profession
FROM paliad.users u
WHERE u.id IN (
SELECT created_by FROM paliad.projects WHERE id IN ` + subtree + `
UNION SELECT created_by FROM paliad.deadlines WHERE project_id IN ` + subtree + `
UNION SELECT created_by FROM paliad.appointments WHERE project_id IN ` + subtree + `
UNION SELECT created_by FROM paliad.project_events WHERE project_id IN ` + subtree + `
UNION SELECT user_id FROM paliad.project_teams WHERE project_id IN ` + subtree + `
UNION SELECT requested_by FROM paliad.approval_requests WHERE project_id IN ` + subtree + `
UNION SELECT decided_by FROM paliad.approval_requests WHERE project_id IN ` + subtree + ` AND decided_by IS NOT NULL
UNION SELECT created_by FROM paliad.notes WHERE COALESCE(project_id,
(SELECT d.project_id FROM paliad.deadlines d WHERE d.id = notes.deadline_id),
(SELECT a.project_id FROM paliad.appointments a WHERE a.id = notes.appointment_id),
(SELECT pe.project_id FROM paliad.project_events pe WHERE pe.id = notes.project_event_id)
) IN ` + subtree + `
UNION SELECT uploaded_by FROM paliad.documents WHERE project_id IN ` + subtree + ` AND uploaded_by IS NOT NULL
UNION SELECT user_id FROM paliad.partner_unit_members pum
WHERE pum.partner_unit_id IN (
SELECT ppu.partner_unit_id
FROM paliad.project_partner_units ppu
WHERE ppu.project_id IN ` + subtree + `
)
)
ORDER BY id`,
Args: []any{rootID},
},
// --- system_audit_log subset (the export's own audit trail) ---
// Includes prior export events scoped to this subtree's
// projects — lets a recipient see "who has previously
// exported this matter".
{
SheetName: "system_audit_log_subset",
SQL: `SELECT * FROM paliad.system_audit_log
WHERE scope_root IN ` + subtree + `
ORDER BY created_at, id`,
Args: []any{rootID},
},
// --- reference data (same set as personal scope) ---
{SheetName: "ref__proceeding_types", SQL: `SELECT * FROM paliad.proceeding_types ORDER BY id`},
{SheetName: "ref__event_types", SQL: `SELECT * FROM paliad.event_types ORDER BY id`},
{SheetName: "ref__event_categories", SQL: `SELECT * FROM paliad.event_categories ORDER BY id`},
{SheetName: "ref__deadline_rules", SQL: `SELECT * FROM paliad.deadline_rules ORDER BY id`},
{SheetName: "ref__deadline_concepts", SQL: `SELECT * FROM paliad.deadline_concepts ORDER BY id`},
{SheetName: "ref__courts", SQL: `SELECT * FROM paliad.courts ORDER BY id`},
{SheetName: "ref__countries", SQL: `SELECT * FROM paliad.countries ORDER BY code`},
{SheetName: "ref__holidays", SQL: `SELECT * FROM paliad.holidays ORDER BY date, country`},
}
return queries
}
// ---------------------------------------------------------------------------
// Org-scope sheet registry (Slice 3 / Backup Mode — t-paliad-246).
// ---------------------------------------------------------------------------
//
// Full-schema dump. Bypasses paliad.can_see_project — admin-only,
// gated at the handler layer (BackupRunner trusts the caller).
//
// Sheet ordering: entity sheets first (alphabetical), then ref__*
// reference sheets (alphabetical). The xlsx writer iterates the slice
// in order; downstream consumers get the same order across runs.
//
// Hard exclusions (per design §5.2 / m's Q3 decision):
//
// - paliadin_turns
// - paliadin_aichat_conversation
//
// AI conversation history is the most-sensitive personal data paliad
// carries; m's prior Q5 decision in t-paliad-214 made the exclusion
// structural. The two tables are absent from the registry — not just
// column-level redacted — so a future schema addition cannot
// accidentally re-include them.
//
// Also excluded unconditionally (operational / shadow):
//
// - *_pre_NNN shadow tables (CREATE TABLE … AS SELECT backups
// written by destructive migrations)
// - paliad_schema_migrations (operational)
// - auth.* (Supabase Auth schema — not ours)
//
// The PII column deny-regex (piiColumnDenyRegex) catches
// secret|token|password|api_key|private_key on every sheet as a
// belt-and-braces filter. user_caldav_config.password_encrypted is
// explicitly named in DropColumns too.
func orgSheetQueries() []sheetQuery {
return []sheetQuery{
// --- entity sheets (alphabetical) ---
{SheetName: "appointment_caldav_targets", SQL: `SELECT * FROM paliad.appointment_caldav_targets ORDER BY appointment_id, calendar_binding_id`},
{SheetName: "appointments", SQL: `SELECT * FROM paliad.appointments ORDER BY id`},
{SheetName: "approval_policies", SQL: `SELECT * FROM paliad.approval_policies ORDER BY id`},
{SheetName: "approval_requests", SQL: `SELECT * FROM paliad.approval_requests ORDER BY id`},
// backups is self-reflexive — including it makes "what backups
// have we taken" recoverable from any prior backup. Tiny table.
{SheetName: "backups", SQL: `SELECT * FROM paliad.backups ORDER BY started_at, id`},
{SheetName: "caldav_sync_log", SQL: `SELECT * FROM paliad.caldav_sync_log ORDER BY occurred_at, id`},
{SheetName: "checklist_instances", SQL: `SELECT * FROM paliad.checklist_instances ORDER BY id`},
{SheetName: "checklist_shares", SQL: `SELECT * FROM paliad.checklist_shares ORDER BY id`},
{SheetName: "checklists", SQL: `SELECT * FROM paliad.checklists ORDER BY id`},
{SheetName: "deadline_rule_audit", SQL: `SELECT * FROM paliad.deadline_rule_audit ORDER BY changed_at, id`},
{SheetName: "deadlines", SQL: `SELECT * FROM paliad.deadlines ORDER BY id`},
// documents: ai_extracted jsonb dropped (verbose AI prompts;
// matches the personal/project precedent). Binaries are not in
// the export — only metadata.
{
SheetName: "documents",
SQL: `SELECT id, project_id, title, doc_type, file_path, file_size, mime_type, uploaded_by, created_at, updated_at
FROM paliad.documents
ORDER BY id`,
},
{SheetName: "email_broadcasts", SQL: `SELECT * FROM paliad.email_broadcasts ORDER BY id`},
{SheetName: "email_template_versions", SQL: `SELECT * FROM paliad.email_template_versions ORDER BY id`},
{SheetName: "email_templates", SQL: `SELECT * FROM paliad.email_templates ORDER BY id`},
{SheetName: "firm_dashboard_default", SQL: `SELECT * FROM paliad.firm_dashboard_default ORDER BY id`},
{SheetName: "invitations", SQL: `SELECT * FROM paliad.invitations ORDER BY sent_at, id`},
{SheetName: "notes", SQL: `SELECT * FROM paliad.notes ORDER BY id`},
{SheetName: "parties", SQL: `SELECT * FROM paliad.parties ORDER BY id`},
{SheetName: "partner_unit_events", SQL: `SELECT * FROM paliad.partner_unit_events ORDER BY id`},
{SheetName: "partner_unit_members", SQL: `SELECT * FROM paliad.partner_unit_members ORDER BY partner_unit_id, user_id`},
{SheetName: "partner_units", SQL: `SELECT * FROM paliad.partner_units ORDER BY id`},
{SheetName: "policy_audit_log", SQL: `SELECT * FROM paliad.policy_audit_log ORDER BY changed_at, id`},
{SheetName: "project_events", SQL: `SELECT * FROM paliad.project_events ORDER BY id`},
{SheetName: "project_partner_units", SQL: `SELECT * FROM paliad.project_partner_units ORDER BY project_id, partner_unit_id`},
{SheetName: "project_teams", SQL: `SELECT * FROM paliad.project_teams ORDER BY project_id, user_id`},
{SheetName: "projects", SQL: `SELECT * FROM paliad.projects ORDER BY id`},
{SheetName: "reminder_log", SQL: `SELECT * FROM paliad.reminder_log ORDER BY sent_at, id`},
{SheetName: "submission_drafts", SQL: `SELECT * FROM paliad.submission_drafts ORDER BY id`},
{SheetName: "system_audit_log", SQL: `SELECT * FROM paliad.system_audit_log ORDER BY created_at, id`},
{
SheetName: "user_caldav_config",
SQL: `SELECT * FROM paliad.user_caldav_config ORDER BY user_id`,
DropColumns: []string{"password_encrypted"}, // belt-and-braces; piiColumnDenyRegex also catches it
},
{SheetName: "user_calendar_bindings", SQL: `SELECT * FROM paliad.user_calendar_bindings ORDER BY user_id, calendar_path`},
{SheetName: "user_card_layouts", SQL: `SELECT * FROM paliad.user_card_layouts ORDER BY id`},
{SheetName: "user_dashboard_layouts", SQL: `SELECT * FROM paliad.user_dashboard_layouts ORDER BY user_id`},
{SheetName: "user_pinned_projects", SQL: `SELECT * FROM paliad.user_pinned_projects ORDER BY user_id, project_id`},
{SheetName: "user_views", SQL: `SELECT * FROM paliad.user_views ORDER BY id`},
{SheetName: "users", SQL: `SELECT * FROM paliad.users ORDER BY id`},
// --- reference data (alphabetical, prefixed ref__) ---
{SheetName: "ref__countries", SQL: `SELECT * FROM paliad.countries ORDER BY code`},
{SheetName: "ref__courts", SQL: `SELECT * FROM paliad.courts ORDER BY id`},
{SheetName: "ref__deadline_concept_event_types", SQL: `SELECT * FROM paliad.deadline_concept_event_types ORDER BY concept_id, event_type_id`},
{SheetName: "ref__deadline_concepts", SQL: `SELECT * FROM paliad.deadline_concepts ORDER BY id`},
{SheetName: "ref__deadline_event_types", SQL: `SELECT * FROM paliad.deadline_event_types ORDER BY rule_id, event_type_id`},
{SheetName: "ref__deadline_rules", SQL: `SELECT * FROM paliad.deadline_rules ORDER BY id`},
{SheetName: "ref__event_categories", SQL: `SELECT * FROM paliad.event_categories ORDER BY id`},
{SheetName: "ref__event_category_concepts", SQL: `SELECT * FROM paliad.event_category_concepts ORDER BY category_id, concept_id`},
{SheetName: "ref__event_types", SQL: `SELECT * FROM paliad.event_types ORDER BY id`},
{SheetName: "ref__holidays", SQL: `SELECT * FROM paliad.holidays ORDER BY date, country`},
{SheetName: "ref__proceeding_types", SQL: `SELECT * FROM paliad.proceeding_types ORDER BY id`},
{SheetName: "ref__trigger_events", SQL: `SELECT * FROM paliad.trigger_events ORDER BY id`},
}
}