From 4cd28bc89672b4c968401d704dd543da3cdf101a Mon Sep 17 00:00:00 2001 From: mAi Date: Tue, 26 May 2026 21:21:38 +0200 Subject: [PATCH] =?UTF-8?q?feat(db):=20mig=20152=20=E2=80=94=20dedupe=20id?= =?UTF-8?q?entical=20sequencing=5Frule=20clones=20(5=20archived)=20(t-pali?= =?UTF-8?q?ad-321=20/=20m/paliad#144=20follow-up)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Mig 151 (t-paliad-319) archived 5 of 6 duplicate procedural_events for "Mängelbeseitigung / Zahlung" and reparented their sequencing_rules onto the canonical PE. The 6 sequencing_rules themselves were left active — and they are byte-for-byte clones (proceeding_type_id=NULL, rule_code=NULL, duration 14d, primary_party=NULL, condition_expr=NULL, …). The admin shows six indistinguishable rows for one legal concept. This migration archives 5 of 6, keeping the row with the lexicographically lowest UUID as canonical. Pre-write verification (Supabase MCP, 2026-05-26): - Exactly 1 clone-group surfaces under the full-signature query (procedural_event_id, proceeding_type_id, rule_code, duration_*, primary_party, condition_expr::text, trigger_event_id, alt_*, anchor_alt, combine_op, parent_id, is_spawn, spawn_*): 6 "Mängelbeseitigung / Zahlung" rows. - 0 paliad.deadlines reference any of the 5 to-be-archived rows (verified via deadlines.sequencing_rule_id JOIN; rule_id column was dropped in mig 140 / Slice B.4). - Other name-duplicates (Antrag auf Patentänderung×4, Beginn des Hauptsacheverfahrens×2, Berufungsbegründung-R.220.1×2, Berufungsschrift-R.220.1×2) do NOT collapse under this signature — their proceeding_type_id / rule_code / duration / primary_party differ. Legitimately distinct rules per proceeding. This mig leaves them alone. Migration shape (mirrors mig 151): 1. Build dedupe mapping (duplicate_id → canonical_id) into a ROW_NUMBER() OVER (PARTITION BY full-signature ORDER BY created_at, id::text) TEMP table. 2. PRE NOTICE: surface every clone-group with its canonical + dups so the deploy log shows what's about to be touched (m may want to spot-check). 3. Snapshot the duplicates into paliad.sequencing_rules_pre_152 (precedent pre_091/093/095/098/140/151). 4. Reparent paliad.deadlines.sequencing_rule_id duplicate → canonical BEFORE archiving (defensive no-op today). 5. set_config('paliad.audit_reason', …) — defensive; sequencing_rules has no audit trigger yet (mig 151 §scope verified), but a future trigger would inherit the reason automatically. 6. UPDATE sequencing_rules SET is_active=false, lifecycle_state='archived' WHERE id IN dups. 7. POST assertions: expected archive count met, zero clone groups remaining in active+published, zero live deadlines pointing at an archived sequencing_rule. RAISE EXCEPTION on any mismatch. Down: best-effort revert (flips archived → published from snapshot). Doesn't undo the deadlines reparent (live data didn't need one; snapshot doesn't carry pre-state of deadlines). Build + vet clean. TestMigrations_NoDuplicateSlot passes. --- ..._identical_sequencing_rule_clones.down.sql | 17 ++ ...pe_identical_sequencing_rule_clones.up.sql | 240 ++++++++++++++++++ 2 files changed, 257 insertions(+) create mode 100644 internal/db/migrations/152_dedupe_identical_sequencing_rule_clones.down.sql create mode 100644 internal/db/migrations/152_dedupe_identical_sequencing_rule_clones.up.sql diff --git a/internal/db/migrations/152_dedupe_identical_sequencing_rule_clones.down.sql b/internal/db/migrations/152_dedupe_identical_sequencing_rule_clones.down.sql new file mode 100644 index 0000000..1fd9dc0 --- /dev/null +++ b/internal/db/migrations/152_dedupe_identical_sequencing_rule_clones.down.sql @@ -0,0 +1,17 @@ +-- 152_dedupe_identical_sequencing_rule_clones (down) — t-paliad-321 +-- +-- Best-effort revert from paliad.sequencing_rules_pre_152. Flips the +-- archived rows back to is_active=true / lifecycle_state='published'. +-- Does NOT undo the deadlines.sequencing_rule_id reparent — that would +-- require remembering the previous pointer per row, which the snapshot +-- on sequencing_rules doesn't carry. In live data the reparent was a +-- no-op (zero deadlines pointed at duplicates), so this is fine. + +UPDATE paliad.sequencing_rules sr + SET is_active = true, + lifecycle_state = 'published', + updated_at = now() + FROM paliad.sequencing_rules_pre_152 snap + WHERE sr.id = snap.id; + +DROP TABLE IF EXISTS paliad.sequencing_rules_pre_152; diff --git a/internal/db/migrations/152_dedupe_identical_sequencing_rule_clones.up.sql b/internal/db/migrations/152_dedupe_identical_sequencing_rule_clones.up.sql new file mode 100644 index 0000000..ea9c3bb --- /dev/null +++ b/internal/db/migrations/152_dedupe_identical_sequencing_rule_clones.up.sql @@ -0,0 +1,240 @@ +-- 152_dedupe_identical_sequencing_rule_clones — t-paliad-321 / m/paliad#144 follow-up +-- +-- Purpose: mig 151 archived 5 of 6 duplicate procedural_events for +-- "Mängelbeseitigung / Zahlung" and reparented their sequencing_rules +-- onto the canonical PE. The 6 sequencing_rules themselves remained +-- active. Because every one of them is a byte-for-byte clone (same +-- proceeding_type_id=NULL, rule_code=NULL, duration 14d, primary_party=NULL, +-- everything else NULL, lifecycle_state='published') and only sequence_order +-- differs, the admin shows six indistinguishable rows for one legal +-- concept. This mig archives 5 of the 6 keeping the lexicographically +-- lowest UUID as canonical. +-- +-- Scope verified live before write (Supabase MCP, 2026-05-26): +-- * Exactly 1 clone-group surfaces by the full-signature query +-- below: 6 "Mängelbeseitigung / Zahlung" sequencing_rules with +-- all-NULL discriminators and (duration_value=14, duration_unit='days'). +-- * 0 paliad.deadlines reference the 5 to-be-archived rows +-- (verified via deadlines.sequencing_rule_id JOIN; the column +-- formerly named deadlines.rule_id was dropped in mig 140 / B.4). +-- * Other name-groups in the live corpus — "Antrag auf +-- Patentänderung"×4, "Beginn des Hauptsacheverfahrens"×2, +-- "Berufungsbegründung-R.220.1"×2, "Berufungsschrift-R.220.1"×2 — +-- do NOT collapse under this signature because their +-- proceeding_type_id / rule_code / duration / primary_party +-- differ. They are legitimately distinct rules per proceeding; +-- this mig leaves them alone. +-- +-- Hard constraints honoured (mirrors mig 151): +-- * No deletions. Archived rows flip to is_active=false + +-- lifecycle_state='archived'. Rows stay in the table for audit. +-- * Reparent paliad.deadlines.sequencing_rule_id duplicate → +-- canonical BEFORE archiving, so no live deadline keeps pointing +-- at an archived sequencing_rule. (deadlines.rule_id column +-- dropped in mig 140; the back-link lives on sequencing_rule_id +-- now — same UUID semantics.) +-- * Snapshot the affected rows into paliad.sequencing_rules_pre_152 +-- in the same TX, mirroring precedent (migs 091/093/095/098/140/151). +-- * set_config('paliad.audit_reason') is defensively called even +-- though no audit trigger fires on sequencing_rules today (mig 151 +-- §comments documented this). Future audit trigger would inherit +-- the reason automatically. +-- +-- Generic-shape rationale: the audit query below uses the FULL +-- signature paliadin specified — procedural_event_id, proceeding_type_id, +-- rule_code, duration_value, duration_unit, primary_party, condition_expr, +-- trigger_event_id, alt_*, anchor_alt, combine_op, parent_id, is_spawn, +-- spawn_*. A NOTICE surfaces every group BEFORE the archive step so an +-- operator running the deploy logs sees what's about to be touched. +-- If new groups appear after future seeds, this mig is safe to re-run +-- conceptually (it would archive any new clones) but only fires once +-- via the applied_migrations protocol. + +-- ---------------------------------------------------------------- +-- 1. Build the dedupe mapping (duplicate_id → canonical_id) into a +-- TEMP table used by every subsequent step. +-- ---------------------------------------------------------------- + +CREATE TEMP TABLE tmp_sr_dedupe ON COMMIT DROP AS +WITH ranked AS ( + SELECT + id, procedural_event_id, proceeding_type_id, rule_code, + duration_value, duration_unit, primary_party, + condition_expr, trigger_event_id, alt_duration_value, + alt_duration_unit, alt_rule_code, anchor_alt, combine_op, + parent_id, is_spawn, spawn_label, spawn_proceeding_type_id, + created_at, + ROW_NUMBER() OVER ( + PARTITION BY + procedural_event_id, proceeding_type_id, rule_code, + duration_value, duration_unit, primary_party, + condition_expr::text, trigger_event_id, + alt_duration_value, alt_duration_unit, alt_rule_code, + anchor_alt, combine_op, parent_id, is_spawn, spawn_label, + spawn_proceeding_type_id + ORDER BY created_at, id::text + ) AS rn, + COUNT(*) OVER ( + PARTITION BY + procedural_event_id, proceeding_type_id, rule_code, + duration_value, duration_unit, primary_party, + condition_expr::text, trigger_event_id, + alt_duration_value, alt_duration_unit, alt_rule_code, + anchor_alt, combine_op, parent_id, is_spawn, spawn_label, + spawn_proceeding_type_id + ) AS grp_size + FROM paliad.sequencing_rules + WHERE is_active = true + AND lifecycle_state = 'published' +) +SELECT + r.id AS duplicate_id, + canon.id AS canonical_id, + r.procedural_event_id, + (SELECT name FROM paliad.procedural_events WHERE id = r.procedural_event_id) AS pe_name + FROM ranked r + JOIN ranked canon + ON canon.procedural_event_id IS NOT DISTINCT FROM r.procedural_event_id + AND canon.proceeding_type_id IS NOT DISTINCT FROM r.proceeding_type_id + AND canon.rule_code IS NOT DISTINCT FROM r.rule_code + AND canon.duration_value IS NOT DISTINCT FROM r.duration_value + AND canon.duration_unit IS NOT DISTINCT FROM r.duration_unit + AND canon.primary_party IS NOT DISTINCT FROM r.primary_party + AND canon.condition_expr::text IS NOT DISTINCT FROM r.condition_expr::text + AND canon.trigger_event_id IS NOT DISTINCT FROM r.trigger_event_id + AND canon.alt_duration_value IS NOT DISTINCT FROM r.alt_duration_value + AND canon.alt_duration_unit IS NOT DISTINCT FROM r.alt_duration_unit + AND canon.alt_rule_code IS NOT DISTINCT FROM r.alt_rule_code + AND canon.anchor_alt IS NOT DISTINCT FROM r.anchor_alt + AND canon.combine_op IS NOT DISTINCT FROM r.combine_op + AND canon.parent_id IS NOT DISTINCT FROM r.parent_id + AND canon.is_spawn IS NOT DISTINCT FROM r.is_spawn + AND canon.spawn_label IS NOT DISTINCT FROM r.spawn_label + AND canon.spawn_proceeding_type_id IS NOT DISTINCT FROM r.spawn_proceeding_type_id + AND canon.rn = 1 + WHERE r.rn > 1 AND r.grp_size > 1; + +-- ---------------------------------------------------------------- +-- 2. Surface every clone-group as a NOTICE before archiving. +-- ---------------------------------------------------------------- + +DO $$ +DECLARE + rec record; + total_to_archive int; +BEGIN + SELECT COUNT(*) INTO total_to_archive FROM tmp_sr_dedupe; + RAISE NOTICE '[mig 152] PRE: % sequencing_rules row(s) will be archived', total_to_archive; + FOR rec IN + SELECT pe_name, canonical_id, COUNT(*) AS dup_count, array_agg(duplicate_id::text ORDER BY duplicate_id::text) AS dup_ids + FROM tmp_sr_dedupe + GROUP BY pe_name, canonical_id + ORDER BY pe_name + LOOP + RAISE NOTICE '[mig 152] % canonical=% duplicates=% ids=%', + rec.pe_name, rec.canonical_id, rec.dup_count, rec.dup_ids; + END LOOP; +END $$; + +-- ---------------------------------------------------------------- +-- 3. Snapshot the rows about to be archived (only the duplicates; +-- the canonicals stay in the live table). Matches precedent. +-- ---------------------------------------------------------------- + +CREATE TABLE paliad.sequencing_rules_pre_152 AS +SELECT sr.* + FROM paliad.sequencing_rules sr + JOIN tmp_sr_dedupe d ON d.duplicate_id = sr.id; + +COMMENT ON TABLE paliad.sequencing_rules_pre_152 IS + 'Snapshot of paliad.sequencing_rules rows archived by mig 152 ' + '(identical clones — Mängelbeseitigung / Zahlung × 5). Mirrors ' + 'precedent pre_091/093/095/098/140/151. Read-only revert source. ' + 't-paliad-321 / m/paliad#144 follow-up.'; + +-- ---------------------------------------------------------------- +-- 4. Reparent paliad.deadlines.sequencing_rule_id duplicate → canonical +-- BEFORE archiving. Today's live data has 0 deadlines pointing at +-- any duplicate, but the statement is safe + defensive against a +-- race between drift-check and apply. +-- ---------------------------------------------------------------- + +UPDATE paliad.deadlines d + SET sequencing_rule_id = m.canonical_id, + procedural_event_id = (SELECT procedural_event_id + FROM paliad.sequencing_rules + WHERE id = m.canonical_id), + updated_at = now() + FROM tmp_sr_dedupe m + WHERE d.sequencing_rule_id = m.duplicate_id; + +-- ---------------------------------------------------------------- +-- 5. Defensive audit-reason. Sequencing_rules has no audit trigger +-- today (mig 151 §scope verified), but set_config is transactional +-- and a future audit trigger inherits the reason automatically. +-- ---------------------------------------------------------------- + +SELECT set_config('paliad.audit_reason', + 'mig 152: archive identical sequencing_rule clones (mig 151 follow-up; t-paliad-321)', + true); + +-- ---------------------------------------------------------------- +-- 6. Archive the duplicates. +-- ---------------------------------------------------------------- + +UPDATE paliad.sequencing_rules + SET is_active = false, + lifecycle_state = 'archived', + updated_at = now() + WHERE id IN (SELECT duplicate_id FROM tmp_sr_dedupe); + +-- ---------------------------------------------------------------- +-- 7. POST assertions. +-- ---------------------------------------------------------------- + +DO $$ +DECLARE + v_archived int; + v_remaining_dupes int; + v_orphan_deadlines int; +BEGIN + -- a. Did the expected number of rows get archived? + SELECT COUNT(*) INTO v_archived + FROM paliad.sequencing_rules + WHERE id IN (SELECT duplicate_id FROM tmp_sr_dedupe) + AND lifecycle_state = 'archived' + AND is_active = false; + IF v_archived <> (SELECT COUNT(*) FROM tmp_sr_dedupe) THEN + RAISE EXCEPTION '[mig 152] FAILED POST: expected % rows archived, got %', + (SELECT COUNT(*) FROM tmp_sr_dedupe), v_archived; + END IF; + + -- b. No clone group of size > 1 should remain in active+published. + SELECT COUNT(*) INTO v_remaining_dupes FROM ( + SELECT 1 + FROM paliad.sequencing_rules + WHERE is_active = true AND lifecycle_state = 'published' + GROUP BY procedural_event_id, proceeding_type_id, rule_code, + duration_value, duration_unit, primary_party, + condition_expr::text, trigger_event_id, + alt_duration_value, alt_duration_unit, alt_rule_code, + anchor_alt, combine_op, parent_id, is_spawn, spawn_label, + spawn_proceeding_type_id + HAVING COUNT(*) > 1 + ) g; + IF v_remaining_dupes > 0 THEN + RAISE EXCEPTION '[mig 152] FAILED POST: % clone group(s) still active+published after archive', v_remaining_dupes; + END IF; + + -- c. No deadline points at an archived sequencing_rule. + SELECT COUNT(*) INTO v_orphan_deadlines + FROM paliad.deadlines d + JOIN paliad.sequencing_rules sr ON sr.id = d.sequencing_rule_id + WHERE sr.lifecycle_state = 'archived'; + IF v_orphan_deadlines > 0 THEN + RAISE EXCEPTION '[mig 152] FAILED POST: % live deadline(s) still point at an archived sequencing_rule', v_orphan_deadlines; + END IF; + + RAISE NOTICE '[mig 152] OK — archived=%, remaining clone groups=0, orphan deadlines=0', + v_archived; +END $$;