diff --git a/internal/db/migrations/098_submission_codes_prefix_and_rename.up.sql b/internal/db/migrations/098_submission_codes_prefix_and_rename.up.sql index 4894221..6980afe 100644 --- a/internal/db/migrations/098_submission_codes_prefix_and_rename.up.sql +++ b/internal/db/migrations/098_submission_codes_prefix_and_rename.up.sql @@ -226,11 +226,14 @@ BEGIN -- 6.1 Every active+published row has the proceeding-code-prefixed -- 4+-segment shape. Archived rows (`_archived_litigation` ones) -- keep their shorter shape by design — they're carved out. + -- Suffix segments may include digits (existing data — e.g. EPA rule + -- codes like `epa.opp.boa.r106` / `epa.grant.exa.r71_3` carry the + -- statutory rule number in the suffix). Allow [a-z_0-9] per segment. SELECT count(*) INTO v_bad_shape FROM paliad.deadline_rules WHERE is_active = true AND lifecycle_state = 'published' - AND submission_code !~ '^[a-z_]+\.[a-z_]+\.[a-z_]+\.[a-z_]+(\..*)?$'; + AND submission_code !~ '^[a-z_0-9]+\.[a-z_0-9]+\.[a-z_0-9]+\.[a-z_0-9]+(\..*)?$'; IF v_bad_shape <> 0 THEN RAISE EXCEPTION 'mig 098: expected every active+published deadline_rules row to match the 4+-segment submission_code shape, got % violators', diff --git a/internal/services/submission_codes_shape_test.go b/internal/services/submission_codes_shape_test.go index f5eeabf..d618bbb 100644 --- a/internal/services/submission_codes_shape_test.go +++ b/internal/services/submission_codes_shape_test.go @@ -14,13 +14,15 @@ import ( // submissionCodeShapeRegex is the proceeding-code-prefixed shape // installed by mig 098 (t-paliad-209): the proceeding's 3-segment code -// (`^[a-z_]+\.[a-z_]+\.[a-z_]+\.`) followed by at least one suffix -// segment (and optional further dot-separated segments). The regex -// allows underscores so the legacy archived bucket (`_archived_…`) and -// hand-seeded test rules (e.g. `s11a.initial`) match alongside the -// canonical taxonomy. Mirrors the assertion in mig 098 §6.1. +// (`^[a-z_0-9]+\.[a-z_0-9]+\.[a-z_0-9]+\.`) followed by at least one +// suffix segment (and optional further dot-separated segments). The +// regex allows digits so EPA suffixes like `r106` / `r71_3` / `r116` +// (statutory rule numbers in the suffix) pass alongside canonical +// dotted-word codes. Underscores cover the legacy archived bucket +// (`_archived_…`) and hand-seeded test rules. Mirrors the assertion in +// mig 098 §6.1. var submissionCodeShapeRegex = regexp.MustCompile( - `^[a-z_]+\.[a-z_]+\.[a-z_]+\.[a-z_]+(\..*)?$`) + `^[a-z_0-9]+\.[a-z_0-9]+\.[a-z_0-9]+\.[a-z_0-9]+(\..*)?$`) // TestSubmissionCodeShape walks every active+published row in // paliad.deadline_rules and asserts that submission_code matches the