fix(t-paliad-136): Phase A — filter narrowing carries (concept, proc) tuples

The v3 B1 decision tree filter collapsed each leaf's
(concept_id, proceeding_type_code) tuple list down to a flat concept_id
slice in EventCategoryService.ConceptIDsForSlug, dropping the per-leaf
proceeding constraint. The search service then loaded pills by
concept_id only, so picking a UPC-specific leaf still surfaced DE/EPA/
DPMA pills for any shared concept (Klageerwiderung, Replik, Duplik,
Berufungsschrift). m's repro: choosing CMS-Eingang → Gegenseite →
UPC Verletzung leaked national submissions.

Confirmed via DB: at least 25 leaves were over-broad pre-fix.

Fix carries the tuple set end-to-end via a new subtreeFilter type with
parallel uuid[] / text[] arrays. The matview SQL now uses
unnest($cids, $procs) AS t(cid, pcode) to match each row against the
allowed tuples — a junction row with NULL proc encodes "any proc for
this concept" (used by cross-cutting concepts like Wiedereinsetzung).

EventCategoryService gains AllOutcomes() for browse-all so the root
view also respects junction tuples. allMappedConceptIDs is gone.

Tests: added 5 v4 subtests under TestDeadlineSearch covering m's
repro slug, multi-tuple narrowing, trigger-pill cross-cutting,
forum AND-narrowing, plus an invariant regression gate that walks
every leaf with non-NULL proc and asserts no pill leaks. Skipped
when TEST_DATABASE_URL is unset; existing v3 assertions unchanged.

No schema change. No migration. Ships independently of Phases B/C.
This commit is contained in:
m
2026-05-05 13:02:09 +02:00
parent 30ac337a78
commit b7470d7d77
3 changed files with 373 additions and 48 deletions

View File

@@ -249,31 +249,34 @@ func (s *DeadlineSearchService) Search(ctx context.Context, q string, opts Searc
qNorm := normalizeQuery(q)
browseMode := qNorm == "" && (opts.EventCategorySlug != "" || opts.BrowseAll)
// v3: resolve the event-category slug to a concept_id allow-list.
// When BrowseAll is set without a slug, the allow-list is the union
// of every concept reachable from any leaf — i.e. all rows of
// paliad.event_category_concepts.
var allowConceptIDs []string
// v4 (t-paliad-136): resolve the event-category slug — or the
// browse-all root — to a (concept_id, proceeding_type_code) tuple
// allow-list. The previous v3 implementation collapsed this to a
// flat concept_id slice and dropped the per-leaf proc constraint,
// which leaked DE/EPA/DPMA pills under "UPC infringement opposing
// party" leaves and similar. Carrying tuples end-to-end fixes the
// bug.
var subtree *subtreeFilter
if opts.EventCategorySlug != "" && s.eventCategory != nil {
ids, err := s.eventCategory.ConceptIDsForSlug(ctx, opts.EventCategorySlug)
outcomes, err := s.eventCategory.ConceptsForSlug(ctx, opts.EventCategorySlug)
if err != nil {
return nil, err
}
if len(ids) == 0 {
if len(outcomes) == 0 {
// Slug resolves to no concepts; return empty without hitting
// the matview.
return resp, nil
}
allowConceptIDs = ids
} else if opts.BrowseAll {
ids, err := s.allMappedConceptIDs(ctx)
subtree = newSubtreeFilter(outcomes)
} else if opts.BrowseAll && s.eventCategory != nil {
outcomes, err := s.eventCategory.AllOutcomes(ctx)
if err != nil {
return nil, err
}
if len(ids) == 0 {
if len(outcomes) == 0 {
return resp, nil
}
allowConceptIDs = ids
subtree = newSubtreeFilter(outcomes)
}
// v3: translate forum slugs to proceeding_code allow-list.
@@ -290,11 +293,11 @@ func (s *DeadlineSearchService) Search(ctx context.Context, q string, opts Searc
var ranks []rankRow
if browseMode {
// Browse mode: synthesize ranks from the allow-list directly.
ranks = s.browseRanks(ctx, allowConceptIDs, party, proc, source, forumCodes, limit)
ranks = s.browseRanks(ctx, subtree, party, proc, source, forumCodes, limit)
} else {
qLow := strings.ToLower(qNorm)
var err error
ranks, err = s.rankConcepts(ctx, qNorm, qLow, party, proc, source, allowConceptIDs, forumCodes, limit)
ranks, err = s.rankConcepts(ctx, qNorm, qLow, party, proc, source, subtree, forumCodes, limit)
if err != nil {
return nil, err
}
@@ -307,7 +310,7 @@ func (s *DeadlineSearchService) Search(ctx context.Context, q string, opts Searc
for i, r := range ranks {
conceptIDs[i] = r.ConceptID
}
pills, err := s.loadPills(ctx, conceptIDs, party, proc, source, forumCodes)
pills, err := s.loadPills(ctx, conceptIDs, party, proc, source, subtree, forumCodes)
if err != nil {
return nil, err
}
@@ -319,17 +322,75 @@ func (s *DeadlineSearchService) Search(ctx context.Context, q string, opts Searc
return resp, nil
}
// allMappedConceptIDs returns every distinct concept_id that has at
// least one row in paliad.event_category_concepts — the universe of
// concepts reachable from any leaf of the v3 decision tree. Drives B1
// browse-all mode (no slug picked yet, show the full landscape).
func (s *DeadlineSearchService) allMappedConceptIDs(ctx context.Context) ([]string, error) {
const sqlText = `SELECT DISTINCT concept_id::text FROM paliad.event_category_concepts`
var ids []string
if err := s.db.SelectContext(ctx, &ids, sqlText); err != nil {
return nil, fmt.Errorf("all mapped concept ids: %w", err)
// subtreeFilter expresses the per-leaf (concept_id, proceeding_type_code)
// tuple constraint in a form the matview SQL can apply.
//
// Two parallel slices are passed to the SQL via unnest; each row of the
// matview must match at least one tuple to pass through. An empty
// proc_code in the slice ('') means "any proceeding for this concept" —
// it's the encoding of a junction row with proceeding_type_code IS NULL,
// which the seed uses for cross-cutting concepts (Wiedereinsetzung,
// Weiterbehandlung, Versäumnisurteil-Einspruch, Schriftsatznachreichung).
//
// SQL match clause:
//
// EXISTS (
// SELECT 1 FROM unnest($cids::uuid[], $procs::text[]) AS t(cid, pcode)
// WHERE t.cid = s.concept_id
// AND (t.pcode = '' OR t.pcode = s.proceeding_code)
// )
//
// Trigger pills (kind='trigger', proceeding_code IS NULL) only surface
// when their concept appears with proc_code='' in this filter — i.e. a
// junction row with NULL proc. That matches the seed convention.
type subtreeFilter struct {
conceptIDs []string // parallel to ProcCodes
procCodes []string // '' encodes "any proc for this concept"
}
// newSubtreeFilter builds the parallel arrays from a slice of outcomes.
// Dedup: when a concept has both a (c, NULL) row and one or more (c, X)
// rows in the junction, the NULL row subsumes — keep only the unconstrained
// entry to avoid redundant work in unnest.
func newSubtreeFilter(outcomes []ConceptOutcome) *subtreeFilter {
unconstrained := make(map[string]bool, len(outcomes))
for _, o := range outcomes {
if o.ProceedingTypeCode == nil {
unconstrained[o.ConceptID] = true
}
}
return ids, nil
seen := make(map[string]bool, len(outcomes))
cids := make([]string, 0, len(outcomes))
procs := make([]string, 0, len(outcomes))
for _, o := range outcomes {
var pc string
if o.ProceedingTypeCode == nil {
pc = ""
} else {
if unconstrained[o.ConceptID] {
continue
}
pc = *o.ProceedingTypeCode
}
key := o.ConceptID + "\x00" + pc
if seen[key] {
continue
}
seen[key] = true
cids = append(cids, o.ConceptID)
procs = append(procs, pc)
}
return &subtreeFilter{conceptIDs: cids, procCodes: procs}
}
// args returns the two slice arguments to pass into the SQL placeholder
// pair, or two nil sentinels when no narrowing applies. Calling it on a
// nil receiver is safe.
func (f *subtreeFilter) args() (any, any) {
if f == nil || len(f.conceptIDs) == 0 {
return nil, nil
}
return pq.Array(f.conceptIDs), pq.Array(f.procCodes)
}
// translateForums maps a list of forum slugs to the union of their
@@ -357,13 +418,16 @@ func translateForums(slugs []string) []string {
return out
}
// browseRanks synthesizes a rank list from a concept-id allow-list
// browseRanks synthesizes a rank list from a subtree-filter tuple set
// (v3 B1 browse mode). No trigram scoring — order is by concept
// sort_order then name. Forum filter applies post-hoc to keep concepts
// that have at least one matching pill.
//
// v4: subtree filter enforces (concept_id, proceeding_code) tuples, not
// concept_id alone — see subtreeFilter doc.
func (s *DeadlineSearchService) browseRanks(
ctx context.Context,
conceptIDs []string,
subtree *subtreeFilter,
party, proc, source *string,
forumCodes []string,
limit int,
@@ -377,22 +441,27 @@ SELECT DISTINCT
s.concept_name_de,
ARRAY[]::text[] AS matched_aliases
FROM paliad.deadline_search s
WHERE s.concept_id = ANY($1::uuid[])
AND ($2::text IS NULL OR s.effective_party = $2)
AND ($3::text IS NULL OR s.proceeding_code = $3)
AND ($4::text IS NULL OR s.legal_source LIKE $4 || '%')
WHERE EXISTS (
SELECT 1 FROM unnest($1::uuid[], $2::text[]) AS t(cid, pcode)
WHERE t.cid = s.concept_id
AND (t.pcode = '' OR t.pcode = s.proceeding_code)
)
AND ($3::text IS NULL OR s.effective_party = $3)
AND ($4::text IS NULL OR s.proceeding_code = $4)
AND ($5::text IS NULL OR s.legal_source LIKE $5 || '%')
AND (
$5::text[] IS NULL
OR cardinality($5::text[]) = 0
$6::text[] IS NULL
OR cardinality($6::text[]) = 0
OR s.kind = 'trigger'
OR s.proceeding_code = ANY($5::text[])
OR s.proceeding_code = ANY($6::text[])
)
ORDER BY s.concept_sort_order ASC, s.concept_name_de ASC
LIMIT $6
LIMIT $7
`
cidArg, procArg := subtree.args()
var rows []rankRow
if err := s.db.SelectContext(ctx, &rows, sqlText,
pq.Array(conceptIDs),
cidArg, procArg,
party, proc, source,
nullableArray(forumCodes),
limit,
@@ -419,12 +488,13 @@ func (s *DeadlineSearchService) rankConcepts(
ctx context.Context,
q, qLow string,
party, proc, source *string,
allowConceptIDs []string,
subtree *subtreeFilter,
forumCodes []string,
limit int,
) ([]rankRow, error) {
// $1 q · $2 qLow · $3 party · $4 proc · $5 source ·
// $6 concept_allow uuid[]? · $7 forum_codes text[]? · $8 limit
// $6 subtree_cids uuid[]? · $7 subtree_procs text[]? ·
// $8 forum_codes text[]? · $9 limit
const sqlText = `
WITH matched AS (
SELECT
@@ -463,12 +533,19 @@ WITH matched AS (
AND ($3::text IS NULL OR s.effective_party = $3)
AND ($4::text IS NULL OR s.proceeding_code = $4)
AND ($5::text IS NULL OR s.legal_source LIKE $5 || '%')
AND ($6::uuid[] IS NULL OR s.concept_id = ANY($6::uuid[]))
AND (
$7::text[] IS NULL
OR cardinality($7::text[]) = 0
$6::uuid[] IS NULL
OR EXISTS (
SELECT 1 FROM unnest($6::uuid[], $7::text[]) AS t(cid, pcode)
WHERE t.cid = s.concept_id
AND (t.pcode = '' OR t.pcode = s.proceeding_code)
)
)
AND (
$8::text[] IS NULL
OR cardinality($8::text[]) = 0
OR s.kind = 'trigger'
OR s.proceeding_code = ANY($7::text[])
OR s.proceeding_code = ANY($8::text[])
)
)
SELECT
@@ -482,13 +559,14 @@ SELECT
FROM matched m
GROUP BY m.concept_id
ORDER BY score DESC, concept_sort_order ASC, concept_name_de ASC
LIMIT $8
LIMIT $9
`
cidArg, procArg := subtree.args()
var rows []rankRow
if err := s.db.SelectContext(ctx, &rows, sqlText,
q, qLow,
party, proc, source,
nullableArray(allowConceptIDs),
cidArg, procArg,
nullableArray(forumCodes),
limit,
); err != nil {
@@ -501,8 +579,12 @@ func (s *DeadlineSearchService) loadPills(
ctx context.Context,
conceptIDs []string,
party, proc, source *string,
subtree *subtreeFilter,
forumCodes []string,
) ([]pillRow, error) {
// $1 concept_ids uuid[] · $2 party · $3 proc · $4 source ·
// $5 subtree_cids uuid[]? · $6 subtree_procs text[]? ·
// $7 forum_codes text[]?
const sqlText = `
SELECT
s.kind,
@@ -535,16 +617,27 @@ SELECT
AND ($3::text IS NULL OR s.proceeding_code = $3)
AND ($4::text IS NULL OR s.legal_source LIKE $4 || '%')
AND (
$5::text[] IS NULL
OR cardinality($5::text[]) = 0
$5::uuid[] IS NULL
OR EXISTS (
SELECT 1 FROM unnest($5::uuid[], $6::text[]) AS t(cid, pcode)
WHERE t.cid = s.concept_id
AND (t.pcode = '' OR t.pcode = s.proceeding_code)
)
)
AND (
$7::text[] IS NULL
OR cardinality($7::text[]) = 0
OR s.kind = 'trigger'
OR s.proceeding_code = ANY($5::text[])
OR s.proceeding_code = ANY($7::text[])
)
ORDER BY s.concept_id, s.kind, s.proceeding_display_order, s.proceeding_code NULLS LAST, s.rule_local_code
`
cidArg, procArg := subtree.args()
var rows []pillRow
if err := s.db.SelectContext(ctx, &rows, sqlText,
pq.Array(conceptIDs), party, proc, source, nullableArray(forumCodes),
pq.Array(conceptIDs), party, proc, source,
cidArg, procArg,
nullableArray(forumCodes),
); err != nil {
return nil, fmt.Errorf("load pills: %w", err)
}

View File

@@ -3,6 +3,7 @@ package services
import (
"context"
"os"
"strings"
"testing"
"github.com/jmoiron/sqlx"
@@ -234,6 +235,210 @@ func TestDeadlineSearch(t *testing.T) {
}
}
})
// v4 (t-paliad-136): event-category narrowing must apply per-leaf
// (concept_id, proceeding_type_code) tuples, not just concept_id. The
// v3 implementation collapsed tuples to flat concept_ids and surfaced
// pills for every proceeding the matview had a row for. m's repro:
// picking "CMS-Eingang → Gegenseite → UPC Verletzung" leaked DE/EPA/
// DPMA pills.
ec := NewEventCategoryService(pool)
svc.SetEventCategoryService(ec)
t.Run("v4 event_category_slug narrows pills to per-leaf proceeding (UPC infringement subtree)", func(t *testing.T) {
resp, err := svc.Search(ctx, "", SearchOptions{
EventCategorySlug: "cms-eingang.gegenseite.upc-inf",
Limit: 200,
})
if err != nil {
t.Fatalf("search: %v", err)
}
// Every rule pill must be a UPC proceeding. The seed maps every
// concept under this subtree to UPC_INF or UPC_APP — no DE/EPA/
// DPMA codes should leak.
allowedRulePrefix := []string{"UPC_"}
for _, c := range resp.Cards {
for _, p := range c.Pills {
if p.Kind != "rule" {
continue
}
if p.Proceeding == nil {
t.Errorf("rule pill on %q has no proceeding", c.Concept.Slug)
continue
}
ok := false
for _, prefix := range allowedRulePrefix {
if strings.HasPrefix(p.Proceeding.Code, prefix) {
ok = true
break
}
}
if !ok {
t.Errorf("subtree narrowing leaked non-UPC pill on %q: proc=%s rule=%s",
c.Concept.Slug, p.Proceeding.Code, p.RuleLocalCode)
}
}
}
})
t.Run("v4 event_category_slug honours per-tuple narrowing (klageerwiderung-mit-ccr leaf)", func(t *testing.T) {
resp, err := svc.Search(ctx, "", SearchOptions{
EventCategorySlug: "cms-eingang.gegenseite.upc-inf.klageerwiderung-mit-ccr",
Limit: 200,
})
if err != nil {
t.Fatalf("search: %v", err)
}
// Junction maps three concepts × UPC_INF for this leaf:
// defence-to-counterclaim-for-revocation, application-to-amend,
// reply-to-defence. Every pill must be UPC_INF.
for _, c := range resp.Cards {
for _, p := range c.Pills {
if p.Kind != "rule" {
continue
}
if p.Proceeding == nil || p.Proceeding.Code != "UPC_INF" {
code := "(nil)"
if p.Proceeding != nil {
code = p.Proceeding.Code
}
t.Errorf("klageerwiderung-mit-ccr leaf leaked non-UPC_INF pill on %q: proc=%s",
c.Concept.Slug, code)
}
}
}
})
t.Run("v4 trigger-only concept under leaf with NULL proc surfaces", func(t *testing.T) {
// frist-verpasst.epa maps wiedereinsetzung and weiterbehandlung
// with NULL proceeding_type_code (cross-cutting). Both must
// appear and both must surface their trigger pills.
resp, err := svc.Search(ctx, "", SearchOptions{
EventCategorySlug: "frist-verpasst.epa",
Limit: 50,
})
if err != nil {
t.Fatalf("search: %v", err)
}
mustHaveCard := func(slug string) ConceptCard {
for _, c := range resp.Cards {
if c.Concept.Slug == slug {
return c
}
}
t.Fatalf("missing card %q under frist-verpasst.epa; got %v", slug, conceptSlugs(resp.Cards))
return ConceptCard{}
}
wcard := mustHaveCard("wiedereinsetzung")
// Trigger pills (no proceeding) must be present.
hasTrigger := false
for _, p := range wcard.Pills {
if p.Kind == "trigger" {
hasTrigger = true
break
}
}
if !hasTrigger {
t.Errorf("wiedereinsetzung card under frist-verpasst.epa has no trigger pills")
}
})
t.Run("v4 forum filter ANDs against subtree narrowing", func(t *testing.T) {
// Pick the UPC_INF subtree and add a forum chip that excludes
// UPC_INF — the result must be empty (the user contradicted
// themselves; empty is the correct UX).
resp, err := svc.Search(ctx, "", SearchOptions{
EventCategorySlug: "cms-eingang.gegenseite.upc-inf",
Forums: []string{"epa_opp"},
Limit: 200,
})
if err != nil {
t.Fatalf("search: %v", err)
}
for _, c := range resp.Cards {
for _, p := range c.Pills {
if p.Kind == "rule" {
t.Errorf("AND-narrowing produced a rule pill where forum + subtree contradict: %q proc=%v",
c.Concept.Slug, p.Proceeding)
}
}
}
})
// Invariant test: walk every leaf with at least one non-NULL
// proceeding_type_code in the junction and assert that the search
// result for that leaf only surfaces pills whose (concept, proc)
// is authorised by an outcome row. This is the regression gate
// that would have caught the v3 bug at PR time.
t.Run("v4 invariant: per-leaf pills are authorised by junction tuples", func(t *testing.T) {
leafRows, err := pool.QueryxContext(ctx, `
SELECT DISTINCT ec.slug
FROM paliad.event_categories ec
JOIN paliad.event_category_concepts ecc ON ecc.event_category_id = ec.id
WHERE ec.is_active AND ec.is_leaf
AND ecc.proceeding_type_code IS NOT NULL
`)
if err != nil {
t.Fatalf("list leaves: %v", err)
}
var slugs []string
for leafRows.Next() {
var s string
if err := leafRows.Scan(&s); err != nil {
leafRows.Close()
t.Fatalf("scan leaf: %v", err)
}
slugs = append(slugs, s)
}
leafRows.Close()
if len(slugs) == 0 {
t.Fatalf("expected at least one leaf with non-NULL proc; got 0")
}
for _, slug := range slugs {
outcomes, err := ec.ConceptsForSlug(ctx, slug)
if err != nil {
t.Fatalf("ConceptsForSlug(%q): %v", slug, err)
}
// Build tuple validator.
isAllowed := func(conceptID, proc string) bool {
for _, o := range outcomes {
if o.ConceptID != conceptID {
continue
}
if o.ProceedingTypeCode == nil {
return true
}
if *o.ProceedingTypeCode == proc {
return true
}
}
return false
}
resp, err := svc.Search(ctx, "", SearchOptions{
EventCategorySlug: slug,
Limit: 200,
})
if err != nil {
t.Fatalf("search %q: %v", slug, err)
}
for _, c := range resp.Cards {
for _, p := range c.Pills {
if p.Kind != "rule" {
continue
}
proc := ""
if p.Proceeding != nil {
proc = p.Proceeding.Code
}
if !isAllowed(c.Concept.ID, proc) {
t.Errorf("leaf %s leaked unauthorised pill: concept=%s proc=%s rule=%s",
slug, c.Concept.Slug, proc, p.RuleLocalCode)
}
}
}
}
})
}
func findCardBySlug(t *testing.T, resp *SearchResponse, slug string) ConceptCard {

View File

@@ -211,6 +211,33 @@ func (s *EventCategoryService) ConceptIDsForSlug(ctx context.Context, slug strin
return out, nil
}
// AllOutcomes returns the distinct (concept_id, proceeding_type_code)
// tuples present in paliad.event_category_concepts across the entire
// taxonomy. Drives B1 browse-all mode (no slug picked yet, show every
// concept-context the tree can reach).
//
// Distinct from "every concept_id ever mapped" because a concept can
// appear at the root view in MULTIPLE proceeding contexts that the tree
// authors intentionally surfaced — e.g. opposition under both EPA_OPP
// and DPMA_OPP. We respect those tuples even at the root so the
// result-card pill set matches the junction's design.
func (s *EventCategoryService) AllOutcomes(ctx context.Context) ([]ConceptOutcome, error) {
const sqlText = `
SELECT DISTINCT
ecc.concept_id::text AS concept_id,
ecc.proceeding_type_code AS proceeding_type_code,
min(ecc.sort_order) AS sort_order
FROM paliad.event_category_concepts ecc
GROUP BY ecc.concept_id, ecc.proceeding_type_code
ORDER BY sort_order ASC, concept_id ASC
`
var rows []ConceptOutcome
if err := s.db.SelectContext(ctx, &rows, sqlText); err != nil {
return nil, fmt.Errorf("event_category all outcomes: %w", err)
}
return rows, nil
}
// ProceedingCodesForSlug returns the distinct proceeding_type_code
// values associated with the slug's reachable concept set. Used by the
// search service to AND the user's forum filter against the leaf's