Files
paliad/internal/services/project_code.go
mAi 47b869dddf hotfix(projects): drop ltree operators on text path — production outage
Production-down: project tree returned the
"Projektverwaltung zurzeit nicht verfügbar" message because every
PopulateProjectCodes call raised:

  ERROR service: populate project codes: bulk fetch:
  pq: operator does not exist: text @> text at position 13:38 (42883)

Root cause: paliad.projects.path is stored as TEXT (dot-separated
UUIDs), not as the ltree extension type. The rest of the codebase
treats it accordingly — can_see_project uses
string_to_array(path, '.')::uuid[]; export_service.go uses LIKE
patterns; export_service.go even spells it out:
"Subtree-aware queries via paliad.projects.path (ltree as text)."

The new project-code helper (t-paliad-222 / m/paliad#50) was the only
caller using ltree operators (@>, nlevel) against this text column.
Postgres correctly rejected text @> text — no such operator exists.

Fix: rewrite both queries (BuildProjectCode + PopulateProjectCodes) to
walk ancestors via string_to_array(path, '.')::uuid[], consistent with
the existing visibility predicate. Ordering uses array_position
instead of nlevel. Query shape validated against the live DB.

Pure-function tests (assemble + segment) untouched and passing. The
gap that let this ship: no integration test exercises the actual SQL
— it only tests the pure assembler. Filing a follow-up issue for a
real-DB regression test.
2026-05-21 14:52:50 +02:00

327 lines
11 KiB
Go

package services
import (
"context"
"fmt"
"regexp"
"strings"
"unicode"
"github.com/google/uuid"
"github.com/jmoiron/sqlx"
"github.com/lib/pq"
"golang.org/x/text/runes"
"golang.org/x/text/transform"
"golang.org/x/text/unicode/norm"
"mgit.msbls.de/m/paliad/internal/models"
)
// Project codes — t-paliad-222 / m/paliad#50.
//
// BuildProjectCode assembles a dotted code from the ancestor chain of
// a project. Each ancestor contributes one segment derived from its
// type-specific metadata. Missing segments (NULL ancestor field,
// unfilled opponent_code, etc.) are skipped silently — there is no
// placeholder.
//
// client → reference if set, else slug(title), capped at 8 chars
// litigation → opponent_code (the slug the user typed at litigation
// creation), empty → skipped
// patent → last 3 digits of patent_number (full digit-stream when
// shorter), empty → skipped
// case → uppercase tail of proceeding_types.code (jurisdiction
// segment dropped), empty → skipped
// project → "" (generic projects don't contribute a segment)
//
// Custom override: if the target row's `reference` column is non-empty,
// it wins outright — the helper returns the literal `reference` string
// without walking the ancestor chain.
//
// Example: Client EXMPL → Litigation OPNT → Patent EP3456789 → Case
// `upc.inf.cfi` → "EXMPL.OPNT.789.INF.CFI".
//
// Collision handling: codes are display-only (no uniqueness
// constraint). Two cases that derive to the same code both return the
// same string. v1 contract — users disambiguate via `reference` when it
// matters.
// projectChainRow is one row of the ancestor walk. Includes only the
// columns BuildProjectCode needs; trimmed for cheap projection.
type projectChainRow struct {
ID uuid.UUID `db:"id"`
Type string `db:"type"`
Title string `db:"title"`
Reference *string `db:"reference"`
OpponentCode *string `db:"opponent_code"`
PatentNumber *string `db:"patent_number"`
ProceedingTypeID *int `db:"proceeding_type_id"`
ProceedingCode *string `db:"proceeding_code"`
}
// BuildProjectCode walks the ancestor chain via paliad.projects.path
// and returns the assembled code. One DB round-trip per call; suitable
// for per-row use in single-project projection paths.
//
// paliad.projects.path is stored as TEXT (dot-separated UUIDs), not as
// the ltree extension type — see export_service.go comment "ltree as
// text" and can_see_project's string_to_array decomposition. Ancestor
// walks use the same string_to_array(path, '.')::uuid[] pattern as the
// canonical visibility predicate; ltree operators (@>, nlevel) would
// raise "operator does not exist: text @> text" at runtime.
//
// For list endpoints with many rows, the call still scales fine for
// firm-scale datasets (order-of-100s); if profiling later flags it as
// a hotspot, introduce a materialised view per the design doc §3.2 Q8.
func BuildProjectCode(ctx context.Context, db sqlx.QueryerContext, projectID uuid.UUID) (string, error) {
const query = `
SELECT p.id, p.type, p.title, p.reference, p.opponent_code,
p.patent_number, p.proceeding_type_id,
pt.code AS proceeding_code
FROM paliad.projects target
JOIN paliad.projects p
ON p.id = ANY(string_to_array(target.path, '.')::uuid[])
LEFT JOIN paliad.proceeding_types pt ON pt.id = p.proceeding_type_id
WHERE target.id = $1
ORDER BY array_position(string_to_array(target.path, '.')::uuid[], p.id)
`
rows := []projectChainRow{}
if err := sqlx.SelectContext(ctx, db, &rows, query, projectID); err != nil {
return "", fmt.Errorf("build project code: load chain: %w", err)
}
if len(rows) == 0 {
return "", nil
}
return assembleProjectCode(rows), nil
}
// PopulateProjectCodes assigns .Code on every project in `targets` via
// a single bulk round-trip. Used by List / ListChildren / ListAncestors
// projection paths to avoid N+1 BuildProjectCode calls.
//
// Empty slice → no-op. Rows that can't be matched (orphaned) get an
// empty code rather than an error.
func PopulateProjectCodes(ctx context.Context, db sqlx.QueryerContext, targets []models.Project) error {
if len(targets) == 0 {
return nil
}
ids := make([]string, len(targets))
for i, t := range targets {
ids[i] = t.ID.String()
}
// One query: for each target id, fetch the full ancestor chain
// joined to proceeding_types, ordered so we can group in Go.
//
// Ancestor walk uses string_to_array(path, '.')::uuid[] — same shape
// as can_see_project. paliad.projects.path is TEXT, so ltree
// operators (@>, nlevel) would fail with "operator does not exist:
// text @> text". See BuildProjectCode doc comment for context.
const query = `
WITH targets AS (
SELECT id, path
FROM paliad.projects
WHERE id = ANY($1::uuid[])
)
SELECT t.id AS target_id,
p.id, p.type, p.title, p.reference, p.opponent_code,
p.patent_number, p.proceeding_type_id,
pt.code AS proceeding_code,
array_position(string_to_array(t.path, '.')::uuid[], p.id) AS chain_level
FROM targets t
JOIN paliad.projects p
ON p.id = ANY(string_to_array(t.path, '.')::uuid[])
LEFT JOIN paliad.proceeding_types pt ON pt.id = p.proceeding_type_id
ORDER BY t.id, chain_level
`
type bulkRow struct {
TargetID uuid.UUID `db:"target_id"`
projectChainRow
ChainLevel int `db:"chain_level"`
}
rows := []bulkRow{}
if err := sqlx.SelectContext(ctx, db, &rows, query, pq.StringArray(ids)); err != nil {
return fmt.Errorf("populate project codes: bulk fetch: %w", err)
}
chains := make(map[uuid.UUID][]projectChainRow, len(targets))
for _, r := range rows {
chains[r.TargetID] = append(chains[r.TargetID], r.projectChainRow)
}
for i := range targets {
targets[i].Code = assembleProjectCode(chains[targets[i].ID])
}
return nil
}
// assembleProjectCode is the pure code-assembly step, split out from
// the DB hop so it can be table-tested without fixtures.
//
// Custom override: non-empty `reference` on the target row (last in
// chain) wins; the function returns it verbatim without computing the
// other segments.
func assembleProjectCode(chain []projectChainRow) string {
if len(chain) == 0 {
return ""
}
target := chain[len(chain)-1]
if target.Reference != nil {
if v := strings.TrimSpace(*target.Reference); v != "" {
return v
}
}
segments := make([]string, 0, len(chain))
for _, p := range chain {
seg := projectCodeSegment(p)
if seg == "" {
continue
}
segments = append(segments, seg)
}
return strings.Join(segments, ".")
}
// projectCodeSegment returns the per-row segment string for the dotted
// project code. Empty string → row contributes no segment (skipped by
// the assembler). Pure; never touches the DB. Table-tested.
func projectCodeSegment(p projectChainRow) string {
switch p.Type {
case "client":
if p.Reference != nil {
if v := sanitizeClientShort(*p.Reference); v != "" {
return v
}
}
return sanitizeClientShort(p.Title)
case "litigation":
if p.OpponentCode != nil {
return strings.TrimSpace(*p.OpponentCode)
}
return ""
case "patent":
if p.PatentNumber != nil {
return patentLast3(*p.PatentNumber)
}
return ""
case "case":
if p.ProceedingCode != nil {
return proceedingTail(*p.ProceedingCode)
}
return ""
default:
// 'project' (generic) and any future types contribute nothing.
return ""
}
}
// sanitizeClientShort produces an 8-char uppercase slug from a client
// reference / title. Strips diacritics, replaces non-alphanumerics
// with nothing, trims, caps at 8 chars. Empty input → "".
//
// Examples (verified by table test):
// "EXMPL" → "EXMPL"
// "Example Co." → "EXAMPLEC"
// "Müller GmbH" → "MULLERGM"
// " " → ""
func sanitizeClientShort(s string) string {
s = strings.TrimSpace(s)
if s == "" {
return ""
}
// Strip diacritics: NFD-decompose, drop combining marks, NFC-recompose.
t := transform.Chain(norm.NFD, runes.Remove(runes.In(unicode.Mn)), norm.NFC)
stripped, _, err := transform.String(t, s)
if err != nil {
stripped = s
}
var b strings.Builder
b.Grow(len(stripped))
for _, r := range stripped {
if unicode.IsLetter(r) || unicode.IsDigit(r) {
b.WriteRune(unicode.ToUpper(r))
}
}
out := b.String()
if len(out) > 8 {
out = out[:8]
}
return out
}
// patentDigitsPattern matches a run of digits inside a patent number.
// Pre-compiled once to avoid per-call regex compilation cost.
var patentDigitsPattern = regexp.MustCompile(`\d+`)
// patentKindCodeSuffix matches the trailing kind code on a patent
// publication number (A1, A2, B1, B2, C, T3, etc.). Stripped before
// digit extraction so the kind-code's optional digit doesn't sneak
// into the patent number proper.
//
// EP / WO conventions allow A, B, C, T, U as the letter; the digit is
// optional. The regex anchors at end-of-string and tolerates trailing
// whitespace.
var patentKindCodeSuffix = regexp.MustCompile(`[A-Z][0-9]?\s*$`)
// patentLast3 extracts the last 3 digits of a patent number, returning
// the full digit-stream if the patent has fewer than 3 digits total.
//
// Strips a trailing kind-code suffix (A1, B2, C, T3 …) first so its
// optional digit doesn't pollute the result, then collapses all digit
// runs in the remainder to handle spaced / slashed formats. Examples:
//
// "EP1234567" → "567"
// "EP 1 234 567" → "567"
// "EP3456789A1" → "789"
// "EP1234567 B1" → "567"
// "WO2020/123456A1" → "456"
// "DE12" → "12"
// "EP" → ""
// "" → ""
func patentLast3(s string) string {
s = strings.ToUpper(strings.TrimSpace(s))
if s == "" {
return ""
}
// Strip the trailing kind code (one or two chars at end).
s = patentKindCodeSuffix.ReplaceAllString(s, "")
matches := patentDigitsPattern.FindAllString(s, -1)
if len(matches) == 0 {
return ""
}
digits := strings.Join(matches, "")
if len(digits) >= 3 {
return digits[len(digits)-3:]
}
return digits
}
// proceedingTail takes a proceeding_types.code (e.g. "upc.inf.cfi") and
// returns the uppercase tail with the leading jurisdiction segment
// dropped. The jurisdiction is implied by the ancestor client / patent
// context, so it's redundant in the code.
//
// "upc.inf.cfi" → "INF.CFI"
// "upc.rev.cfi" → "REV.CFI"
// "upc.apl.merits" → "APL.MERITS"
// "de.inf.lg" → "INF.LG"
// "de.inf.olg" → "INF.OLG"
// "single" → "" (no tail after dropping the only segment)
// "" → ""
func proceedingTail(code string) string {
code = strings.TrimSpace(code)
if code == "" {
return ""
}
parts := strings.Split(code, ".")
if len(parts) < 2 {
return ""
}
tail := parts[1:]
out := make([]string, len(tail))
for i, p := range tail {
out[i] = strings.ToUpper(p)
}
return strings.Join(out, ".")
}