Production-down: project tree returned the "Projektverwaltung zurzeit nicht verfügbar" message because every PopulateProjectCodes call raised: ERROR service: populate project codes: bulk fetch: pq: operator does not exist: text @> text at position 13:38 (42883) Root cause: paliad.projects.path is stored as TEXT (dot-separated UUIDs), not as the ltree extension type. The rest of the codebase treats it accordingly — can_see_project uses string_to_array(path, '.')::uuid[]; export_service.go uses LIKE patterns; export_service.go even spells it out: "Subtree-aware queries via paliad.projects.path (ltree as text)." The new project-code helper (t-paliad-222 / m/paliad#50) was the only caller using ltree operators (@>, nlevel) against this text column. Postgres correctly rejected text @> text — no such operator exists. Fix: rewrite both queries (BuildProjectCode + PopulateProjectCodes) to walk ancestors via string_to_array(path, '.')::uuid[], consistent with the existing visibility predicate. Ordering uses array_position instead of nlevel. Query shape validated against the live DB. Pure-function tests (assemble + segment) untouched and passing. The gap that let this ship: no integration test exercises the actual SQL — it only tests the pure assembler. Filing a follow-up issue for a real-DB regression test.
327 lines
11 KiB
Go
327 lines
11 KiB
Go
package services
|
|
|
|
import (
|
|
"context"
|
|
"fmt"
|
|
"regexp"
|
|
"strings"
|
|
"unicode"
|
|
|
|
"github.com/google/uuid"
|
|
"github.com/jmoiron/sqlx"
|
|
"github.com/lib/pq"
|
|
"golang.org/x/text/runes"
|
|
"golang.org/x/text/transform"
|
|
"golang.org/x/text/unicode/norm"
|
|
|
|
"mgit.msbls.de/m/paliad/internal/models"
|
|
)
|
|
|
|
// Project codes — t-paliad-222 / m/paliad#50.
|
|
//
|
|
// BuildProjectCode assembles a dotted code from the ancestor chain of
|
|
// a project. Each ancestor contributes one segment derived from its
|
|
// type-specific metadata. Missing segments (NULL ancestor field,
|
|
// unfilled opponent_code, etc.) are skipped silently — there is no
|
|
// placeholder.
|
|
//
|
|
// client → reference if set, else slug(title), capped at 8 chars
|
|
// litigation → opponent_code (the slug the user typed at litigation
|
|
// creation), empty → skipped
|
|
// patent → last 3 digits of patent_number (full digit-stream when
|
|
// shorter), empty → skipped
|
|
// case → uppercase tail of proceeding_types.code (jurisdiction
|
|
// segment dropped), empty → skipped
|
|
// project → "" (generic projects don't contribute a segment)
|
|
//
|
|
// Custom override: if the target row's `reference` column is non-empty,
|
|
// it wins outright — the helper returns the literal `reference` string
|
|
// without walking the ancestor chain.
|
|
//
|
|
// Example: Client EXMPL → Litigation OPNT → Patent EP3456789 → Case
|
|
// `upc.inf.cfi` → "EXMPL.OPNT.789.INF.CFI".
|
|
//
|
|
// Collision handling: codes are display-only (no uniqueness
|
|
// constraint). Two cases that derive to the same code both return the
|
|
// same string. v1 contract — users disambiguate via `reference` when it
|
|
// matters.
|
|
|
|
// projectChainRow is one row of the ancestor walk. Includes only the
|
|
// columns BuildProjectCode needs; trimmed for cheap projection.
|
|
type projectChainRow struct {
|
|
ID uuid.UUID `db:"id"`
|
|
Type string `db:"type"`
|
|
Title string `db:"title"`
|
|
Reference *string `db:"reference"`
|
|
OpponentCode *string `db:"opponent_code"`
|
|
PatentNumber *string `db:"patent_number"`
|
|
ProceedingTypeID *int `db:"proceeding_type_id"`
|
|
ProceedingCode *string `db:"proceeding_code"`
|
|
}
|
|
|
|
// BuildProjectCode walks the ancestor chain via paliad.projects.path
|
|
// and returns the assembled code. One DB round-trip per call; suitable
|
|
// for per-row use in single-project projection paths.
|
|
//
|
|
// paliad.projects.path is stored as TEXT (dot-separated UUIDs), not as
|
|
// the ltree extension type — see export_service.go comment "ltree as
|
|
// text" and can_see_project's string_to_array decomposition. Ancestor
|
|
// walks use the same string_to_array(path, '.')::uuid[] pattern as the
|
|
// canonical visibility predicate; ltree operators (@>, nlevel) would
|
|
// raise "operator does not exist: text @> text" at runtime.
|
|
//
|
|
// For list endpoints with many rows, the call still scales fine for
|
|
// firm-scale datasets (order-of-100s); if profiling later flags it as
|
|
// a hotspot, introduce a materialised view per the design doc §3.2 Q8.
|
|
func BuildProjectCode(ctx context.Context, db sqlx.QueryerContext, projectID uuid.UUID) (string, error) {
|
|
const query = `
|
|
SELECT p.id, p.type, p.title, p.reference, p.opponent_code,
|
|
p.patent_number, p.proceeding_type_id,
|
|
pt.code AS proceeding_code
|
|
FROM paliad.projects target
|
|
JOIN paliad.projects p
|
|
ON p.id = ANY(string_to_array(target.path, '.')::uuid[])
|
|
LEFT JOIN paliad.proceeding_types pt ON pt.id = p.proceeding_type_id
|
|
WHERE target.id = $1
|
|
ORDER BY array_position(string_to_array(target.path, '.')::uuid[], p.id)
|
|
`
|
|
rows := []projectChainRow{}
|
|
if err := sqlx.SelectContext(ctx, db, &rows, query, projectID); err != nil {
|
|
return "", fmt.Errorf("build project code: load chain: %w", err)
|
|
}
|
|
if len(rows) == 0 {
|
|
return "", nil
|
|
}
|
|
return assembleProjectCode(rows), nil
|
|
}
|
|
|
|
// PopulateProjectCodes assigns .Code on every project in `targets` via
|
|
// a single bulk round-trip. Used by List / ListChildren / ListAncestors
|
|
// projection paths to avoid N+1 BuildProjectCode calls.
|
|
//
|
|
// Empty slice → no-op. Rows that can't be matched (orphaned) get an
|
|
// empty code rather than an error.
|
|
func PopulateProjectCodes(ctx context.Context, db sqlx.QueryerContext, targets []models.Project) error {
|
|
if len(targets) == 0 {
|
|
return nil
|
|
}
|
|
ids := make([]string, len(targets))
|
|
for i, t := range targets {
|
|
ids[i] = t.ID.String()
|
|
}
|
|
|
|
// One query: for each target id, fetch the full ancestor chain
|
|
// joined to proceeding_types, ordered so we can group in Go.
|
|
//
|
|
// Ancestor walk uses string_to_array(path, '.')::uuid[] — same shape
|
|
// as can_see_project. paliad.projects.path is TEXT, so ltree
|
|
// operators (@>, nlevel) would fail with "operator does not exist:
|
|
// text @> text". See BuildProjectCode doc comment for context.
|
|
const query = `
|
|
WITH targets AS (
|
|
SELECT id, path
|
|
FROM paliad.projects
|
|
WHERE id = ANY($1::uuid[])
|
|
)
|
|
SELECT t.id AS target_id,
|
|
p.id, p.type, p.title, p.reference, p.opponent_code,
|
|
p.patent_number, p.proceeding_type_id,
|
|
pt.code AS proceeding_code,
|
|
array_position(string_to_array(t.path, '.')::uuid[], p.id) AS chain_level
|
|
FROM targets t
|
|
JOIN paliad.projects p
|
|
ON p.id = ANY(string_to_array(t.path, '.')::uuid[])
|
|
LEFT JOIN paliad.proceeding_types pt ON pt.id = p.proceeding_type_id
|
|
ORDER BY t.id, chain_level
|
|
`
|
|
type bulkRow struct {
|
|
TargetID uuid.UUID `db:"target_id"`
|
|
projectChainRow
|
|
ChainLevel int `db:"chain_level"`
|
|
}
|
|
|
|
rows := []bulkRow{}
|
|
if err := sqlx.SelectContext(ctx, db, &rows, query, pq.StringArray(ids)); err != nil {
|
|
return fmt.Errorf("populate project codes: bulk fetch: %w", err)
|
|
}
|
|
|
|
chains := make(map[uuid.UUID][]projectChainRow, len(targets))
|
|
for _, r := range rows {
|
|
chains[r.TargetID] = append(chains[r.TargetID], r.projectChainRow)
|
|
}
|
|
for i := range targets {
|
|
targets[i].Code = assembleProjectCode(chains[targets[i].ID])
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// assembleProjectCode is the pure code-assembly step, split out from
|
|
// the DB hop so it can be table-tested without fixtures.
|
|
//
|
|
// Custom override: non-empty `reference` on the target row (last in
|
|
// chain) wins; the function returns it verbatim without computing the
|
|
// other segments.
|
|
func assembleProjectCode(chain []projectChainRow) string {
|
|
if len(chain) == 0 {
|
|
return ""
|
|
}
|
|
target := chain[len(chain)-1]
|
|
if target.Reference != nil {
|
|
if v := strings.TrimSpace(*target.Reference); v != "" {
|
|
return v
|
|
}
|
|
}
|
|
segments := make([]string, 0, len(chain))
|
|
for _, p := range chain {
|
|
seg := projectCodeSegment(p)
|
|
if seg == "" {
|
|
continue
|
|
}
|
|
segments = append(segments, seg)
|
|
}
|
|
return strings.Join(segments, ".")
|
|
}
|
|
|
|
// projectCodeSegment returns the per-row segment string for the dotted
|
|
// project code. Empty string → row contributes no segment (skipped by
|
|
// the assembler). Pure; never touches the DB. Table-tested.
|
|
func projectCodeSegment(p projectChainRow) string {
|
|
switch p.Type {
|
|
case "client":
|
|
if p.Reference != nil {
|
|
if v := sanitizeClientShort(*p.Reference); v != "" {
|
|
return v
|
|
}
|
|
}
|
|
return sanitizeClientShort(p.Title)
|
|
case "litigation":
|
|
if p.OpponentCode != nil {
|
|
return strings.TrimSpace(*p.OpponentCode)
|
|
}
|
|
return ""
|
|
case "patent":
|
|
if p.PatentNumber != nil {
|
|
return patentLast3(*p.PatentNumber)
|
|
}
|
|
return ""
|
|
case "case":
|
|
if p.ProceedingCode != nil {
|
|
return proceedingTail(*p.ProceedingCode)
|
|
}
|
|
return ""
|
|
default:
|
|
// 'project' (generic) and any future types contribute nothing.
|
|
return ""
|
|
}
|
|
}
|
|
|
|
// sanitizeClientShort produces an 8-char uppercase slug from a client
|
|
// reference / title. Strips diacritics, replaces non-alphanumerics
|
|
// with nothing, trims, caps at 8 chars. Empty input → "".
|
|
//
|
|
// Examples (verified by table test):
|
|
// "EXMPL" → "EXMPL"
|
|
// "Example Co." → "EXAMPLEC"
|
|
// "Müller GmbH" → "MULLERGM"
|
|
// " " → ""
|
|
func sanitizeClientShort(s string) string {
|
|
s = strings.TrimSpace(s)
|
|
if s == "" {
|
|
return ""
|
|
}
|
|
// Strip diacritics: NFD-decompose, drop combining marks, NFC-recompose.
|
|
t := transform.Chain(norm.NFD, runes.Remove(runes.In(unicode.Mn)), norm.NFC)
|
|
stripped, _, err := transform.String(t, s)
|
|
if err != nil {
|
|
stripped = s
|
|
}
|
|
var b strings.Builder
|
|
b.Grow(len(stripped))
|
|
for _, r := range stripped {
|
|
if unicode.IsLetter(r) || unicode.IsDigit(r) {
|
|
b.WriteRune(unicode.ToUpper(r))
|
|
}
|
|
}
|
|
out := b.String()
|
|
if len(out) > 8 {
|
|
out = out[:8]
|
|
}
|
|
return out
|
|
}
|
|
|
|
// patentDigitsPattern matches a run of digits inside a patent number.
|
|
// Pre-compiled once to avoid per-call regex compilation cost.
|
|
var patentDigitsPattern = regexp.MustCompile(`\d+`)
|
|
|
|
// patentKindCodeSuffix matches the trailing kind code on a patent
|
|
// publication number (A1, A2, B1, B2, C, T3, etc.). Stripped before
|
|
// digit extraction so the kind-code's optional digit doesn't sneak
|
|
// into the patent number proper.
|
|
//
|
|
// EP / WO conventions allow A, B, C, T, U as the letter; the digit is
|
|
// optional. The regex anchors at end-of-string and tolerates trailing
|
|
// whitespace.
|
|
var patentKindCodeSuffix = regexp.MustCompile(`[A-Z][0-9]?\s*$`)
|
|
|
|
// patentLast3 extracts the last 3 digits of a patent number, returning
|
|
// the full digit-stream if the patent has fewer than 3 digits total.
|
|
//
|
|
// Strips a trailing kind-code suffix (A1, B2, C, T3 …) first so its
|
|
// optional digit doesn't pollute the result, then collapses all digit
|
|
// runs in the remainder to handle spaced / slashed formats. Examples:
|
|
//
|
|
// "EP1234567" → "567"
|
|
// "EP 1 234 567" → "567"
|
|
// "EP3456789A1" → "789"
|
|
// "EP1234567 B1" → "567"
|
|
// "WO2020/123456A1" → "456"
|
|
// "DE12" → "12"
|
|
// "EP" → ""
|
|
// "" → ""
|
|
func patentLast3(s string) string {
|
|
s = strings.ToUpper(strings.TrimSpace(s))
|
|
if s == "" {
|
|
return ""
|
|
}
|
|
// Strip the trailing kind code (one or two chars at end).
|
|
s = patentKindCodeSuffix.ReplaceAllString(s, "")
|
|
matches := patentDigitsPattern.FindAllString(s, -1)
|
|
if len(matches) == 0 {
|
|
return ""
|
|
}
|
|
digits := strings.Join(matches, "")
|
|
if len(digits) >= 3 {
|
|
return digits[len(digits)-3:]
|
|
}
|
|
return digits
|
|
}
|
|
|
|
// proceedingTail takes a proceeding_types.code (e.g. "upc.inf.cfi") and
|
|
// returns the uppercase tail with the leading jurisdiction segment
|
|
// dropped. The jurisdiction is implied by the ancestor client / patent
|
|
// context, so it's redundant in the code.
|
|
//
|
|
// "upc.inf.cfi" → "INF.CFI"
|
|
// "upc.rev.cfi" → "REV.CFI"
|
|
// "upc.apl.merits" → "APL.MERITS"
|
|
// "de.inf.lg" → "INF.LG"
|
|
// "de.inf.olg" → "INF.OLG"
|
|
// "single" → "" (no tail after dropping the only segment)
|
|
// "" → ""
|
|
func proceedingTail(code string) string {
|
|
code = strings.TrimSpace(code)
|
|
if code == "" {
|
|
return ""
|
|
}
|
|
parts := strings.Split(code, ".")
|
|
if len(parts) < 2 {
|
|
return ""
|
|
}
|
|
tail := parts[1:]
|
|
out := make([]string, len(tail))
|
|
for i, p := range tail {
|
|
out[i] = strings.ToUpper(p)
|
|
}
|
|
return strings.Join(out, ".")
|
|
}
|