package services import ( "context" "fmt" "regexp" "strings" "unicode" "github.com/google/uuid" "github.com/jmoiron/sqlx" "github.com/lib/pq" "golang.org/x/text/runes" "golang.org/x/text/transform" "golang.org/x/text/unicode/norm" "mgit.msbls.de/m/paliad/internal/models" ) // Project codes — t-paliad-222 / m/paliad#50. // // BuildProjectCode assembles a dotted code from the ancestor chain of // a project. Each ancestor contributes one segment derived from its // type-specific metadata. Missing segments (NULL ancestor field, // unfilled opponent_code, etc.) are skipped silently — there is no // placeholder. // // client → reference if set, else slug(title), capped at 8 chars // litigation → opponent_code (the slug the user typed at litigation // creation), empty → skipped // patent → last 3 digits of patent_number (full digit-stream when // shorter), empty → skipped // case → uppercase tail of proceeding_types.code (jurisdiction // segment dropped), empty → skipped // project → "" (generic projects don't contribute a segment) // // Custom override: if the target row's `reference` column is non-empty, // it wins outright — the helper returns the literal `reference` string // without walking the ancestor chain. // // Example: Client EXMPL → Litigation OPNT → Patent EP3456789 → Case // `upc.inf.cfi` → "EXMPL.OPNT.789.INF.CFI". // // Collision handling: codes are display-only (no uniqueness // constraint). Two cases that derive to the same code both return the // same string. v1 contract — users disambiguate via `reference` when it // matters. // projectChainRow is one row of the ancestor walk. Includes only the // columns BuildProjectCode needs; trimmed for cheap projection. type projectChainRow struct { ID uuid.UUID `db:"id"` Type string `db:"type"` Title string `db:"title"` Reference *string `db:"reference"` OpponentCode *string `db:"opponent_code"` PatentNumber *string `db:"patent_number"` ProceedingTypeID *int `db:"proceeding_type_id"` ProceedingCode *string `db:"proceeding_code"` } // BuildProjectCode walks the ancestor chain via paliad.projects.path // and returns the assembled code. One DB round-trip per call; suitable // for per-row use in single-project projection paths. // // paliad.projects.path is stored as TEXT (dot-separated UUIDs), not as // the ltree extension type — see export_service.go comment "ltree as // text" and can_see_project's string_to_array decomposition. Ancestor // walks use the same string_to_array(path, '.')::uuid[] pattern as the // canonical visibility predicate; ltree operators (@>, nlevel) would // raise "operator does not exist: text @> text" at runtime. // // For list endpoints with many rows, the call still scales fine for // firm-scale datasets (order-of-100s); if profiling later flags it as // a hotspot, introduce a materialised view per the design doc §3.2 Q8. func BuildProjectCode(ctx context.Context, db sqlx.QueryerContext, projectID uuid.UUID) (string, error) { const query = ` SELECT p.id, p.type, p.title, p.reference, p.opponent_code, p.patent_number, p.proceeding_type_id, pt.code AS proceeding_code FROM paliad.projects target JOIN paliad.projects p ON p.id = ANY(string_to_array(target.path, '.')::uuid[]) LEFT JOIN paliad.proceeding_types pt ON pt.id = p.proceeding_type_id WHERE target.id = $1 ORDER BY array_position(string_to_array(target.path, '.')::uuid[], p.id) ` rows := []projectChainRow{} if err := sqlx.SelectContext(ctx, db, &rows, query, projectID); err != nil { return "", fmt.Errorf("build project code: load chain: %w", err) } if len(rows) == 0 { return "", nil } return assembleProjectCode(rows), nil } // PopulateProjectCodes assigns .Code on every project in `targets` via // a single bulk round-trip. Used by List / ListChildren / ListAncestors // projection paths to avoid N+1 BuildProjectCode calls. // // Empty slice → no-op. Rows that can't be matched (orphaned) get an // empty code rather than an error. func PopulateProjectCodes(ctx context.Context, db sqlx.QueryerContext, targets []models.Project) error { if len(targets) == 0 { return nil } ids := make([]string, len(targets)) for i, t := range targets { ids[i] = t.ID.String() } // One query: for each target id, fetch the full ancestor chain // joined to proceeding_types, ordered so we can group in Go. // // Ancestor walk uses string_to_array(path, '.')::uuid[] — same shape // as can_see_project. paliad.projects.path is TEXT, so ltree // operators (@>, nlevel) would fail with "operator does not exist: // text @> text". See BuildProjectCode doc comment for context. const query = ` WITH targets AS ( SELECT id, path FROM paliad.projects WHERE id = ANY($1::uuid[]) ) SELECT t.id AS target_id, p.id, p.type, p.title, p.reference, p.opponent_code, p.patent_number, p.proceeding_type_id, pt.code AS proceeding_code, array_position(string_to_array(t.path, '.')::uuid[], p.id) AS chain_level FROM targets t JOIN paliad.projects p ON p.id = ANY(string_to_array(t.path, '.')::uuid[]) LEFT JOIN paliad.proceeding_types pt ON pt.id = p.proceeding_type_id ORDER BY t.id, chain_level ` type bulkRow struct { TargetID uuid.UUID `db:"target_id"` projectChainRow ChainLevel int `db:"chain_level"` } rows := []bulkRow{} if err := sqlx.SelectContext(ctx, db, &rows, query, pq.StringArray(ids)); err != nil { return fmt.Errorf("populate project codes: bulk fetch: %w", err) } chains := make(map[uuid.UUID][]projectChainRow, len(targets)) for _, r := range rows { chains[r.TargetID] = append(chains[r.TargetID], r.projectChainRow) } for i := range targets { targets[i].Code = assembleProjectCode(chains[targets[i].ID]) } return nil } // assembleProjectCode is the pure code-assembly step, split out from // the DB hop so it can be table-tested without fixtures. // // Custom override: non-empty `reference` on the target row (last in // chain) wins; the function returns it verbatim without computing the // other segments. func assembleProjectCode(chain []projectChainRow) string { if len(chain) == 0 { return "" } target := chain[len(chain)-1] if target.Reference != nil { if v := strings.TrimSpace(*target.Reference); v != "" { return v } } segments := make([]string, 0, len(chain)) for _, p := range chain { seg := projectCodeSegment(p) if seg == "" { continue } segments = append(segments, seg) } return strings.Join(segments, ".") } // projectCodeSegment returns the per-row segment string for the dotted // project code. Empty string → row contributes no segment (skipped by // the assembler). Pure; never touches the DB. Table-tested. func projectCodeSegment(p projectChainRow) string { switch p.Type { case "client": if p.Reference != nil { if v := sanitizeClientShort(*p.Reference); v != "" { return v } } return sanitizeClientShort(p.Title) case "litigation": if p.OpponentCode != nil { return strings.TrimSpace(*p.OpponentCode) } return "" case "patent": if p.PatentNumber != nil { return patentLast3(*p.PatentNumber) } return "" case "case": if p.ProceedingCode != nil { return proceedingTail(*p.ProceedingCode) } return "" default: // 'project' (generic) and any future types contribute nothing. return "" } } // sanitizeClientShort produces an 8-char uppercase slug from a client // reference / title. Strips diacritics, replaces non-alphanumerics // with nothing, trims, caps at 8 chars. Empty input → "". // // Examples (verified by table test): // "EXMPL" → "EXMPL" // "Example Co." → "EXAMPLEC" // "Müller GmbH" → "MULLERGM" // " " → "" func sanitizeClientShort(s string) string { s = strings.TrimSpace(s) if s == "" { return "" } // Strip diacritics: NFD-decompose, drop combining marks, NFC-recompose. t := transform.Chain(norm.NFD, runes.Remove(runes.In(unicode.Mn)), norm.NFC) stripped, _, err := transform.String(t, s) if err != nil { stripped = s } var b strings.Builder b.Grow(len(stripped)) for _, r := range stripped { if unicode.IsLetter(r) || unicode.IsDigit(r) { b.WriteRune(unicode.ToUpper(r)) } } out := b.String() if len(out) > 8 { out = out[:8] } return out } // patentDigitsPattern matches a run of digits inside a patent number. // Pre-compiled once to avoid per-call regex compilation cost. var patentDigitsPattern = regexp.MustCompile(`\d+`) // patentKindCodeSuffix matches the trailing kind code on a patent // publication number (A1, A2, B1, B2, C, T3, etc.). Stripped before // digit extraction so the kind-code's optional digit doesn't sneak // into the patent number proper. // // EP / WO conventions allow A, B, C, T, U as the letter; the digit is // optional. The regex anchors at end-of-string and tolerates trailing // whitespace. var patentKindCodeSuffix = regexp.MustCompile(`[A-Z][0-9]?\s*$`) // patentLast3 extracts the last 3 digits of a patent number, returning // the full digit-stream if the patent has fewer than 3 digits total. // // Strips a trailing kind-code suffix (A1, B2, C, T3 …) first so its // optional digit doesn't pollute the result, then collapses all digit // runs in the remainder to handle spaced / slashed formats. Examples: // // "EP1234567" → "567" // "EP 1 234 567" → "567" // "EP3456789A1" → "789" // "EP1234567 B1" → "567" // "WO2020/123456A1" → "456" // "DE12" → "12" // "EP" → "" // "" → "" func patentLast3(s string) string { s = strings.ToUpper(strings.TrimSpace(s)) if s == "" { return "" } // Strip the trailing kind code (one or two chars at end). s = patentKindCodeSuffix.ReplaceAllString(s, "") matches := patentDigitsPattern.FindAllString(s, -1) if len(matches) == 0 { return "" } digits := strings.Join(matches, "") if len(digits) >= 3 { return digits[len(digits)-3:] } return digits } // proceedingTail takes a proceeding_types.code (e.g. "upc.inf.cfi") and // returns the uppercase tail with the leading jurisdiction segment // dropped. The jurisdiction is implied by the ancestor client / patent // context, so it's redundant in the code. // // "upc.inf.cfi" → "INF.CFI" // "upc.rev.cfi" → "REV.CFI" // "upc.apl.merits" → "APL.MERITS" // "de.inf.lg" → "INF.LG" // "de.inf.olg" → "INF.OLG" // "single" → "" (no tail after dropping the only segment) // "" → "" func proceedingTail(code string) string { code = strings.TrimSpace(code) if code == "" { return "" } parts := strings.Split(code, ".") if len(parts) < 2 { return "" } tail := parts[1:] out := make([]string, len(tail)) for i, p := range tail { out[i] = strings.ToUpper(p) } return strings.Join(out, ".") }