Merge branch 'mai/artemis/issue-10-anti-ai-lint': Anti-AI-Lint im Build (#10)
This commit is contained in:
@@ -1,6 +1,6 @@
|
||||
FROM alpine:3.21 AS builder
|
||||
|
||||
RUN apk add --no-cache bash yq coreutils findutils
|
||||
RUN apk add --no-cache bash yq coreutils findutils python3
|
||||
|
||||
WORKDIR /src
|
||||
COPY . .
|
||||
|
||||
26
README.md
26
README.md
@@ -31,10 +31,32 @@ build/ # Generated output (gitignored)
|
||||
### Build
|
||||
|
||||
```bash
|
||||
./build.sh
|
||||
./build.sh # build + anti-AI text lint
|
||||
./build.sh --skip-lint # build only (emergencies)
|
||||
```
|
||||
|
||||
Requires `yq` for YAML parsing. Outputs to `build/` directory.
|
||||
Requires `yq` for YAML parsing and `python3` for the lint step. Outputs to `build/`.
|
||||
|
||||
### Anti-AI text lint
|
||||
|
||||
Every build runs `tools/anti-ai-lint.py` against `build/<domain>/index.html`,
|
||||
flagging text fingerprints typical of LLM-generated content (vocab and structure
|
||||
patterns from `tools/anti-ai-blacklist.yaml`). Severity `warn` prints a message;
|
||||
`fail` aborts the build.
|
||||
|
||||
Whitelist a hit:
|
||||
|
||||
- HTML comment in the affected page:
|
||||
`<!-- anti-ai-allow: revolutionär, em-dash-3-bullet -->`
|
||||
- Per-site override in `site.yaml`:
|
||||
```yaml
|
||||
anti_ai_allow:
|
||||
- revolutionär
|
||||
- em-dash-3-bullet
|
||||
```
|
||||
|
||||
The blacklist source is `docs/geo-seo-guideline.md` §3.6. Test the linter with
|
||||
`tools/test-anti-ai-lint.sh`.
|
||||
|
||||
### Deploy
|
||||
|
||||
|
||||
25
build.sh
25
build.sh
@@ -5,6 +5,19 @@ set -euo pipefail
|
||||
SCRIPT_DIR=$(cd "$(dirname "$0")" && pwd)
|
||||
BUILD_DIR="$SCRIPT_DIR/build"
|
||||
|
||||
skip_lint=0
|
||||
for arg in "$@"; do
|
||||
case "$arg" in
|
||||
--skip-lint) skip_lint=1 ;;
|
||||
-h|--help)
|
||||
echo "Usage: $0 [--skip-lint]"
|
||||
echo " --skip-lint Skip the anti-AI text lint step (emergencies only)."
|
||||
exit 0
|
||||
;;
|
||||
*) echo "Unknown argument: $arg" >&2; exit 2 ;;
|
||||
esac
|
||||
done
|
||||
|
||||
echo "=== Onepager Build ==="
|
||||
|
||||
# Clean build directory
|
||||
@@ -50,6 +63,14 @@ echo "[3/3] Copying shared assets..."
|
||||
cp -r "$SCRIPT_DIR/shared" "$BUILD_DIR/shared"
|
||||
echo " -> shared/ copied"
|
||||
|
||||
# 4. Report
|
||||
echo "[4/4] Build complete"
|
||||
# 4. Anti-AI text lint
|
||||
if [ "$skip_lint" -eq 1 ]; then
|
||||
echo "[4/4] Anti-AI lint skipped (--skip-lint)"
|
||||
elif ! command -v python3 >/dev/null 2>&1; then
|
||||
echo "[4/4] python3 not found — skipping anti-AI lint"
|
||||
else
|
||||
echo "[4/4] Anti-AI text lint..."
|
||||
python3 "$SCRIPT_DIR/tools/anti-ai-lint.py" "$BUILD_DIR"
|
||||
fi
|
||||
|
||||
echo "=== Build complete: $count sites ==="
|
||||
|
||||
@@ -248,7 +248,7 @@ Mehrere dieser Wörter im selben Absatz sind das stärkste Tell.
|
||||
|
||||
**Praktische Umsetzung:**
|
||||
|
||||
- Lint-Skript im Build (`scripts/anti-ai-lint.sh`) das Vokabel-Blacklist über alle `index.html` und `*.md` läuft, mit Schwellenwert (z. B. mehr als 3 Marker pro 500 Wörter → Warnung).
|
||||
- Lint-Skript im Build: für onepager implementiert in `tools/anti-ai-lint.py` mit Vokabel-Blacklist in `tools/anti-ai-blacklist.yaml`. Läuft am Ende von `build.sh` über alle `build/<domain>/index.html`. Severity `warn` (Build geht durch) vs. `fail` (Build bricht ab). Whitelist via `<!-- anti-ai-allow: term -->`-Kommentar oder `anti_ai_allow:`-Liste in `site.yaml`. Notfall-Override: `build.sh --skip-lint`.
|
||||
- Bei AI-generierten Drafts: bewusst gegen die Blacklist redigieren.
|
||||
- Vor Veröffentlichung laut lesen. Wenn es klingt wie ein Pressemitteilungs-Bot, ist es einer.
|
||||
|
||||
|
||||
97
tools/anti-ai-blacklist.yaml
Normal file
97
tools/anti-ai-blacklist.yaml
Normal file
@@ -0,0 +1,97 @@
|
||||
# Anti-AI lint rules: textual fingerprints typical of LLM-generated content.
|
||||
#
|
||||
# Severity:
|
||||
# warn — build proceeds, message printed
|
||||
# fail — build aborts (exit 1) unless build.sh --skip-lint
|
||||
#
|
||||
# Whitelisting matches:
|
||||
# In an HTML file: <!-- anti-ai-allow: term -->
|
||||
# <!-- anti-ai-allow: term1, term2 -->
|
||||
# Per site (site.yaml): anti_ai_allow:
|
||||
# - leverage
|
||||
# - em-dash-3-bullet
|
||||
#
|
||||
# Vocab matches are case-insensitive substring matches against the visible
|
||||
# text of the rendered HTML (script/style/comments stripped). Pattern matches
|
||||
# are regex (Python re), case-insensitive by default, against the same.
|
||||
#
|
||||
# Source: docs/geo-seo-guideline.md §3.6 (Wikipedia AI-content signals).
|
||||
|
||||
vocab:
|
||||
de:
|
||||
warn:
|
||||
- "nahtlos"
|
||||
- "robust"
|
||||
- "umfassend"
|
||||
- "ganzheitlich"
|
||||
- "fungiert als"
|
||||
- "dient als Brücke"
|
||||
- "Symbiose"
|
||||
- "im Bereich der"
|
||||
- "in der heutigen schnelllebigen"
|
||||
- "ein Meilenstein"
|
||||
- "ein Beweis für"
|
||||
- "hat Spuren hinterlassen"
|
||||
- "Es ist wichtig zu erwähnen"
|
||||
- "Es ist wichtig zu beachten"
|
||||
- "bahnbrechend"
|
||||
- "revolutionär"
|
||||
fail:
|
||||
- "in der sich entwickelnden Landschaft"
|
||||
- "Herausforderungen und Zukunftsaussichten"
|
||||
- "Herausforderungen und Perspektiven"
|
||||
|
||||
en:
|
||||
warn:
|
||||
- "delve"
|
||||
- "tapestry"
|
||||
- "testament"
|
||||
- "intricate"
|
||||
- "garnered"
|
||||
- "bolstered"
|
||||
- "enduring"
|
||||
- "robust"
|
||||
- "comprehensive"
|
||||
- "meticulous"
|
||||
- "interplay"
|
||||
- "pivotal"
|
||||
- "underscore"
|
||||
- "moreover"
|
||||
- "furthermore"
|
||||
- "additionally"
|
||||
- "crucial"
|
||||
- "showcasing"
|
||||
- "highlighting"
|
||||
- "leverage"
|
||||
- "streamline"
|
||||
- "holistic"
|
||||
- "seamless"
|
||||
- "unleash"
|
||||
- "ecosystem"
|
||||
- "in the realm of"
|
||||
- "dive into"
|
||||
- "It's important to note that"
|
||||
- "It is important to note that"
|
||||
- "In this article, we'll"
|
||||
fail:
|
||||
- "in today's evolving landscape"
|
||||
- "in the ever-evolving landscape"
|
||||
- "Challenges and Future Prospects"
|
||||
|
||||
patterns:
|
||||
- name: em-dash-3-bullet
|
||||
description: |
|
||||
Three "Word: text — Word: text — Word: …" segments in one block.
|
||||
Classic AI bullet pattern.
|
||||
regex: '(\w[\w\s]{0,30}:\s+[^—\n]{2,80}—\s*){2,}\w[\w\s]{0,30}:'
|
||||
severity: warn
|
||||
|
||||
- name: not-only-but-also
|
||||
description: '"not only X, but also Y" / "nicht nur X, sondern auch Y" filler.'
|
||||
regex: '\b(?:not only|nicht nur)\b[^.,;\n]{1,80}\b(?:but also|sondern auch)\b'
|
||||
severity: warn
|
||||
|
||||
- name: as-an-ai
|
||||
description: Leftover AI self-disclosure.
|
||||
regex: '\b(?:as an? (?:AI|language model)|als (?:eine?|eine\s+)?(?:KI|Sprachmodell))\b'
|
||||
severity: fail
|
||||
294
tools/anti-ai-lint.py
Executable file
294
tools/anti-ai-lint.py
Executable file
@@ -0,0 +1,294 @@
|
||||
#!/usr/bin/env python3
|
||||
"""anti-ai-lint — flag AI-text fingerprints in built sites.
|
||||
|
||||
Reads tools/anti-ai-blacklist.yaml, walks build/<domain>/index.html, prints
|
||||
findings. Exits 1 if any finding has severity=fail, else 0.
|
||||
|
||||
Usage:
|
||||
tools/anti-ai-lint.py [--blacklist PATH] [--sources sites/] [--quiet]
|
||||
[--json] [BUILD_DIR]
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import json
|
||||
import os
|
||||
import re
|
||||
import subprocess
|
||||
import sys
|
||||
from html.parser import HTMLParser
|
||||
|
||||
|
||||
def _ansi(code: str) -> str:
|
||||
return code if sys.stdout.isatty() else ""
|
||||
|
||||
|
||||
RED = _ansi("\033[31m")
|
||||
YELLOW = _ansi("\033[33m")
|
||||
GREEN = _ansi("\033[32m")
|
||||
DIM = _ansi("\033[2m")
|
||||
BOLD = _ansi("\033[1m")
|
||||
RESET = _ansi("\033[0m")
|
||||
|
||||
|
||||
class TextExtractor(HTMLParser):
|
||||
"""Extract visible text and per-site allow directives."""
|
||||
|
||||
SKIP_TAGS = {"script", "style", "noscript", "template"}
|
||||
|
||||
def __init__(self) -> None:
|
||||
super().__init__(convert_charrefs=True)
|
||||
self.skip_depth = 0
|
||||
self.fragments: list[tuple[int, str]] = []
|
||||
self.allows: set[str] = set()
|
||||
self.html_lang: str | None = None
|
||||
|
||||
def handle_starttag(self, tag: str, attrs) -> None:
|
||||
if tag == "html" and self.html_lang is None:
|
||||
for k, v in attrs:
|
||||
if k == "lang" and v:
|
||||
self.html_lang = v.lower().split("-")[0]
|
||||
break
|
||||
if tag in self.SKIP_TAGS:
|
||||
self.skip_depth += 1
|
||||
|
||||
def handle_startendtag(self, tag: str, attrs) -> None:
|
||||
# Self-closing — never enters skip depth, no data either.
|
||||
pass
|
||||
|
||||
def handle_endtag(self, tag: str) -> None:
|
||||
if tag in self.SKIP_TAGS and self.skip_depth > 0:
|
||||
self.skip_depth -= 1
|
||||
|
||||
def handle_data(self, data: str) -> None:
|
||||
if self.skip_depth == 0 and data.strip():
|
||||
line, _ = self.getpos()
|
||||
self.fragments.append((line, data))
|
||||
|
||||
def handle_comment(self, data: str) -> None:
|
||||
m = re.search(r"anti-ai-allow\s*:\s*(.+)", data, re.IGNORECASE)
|
||||
if m:
|
||||
for token in re.split(r"[,\s]+", m.group(1)):
|
||||
token = token.strip()
|
||||
if token:
|
||||
self.allows.add(token)
|
||||
self.allows.add(token.lower())
|
||||
|
||||
|
||||
def load_blacklist(path: str) -> dict:
|
||||
"""Convert YAML to JSON via yq, parse with stdlib json."""
|
||||
try:
|
||||
out = subprocess.check_output(
|
||||
["yq", "-o=json", path],
|
||||
stderr=subprocess.PIPE,
|
||||
text=True,
|
||||
)
|
||||
except FileNotFoundError:
|
||||
sys.exit("ERROR: yq not found in PATH (required to parse YAML blacklist)")
|
||||
except subprocess.CalledProcessError as e:
|
||||
sys.exit(f"ERROR: yq failed to parse {path}: {e.stderr.strip()}")
|
||||
return json.loads(out)
|
||||
|
||||
|
||||
def site_allow_yaml(site_yaml: str) -> list[str]:
|
||||
if not os.path.isfile(site_yaml):
|
||||
return []
|
||||
try:
|
||||
out = subprocess.check_output(
|
||||
["yq", "-r", "(.anti_ai_allow // []) | .[]", site_yaml],
|
||||
stderr=subprocess.DEVNULL,
|
||||
text=True,
|
||||
)
|
||||
except subprocess.CalledProcessError:
|
||||
return []
|
||||
return [line.strip() for line in out.splitlines() if line.strip()]
|
||||
|
||||
|
||||
def lint_file(html_path: str, blacklist: dict, extra_allows: list[str]) -> list[dict]:
|
||||
with open(html_path, "r", encoding="utf-8", errors="replace") as f:
|
||||
raw = f.read()
|
||||
|
||||
parser = TextExtractor()
|
||||
try:
|
||||
parser.feed(raw)
|
||||
parser.close()
|
||||
except Exception as e:
|
||||
# Malformed HTML — record a single warning and skip.
|
||||
return [{
|
||||
"kind": "parse",
|
||||
"name": "html-parse-error",
|
||||
"severity": "warn",
|
||||
"line": 0,
|
||||
"snippet": str(e)[:120],
|
||||
}]
|
||||
|
||||
allow_set = set(parser.allows)
|
||||
for tok in extra_allows:
|
||||
allow_set.add(tok)
|
||||
allow_set.add(tok.lower())
|
||||
|
||||
findings: list[dict] = []
|
||||
seen: set[tuple[str, str]] = set() # (kind, name) — one report per file
|
||||
|
||||
# Lint vocab in BOTH languages — sites may carry data-en attributes
|
||||
# that surface translated text alongside the primary language.
|
||||
vocab = blacklist.get("vocab") or {}
|
||||
for lang in ("de", "en"):
|
||||
bucket = vocab.get(lang) or {}
|
||||
for severity in ("warn", "fail"):
|
||||
for term in bucket.get(severity) or []:
|
||||
key = ("vocab", term.lower())
|
||||
if key in seen:
|
||||
continue
|
||||
if term in allow_set or term.lower() in allow_set:
|
||||
continue
|
||||
term_lc = term.lower()
|
||||
for line_no, frag in parser.fragments:
|
||||
if term_lc in frag.lower():
|
||||
findings.append({
|
||||
"kind": "vocab",
|
||||
"lang": lang,
|
||||
"name": term,
|
||||
"severity": severity,
|
||||
"line": line_no,
|
||||
"snippet": frag.strip()[:120],
|
||||
})
|
||||
seen.add(key)
|
||||
break
|
||||
|
||||
# Patterns
|
||||
for pat in blacklist.get("patterns") or []:
|
||||
name = pat.get("name") or pat.get("regex", "")[:40]
|
||||
key = ("pattern", name)
|
||||
if key in seen:
|
||||
continue
|
||||
if name in allow_set or name.lower() in allow_set:
|
||||
continue
|
||||
flags = re.MULTILINE
|
||||
if not pat.get("case_sensitive"):
|
||||
flags |= re.IGNORECASE
|
||||
try:
|
||||
rx = re.compile(pat["regex"], flags)
|
||||
except re.error as e:
|
||||
findings.append({
|
||||
"kind": "config",
|
||||
"name": name,
|
||||
"severity": "warn",
|
||||
"line": 0,
|
||||
"snippet": f"invalid regex: {e}",
|
||||
})
|
||||
continue
|
||||
for line_no, frag in parser.fragments:
|
||||
m = rx.search(frag)
|
||||
if m:
|
||||
findings.append({
|
||||
"kind": "pattern",
|
||||
"name": name,
|
||||
"severity": pat.get("severity", "warn"),
|
||||
"line": line_no,
|
||||
"snippet": (frag.strip()[:120] or m.group(0)[:120]),
|
||||
})
|
||||
seen.add(key)
|
||||
break
|
||||
|
||||
return findings
|
||||
|
||||
|
||||
def main() -> int:
|
||||
here = os.path.dirname(os.path.abspath(__file__))
|
||||
repo = os.path.dirname(here)
|
||||
|
||||
ap = argparse.ArgumentParser(description="Flag AI-text fingerprints in built sites.")
|
||||
ap.add_argument("build_dir", nargs="?", default=os.path.join(repo, "build"))
|
||||
ap.add_argument("--blacklist", default=os.path.join(here, "anti-ai-blacklist.yaml"))
|
||||
ap.add_argument("--sources", default=os.path.join(repo, "sites"),
|
||||
help="sites/ root (for per-site site.yaml allow lists)")
|
||||
ap.add_argument("--quiet", action="store_true",
|
||||
help="Suppress warnings; only show fails.")
|
||||
ap.add_argument("--json", action="store_true", help="Emit JSON report.")
|
||||
args = ap.parse_args()
|
||||
|
||||
if not os.path.isdir(args.build_dir):
|
||||
print(f"ERROR: build dir not found: {args.build_dir}", file=sys.stderr)
|
||||
return 2
|
||||
if not os.path.isfile(args.blacklist):
|
||||
print(f"ERROR: blacklist not found: {args.blacklist}", file=sys.stderr)
|
||||
return 2
|
||||
|
||||
blacklist = load_blacklist(args.blacklist)
|
||||
|
||||
total_warn = 0
|
||||
total_fail = 0
|
||||
sites_with_findings = 0
|
||||
sites_total = 0
|
||||
json_sites: list[dict] = []
|
||||
|
||||
for entry in sorted(os.listdir(args.build_dir)):
|
||||
site_dir = os.path.join(args.build_dir, entry)
|
||||
html = os.path.join(site_dir, "index.html")
|
||||
if not os.path.isfile(html):
|
||||
continue
|
||||
sites_total += 1
|
||||
|
||||
site_yaml = os.path.join(args.sources, entry, "site.yaml")
|
||||
extra_allows = site_allow_yaml(site_yaml)
|
||||
|
||||
findings = lint_file(html, blacklist, extra_allows)
|
||||
warns = [f for f in findings if f["severity"] == "warn"]
|
||||
fails = [f for f in findings if f["severity"] == "fail"]
|
||||
|
||||
if findings:
|
||||
sites_with_findings += 1
|
||||
total_warn += len(warns)
|
||||
total_fail += len(fails)
|
||||
if args.json:
|
||||
json_sites.append({"site": entry, "findings": findings})
|
||||
else:
|
||||
visible = fails if args.quiet else findings
|
||||
if visible:
|
||||
print(f"{BOLD}{entry}{RESET}")
|
||||
for f in visible:
|
||||
if f["severity"] == "fail":
|
||||
color, tag = RED, "FAIL"
|
||||
else:
|
||||
color, tag = YELLOW, "warn"
|
||||
lang = f" ({f['lang']})" if "lang" in f else ""
|
||||
print(
|
||||
f" {color}{tag}{RESET} {f['kind']}{lang}: "
|
||||
f"{BOLD}{f['name']}{RESET} "
|
||||
f"{DIM}line {f['line']}: {f['snippet']}{RESET}"
|
||||
)
|
||||
|
||||
if args.json:
|
||||
json.dump(
|
||||
{
|
||||
"summary": {
|
||||
"sites_total": sites_total,
|
||||
"sites_with_findings": sites_with_findings,
|
||||
"warn": total_warn,
|
||||
"fail": total_fail,
|
||||
},
|
||||
"sites": json_sites,
|
||||
},
|
||||
sys.stdout,
|
||||
indent=2,
|
||||
ensure_ascii=False,
|
||||
)
|
||||
print()
|
||||
else:
|
||||
if total_fail > 0:
|
||||
tag, color = "FAIL", RED
|
||||
elif total_warn > 0:
|
||||
tag, color = "WARN", YELLOW
|
||||
else:
|
||||
tag, color = "OK", GREEN
|
||||
print(
|
||||
f"\n{color}anti-ai-lint: {tag}{RESET} — "
|
||||
f"{sites_with_findings}/{sites_total} sites flagged "
|
||||
f"({total_fail} fail, {total_warn} warn)"
|
||||
)
|
||||
|
||||
return 1 if total_fail > 0 else 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
sys.exit(main())
|
||||
107
tools/test-anti-ai-lint.sh
Executable file
107
tools/test-anti-ai-lint.sh
Executable file
@@ -0,0 +1,107 @@
|
||||
#!/bin/bash
|
||||
# Self-test for tools/anti-ai-lint.py.
|
||||
# Builds a synthetic AI-text fixture in a temp dir, asserts the linter
|
||||
# flags it, then verifies whitelist comments suppress the hit.
|
||||
set -euo pipefail
|
||||
|
||||
SCRIPT_DIR=$(cd "$(dirname "$0")" && pwd)
|
||||
LINT="$SCRIPT_DIR/anti-ai-lint.py"
|
||||
|
||||
tmp=$(mktemp -d)
|
||||
trap 'rm -rf "$tmp"' EXIT
|
||||
|
||||
mkdir -p "$tmp/build/synthetic-ai.test"
|
||||
|
||||
cat > "$tmp/build/synthetic-ai.test/index.html" <<'HTML'
|
||||
<!DOCTYPE html>
|
||||
<html lang="en">
|
||||
<head>
|
||||
<title>Synthetic AI sample</title>
|
||||
<style>.foo { color: red; } /* leverage in CSS comment must be ignored */</style>
|
||||
<script>const x = "leverage"; // in JS, must be ignored</script>
|
||||
</head>
|
||||
<body>
|
||||
<h1>In today's evolving landscape</h1>
|
||||
<p>This is a comprehensive, robust, holistic solution that lets us leverage emerging trends.</p>
|
||||
<p>We delve into the intricate tapestry of AI to navigate this pivotal moment.</p>
|
||||
<h2>Challenges and Future Prospects</h2>
|
||||
<ul>
|
||||
<li>Effizienz: hoch — Skalierbarkeit: gut — Sicherheit: solide</li>
|
||||
</ul>
|
||||
</body>
|
||||
</html>
|
||||
HTML
|
||||
|
||||
expect_finding() {
|
||||
# expect_finding <json> <name>
|
||||
python3 -c '
|
||||
import json, sys
|
||||
data = json.loads(sys.argv[1])
|
||||
target = sys.argv[2]
|
||||
hits = [f for site in data["sites"] for f in site["findings"] if f["name"] == target]
|
||||
if len(hits) != 1:
|
||||
print(f"expected exactly 1 finding for {target!r}, got {len(hits)}", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
' "$1" "$2"
|
||||
}
|
||||
|
||||
expect_no_finding() {
|
||||
python3 -c '
|
||||
import json, sys
|
||||
data = json.loads(sys.argv[1])
|
||||
target = sys.argv[2]
|
||||
hits = [f for site in data["sites"] for f in site["findings"] if f["name"] == target]
|
||||
if hits:
|
||||
print(f"unexpected finding for {target!r}: {hits}", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
' "$1" "$2"
|
||||
}
|
||||
|
||||
echo "[1] expecting FAIL on synthetic AI fixture..."
|
||||
report=$(python3 "$LINT" --json "$tmp/build" 2>/dev/null) && rc=0 || rc=$?
|
||||
if [ "$rc" -ne 1 ]; then
|
||||
echo "FAIL: expected exit 1, got $rc" >&2
|
||||
echo "$report" >&2
|
||||
exit 1
|
||||
fi
|
||||
for term in "in today's evolving landscape" "Challenges and Future Prospects" \
|
||||
"leverage" "comprehensive" "delve" "em-dash-3-bullet"; do
|
||||
expect_finding "$report" "$term" || exit 1
|
||||
done
|
||||
echo " OK"
|
||||
|
||||
echo "[2] expecting whitelist comment to suppress hits..."
|
||||
sed -i '4a\ <!-- anti-ai-allow: leverage, comprehensive, delve, em-dash-3-bullet -->' \
|
||||
"$tmp/build/synthetic-ai.test/index.html"
|
||||
report=$(python3 "$LINT" --json "$tmp/build" 2>/dev/null) || true
|
||||
for term in "leverage" "comprehensive" "delve" "em-dash-3-bullet"; do
|
||||
expect_no_finding "$report" "$term" || exit 1
|
||||
done
|
||||
# fail-level "in today's evolving landscape" should still be reported
|
||||
expect_finding "$report" "in today's evolving landscape" || exit 1
|
||||
echo " OK"
|
||||
|
||||
echo "[3] expecting fail-level hit still triggers exit 1..."
|
||||
python3 "$LINT" "$tmp/build" >/dev/null 2>&1 && rc=0 || rc=$?
|
||||
if [ "$rc" -ne 1 ]; then
|
||||
echo "FAIL: expected exit 1, got $rc" >&2
|
||||
exit 1
|
||||
fi
|
||||
echo " OK"
|
||||
|
||||
echo "[4] expecting clean exit on neutral fixture..."
|
||||
rm "$tmp/build/synthetic-ai.test/index.html"
|
||||
mkdir -p "$tmp/build/clean.test"
|
||||
echo '<!DOCTYPE html><html lang="de"><body><p>Ein einfacher Satz ohne KI-Vokabular.</p></body></html>' \
|
||||
> "$tmp/build/clean.test/index.html"
|
||||
rm -rf "$tmp/build/synthetic-ai.test"
|
||||
out=$(python3 "$LINT" "$tmp/build" 2>&1) && rc=0 || rc=$?
|
||||
if [ "$rc" -ne 0 ]; then
|
||||
echo "FAIL: clean fixture should exit 0, got $rc" >&2
|
||||
echo "$out"
|
||||
exit 1
|
||||
fi
|
||||
echo " OK"
|
||||
|
||||
echo
|
||||
echo "all tests passed"
|
||||
Reference in New Issue
Block a user