Files
paliad/internal/handlers/files.go
mAi d86cac0b53 feat(submissions): t-paliad-230 format-only .dotm→.docx convert
m's 2026-05-21 scope reduction of the t-paliad-215 submission generator:
ship a demo that hands the lawyer the firm style template as a clean
.docx. No variable-merge engine, no per-submission template registry,
no fallback chain — the merge slice is deferred to a future task.

Replaces the previous engine (template registry + variable bag +
{{placeholder}} renderer + dual project_events/documents writes) with:

* services.ConvertDotmToDocx — single-function .dotm/.docm/.dotx → .docx
  format converter that strips word/vbaProject.bin, word/vbaData.xml,
  word/customizations.xml, and word/_rels/vbaProject.bin.rels, rewrites
  [Content_Types].xml (demotes the macro/template main type to plain
  docx, drops the .bin Default Extension and the macro Overrides), and
  rewrites word/_rels/document.xml.rels to drop the vbaProject +
  keyMapCustomizations relationships. Idempotent on a plain .docx.
  archive/zip + regex stdlib only — no new third-party dependencies.

* handlers/submissions.go — POST /api/projects/{id}/submissions/{code}
  /generate fetches the cached HL Patents Style .dotm (via a new
  fetchHLPatentsStyleBytes accessor on files.go that shares the same
  cache as /files/{slug}), converts, writes one paliad.system_audit_log
  row (event_type='submission.generated', metadata={submission_code,
  rule_name, filename}), and streams the .docx as an attachment. GET
  /api/projects/{id}/submissions still lists filing rules but
  has_template is unconditionally true (one universal template).

* Filename per design §7: {rule.name}-{project.case_number}-{YYYY-MM-DD}
  .docx, with Umlauts ASCII-folded and slashes → underscores.

Drops services/submission_templates.go, services/submission_vars.go,
and the wiring in cmd/server/main.go + handlers/handlers.go that bound
them together. Frontend client switched to POST.

Verified the converter against the real HL Patents Style.dotm (361 KB
input → 243 KB output, 46 parts in output zip):

  unzip -tq /tmp/hl-patents-style.converted.docx   → No errors
  python3 -c "import zipfile, xml.etree.ElementTree as ET; \
              z=zipfile.ZipFile('/tmp/hl-patents-style.converted.docx'); \
              [ET.fromstring(z.read(p)) for p in z.namelist() if p.endswith('.xml')]"
  uv run --with python-docx python3 -c "import docx; \
              d=docx.Document('/tmp/hl-patents-style.converted.docx'); \
              print(len(d.paragraphs), 'paragraphs', len(d.styles), 'styles')"
              → 236 paragraphs, 168 styles, 1 section

All assertions passed: every Override in [Content_Types].xml resolves
to a real part, every internal Target in document.xml.rels resolves,
zero macro-related residue, and the document body + styles + theme
survive untouched.

go test -run TestBootSmoke ./cmd/server/... clean (route additions
register without conflict on the Go ServeMux).
2026-05-21 15:23:24 +02:00

289 lines
7.0 KiB
Go

package handlers
import (
"context"
"encoding/json"
"fmt"
"io"
"log"
"net/http"
"net/url"
"sync"
"time"
"mgit.msbls.de/m/paliad/internal/branding"
)
const (
giteaBaseURL = "https://mgit.msbls.de"
checkInterval = 5 * time.Minute
)
type fileEntry struct {
RawURL string
DownloadName string
ContentType string
RepoOwner string
RepoName string
FilePath string
}
// fileRegistry maps the public download slug to the upstream Gitea object.
//
// RawURL / FilePath reference the actual file in mWorkRepo and must match the
// blob's name there exactly; renaming would 404 the proxy. DownloadName is
// what the browser saves the file as — that's a branding surface, so it
// renders branding.Name instead of the upstream filename.
//
// The URL slug ("hl-patents-style.dotm") is preserved as a stable public
// identifier so existing bookmarks keep working post-rebrand.
var fileRegistry = map[string]fileEntry{
"hl-patents-style.dotm": {
RawURL: "https://mgit.msbls.de/m/mWorkRepo/raw/branch/main/6%20-%20material/Templates/Word/HL%20Patents%20Style.dotm",
DownloadName: branding.Name + " Patents Style.dotm",
ContentType: "application/vnd.ms-word.template.macroEnabled.12",
RepoOwner: "m",
RepoName: "mWorkRepo",
FilePath: "6 - material/Templates/Word/HL Patents Style.dotm",
},
}
type cacheEntry struct {
mu sync.RWMutex
data []byte
sha string
lastChecked time.Time
checking bool
}
var (
giteaToken string
fileCache = make(map[string]*cacheEntry)
fileCacheMu sync.Mutex
httpClient = &http.Client{Timeout: 30 * time.Second}
)
func getCacheEntry(name string) *cacheEntry {
fileCacheMu.Lock()
defer fileCacheMu.Unlock()
ce, ok := fileCache[name]
if !ok {
ce = &cacheEntry{}
fileCache[name] = ce
}
return ce
}
func handleFileDownload(w http.ResponseWriter, r *http.Request) {
filename := r.PathValue("filename")
entry, ok := fileRegistry[filename]
if !ok {
http.NotFound(w, r)
return
}
ce := getCacheEntry(filename)
ce.mu.RLock()
hasData := len(ce.data) > 0
needsCheck := time.Since(ce.lastChecked) >= checkInterval
ce.mu.RUnlock()
if !hasData {
if err := fileFetch(ce, entry); err != nil {
log.Printf("file proxy: fetch %s failed: %v", filename, err)
http.Error(w, "Failed to fetch file", http.StatusBadGateway)
return
}
} else if needsCheck {
go fileCheckAndRefresh(ce, entry)
}
ce.mu.RLock()
defer ce.mu.RUnlock()
w.Header().Set("Content-Type", entry.ContentType)
w.Header().Set("Content-Disposition", fmt.Sprintf(`attachment; filename="%s"`, entry.DownloadName))
w.Header().Set("Content-Length", fmt.Sprintf("%d", len(ce.data)))
w.Write(ce.data)
}
func handleFileRefresh(w http.ResponseWriter, r *http.Request) {
fileCacheMu.Lock()
for name := range fileCache {
fileCache[name] = &cacheEntry{}
}
fileCacheMu.Unlock()
writeJSON(w, http.StatusOK, map[string]string{"ok": "true", "message": "Cache cleared"})
}
// fetchHLPatentsStyleBytes returns the cached HL Patents Style .dotm
// bytes. Shared accessor used by both the /files/{slug} download path
// (Word auto-update channel) and the submission generator
// (handlers/submissions.go) so a refresh through one path is visible to
// the other. First call warms the cache from Gitea synchronously;
// subsequent calls are sub-millisecond. A stale-but-present cache is
// returned immediately while a background refresh runs.
func fetchHLPatentsStyleBytes(ctx context.Context) ([]byte, error) {
entry, ok := fileRegistry[hlPatentsStyleSlug]
if !ok {
return nil, fmt.Errorf("file proxy: %s not registered", hlPatentsStyleSlug)
}
ce := getCacheEntry(hlPatentsStyleSlug)
ce.mu.RLock()
hasData := len(ce.data) > 0
needsCheck := time.Since(ce.lastChecked) >= checkInterval
ce.mu.RUnlock()
if !hasData {
if err := fileFetch(ce, entry); err != nil {
return nil, err
}
} else if needsCheck {
go fileCheckAndRefresh(ce, entry)
}
ce.mu.RLock()
defer ce.mu.RUnlock()
if len(ce.data) == 0 {
return nil, fmt.Errorf("file proxy: %s cache empty after fetch", hlPatentsStyleSlug)
}
out := make([]byte, len(ce.data))
copy(out, ce.data)
_ = ctx // ctx reserved for future timeout pass-through; fileFetch
// uses the package httpClient timeout today.
return out, nil
}
// fileFetch downloads the file synchronously (first request).
func fileFetch(ce *cacheEntry, entry fileEntry) error {
sha, _ := giteaLatestSHA(entry)
data, err := giteaDownload(entry)
if err != nil {
return err
}
ce.mu.Lock()
ce.data = data
ce.sha = sha
ce.lastChecked = time.Now()
ce.mu.Unlock()
return nil
}
// fileCheckAndRefresh checks the latest commit SHA and re-downloads if changed.
func fileCheckAndRefresh(ce *cacheEntry, entry fileEntry) {
ce.mu.Lock()
if ce.checking {
ce.mu.Unlock()
return
}
ce.checking = true
ce.mu.Unlock()
defer func() {
ce.mu.Lock()
ce.checking = false
ce.mu.Unlock()
}()
latestSHA, err := giteaLatestSHA(entry)
if err != nil {
log.Printf("file proxy: SHA check for %s failed: %v", entry.DownloadName, err)
ce.mu.Lock()
ce.lastChecked = time.Now()
ce.mu.Unlock()
return
}
ce.mu.RLock()
unchanged := latestSHA == ce.sha && ce.sha != ""
ce.mu.RUnlock()
if unchanged {
ce.mu.Lock()
ce.lastChecked = time.Now()
ce.mu.Unlock()
return
}
data, err := giteaDownload(entry)
if err != nil {
log.Printf("file proxy: download %s failed: %v", entry.DownloadName, err)
ce.mu.Lock()
ce.lastChecked = time.Now()
ce.mu.Unlock()
return
}
ce.mu.Lock()
ce.data = data
ce.sha = latestSHA
ce.lastChecked = time.Now()
ce.mu.Unlock()
log.Printf("file proxy: updated %s (SHA: %.8s)", entry.DownloadName, latestSHA)
}
// giteaLatestSHA returns the SHA of the latest commit that touched the file.
func giteaLatestSHA(entry fileEntry) (string, error) {
apiURL := fmt.Sprintf("%s/api/v1/repos/%s/%s/commits?path=%s&limit=1&sha=main",
giteaBaseURL, entry.RepoOwner, entry.RepoName, url.QueryEscape(entry.FilePath))
req, err := http.NewRequest("GET", apiURL, nil)
if err != nil {
return "", err
}
if giteaToken != "" {
req.Header.Set("Authorization", "token "+giteaToken)
}
resp, err := httpClient.Do(req)
if err != nil {
return "", err
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
return "", fmt.Errorf("gitea API returned %d", resp.StatusCode)
}
var commits []struct {
SHA string `json:"sha"`
}
if err := json.NewDecoder(resp.Body).Decode(&commits); err != nil {
return "", err
}
if len(commits) == 0 {
return "", fmt.Errorf("no commits for path %s", entry.FilePath)
}
return commits[0].SHA, nil
}
// giteaDownload fetches the raw file content from Gitea.
func giteaDownload(entry fileEntry) ([]byte, error) {
req, err := http.NewRequest("GET", entry.RawURL, nil)
if err != nil {
return nil, err
}
if giteaToken != "" {
req.Header.Set("Authorization", "token "+giteaToken)
}
resp, err := httpClient.Do(req)
if err != nil {
return nil, err
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
return nil, fmt.Errorf("gitea raw returned %d", resp.StatusCode)
}
return io.ReadAll(resp.Body)
}