#!/usr/bin/env bash
# =============================================================================
# RAGWeed v10  --  Installer
# Node.js rewrite: direct HNSW + SQLite, no ChromaDB, no Python at runtime
# Copyright (c) Fred Cohen, 2026 - ALL RIGHTS RESERVED
# Patents: https://all.net/Notices.html
# =============================================================================
RAGWEED_INSTALL_VERSION="1.0.102"
RAGWEED_MANIFEST_BEGIN="v1.0.102-20260319-000014-473"
# RAGWEED-PKG|$PROJECT_DIR/package.json|21|57a8c1072a816d04fec333f3e0553d20|
# RAGWEED-PCK|$SCRIPTS_DIR/pickle.js|54|9c15c3609811c7e44cee6f01c65ac9bb|
# RAGWEED-COL|$SCRIPTS_DIR/collections.js|240|372db0663f4331bc2cad3f07d9a700a7|
# RAGWEED-S7B|$PROJECT_DIR/step7b-init.mjs|50|5457c6d9ff77880b0fd06dd873543867|install_only
# RAGWEED-QRY|$SCRIPTS_DIR/query.js|2515|ab9b6d5a11ba85a31ffa9f979cfd28c4|
# RAGWEED-WEB|$SCRIPTS_DIR/web.js|1527|12abf3735570b57e83a4f50e10d2056c|
# RAGWEED-HTM|$SCRIPTS_DIR/index.html|62|d4bf019580d59440bd8d5330ad2df49f|
# RAGWEED-APP|$SCRIPTS_DIR/app.js|996|1dacda0b2030d2ba23d14535813700c3|
# RAGWEED-LFJ|$SCRIPTS_DIR/lf.js|316|967a01aeddd118e9eccc24b02e69f698|
# RAGWEED-RUN|$PROJECT_DIR/run.sh|158|783e3320706f626dc634bbb5164b7857|
# RAGWEED-BMP|$PROJECT_DIR/bump.js|272|5cea8af5552e9672a31f53fbc6c16ef6|
# RAGWEED-IPF|$PROJECT_DIR/inject_preflight.js|354|00c604e58d40c1d6d3c77ddb1b919392|
# RAGWEED-PCH|$PROJECT_DIR/patch.js|40|57f31fb337fec225740923d1e774716c|
# RAGWEED-MAN|$PROJECT_DIR/docs/ragweed.1|305|e4fc83482ed8042edee435e0c5b0899b|
# RAGWEED-WBC|$SCRIPTS_DIR/webc.js|2535|2c2c3f1747b2d35751f310dd6565e490|
# RAGWEED-ING|$SCRIPTS_DIR/ingest.js|2836|531d20bea02f32f15242cf3aa5b9c31e|
# RAGWEED-SYS|$PROJECT_DIR/system_prompt.txt|5|396380bf9067f44e5fc98d8888471803|conditional
# RAGWEED-ANP|$SCRIPTS_DIR/annotation_prompt.txt|1|a09af496c744ed3c8bfe04f60724a3e0|
# RAGWEED-HLP|$SCRIPTS_DIR/help.txt|56|445af9b867dfa0549c0cb7c183be3b2e|
# RAGWEED-ATR|$SCRIPTS_DIR/annotation_test_runner.cjs|293|19a3077881b71f41fa3dfd8273e3e1e7|
# RAGWEED-CFG|$SCRIPTS_DIR/config.sh|447|29cc35b08dcabe93a6f0e1804198bff7|
RAGWEED_MANIFEST_END="v1.0.102-20260319-000014-473"
set -euo pipefail

echo ""
echo "RAGWeed Installer v${RAGWEED_INSTALL_VERSION}"
echo ""

PROJECT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
_INSTALLER_PATH="$0"
SCRIPTS_DIR="$PROJECT_DIR/scripts"
LOGS_DIR="$PROJECT_DIR/logs"
CACHE_DIR="$PROJECT_DIR/cache"
CHROMA_DIR="$PROJECT_DIR/chromadb"
NODE_MIN_VERSION=18
NVM_DIR="$HOME/.nvm"

# No colors  --  plain text output only
ok()   { echo "  ok  $*"; }
warn() { echo "  !!  $*"; }
err()  { echo "  XX  $*"; }
info() { echo "   i  $*"; }
ph()   { echo ""; echo "================================================================"; \
         echo "  $*"; \
         echo "================================================================"; }

# .env helpers  --  update a single key without destroying the file
env_get() { grep -E "^${1}=" "$PROJECT_DIR/Config" 2>/dev/null | tail -1 | cut -d= -f2- | tr -d '"' || true; }
env_set() {
    local key="$1" val="$2" envf="$PROJECT_DIR/Config"
    touch "$envf"
    if grep -qE "^${key}=" "$envf" 2>/dev/null; then
        sed -i "s|^${key}=.*|${key}=${val}|" "$envf"
    else
        echo "${key}=${val}" >> "$envf"
    fi
}

# =============================================================================
# OS version check  --  Ubuntu >= 22.04.5 required
# =============================================================================
ph "OS version check"

if [ ! -f /etc/os-release ]; then
    err "Cannot determine OS  --  /etc/os-release not found"
    err "RAGWeed requires Ubuntu 22.04.5 LTS or later."
    exit 1
fi

_os_id=$(grep "^ID=" /etc/os-release | cut -d= -f2 | tr -d '"')
_os_ver=$(grep "^VERSION_ID=" /etc/os-release | cut -d= -f2 | tr -d '"')

if [ "$_os_id" != "ubuntu" ]; then
    err "Unsupported OS: $_os_id"
    err "RAGWeed requires Ubuntu 22.04 or later."
    err "Please upgrade your OS and re-run this installer."
    exit 1
fi

_os_major=$(echo "$_os_ver" | cut -d. -f1)
_os_minor=$(echo "$_os_ver" | cut -d. -f2)
_req_major=20; _req_minor=04
_os_ok=0

if   [ "$_os_major" -gt "$_req_major" ]; then
    _os_ok=1
elif [ "$_os_major" -eq "$_req_major" ] && [ "$_os_minor" -ge "$_req_minor" ]; then
    _os_ok=1
fi

if [ $_os_ok -eq 0 ]; then
    err "Ubuntu $_os_ver does not meet the minimum required version 20.04"
    err "Please run:  sudo do-release-upgrade"
    err "Or download Ubuntu 20.04 LTS from https://ubuntu.com/download/server"
    exit 1
fi

ok "Ubuntu $_os_ver"

# =============================================================================
# Node.js  --  verify or install before preflight steps that use it
# =============================================================================
ph "Node.js (>= $NODE_MIN_VERSION required)"

# Ensure curl is available before any download attempts
if ! command -v curl &>/dev/null; then
    info "curl not found  --  installing..."
    sudo apt-get install -y curl 2>&1 | tail -1
    if ! command -v curl &>/dev/null; then
        err "curl install failed  --  cannot continue"
        exit 1
    fi
    ok "curl installed"
fi

_node_ok=0
if command -v node &>/dev/null; then
    _node_ver=$(node --version | tr -d 'v' | cut -d. -f1)
    if [ "$_node_ver" -ge "$NODE_MIN_VERSION" ]; then
        ok "node $(node --version) already installed"
        _node_ok=1
    else
        warn "node $(node --version) too old (need >= v$NODE_MIN_VERSION)  --  upgrading via nvm"
    fi
fi

if [ $_node_ok -eq 0 ]; then
    info "Installing Node.js LTS via nvm..."
    export NVM_DIR="$HOME/.nvm"
    # Detect whether SSL cert verification works  --  old Ubuntu installs often have
    # an outdated CA bundle. If it fails, use -k (insecure) for all curl calls.
    _curl_ok=0
    curl -fsSL --max-time 5 https://raw.githubusercontent.com/ -o /dev/null 2>/dev/null && _curl_ok=1
    if [ $_curl_ok -eq 0 ]; then
        warn "SSL certificate verification failed  --  using -k (insecure) for downloads."
        warn "To fix permanently: sudo apt-get install -y ca-certificates && sudo update-ca-certificates"
        _CURL_INSECURE=1
    else
        _CURL_INSECURE=0
    fi
    # If SSL certs are broken, write 'insecure' to ~/.curlrc before any curl/nvm
    # calls. This is the only reliable way to make ALL curl invocations inside
    # nvm's install script and nvm itself use -k  --  env vars are not sufficient.
    _curlrc_patched=0
    if [ "$_CURL_INSECURE" = "1" ]; then
        if ! grep -q '^insecure' "$HOME/.curlrc" 2>/dev/null; then
            echo 'insecure' >> "$HOME/.curlrc"
            _curlrc_patched=1
        fi
    fi
    # Install nvm if missing
    if [ ! -f "$NVM_DIR/nvm.sh" ]; then
        curl -fsSL https://raw.githubusercontent.com/nvm-sh/nvm/v0.40.1/install.sh | bash
    fi
    # Load nvm  --  must unset any nvm function that bash may have half-loaded
    unset -f nvm 2>/dev/null || true
    # shellcheck source=/dev/null
    # set +e: nvm.sh sets up shell functions and may return non-zero
    set +e
    source "$NVM_DIR/nvm.sh"
    set -e

    # Select the highest Node LTS version compatible with this system.
    # Strategy: install latest LTS, test if the binary actually executes,
    # then step down through v22/v20/v18 until one works. No distro assumptions.
    # Use filesystem glob to find the binary  --  never rely on nvm current.
    _pick_node_bin() {
        # Find the newest installed node binary under a given major version prefix
        ls "$NVM_DIR"/versions/node/v${1}*/bin/node 2>/dev/null | sort -V | tail -1
    }
    set +e
    nvm install --lts 2>&1
    # Find whatever was just installed  --  highest version in the filesystem
    _node_bin=$(ls "$NVM_DIR"/versions/node/*/bin/node 2>/dev/null | sort -V | tail -1)
    "$_node_bin" --version >/dev/null 2>&1; _lts_ok=$?
    if [ "$_lts_ok" != "0" ]; then
        warn "Newest Node LTS won't run on this system (libc too old). Trying v22/v20/v18..."
        _node_bin=''
        for _try_maj in 22 20 18; do
            nvm install $_try_maj 2>&1
            _try_bin=$(_pick_node_bin $_try_maj)
            if [ -n "$_try_bin" ] && "$_try_bin" --version >/dev/null 2>&1; then
                _node_bin="$_try_bin"
                warn "Using Node v$_try_maj (compatible with this system's libc)."
                break
            fi
        done
    fi
    set -e

    # Remove the insecure line we added  --  leave ~/.curlrc clean
    if [ "$_curlrc_patched" = "1" ]; then
        sed -i '/^insecure$/d' "$HOME/.curlrc" 2>/dev/null || true
    fi

    # glibc check above guarantees nvm binaries will run  --  no fallback needed.
    if [ -z "$_node_bin" ] || ! "$_node_bin" --version >/dev/null 2>&1; then
        err "Node.js install failed. Check nvm output above for details."
        exit 1
    fi
    _node_ver=$( "$_node_bin" --version 2>/dev/null | tr -d 'v' | cut -d. -f1 )
    _node_ver=${_node_ver:-0}
    if [ "$_node_ver" -ge "$NODE_MIN_VERSION" ] 2>/dev/null; then
        export PATH="$(dirname "$_node_bin"):$PATH"
        hash -r 2>/dev/null || true
        ok "node $( "$_node_bin" --version ) installed via nvm"
    else
        err "Node.js v${_node_ver} is below minimum required v${NODE_MIN_VERSION}"
        exit 1
    fi
fi

# npm  --  find via nvm path if not in PATH yet
if ! command -v npm &>/dev/null; then
    # Try to find npm next to the node binary we just installed
    _npm_bin=$(ls "$NVM_DIR"/versions/node/*/bin/npm 2>/dev/null | sort -V | tail -1)
    if [ -n "$_npm_bin" ] && [ -x "$_npm_bin" ]; then
        export PATH="$(dirname "$_npm_bin"):$PATH"
        hash -r 2>/dev/null || true
    else
        err "npm not found after node install"; exit 1
    fi
fi
ok "npm $(npm --version)"

# Always write local/env.sh so node is available in new terminals without modifying ~/.bashrc
mkdir -p "$PROJECT_DIR/local"
cat > "$PROJECT_DIR/local/env.sh" << ENVEOF
# RAGWeed environment -- source this to get node/npm in new terminals
export NVM_DIR="\$HOME/.nvm"
[ -s "\$NVM_DIR/nvm.sh" ] && . "\$NVM_DIR/nvm.sh"
ENVEOF
ok "local/env.sh written"

# =============================================================================
# PREFLIGHT A: Backup existing installed files to OLD/<prev-version>/
# =============================================================================
ph "PREFLIGHT A: Backing up previous installation"

_OLD_DIR="$PROJECT_DIR/OLD"

# Extract version string from an installed file by type.
# Matches: const VERSION = 'x.y.z'  OR  // VERSION: x.y.z  OR  # VERSION: x.y.z
# Prints the version found, "template" for index.html, "none" for unversioned
# files, or empty string if the pattern is not found.
_extract_file_version() {
    local fpath="$1" ext="${1##*.}" _v
    case "$ext" in
        js|mjs)
            _v=$(grep -oP "(?<=const VERSION = ')\d+\.\d+\.\d+(?=')" "$fpath" 2>/dev/null | head -1)
            [ -z "$_v" ] && _v=$(grep -oP "(?<=// VERSION: )\d+\.\d+\.\d+" "$fpath" 2>/dev/null | head -1)
            echo "$_v" ;;
        sh)
            _v=$(grep -oP "(?<=RAGWEED_VERSION=\")\d+\.\d+\.\d+(?=\")" "$fpath" 2>/dev/null | head -1)
            [ -z "$_v" ] && _v=$(grep -oP "(?<=RAGWEED_INSTALL_VERSION=\")\d+\.\d+\.\d+(?=\")" "$fpath" 2>/dev/null | head -1)
            [ -z "$_v" ] && _v=$(grep -oP "(?<=# RAGWEED_VERSION=)\d+\.\d+\.\d+" "$fpath" 2>/dev/null | head -1)
            [ -z "$_v" ] && _v=$(grep -oP "(?<=# VERSION: )\d+\.\d+\.\d+" "$fpath" 2>/dev/null | head -1)
            echo "$_v" ;;
        json)
            grep -oP "(?<=\"version\": \")\d+\.\d+\.\d+(?=\")" "$fpath" 2>/dev/null | head -1 || true ;;
        html)
            echo "template" ;;
        txt|1|Config)
            echo "none" ;;
        *)
            echo "" ;;
    esac
}

# All files managed by this installer, relative to PROJECT_DIR
_MANAGED_FILES=(
    "run.sh"
    "Config"
    "package.json"
    "scripts/pickle.js"
    "scripts/collections.js"
    "scripts/query.js"
    "scripts/web.js"
    "scripts/index.html"
    "scripts/app.js"
    "scripts/lf.js"
    "scripts/webc.js"
    "scripts/ingest.js"
    "scripts/config.sh"
    "system_prompt.txt"
    "scripts/annotation_prompt.txt"
    "scripts/help.txt"
    "scripts/prompts.json"
    "scripts/annotation_test_runner.cjs"
    "bump.js"
    "inject_preflight.js"
    "docs/ragweed.1"
    "patch.js"
)

_backup_errors=0
_backup_count=0
# _backup_version = version currently on disk (for mismatch warnings)
_backup_version=""
_run_sh="$PROJECT_DIR/run.sh"
if [ -f "$_run_sh" ]; then
    _backup_version="$(_extract_file_version "$_run_sh")" || true
fi

set +e  # backup loop must not die on version misses

# BEFORE snapshot goes into the NEW version's directory -- what was on disk before this install
_backup_dest_dir="$_OLD_DIR/${RAGWEED_INSTALL_VERSION}/BEFORE"
mkdir -p "$_backup_dest_dir"
for _rel in "${_MANAGED_FILES[@]}"; do
    _fpath="$PROJECT_DIR/$_rel"
    [ -f "$_fpath" ] || continue   # file not yet written -- skip

    _fver="$(_extract_file_version "$_fpath")" || true

    # Warn on version mismatch but always back up
    case "$_fver" in
        ""|template|none) ;;
        *)
            if [ -n "$_backup_version" ] && [ "$_fver" != "$_backup_version" ]; then
                warn "$_rel: version mismatch  --  file says $_fver, expected $_backup_version"
                _backup_errors=$((_backup_errors + 1))
            fi
            ;;
    esac

    _dest_subdir="$_backup_dest_dir/$(dirname "$_rel")"
    mkdir -p "$_dest_subdir"
    cp -a "$_fpath" "$_dest_subdir/"
    _backup_count=$((_backup_count + 1))
done

# Also back up installer-managed subdirectories in full
for _subdir in scripts docs local; do
    [ -d "$PROJECT_DIR/$_subdir" ] && cp -a "$PROJECT_DIR/$_subdir/." "$_backup_dest_dir/$_subdir/" 2>/dev/null || true
done

if [ $_backup_count -gt 0 ]; then
    ok "$_backup_count files in BEFORE snapshot: OLD/${RAGWEED_INSTALL_VERSION}/BEFORE/"
    if [ $_backup_errors -gt 0 ]; then
        warn "$_backup_errors file(s) had version mismatches  --  see warnings above"
    fi
    # Also extract current verification section into BEFORE for later diffing
    sed -n '/^# ============ START VERIFICATION SECTION ============/,/^# ============ END VERIFICATION SECTION ============/p' \
        "$_INSTALLER_PATH" > "$_backup_dest_dir/verification_section.sh" 2>/dev/null
else
    info "Fresh install  --  no BEFORE snapshot"
fi
set -e  # restore strict mode

# =============================================================================
# PREFLIGHT B: Extract all heredoc files to their final locations
# =============================================================================
# STEP 1: System Dependencies
# =============================================================================
echo ""
echo "RAGWeed v${RAGWEED_INSTALL_VERSION}  --  installer"
echo ""
# _apt_install NAME PKG [PKG...] -- install only if not already present
# tries without sudo first (works if running as root or if apt allows it),
# then with sudo if available, otherwise warns and continues.
_apt_install() {
    local label="$1"; shift
    if command -v "$label" &>/dev/null; then
        return 0  # already installed
    fi
    info "Installing $label..."
    if apt-get install -y "$@" 2>/dev/null; then
        return 0
    fi
    if command -v sudo &>/dev/null; then
        sudo apt-get install -y "$@" 2>/dev/null && return 0
    fi
    warn "$label not found and could not be installed -- some file types may not be processed"
    return 1
}

ph "STEP 1: System Dependencies"

# sqlite3 CLI (for diagnostics  --  not runtime)
if command -v sqlite3 &>/dev/null; then
    ok "sqlite3 $(sqlite3 --version | awk '{print $1}')"
else
    info "Installing sqlite3..."
    sudo apt-get install -y sqlite3 2>/dev/null || warn "sqlite3 install failed  --  diagnostics will be limited"
fi

ok "curl $(curl --version | head -1 | awk '{print $2}')"

# zstd (required by Ollama installer for binary extraction)
if ! command -v zstd &>/dev/null; then
    _apt_install zstd zstd || { err "zstd required for Ollama install but could not be installed"; exit 1; }
fi
ok "zstd $(zstd --version | head -1 | grep -oE '[0-9]+\.[0-9]+\.[0-9]+' | head -1)"

# glibc version check  --  must pass before we attempt any Node.js install.
# Node.js v18+ prebuilt binaries require glibc 2.25+ (Ubuntu 18.04+).
# Parse: 'ldd (Ubuntu GLIBC 2.23-0ubuntu3) 2.23' -> minor=23
_glibc_str=$(ldd --version 2>/dev/null | head -1 || echo '')
_glibc_ver=$(echo "$_glibc_str" | awk '{print $NF}')
_glibc_major=$(echo "$_glibc_ver" | cut -d. -f1)
_glibc_minor=$(echo "$_glibc_ver" | cut -d. -f2)
_glibc_major=${_glibc_major:-0}; _glibc_minor=${_glibc_minor:-0}
# Require glibc >= 2.25
if [ "$_glibc_major" -lt 2 ] 2>/dev/null || \
   { [ "$_glibc_major" -eq 2 ] 2>/dev/null && [ "$_glibc_minor" -lt 25 ] 2>/dev/null; }; then
    err "glibc $_glibc_ver detected  --  Node.js v18+ requires glibc 2.25+."
    err "This system cannot run RAGWeed. Minimum: Ubuntu 18.04 LTS."
    err ""
    err "To upgrade Ubuntu in-place:"
    err "  sudo apt-get update && sudo apt-get upgrade -y && sudo do-release-upgrade"
    err "  (then re-run this installer)"
    err ""
    err "Or install a fresh Ubuntu 20.04+ and re-run this installer."
    exit 1
fi
ok "glibc $_glibc_ver"

# unrtf (lightweight RTF-to-text converter)
if command -v unrtf &>/dev/null; then
    ok "unrtf $(unrtf --version 2>&1 | head -1 | grep -oE '[0-9]+\.[0-9.]+')"
else
    info "Installing unrtf..."
    sudo apt-get install -y unrtf || warn "unrtf install failed  --  RTF files will fall back to LibreOffice"
fi

# detex ships inside texlive-binaries -- check first, only install if missing
if command -v detex &>/dev/null; then
    ok "detex available"
else
    apt-get install -y texlive-binaries 2>/dev/null || \
    sudo apt-get install -y texlive-binaries 2>/dev/null || true
    command -v detex &>/dev/null && ok "detex installed" || \
    warn "detex not available -- .tex files will fall back to plain text read"
fi

# ffmpeg (required for whisper-cli to process MP4/non-WAV audio)
if command -v ffmpeg &>/dev/null; then
    ok "ffmpeg $(ffmpeg -version 2>&1 | head -1 | grep -oE '[0-9]+\.[0-9]+' | head -1)"
else
    info "Installing ffmpeg..."
    sudo apt-get install -y ffmpeg || warn "ffmpeg install failed  --  MP4 transcription will not work"
    command -v ffmpeg &>/dev/null && ok "ffmpeg installed" || warn "ffmpeg not found after install attempt"
fi

# =============================================================================
# =============================================================================
# STEP 1b: whisper.cpp (C++ speech-to-text  --  no Python)
# =============================================================================
ph "STEP 1b: whisper.cpp"
_WHISPER_DIR="$PROJECT_DIR/whisper-cpp"
_WHISPER_BIN="$_WHISPER_DIR/build/bin/whisper-cli"
_WHISPER_MODEL="$_WHISPER_DIR/models/ggml-base.en.bin"
_WHISPER_MODEL_URL="https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-base.en.bin"

# Migrate from old hidden location if present
if [ -d "$HOME/.whisper-cpp" ] && [ ! -d "$_WHISPER_DIR" ]; then
    mv "$HOME/.whisper-cpp" "$_WHISPER_DIR"
    ok "Migrated ~/.whisper-cpp -> $PROJECT_DIR/whisper-cpp"
fi

if command -v whisper-cli &>/dev/null; then
    ok "whisper-cli already on PATH: $(which whisper-cli)"
elif [ -x "$_WHISPER_BIN" ]; then
    ok "whisper-cli already built: $_WHISPER_BIN"
    export PATH="$_WHISPER_DIR/build/bin:$PATH"
    export LD_LIBRARY_PATH="$_WHISPER_DIR/build/src:$_WHISPER_DIR/build/ggml/src${LD_LIBRARY_PATH:+:$LD_LIBRARY_PATH}"
else
    info "Building whisper.cpp from source..."
    if ! command -v git &>/dev/null; then
        info "Installing git..."
        sudo apt-get install -y git || { warn "git not available  --  skipping whisper.cpp build"; }
    fi
    if ! command -v cmake &>/dev/null; then
        info "Installing cmake + build-essential..."
        sudo apt-get install -y cmake build-essential || { warn "cmake not available  --  skipping whisper.cpp build"; }
    fi
    if command -v git &>/dev/null && command -v cmake &>/dev/null; then
        mkdir -p "$_WHISPER_DIR"
        if [ ! -d "$_WHISPER_DIR/.git" ]; then
            git clone --depth 1 https://github.com/ggml-org/whisper.cpp "$_WHISPER_DIR" 2>&1 | tail -3
        else
            info "whisper.cpp source already cloned  --  skipping clone"
        fi
        if [ -d "$_WHISPER_DIR" ]; then
            cmake -S "$_WHISPER_DIR" -B "$_WHISPER_DIR/build" -DCMAKE_BUILD_TYPE=Release 2>&1 | tail -2
            cmake --build "$_WHISPER_DIR/build" --config Release -j$(nproc) 2>&1 | tail -3
            if [ -x "$_WHISPER_BIN" ]; then
                ok "whisper-cli built: $_WHISPER_BIN"
                export PATH="$_WHISPER_DIR/build/bin:$PATH"
                export LD_LIBRARY_PATH="$_WHISPER_DIR/build/src:$_WHISPER_DIR/build/ggml/src${LD_LIBRARY_PATH:+:$LD_LIBRARY_PATH}"
            else
                warn "whisper.cpp build failed  --  audio files will not be transcribed"
            fi
        fi
    else
        warn "Build tools unavailable  --  skipping whisper.cpp (audio files will not be transcribed)"
    fi
fi

# Download base.en model if not present
if [ -x "$_WHISPER_BIN" ] || command -v whisper-cli &>/dev/null; then
    if [ -f "$_WHISPER_MODEL" ]; then
        ok "whisper model: $_WHISPER_MODEL"
    else
        info "Downloading whisper base.en model (~150MB)..."
        mkdir -p "$_WHISPER_DIR/models"
        if curl -L --progress-bar "$_WHISPER_MODEL_URL" -o "$_WHISPER_MODEL"; then
            ok "whisper model downloaded"
        else
            warn "Model download failed  --  set WHISPER_MODEL= in Config to point to an existing ggml model"
            rm -f "$_WHISPER_MODEL" 2>/dev/null || true
        fi
    fi
fi

# STEP 3: Directory Structure
# =============================================================================
ph "STEP 3: Directory Structure"

for d in "$SCRIPTS_DIR" "$LOGS_DIR" "$CACHE_DIR" \
          "$PROJECT_DIR/history" "$PROJECT_DIR/source"; do
    mkdir -p "$d"
done
ok "directories ready"

# =============================================================================
# STEP 4: .env  --  create or preserve
# =============================================================================
ph "STEP 4: Configuration (.env) + hardware profile"

ENV_FILE="$PROJECT_DIR/Config"
# Migrate .env -> Config on first run after rename
if [ -f "$PROJECT_DIR/.env" ] && [ ! -f "$PROJECT_DIR/Config" ]; then
    mv "$PROJECT_DIR/.env" "$PROJECT_DIR/Config"
    ok "Migrated .env -> Config"
fi


_ram_kb=$(grep MemTotal /proc/meminfo 2>/dev/null | awk '{print $2}' || echo 4000000)
_ram_gb=$(( _ram_kb / 1024 / 1024 ))
_swap_kb=$(grep SwapTotal /proc/meminfo 2>/dev/null | awk '{print $2}' || echo 0)
_cores=$(nproc 2>/dev/null || echo 2)

if   [ "$_ram_gb" -ge 8 ]; then _profile="high"
elif [ "$_ram_gb" -ge 4 ]; then _profile="medium"
else                             _profile="low"
fi

case "$_profile" in
  high)   _cs_text=2048; _cs_pdf=1024; _cs_av=512;  _overlap=50; _etimeout=120; _ebackoff=2  ;;
  medium) _cs_text=1024; _cs_pdf=768;  _cs_av=384;  _overlap=50; _etimeout=180; _ebackoff=5  ;;
  low)    _cs_text=512;  _cs_pdf=512;  _cs_av=256;  _overlap=50;  _etimeout=300; _ebackoff=10 ;;
esac

ph "  Hardware: ${_ram_gb}GB RAM  ${_cores} cores  swap=$(( _swap_kb/1024 ))MB  profile=${_profile}"
ph "  Chunks:   text=${_cs_text}  pdf=${_cs_pdf}  av=${_cs_av}  overlap=${_overlap}  embed_timeout=${_etimeout}s"

if [ ! -f "$ENV_FILE" ]; then
    info "Creating Config with hardware-tuned defaults (profile: $_profile)"
    cat > "$ENV_FILE" << ENVEOF
LLM_PROVIDER=claude
ANTHROPIC_API_KEY=
ANTHROPIC_MODEL=claude-sonnet-4-6
OPENAI_API_KEY=
OPENAI_MODEL=gpt-4o
GEMINI_API_KEY=
GEMINI_MODEL=gemini-2.5-flash
LOCAL_LLM_MODEL=
ANNOTATION_MODEL=
ANNOTATION_TIMEOUT_S=120
ANNOTATION_LOCAL_CONCURRENCY=1
ANNOTATION_TEST_THRESHOLD=95
ANNOTATION_TEST_STOP_AT_THRESHOLD=yes
ANNOTATION_TEST_FIT_ONLY=yes
OLLAMA_HOST=http://localhost:11434
EMBED_MODEL=nomic-embed-text
EMBED_OLLAMA_HOST=
CHROMA_PATH=./chromadb
SOURCE_DIR=./source
TOP_K=64
HNSW_EF=512
MIN_SCORE=0.0
MAX_TOKENS=4096
CONTEXT_CHUNKS=64
ACTIVE_COLLECTIONS=
DEBUG_LEVEL=0
# ── Annotation (per-query LLM relevance notes) ────────────────────────────────
# ANNOTATION_PROVIDER=   # defaults to LLM_PROVIDER if unset
# ANNOTATION_MODEL=      # defaults to active model if unset
ANNOTATION_CONCURRENCY=4
WEB_PORT=3000
WEB_PASSWORD=
# ── Ingest tuning (profile=${_profile}: ${_ram_gb}GB RAM, ${_cores} cores) ──────
CHUNK_SIZE=${_cs_text}
CHUNK_SIZE_PDF=${_cs_pdf}
CHUNK_SIZE_AV=${_cs_av}
CHUNK_OVERLAP_PCT=${_overlap}
EMBED_TIMEOUT_S=${_etimeout}
EMBED_BACKOFF_S=${_ebackoff}
INGEST_PROFILE=${_profile}
ENVEOF
    ok "Config created (profile: $_profile  RAM: ${_ram_gb}GB)"
else
    ok "Config exists  --  preserved"
    declare -A _newkeys=(
        [CHUNK_SIZE_PDF]=${_cs_pdf}
        [CHUNK_SIZE_AV]=${_cs_av}
        [EMBED_TIMEOUT_S]=${_etimeout}
        [EMBED_BACKOFF_S]=${_ebackoff}
        [INGEST_PROFILE]=${_profile}
        [LLM_PROVIDER]=claude
        [ANTHROPIC_MODEL]=claude-sonnet-4-6
        [OPENAI_MODEL]=gpt-4o
        [GEMINI_MODEL]=gemini-2.5-flash
        [EMBED_MODEL]=nomic-embed-text
        [CHROMA_PATH]=./chromadb
        [SOURCE_DIR]=./source
        [TOP_K]=8
        [MIN_SCORE]=0.0
        [ACTIVE_COLLECTIONS]=
        [DEBUG_LEVEL]=0
        [CHUNK_SIZE]=${_cs_text}
        [CHUNK_OVERLAP_PCT]=${_overlap}
    )
    for key in "${!_newkeys[@]}"; do
        if ! grep -qE "^${key}=" "$ENV_FILE"; then
            echo "${key}=${_newkeys[$key]}" >> "$ENV_FILE"
            info "Added: $key=${_newkeys[$key]}"
        fi
    done
fi


# =============================================================================
# STEP 5: Ollama
# =============================================================================
ph "STEP 5: Ollama (local LLM + embeddings)"

OLLAMA_MIN="0.1.0"
_ollama_ok=0

if command -v ollama &>/dev/null; then
    _ollama_ver=$(ollama --version 2>/dev/null | grep -oE '[0-9]+\.[0-9]+\.[0-9]+' | head -1 || echo "0.0.0")
    ok "ollama $_ollama_ver found"
    _ollama_ok=1
else
    info "Ollama not found  --  installing... (this may take a minute)"
    curl -fsSL https://ollama.ai/install.sh | sh
    if command -v ollama &>/dev/null; then
        ok "ollama installed"
        _ollama_ok=1
    else
        warn "ollama install failed  --  local LLM and local embeddings will not be available"
    fi
fi

if [ $_ollama_ok -eq 1 ]; then
    # Ensure ollama service is running
    if ! curl -s http://localhost:11434/api/tags &>/dev/null; then
        info "Starting ollama service..."
        ollama serve &>/dev/null &
        sleep 3
        if curl -s http://localhost:11434/api/tags &>/dev/null; then
            ok "ollama service started"
        else
            warn "ollama service may not be running  --  start with: ollama serve"
        fi
    else
        ok "ollama service running"
    fi

    # Check/pull embed model from Config
    EMBED_MODEL=$(env_get EMBED_MODEL || echo "nomic-embed-text")
    if [ -n "$EMBED_MODEL" ]; then
        if ollama list 2>/dev/null | grep -q "^${EMBED_MODEL}"; then
            ok "embed model '$EMBED_MODEL' available"
        else
            info "Pulling embed model '$EMBED_MODEL'... (this may take several minutes)"
            ollama pull "$EMBED_MODEL" && ok "embed model '$EMBED_MODEL' pulled" \
                || warn "Could not pull '$EMBED_MODEL'  --  embeddings will fail until available"
        fi
    fi

    # Scan existing chromadb collections for any additional embed models needed.
    # Collections are permanent  --  if they were ingested with a different model, we
    # must keep that model available. Pull any that are missing.
    _chroma_dir="${PROJECT_DIR}/chromadb"
    if [ -d "$_chroma_dir" ]; then
        # dim->model map (must match _DIM_TO_MODEL in query.js)
        _dim_to_model() {
            case "$1" in
                384)  echo "all-minilm" ;;
                768)  echo "nomic-embed-text" ;;
                1024) echo "mxbai-embed-large" ;;
                1536) echo "text-embedding-ada-002" ;;
                3072) echo "text-embedding-3-large" ;;
                *)    echo "" ;;
            esac
        }
        _needed_models=""
        for _meta in "$_chroma_dir"/*/index_meta.json; do
            [ -f "$_meta" ] || continue
            _dim=$(node -e "try{const d=JSON.parse(require('fs').readFileSync('$_meta','utf8'));process.stdout.write(String(d.dimensionality||''));}catch(_){}" 2>/dev/null || echo "")
            [ -z "$_dim" ] && continue
            _model=$(_dim_to_model "$_dim")
            [ -z "$_model" ] && continue
            # Only add to needed list if not already covered by EMBED_MODEL
            if [ "$_model" != "$EMBED_MODEL" ]; then
                _needed_models="$_needed_models $_model"
            fi
        done
        # Deduplicate and pull any missing (grep -v returns 1 on empty  --  use set +e)
        set +e
        _needed_models=$(echo "$_needed_models" | tr ' ' '\n' | sort -u | grep -v '^$')
        set -e
        if [ -n "$_needed_models" ]; then
            info "Existing collections need additional embed models: $( echo $_needed_models | tr '\n' ' ')"
            echo "$_needed_models" | while read _m; do
                if ollama list 2>/dev/null | grep -q "^${_m}"; then
                    ok "  $_m already available"
                else
                    info "  Pulling $_m (needed by existing collection)..."
                    ollama pull "$_m" && ok "  $_m pulled" \
                        || warn "  Could not pull '$_m'  --  queries against collections using this model will fail"
                fi
            done
        fi
    fi
fi

# =============================================================================
# STEP 6: npm packages
# =============================================================================
ph "STEP 6: npm packages"

cat > "$PROJECT_DIR/package.json" << 'RAGWEED-PKG-v1.0.102-20260319-000014-473'
{
  "name": "ragweed",
  "version": "1.0.102",
  "description": "RAGWeed  --  direct HNSW/SQLite retrieval, no ChromaDB",
  "type": "module",
  "main": "scripts/query.js",
  "scripts": {
    "query": "node scripts/query.js",
    "web":   "node scripts/web.js"
  },
  "dependencies": {
    "better-sqlite3":       "11.7.0",
    "@anthropic-ai/sdk":    "0.30.0",
    "openai":               "4.52.0",
    "@google/genai":        "0.7.0",
    "dotenv":               "16.4.5",
    "pdf-parse":            "1.1.1",
    "mammoth":              "1.8.0",
    "adm-zip":              "0.5.16"
  }
}
RAGWEED-PKG-v1.0.102-20260319-000014-473

# better-sqlite3 v11+ ships prebuilt binaries for Node 18-24  --  no compilation.
# Other packages are pure JS. npm install should complete in under 30 seconds.
info "Running npm install... (should be fast  --  prebuilt binaries available)"
cd "$PROJECT_DIR"
# If SSL certs are broken (detected in STEP 2), disable strict-ssl for npm too
if [ "${_CURL_INSECURE:-0}" = "1" ]; then
    npm config set strict-ssl false 2>/dev/null || true
    info "npm strict-ssl disabled (SSL cert issue detected in STEP 2)"
fi

# Helper: run npm install in background with a live 1-second ticker.
# Must run with set -e disabled  --  kill -0 and wait both return non-zero
# on a failed/finished process, which would abort the script prematurely.
_run_npm_with_spinner() {
    set +e
    mkdir -p "$LOGS_DIR"
    rm -f "$LOGS_DIR/npm-install.log"
    npm install --foreground-scripts > "$LOGS_DIR/npm-install.log" 2>&1 &
    local _pid=$! _secs=0 _last=""
    while kill -0 $_pid 2>/dev/null; do
        sleep 1
        _secs=$((_secs + 1))
        _last=$(grep -Ev "^(npm warn deprecated|npm notice)" "$LOGS_DIR/npm-install.log" 2>/dev/null                 | grep -v "^$" | tail -1                 | sed "s|$PROJECT_DIR/||g" | cut -c1-70)
        printf "\r   ...  %3ds  %-70s" "$_secs" "$_last"
    done
    printf "\n"
    wait $_pid
    local _exit=$?
    return $_exit
}

if ! _run_npm_with_spinner; then
    warn "npm install failed  --  last 20 lines:"
    tail -20 "$LOGS_DIR/npm-install.log" | sed "s/^/    /"
    warn "Retrying once (this may take a few minutes)..."
    if ! _run_npm_with_spinner; then
        err "npm install failed on retry  --  check $LOGS_DIR/npm-install.log"
        exit 1
    fi
fi

# Verify better-sqlite3 native binding is actually loadable.
# node-v137 = Node 24 ABI. better-sqlite3 v11+ ships prebuilt binaries for
# Node 24 so no compilation should occur. If binding still missing, reinstall.
_node_abi=$(node -e "process.stdout.write(process.versions.modules)" 2>/dev/null || echo "0")
info "Node ABI: $_node_abi  (Node 24 = 137)"
_sqlite3_ok=0
set +e
node --input-type=module << 'BINDCHECK' && _sqlite3_ok=1
import { createRequire } from 'module';
const r = createRequire(import.meta.url);
try { r('better-sqlite3'); process.stdout.write("  ok  better-sqlite3 binding loadable\n"); process.exit(0); }
catch(e) { process.stdout.write("  !!  better-sqlite3 binding missing: " + e.message.split("\n")[0] + "\n"); process.exit(1); }
BINDCHECK
set -e

if [ $_sqlite3_ok -eq 0 ]; then
    info "better-sqlite3 binding missing  --  reinstalling clean (prebuilt binary will be downloaded)..."
    cd "$PROJECT_DIR"
    # Remove the broken install entirely  --  npm rebuild cannot fix a missing binary
    rm -rf node_modules/better-sqlite3
    # Reinstall just this package from scratch with full native build
    set +e
    npm install better-sqlite3 --foreground-scripts > "$LOGS_DIR/npm-install.log" 2>&1 &
    _npm_pid=$! _npm_secs=0
    while kill -0 $_npm_pid 2>/dev/null; do
        sleep 1
        _npm_secs=$((_npm_secs + 1))
        _last=$(grep -Ev "^(npm warn|npm notice)" "$LOGS_DIR/npm-install.log" 2>/dev/null | grep -v "^$" | tail -1 | cut -c1-70)
        printf "\r   ...  %3ds  %-70s" "$_npm_secs" "$_last"
    done
    printf "\n"
    wait $_npm_pid; _rebuild_exit=$?
    set -e
    if [ $_rebuild_exit -ne 0 ]; then
        warn "npm install better-sqlite3 failed  --  check $LOGS_DIR/npm-install.log:"
        tail -20 "$LOGS_DIR/npm-install.log" | sed "s/^/    /"
        err "better-sqlite3 could not be built for Node $(node --version)  --  cannot continue"; exit 1
    fi
    # Verify the binding is now present
    set +e
    node --input-type=module << 'BINDCHECK2'
import { createRequire } from 'module';
const r = createRequire(import.meta.url);
try { r('better-sqlite3'); process.stdout.write("  ok  better-sqlite3 binding loadable after reinstall\n"); process.exit(0); }
catch(e) { process.stdout.write("  XX  better-sqlite3 still broken: " + e.message.split("\n")[0] + "\n"); process.exit(1); }
BINDCHECK2
    set -e
fi
ok "npm packages installed"

# pdf-parse v1.1.1 ships test PDF fixtures that cause a ~10s delay on first require.
# Delete them now so startup is fast.
rm -rf "$PROJECT_DIR/node_modules/pdf-parse/test" 2>/dev/null || true
ok "pdf-parse test fixtures removed (startup speed fix)"

# Verify other critical packages loaded (including chromadb from scripts/ context)
set +e
node --input-type=module << 'NODECHECK'
import { createRequire } from 'module';
const r = createRequire(import.meta.url);
let ok = true;
for (const pkg of ['dotenv']) {
    try { r(pkg); process.stdout.write(`  ✓  ${pkg}\n`); }
    catch(e) { process.stdout.write(`  ✗  ${pkg}: ${e.message}\n`); ok = false; }
}
process.exit(ok ? 0 : 1);
NODECHECK
set -e


# =============================================================================
# STEP 7: Write scripts/pickle.js (shared pickle parser utility)
# =============================================================================
ph "STEP 7: Writing scripts/pickle.js"

cat > "$SCRIPTS_DIR/pickle.js" << 'RAGWEED-PCK-v1.0.102-20260319-000014-473'
// RAGWeed -- collection metadata reader (replaces pickle parser)
// Reads index_meta.json and derives dimensionality from binary file size.
// label_to_id is no longer needed -- labels are 1-based embeddings.id integers.
import fs   from 'fs';
import path from 'path';

// Derive dimensionality from data_level0.bin file size and element count.
function dimFromBinary(segDir, defaultDim, elementCount) {
    try {
        const fileSize = fs.statSync(path.join(segDir, 'data_level0.bin')).size;
        if (elementCount && elementCount > 0) {
            const spe = fileSize / elementCount;
            if (Number.isInteger(spe)) {
                const dim12 = (spe - 128 - 12) / 4;
                if (Number.isInteger(dim12) && dim12 >= 64 && dim12 <= 8192) return dim12;
                const dim8 = (spe - 128 - 8) / 4;
                if (Number.isInteger(dim8) && dim8 >= 64 && dim8 <= 8192) return dim8;
            }
        }
    } catch(_) {}
    return defaultDim;
}

// Read collection metadata from index_meta.json.
// Returns object with: dimensionality, total_elements, name, embed_model,
//   hnsw_entry_point, hnsw_max_layer, M, ef_construction.
// label_to_id is always {} -- no longer used (labels = 1-based embeddings.id).
export function readPickleMeta(segDir, defaultDim) {
    const metaFile = path.join(segDir, 'index_meta.json');
    let existing = {};
    if (fs.existsSync(metaFile)) {
        try { existing = JSON.parse(fs.readFileSync(metaFile, 'utf8')); } catch(_) {}
    }

    // Derive total_elements from binary file size (most reliable)
    if (!existing.total_elements || existing.total_elements === 0) {
        try {
            const fsize = fs.statSync(path.join(segDir, 'data_level0.bin')).size;
            const dim   = existing.dimensionality || defaultDim;
            const spe8  = 132 + dim * 4 + 8;
            const spe12 = 132 + dim * 4 + 12;
            if (fsize % spe8  === 0) existing.total_elements = fsize / spe8;
            else if (fsize % spe12 === 0) existing.total_elements = fsize / spe12;
        } catch(_) {}
    }

    if (!existing.dimensionality) {
        existing.dimensionality = dimFromBinary(segDir, defaultDim, existing.total_elements || 0);
    }

    existing.label_to_id = {};
    existing.id_to_label = {};
    return existing;
}
RAGWEED-PCK-v1.0.102-20260319-000014-473
ok "scripts/pickle.js written"

# =============================================================================
# STEP 7c: Writing scripts/collections.js (shared collection resolution library)
# =============================================================================
ph "STEP 7c: Writing scripts/collections.js"

cat > "$SCRIPTS_DIR/collections.js" << 'RAGWEED-COL-v1.0.102-20260319-000014-473'
#!/usr/bin/env node
// VERSION: 1.0.102
const VERSION = '1.0.102';
// RAGWeed v10 -- Shared collection name resolution and segDir mapping.
// Single source of truth for all components: query.js, web.js, webc.js, step7b-init.mjs
// RULE: READ-ONLY. Never writes index_meta.json, collection_names.json, or any other file.
'use strict';
import fs   from 'fs';
import path from 'path';
import { createRequire } from 'module';
import { fileURLToPath } from 'url';
const require    = createRequire(import.meta.url);
const __filename = fileURLToPath(import.meta.url);
const __dirname  = path.dirname(__filename);

// ── WAL scanner  --  finds names even when SQLite body is malformed ──────────────
export function scanWalForNames(walPath, segUuids) {
    const result = {};
    try {
        const walBuf = fs.readFileSync(walPath);
        if (walBuf.length < 32) return result;
        const pageSize = walBuf.readUInt32BE(8) || 4096;
        const uuidBufs = segUuids.map(u => ({ uuid: u, buf: Buffer.from(u, 'utf8') }));
        let off = 32;
        while (off + 24 + pageSize <= walBuf.length) {
            const page = walBuf.slice(off + 24, off + 24 + pageSize);
            for (const { uuid, buf } of uuidBufs) {
                if (result[uuid]) continue;
                const idx = page.indexOf(buf);
                if (idx < 0) continue;
                const strings = [];
                let run = '';
                for (let b = 0; b < page.length; b++) {
                    const c = page[b];
                    if (c >= 0x20 && c <= 0x7e) { run += String.fromCharCode(c); }
                    else { if (run.length >= 2) strings.push(run); run = ''; }
                }
                if (run.length >= 2) strings.push(run);
                const sqlKeywords = new Set(['CREATE','TABLE','INSERT','SELECT','INDEX','UNIQUE',
                    'PRIMARY','KEY','TEXT','INTEGER','REAL','BLOB','NULL','NOT','DEFAULT',
                    'VECTOR','METADATA','HNSWConfigurationInternal','topic','SegmentScope']);
                for (const s of strings) {
                    const t = s.trim();
                    if (t.length < 2 || t.length > 48) continue;
                    if (/^[0-9a-f]{8}-[0-9a-f]{4}-/.test(t)) continue;
                    if (sqlKeywords.has(t)) continue;
                    if (/^[0-9]+$/.test(t)) continue;
                    if (/^[a-zA-Z][a-zA-Z0-9._\-]{0,31}$/.test(t)) { result[uuid] = t; break; }
                }
            }
            off += 24 + pageSize;
        }
    } catch(_) {}
    return result;
}

// ── resolveNames  --  builds uuid→name map from all sources (READ-ONLY) ─────────
// Priority order:
//   1. index_meta.json name field (written by v10.8.13 from healthy SQLite)
//   2. collection_names.json
//   3. COLLECTION_NAME_MAP env var
//   4. SQLite bulk query (collections JOIN segments)
//   5. SQLite per-UUID fallback (for malformed DB)
//   6. WAL scan
// Does NOT write any files.
export function resolveNames(chromaDir, envPath) {
    const nameMap = {}; // uuid → name

    // Source 1: index_meta.json name fields (highest priority)
    try {
        for (const entry of fs.readdirSync(chromaDir, { withFileTypes: true })) {
            if (!entry.isDirectory()) continue;
            const mf = path.join(chromaDir, entry.name, 'index_meta.json');
            if (!fs.existsSync(mf)) continue;
            try {
                const m = JSON.parse(fs.readFileSync(mf, 'utf8'));
                if (m.name) nameMap[entry.name] = m.name;
            } catch(_) {}
        }
    } catch(_) {}

    // Source 2: collection_names.json
    try {
        const nf = path.join(chromaDir, 'collection_names.json');
        if (fs.existsSync(nf)) {
            const saved = JSON.parse(fs.readFileSync(nf, 'utf8'));
            for (const [k, v] of Object.entries(saved))
                if (v && typeof v === 'string' && !nameMap[k]) nameMap[k] = v;
        }
    } catch(_) {}

    // Source 3: COLLECTION_NAME_MAP env var  (format: "uuid=name,uuid=name")
    try {
        const ep = envPath || path.join(path.resolve(chromaDir, '..'), 'Config');
        if (fs.existsSync(ep)) {
            const line = fs.readFileSync(ep, 'utf8').split('\n')
                .find(l => l.startsWith('COLLECTION_NAME_MAP='));
            if (line) {
                for (const pair of line.slice('COLLECTION_NAME_MAP='.length).split(',')) {
                    const eq = pair.indexOf('=');
                    if (eq < 0) continue;
                    const uuid = pair.slice(0, eq).trim(), name = pair.slice(eq + 1).trim();
                    if (uuid && name && !nameMap[uuid]) nameMap[uuid] = name;
                }
            }
        }
    } catch(_) {}

    // Sources 4+5: SQLite  --  bulk first (both plural and old Python singular table names),
    // then per-UUID fallback for malformed DB
    const dbPath = path.join(chromaDir, 'chroma.sqlite3');
    if (fs.existsSync(dbPath)) {
        const Database = require('better-sqlite3');
        let db = null;
        try {
            try { db = new Database(dbPath, { fileMustExist: true }); db.pragma('wal_checkpoint(TRUNCATE)'); }
            catch(_) { try { db = new Database(dbPath, { readonly: true, fileMustExist: true }); } catch(_2) {} }
            if (db) {
                let bulkOk = false;
                // Try plural table names (JS-written) then old Python singular table names
                for (const sql of [
                    "SELECT c.name, s.id FROM collections c JOIN segments s ON s.collection=c.id WHERE s.scope='VECTOR'",
                    "SELECT c.name, s.id FROM collections c JOIN segments s ON s.collection=c.id",
                    "SELECT name, id FROM collections",
                    "SELECT c.name, s.id FROM collection c JOIN segment s ON s.collection=c.id WHERE s.scope='VECTOR'",
                    "SELECT c.name, s.id FROM collection c JOIN segment s ON s.collection=c.id",
                    "SELECT name, id FROM collection",
                ]) {
                    try {
                        const rows = db.prepare(sql).all();
                        for (const r of rows) if (r.id && r.name && !nameMap[r.id]) nameMap[r.id] = r.name;
                        if (rows.length > 0) { bulkOk = true; }
                    } catch(_) {}
                }
                if (!bulkOk) {
                    for (const entry of fs.readdirSync(chromaDir, { withFileTypes: true })) {
                        if (!entry.isDirectory() || nameMap[entry.name]) continue;
                        for (const sql of [
                            `SELECT c.name FROM collections c JOIN segments s ON s.collection=c.id WHERE s.id='${entry.name}' AND s.scope='VECTOR'`,
                            `SELECT c.name FROM collections c JOIN segments s ON s.collection=c.id WHERE s.id='${entry.name}'`,
                            `SELECT c.name FROM collection c JOIN segment s ON s.collection=c.id WHERE s.id='${entry.name}' AND s.scope='VECTOR'`,
                            `SELECT c.name FROM collection c JOIN segment s ON s.collection=c.id WHERE s.id='${entry.name}'`,
                        ]) {
                            try {
                                const r = db.prepare(sql).get();
                                if (r && r.name) { nameMap[entry.name] = r.name; break; }
                            } catch(_) {}
                        }
                    }
                }
                db.close();
            }
        } catch(_) { try { db && db.close(); } catch(_2) {} }
    }

    // Source 6: WAL scan
    const walPath = path.join(chromaDir, 'chroma.sqlite3-wal');
    if (fs.existsSync(walPath)) {
        const unresolved = [];
        try {
            for (const e of fs.readdirSync(chromaDir, { withFileTypes: true }))
                if (e.isDirectory() && !nameMap[e.name]) unresolved.push(e.name);
        } catch(_) {}
        if (unresolved.length > 0) {
            const walNames = scanWalForNames(walPath, unresolved);
            for (const [k, v] of Object.entries(walNames)) if (!nameMap[k]) nameMap[k] = v;
        }
    }

    return nameMap;
}

// ── buildSegDirMap  --  name→{segDir,meta} map ───────────────────────────────────
// Used by query.js and web.js (via query.js) at runtime.
export function buildSegDirMap(chromaDir, envPath) {
    const map = {};
    if (!fs.existsSync(chromaDir)) return map;
    const nameMap = resolveNames(chromaDir, envPath);
    for (const entry of fs.readdirSync(chromaDir, { withFileTypes: true })) {
        // Accept both real directories AND symlinks (JS ingest creates name symlinks)
        if (!entry.isDirectory() && !entry.isSymbolicLink()) continue;
        const segDir = path.join(chromaDir, entry.name);
        // Resolve symlinks: fs.existsSync follows symlinks automatically
        const hasIndex = fs.existsSync(path.join(segDir, 'data_level0.bin'));
        let meta = {};
        try {
            const mf = path.join(segDir, 'index_meta.json');
            if (fs.existsSync(mf)) meta = JSON.parse(fs.readFileSync(mf, 'utf8'));
        } catch(_) {}
        // Name: resolved map first (covers all sources), then directory name if not UUID-shaped
        const isUuidShaped = /^[0-9a-f]{8}/i.test(entry.name) && entry.name.length <= 36;
        const name = nameMap[entry.name] || (!isUuidShaped ? entry.name : null);
        if (!name) continue;
        // Deduplicate: if this name already has an entry, keep the one with a BIN file.
        // For old Python collections there is no index_meta.json so label_to_id is never
        // populated  --  use hasIndex as the tiebreaker instead.
        if (!hasIndex) meta = { ...meta, empty: true };
        const existing = map[name];
        if (existing) {
            // Keep whichever has the actual index; skip symlinks that point to same data
            const existingHasIndex = !existing.meta.empty;
            if (existingHasIndex && !hasIndex) continue; // existing is better
            if (!existingHasIndex && hasIndex) { map[name] = { segDir, meta }; continue; }
            // Both have index: prefer the one with more labels (JS-ingested has index_meta)
            const lc = meta.label_to_id ? Object.keys(meta.label_to_id).length : 0;
            const le = existing.meta.label_to_id ? Object.keys(existing.meta.label_to_id).length : 0;
            if (lc > le) map[name] = { segDir, meta };
        } else {
            map[name] = { segDir, meta };
        }
    }
    return map;
}

// ── getCollectionNames  --  sorted array of known collection names ───────────────
// Pure filesystem scan via buildSegDirMap  --  stateless, no SQLite open.
// Callers that also need JS-ingested collections (not yet in segmap) should
// merge with their own SQLite connection after calling this.
export function getCollectionNames(chromaDir, envPath) {
    if (!fs.existsSync(chromaDir)) return [];
    // Primary: scan all rag.sqlite3 files for collection names
    const names = new Set();
    try {
        for (const d of fs.readdirSync(chromaDir, { withFileTypes: true })) {
            if (!d.isDirectory()) continue;
            const ragPath = path.join(chromaDir, d.name, 'rag.sqlite3');
            if (!fs.existsSync(ragPath)) continue;
            try {
                const Database = require('better-sqlite3');
                const db = new Database(ragPath, { readonly: true, fileMustExist: true });
                const rows = db.prepare('SELECT name FROM collections').all();
                db.close();
                for (const r of rows) if (r.name) names.add(r.name);
            } catch(_) {}
        }
    } catch(_) {}
    if (names.size) return Array.from(names).sort();
    // Fallback: filesystem scan via segmap
    return Object.keys(buildSegDirMap(chromaDir, envPath)).sort();
}
RAGWEED-COL-v1.0.102-20260319-000014-473
ok "scripts/collections.js written"

# =============================================================================
# STEP 7b: Pre-populate index_meta.json for all existing collections
# =============================================================================
ph "STEP 7b: Pre-populating index_meta.json (label_to_id) for all collections"

# Write STEP 7b as a real file so dynamic import of pickle.js resolves correctly.
# node --input-type=module runs in [eval] context  --  import.meta.url is undefined
# and dynamic imports of local paths fail. A real .mjs file has a real URL.
cat > "$PROJECT_DIR/step7b-init.mjs" << 'RAGWEED-S7B-v1.0.102-20260319-000014-473'
import fs   from 'fs';
import path from 'path';
import { fileURLToPath } from 'url';
const __dirname   = path.dirname(fileURLToPath(import.meta.url));
const PROJECT_DIR = path.resolve(__dirname);
const CHROMA_DIR  = process.env.CHROMA_DIR || path.join(PROJECT_DIR, 'chromadb');
const EMBED_MODEL = (process.env.EMBED_MODEL || 'nomic-embed-text').toLowerCase();
const SCRIPTS_DIR = path.join(PROJECT_DIR, 'scripts');

const DIM_MAP = {
  'nomic-embed-text': 768, 'mxbai-embed-large': 1024, 'all-minilm': 384,
  'text-embedding-ada-002': 1536, 'text-embedding-3-small': 1536, 'text-embedding-3-large': 3072,
};
let defaultDim = 768;
for (const [k, v] of Object.entries(DIM_MAP)) { if (EMBED_MODEL.includes(k)) { defaultDim = v; break; } }

if (!fs.existsSync(CHROMA_DIR)) { console.log('  i  No chromadb directory found -- skipping'); process.exit(0); }

// Delegated to collections.js and pickle.js -- single source of truth.
const { readPickleMeta }     = await import(path.join(SCRIPTS_DIR, 'pickle.js'));
const { resolveNames }       = await import(path.join(SCRIPTS_DIR, 'collections.js'));
const nameMap = resolveNames(CHROMA_DIR);
console.log('  i  Resolved names: ' + Object.values(nameMap).sort().join(', '));

// -- Process each segment directory (READ-ONLY -- never writes index_meta.json) -
let processed = 0, skipped = 0, failed = 0;
for (const entry of fs.readdirSync(CHROMA_DIR, { withFileTypes: true })) {
  if (!entry.isDirectory()) continue;
  const segDir  = path.join(CHROMA_DIR, entry.name);
  const binFile = path.join(segDir, 'data_level0.bin');
  if (!fs.existsSync(binFile)) continue;

  // RULE: Never write index_meta.json. Read name from existing file only.
  try {
    const meta = readPickleMeta(segDir, defaultDim);
    const labelCount = Object.keys(meta.label_to_id || {}).length;
    const displayName = nameMap[entry.name] || entry.name.slice(0, 8);
    if (labelCount > 0) {
      console.log('  ok  ' + displayName + ': dim=' + meta.dimensionality + ' elements=' + meta.total_elements + ' labels=' + labelCount);
      processed++;
    } else {
      console.log('  i   ' + displayName + ': no pickle -- label_to_id built by ingest');
      skipped++;
    }
  } catch(e) {
    console.log('  !!  ' + entry.name.slice(0, 8) + ': ' + e.message);
    failed++;
  }
}
console.log('  i  ' + processed + ' collections populated, ' + skipped + ' JS-ingested (no pickle), ' + failed + ' failed');
RAGWEED-S7B-v1.0.102-20260319-000014-473

set +e
node "$PROJECT_DIR/step7b-init.mjs"
set -e
rm -f "$PROJECT_DIR/step7b-init.mjs"

# =============================================================================
# STEP 8: Verify HNSW binary files
# =============================================================================
ph "STEP 8: Verifying HNSW index files"

_missing=0
_present=0
for d in "$CHROMA_DIR"/*/; do
    [ -d "$d" ] || continue
    [ -L "${d%/}" ] && continue   # skip symlinks  --  they are human-readable aliases only
    uuid=$(basename "$d")
    # Get collection name from JSON sidecar if present
    cname=""
    if [ -f "$d/index_meta.json" ]; then
        cname=$(node -e "
            import fs from 'fs';
            try { const m=JSON.parse(fs.readFileSync('$d/index_meta.json','utf8')); process.stdout.write(m.name||''); }
            catch(e){}
        " 2>/dev/null || echo "")
    fi
    label="${cname:-${uuid:0:8}}"
    missing_files=""
    for f in data_level0.bin length.bin; do
        [ -f "$d/$f" ] || missing_files="$missing_files $f"
    done
    if [ -z "$missing_files" ]; then
        ok "$label  --  HNSW files present"
        _present=$((_present+1))
    else
        warn "$label  --  missing:$missing_files"
        _missing=$((_missing+1))
    fi
done
[ $_missing -eq 0 ] && ok "All $_present collections have complete HNSW indexes" \
    || warn "$_missing collection(s) have incomplete indexes  --  they will be skipped at runtime"

# =============================================================================
# STEP 9: Write scripts/query.js
# =============================================================================
ph "STEP 9: Writing scripts/query.js"

cat > "$SCRIPTS_DIR/query.js" << 'RAGWEED-QRY-v1.0.102-20260319-000014-473'
#!/usr/bin/env node
// VERSION: 1.0.102
const VERSION = '1.0.102';
// RAGWeed v10 -- TUI query engine
// Direct HNSW + SQLite retrieval. No ChromaDB. No Python at runtime.
// TUI: raw terminal writes via process.stdout, readline -- works over SSH.
'use strict';
import 'dotenv/config';
import fs   from 'fs';
import path from 'path';
import os   from 'os';
import readline from 'readline';
import { createRequire } from 'module';
import { fileURLToPath } from 'url';
const require   = createRequire(import.meta.url);
const __filename = fileURLToPath(import.meta.url);
const __dirname  = path.dirname(__filename);


// ── LLM input context window sizes (tokens) ─────────────────────────────────
// Used to greedily pack as many chunks as fit without overflowing the context.
// Conservative figures  --  actual limits are often higher but headroom is good.
const _MODEL_CTX = {
    // Claude
    'claude-opus-4-6':         180000,
    'claude-sonnet-4-6':       180000,
    'claude-haiku-4-5':        180000,
    'claude-3-5-sonnet':       180000,
    'claude-3-5-haiku':        180000,
    'claude-3-opus':           180000,
    // OpenAI
    'gpt-4o':                  120000,
    'gpt-4o-mini':             120000,
    'gpt-4-turbo':             120000,
    'o1':                      120000,
    'o3-mini':                 120000,
    // Gemini
    'gemini-2.5-pro':          900000,
    'gemini-2.5-flash':        900000,
    'gemini-2.0-flash':        900000,
    'gemini-1.5-pro':          900000,
};
// Rough token estimator: 4 chars per token is accurate enough for budget math
function _estTokens(s) { return Math.ceil((s || '').length / 4); }
// Look up context window for a model name (prefix-match for versioned names)
function _modelCtx(modelName) {
    if (!modelName) return 100000;
    const mn = modelName.toLowerCase();
    for (const [k, v] of Object.entries(_MODEL_CTX))
        if (mn.startsWith(k) || mn.includes(k)) return v;
    return 100000;  // safe default for unknown models
}

// annotateAndFilter: shared annotation+filter logic ─────────────────────────────────────────────
// Returns { annotations, filteredNodes, filteredIndices, blocked }
// onAnnotation(i, text, irrelevant) called as each annotation arrives (for SSE)
async function annotateAndFilter(query, nodes, llmInfo, concurrency, onAnnotation, signal, shouldStop) {
    // Filter on IRRELEVANT keyword (normalized: strip punctuation/whitespace/case) or very short response
    const _normalize = t => t.trim().replace(/[^a-zA-Z]/g, '').toUpperCase();
    const _isIrrel = ann => {
        if (!ann) return false;
        const _t = ann.trim();
        if (_t.length < 10) return true;
        // Normalize away punctuation/spaces/case -- catches "IRRELEVANT.", "Irrelevant!" etc.
        return _normalize(_t) === 'IRRELEVANT';
    };
    const annotations = new Array(nodes.length).fill('');
    if (!llmInfo.error) {
        let _failed = 0;
        await new Promise(resolve => {
            let active = 0, idx = 0;
            function next() {
                if (signal?.aborted) { resolve(); return; }
                if (shouldStop?.()) {
                    const iv = setInterval(() => {
                        if (signal?.aborted) { clearInterval(iv); resolve(); return; }
                        if (!shouldStop()) { clearInterval(iv); next(); }
                    }, 50);
                    return;
                }
                while (active < concurrency && idx < nodes.length) {
                    if (signal?.aborted) { resolve(); return; }
                    if (shouldStop?.()) {
                        // Active calls still running -- they will call next() when done, which will hit the top-of-next check above
                        return;
                    }
                    const i = idx++; active++;
                    annotateNodes(query, [nodes[i]], llmInfo, 1).then(arr => {
                        const text = arr[0] || '';
                        annotations[i] = text;
                        if (onAnnotation) onAnnotation(i, text, _isIrrel(text));
                        active--; next();
                    }).catch(() => { _failed++; active--; next(); });
                }
                if (active === 0) resolve();
            }
            next();
        });
        if (_failed === nodes.length) return { annotations, filteredNodes: [], filteredIndices: new Set(), blocked: true };
    }
    const filteredIndices = new Set();
    nodes.forEach((n, i) => { if (_isIrrel(annotations[i])) filteredIndices.add(i); });
    const filteredNodes = nodes.filter((n, i) => !filteredIndices.has(i));
    return { annotations, filteredNodes, filteredIndices, blocked: false };
}

export { VERSION, cfg, cfgInt, logErr, dbg, loadCost, saveCost, costStr,
         loadCollection, getAllCollectionNames, parseActiveCollections, invalidateSegDirMap,
         queryCollections, retrieveNodes, getLLM, formatSources, annotateNodes, annotateAndFilter,
         historySaveEntry, historyUpdateEntry, historySaveIndex, historyLoadIndex, historyLoadEntry, historyEntryPath,
         synthesizeCited };
const PROJECT_DIR = path.resolve(path.dirname(new URL(import.meta.url).pathname), '..');
import { readPickleMeta } from './pickle.js';

// -- Config --------------------------------------------------------------------
function cfg(key, def='') {
    const v = process.env[key]; return (v !== undefined && v !== '') ? v : def;
}
// cfgLive: like cfg() but also checks the .env file directly if process.env is empty.
// If a value is found in Config but not in process.env (e.g. saved by webc after startup),
// it is promoted into process.env via cfgSet so subsequent calls find it immediately.
function cfgLive(key, def='') {
    const inProc = process.env[key];
    if (inProc) return inProc;
    try {
        const envFile = path.join(PROJECT_DIR, 'Config');
        if (fs.existsSync(envFile)) {
            const m = fs.readFileSync(envFile, 'utf8')
                        .match(new RegExp('^' + key + '=(.*)$', 'm'));
            if (m) {
                const val = m[1].replace(/^["']|["']$/g, '').trim();
                if (val) { cfgSet(key, val); return val; }
            }
        }
    } catch(_) {}
    return def;
}
function cfgSet(key, value) {
    // Update in-process env
    process.env[key] = value;
    // Persist to Config file
    try {
        const envFile = path.join(PROJECT_DIR, 'Config');
        let content = fs.existsSync(envFile) ? fs.readFileSync(envFile, 'utf8') : '';
        const re = new RegExp(`^${key}=.*$`, 'm');
        if (re.test(content)) {
            content = content.replace(re, `${key}=${value}`);
        } else {
            content += `
${key}=${value}`;
        }
        fs.writeFileSync(envFile, content);
    } catch(e) { logErr(`cfgSet: ${e.message}`); }
}
function cfgInt(key, def=0) { return parseInt(cfg(key, String(def)), 10) || def; }
function cfgFloat(key, def=0) { return parseFloat(cfg(key, String(def))) || def; }

const DEBUG_LEVEL  = cfgInt('DEBUG_LEVEL', 0);
const TOP_K        = cfgInt('TOP_K', 8);
const HNSW_EF      = cfgInt('HNSW_EF', 512);
const MIN_SCORE    = cfgFloat('MIN_SCORE', 0.0);
const CHROMA_PATH  = path.resolve(PROJECT_DIR, cfg('CHROMA_PATH', './chromadb'));
const HISTORY_DIR  = path.join(PROJECT_DIR, 'history');
const LOGS_DIR     = path.join(PROJECT_DIR, 'logs');
const COST_FILE    = path.join(PROJECT_DIR, 'cache', 'cost.json');
const CACHE_DIR    = path.join(PROJECT_DIR, 'cache');
const SYS_PROMPT_FILE = path.join(PROJECT_DIR, 'system_prompt.txt');

const DEFAULT_SYSTEM_PROMPT = `You are a research assistant with access to a curated knowledge base.
Answer questions based strictly on the retrieved sources provided.
Use plain prose. Cite sources inline as [N].`;

// Pricing (per million tokens) -- update as needed
const PRICE = {
    'claude-sonnet-4-6':  { in: 3.00,  out: 15.00 },
    'claude-opus-4-6':    { in: 15.00, out: 75.00  },
    'gpt-4o':             { in: 2.50,  out: 10.00  },
    'gpt-4o-mini':        { in: 0.15,  out: 0.60   },
};

// -- Logging -------------------------------------------------------------------
fs.mkdirSync(LOGS_DIR, { recursive: true });
// Per-session log file: query-YYYYMMDD-HHMMSS.log, plus always-appended query.log
const _now = new Date();
const _ts = _now.toISOString().replace(/[-:.TZ]/g,'').slice(0,14);
const SESSION_LOG = path.join(LOGS_DIR, `query-${_ts}.log`);
const LOG_FILE    = path.join(LOGS_DIR, 'query.log');  // cumulative
try {
    const stamp = `\n[START] RAGWeed v${VERSION} ${_now.toISOString()}\n`;
    fs.appendFileSync(LOG_FILE,    stamp);
    fs.appendFileSync(SESSION_LOG, stamp);
} catch(_) {}

function logToFile(msg) {
    const ts = new Date().toISOString();
    const line = `${ts} ${msg.endsWith('\n') ? msg.slice(0,-1) : msg}\n`;
    try { fs.appendFileSync(LOG_FILE,    line); } catch(_) {}
    try { fs.appendFileSync(SESSION_LOG, line); } catch(_) {}
}
function dbg(msg) {
    if (DEBUG_LEVEL >= 2) logToFile(`[DBG] ${msg}`);
}
function logErr(msg) {
    // Always log errors to file regardless of DEBUG_LEVEL
    logToFile(`[ERR] ${msg}`);
}

const PROMPTS_FILE = path.join(PROJECT_DIR, 'scripts', 'prompts.json');

function _readPrompts() {
    try { return JSON.parse(fs.readFileSync(PROMPTS_FILE, 'utf8')); } catch(_) {}
    // Legacy fallback
    const p = {};
    try { p.system_default = fs.readFileSync(SYS_PROMPT_FILE, 'utf8').trim(); } catch(_) {}
    try {
        const af = path.join(PROJECT_DIR, 'scripts', 'annotation_prompt.txt');
        p.annotation_default = fs.readFileSync(af, 'utf8').trim();
    } catch(_) {}
    return p;
}

// -- System prompt -------------------------------------------------------------
function loadSystemPrompt(provider) {
    const p = _readPrompts();
    if (provider) {
        const specific = p['system_' + provider];
        if (specific && specific.trim()) return specific.trim();
    }
    const def = p['system_default'];
    if (def && def.trim()) return def.trim();
    // Legacy fallback
    try {
        if (fs.existsSync(SYS_PROMPT_FILE))
            return fs.readFileSync(SYS_PROMPT_FILE, 'utf8').trim();
    } catch(_) {}
    return DEFAULT_SYSTEM_PROMPT;
}

function loadAnnotPrompt(provider) {
    const p = _readPrompts();
    if (provider) {
        const specific = p['annotation_' + provider];
        if (specific && specific.trim()) return specific.trim();
    }
    const def = p['annotation_default'];
    if (def && def.trim()) return def.trim();
    // Legacy fallback
    try {
        const af = path.join(PROJECT_DIR, 'scripts', 'annotation_prompt.txt');
        return fs.readFileSync(af, 'utf8').trim();
    } catch(_) {}
    return null; // annotateOne will use its built-in default
}

// -- History -------------------------------------------------------------------
const HISTORY_INDEX_FILE = path.join(HISTORY_DIR, 'index.json');
fs.mkdirSync(HISTORY_DIR, { recursive: true });

function historyLoadIndex() {
    try {
        if (fs.existsSync(HISTORY_INDEX_FILE)) {
            const raw = JSON.parse(fs.readFileSync(HISTORY_INDEX_FILE, 'utf8'));
            // Repair: drop any index entry whose file no longer exists on disk
            // (can happen after a partial delete / crash)
            const clean = raw.filter(e => {
                const fp = path.join(HISTORY_DIR, e.file || `q${String(e.qid).padStart(6,'0')}.json`);
                return fs.existsSync(fp);
            });
            if (clean.length !== raw.length) {
                // Quietly repair the index file
                try { fs.writeFileSync(HISTORY_INDEX_FILE, JSON.stringify(clean, null, 2)); } catch(_) {}
            }
            return clean;
        }
    } catch(_) {}
    return [];
}
function historySaveIndex(index) {
    try { fs.writeFileSync(HISTORY_INDEX_FILE, JSON.stringify(index, null, 2)); }
    catch(_) {}
}
function historyEntryPath(qid) {
    return path.join(HISTORY_DIR, `q${String(qid).padStart(6,'0')}.json`);
}
function historySaveEntry(question, answer, sources, srcChunks, srcUrls, meta, status, rawNodes, srcAnnotations) {
    const index = historyLoadIndex();
    const qid   = (index.length ? index[index.length-1].qid : 0) + 1;
    const entry = {
        qid, ts: (() => { const _d=new Date(),_o=-_d.getTimezoneOffset(),_z=(_o>=0?'+':'-')+String(Math.floor(Math.abs(_o)/60)).padStart(2,'0')+String(Math.abs(_o)%60).padStart(2,'0'); return _d.toISOString().slice(0,19)+_z; })(),
        question, answer, sources, src_chunks: srcChunks, src_urls: srcUrls||[],
        ...(srcAnnotations && srcAnnotations.some(a=>a) ? { src_annotations: srcAnnotations } : {}),
        meta,
        ...(status && status !== 'ok' ? { status } : {}),
        ...(rawNodes && rawNodes.length ? { raw_nodes: rawNodes } : {}),
    };
    try {
        fs.writeFileSync(historyEntryPath(qid), JSON.stringify(entry, null, 2));
        index.push({ qid, file: `q${String(qid).padStart(6,'0')}.json`, ts: entry.ts, q: question.slice(0,120) });
        historySaveIndex(index);
    } catch(_) {}
    return qid;
}

// historyUpdateEntry  --  overwrite fields of an existing entry in-place (used by retry)
function historyUpdateEntry(qid, patch) {
    try {
        const index = historyLoadIndex();
        const ie = index.find(e => e.qid === qid);
        if (!ie) return false;
        const p = path.join(HISTORY_DIR, ie.file);
        const existing = fs.existsSync(p) ? JSON.parse(fs.readFileSync(p, 'utf8')) : {};
        fs.writeFileSync(p, JSON.stringify({ ...existing, ...patch }, null, 2));
        return true;
    } catch(e) { logErr(`historyUpdateEntry qid=${qid}: ${e.message}`); return false; }
}
function historyLoadEntry(index, pos) {
    if (pos < 0 || pos >= index.length) return null;
    try {
        const p = path.join(HISTORY_DIR, index[pos].file);
        if (fs.existsSync(p)) return JSON.parse(fs.readFileSync(p, 'utf8'));
    } catch(_) {}
    return null;
}

// -- Cost tracking -------------------------------------------------------------
fs.mkdirSync(CACHE_DIR, { recursive: true });

// -- Query input history persistence --------------------------------------
const QUERY_HIST_FILE = path.join(PROJECT_DIR, 'logs', 'query_hist.json');
const QUERY_HIST_MAX  = 500;
function loadQueryHist() {
    try {
        if (fs.existsSync(QUERY_HIST_FILE)) {
            const arr = JSON.parse(fs.readFileSync(QUERY_HIST_FILE, 'utf8'));
            return Array.isArray(arr) ? arr : [];
        }
    } catch(_) {}
    return [];
}
function saveQueryHist(hist) {
    try {
        const trimmed = hist.slice(-QUERY_HIST_MAX);
        fs.writeFileSync(QUERY_HIST_FILE, JSON.stringify(trimmed));
    } catch(_) {}
}

function loadCost() {
    try {
        if (fs.existsSync(COST_FILE)) {
            const d = JSON.parse(fs.readFileSync(COST_FILE, 'utf8'));
            return { in: parseInt(d.in||0), out: parseInt(d.out||0) };
        }
    } catch(_) {}
    return { in: 0, out: 0 };
}
function saveCost(inTok, outTok) {
    try { fs.writeFileSync(COST_FILE, JSON.stringify({ in: inTok, out: outTok })); }
    catch(_) {}
}
function costStr(inTok, outTok, model, provider) {
    if (provider === 'local' || !model || !PRICE[model]) {
        // Local/Ollama models are free; unknown cloud models use a safe fallback
        if (provider === 'local') return '$0.0000';
    }
    const p = PRICE[model] || { in: 3.00, out: 15.00 };
    return `$${((inTok * p.in + outTok * p.out) / 1_000_000).toFixed(4)}`;
}

// -- SQLite (chroma.sqlite3) ---------------------------------------------------
let _dbCon = null;
function dbDiag() {
    // Run once to log schema and sample data -- helps debug SQL issues
    try {
        const db = getDb();
        // Log embeddings table schema
        const schema = db.prepare("SELECT sql FROM sqlite_master WHERE type='table' AND name IN ('embeddings','embedding_metadata') ORDER BY name").all();
        for (const s of schema) logToFile(`[SCHEMA] ${s.sql}`);
        // Sample embedding_id values
        const sample = db.prepare("SELECT id, segment_id, embedding_id FROM embeddings LIMIT 3").all();
        for (const r of sample) logToFile(`[SAMPLE] embeddings: id=${r.id} seg=${r.segment_id?.slice(0,8)} eid=${r.embedding_id?.slice(0,16)}`);
        // Sample metadata keys
        const keys = db.prepare("SELECT DISTINCT key FROM embedding_metadata LIMIT 10").all();
        logToFile(`[SCHEMA] embedding_metadata keys: ${keys.map(r=>r.key).join(', ')}`);
    } catch(e) { logToFile(`[DIAG] dbDiag error: ${e.message}`); }
}
let _dbDiagDone = false;

function getDb() {
    if (_dbCon) return _dbCon;
    if (_dbCon === false) throw new Error('SQLite unavailable (malformed or missing)');
    const dbPath = path.join(CHROMA_PATH, 'chroma.sqlite3');
    dbg(`[DB] getDb: dbPath=${dbPath} exists=${fs.existsSync(dbPath)}`);
    if (!fs.existsSync(dbPath)) { _dbCon = false; throw new Error('chroma.sqlite3 not found at ' + dbPath); }
    const Database = require('better-sqlite3');
    const uriPath = 'file:' + dbPath.replace(/\\/g, '/') + '?immutable=1&mode=ro';
    dbg(`[DB] trying URI: ${uriPath}`);
    try {
        _dbCon = new Database(uriPath, { uri: true, readonly: true, fileMustExist: true });
        dbg('[DB] opened OK with immutable=1 URI');
    } catch(e1) {
        dbg(`[DB] immutable URI FAILED: ${e1.message}`);
        dbg('[DB] trying plain readonly...');
        try {
            _dbCon = new Database(dbPath, { readonly: true, fileMustExist: true });
            dbg('[DB] opened OK plain readonly');
        } catch(e2) {
            dbg(`[DB] plain readonly FAILED: ${e2.message}`);
            _dbCon = false;
            throw new Error('SQLite open failed: ' + e2.message);
        }
    }
    return _dbCon;
}


// -- Pure-JS ChromaDB HNSW reader ---------------------------------------------
// ChromaDB's hnswlib fork stores indexes as split files:
//   header.bin      -- ChromaDB metadata (NOT standard hnswlib header)
//   data_level0.bin -- raw elements: [M0 neighbor int32s][dim float32s][int64 label]
//   link_lists.bin  -- upper-level links (not needed for search)
//   length.bin      -- 8 bytes: current element count
//
// We read vectors directly and do brute-force dot-product (cosine) search.
// For 768-dim vectors: dot product of N vectors ~= N*768 multiplications.
// Performance: 10k docs ~= 2ms, 50k ~= 8ms, 200k ~= 30ms -- all acceptable.

const _loadedIndexes = {};  // name -> { vectors: Float32Array[], labels: string[], count: int }

// Read the index_meta.json to get M (neighbors per node) and dim
// Then derive: M0 = 2*M, size_data_per_element = M0*4 + dim*4 + 8
function loadVectors(segDir, dim, meta) {
    // Zero-RAM approach: no data loaded at startup.
    // The worker reads both neighbor slots and vectors from disk during traversal.
    const dataFile = path.join(segDir, 'data_level0.bin');
    const count = meta.total_elements || 0;
    if (count === 0) return { count: 0, labels: new Int32Array(0) };

    const vectorBytes   = dim * 4;
    const labelBytes    = 8;
    const M0            = 32;
    const neighborBytes = M0 * 4 + 4;  // 132 bytes
    const spe           = neighborBytes + vectorBytes + labelBytes;

    // Auto-detect actual spe from file size
    let actualSpe = spe;
    let actualNeighborBytes = neighborBytes;
    try {
        const fsize = fs.statSync(dataFile).size;
        const derived = fsize / count;
        if (Number.isInteger(derived) && derived > vectorBytes + 4) {
            if (derived !== spe) logToFile(`[INFO] '${meta.name}' spe auto-corrected: ${spe}->${derived}`);
            actualSpe = derived;
            actualNeighborBytes = derived - vectorBytes - labelBytes;
        }
    } catch(e) { return { count: 0, labels: new Int32Array(0) }; }

    // Detect if graph exists by spot-checking first element neighbor slot
    let hasGraph = false;
    try {
        const buf = Buffer.allocUnsafe(4);
        const fd = fs.openSync(dataFile, 'r');
        fs.readSync(fd, buf, 0, 4, 0);
        fs.closeSync(fd);
        hasGraph = buf.readInt32LE(0) !== 0;
    } catch(_) {}

    const labels = new Int32Array(count);
    for (let i = 0; i < count; i++) labels[i] = i + 1;

    const entryPoint = (meta.hnsw_entry_point !== undefined) ? meta.hnsw_entry_point : 0;
    dbg(`loadVectors: '${meta.name||'?'}' count=${count} dim=${dim} spe=${actualSpe} hasGraph=${hasGraph} (zero-RAM)`);
    return { dim, labels, count, hasGraph, entryPoint, binPath: dataFile, spe: actualSpe, neighborBytes: actualNeighborBytes };
}

// L2-normalize a vector in place (so dot product = cosine similarity)
function l2normalize(vec) {
    let norm = 0;
    for (let i = 0; i < vec.length; i++) norm += vec[i] * vec[i];
    norm = Math.sqrt(norm);
    if (norm < 1e-10) return vec;
    const out = new Float32Array(vec.length);
    for (let i = 0; i < vec.length; i++) out[i] = vec[i] / norm;
    return out;
}

// Cosine similarity search  --  offloaded to a Worker thread so the event loop
// stays free to flush SSE status messages while the dot-product loop runs.
// The flat Float32Array is backed by a SharedArrayBuffer (zero-copy transfer).
const { Worker, isMainThread, parentPort, workerData } = require('worker_threads');

// Worker code inlined as a string so no extra file is needed.
const _WORKER_SRC = `
const { parentPort } = require('worker_threads');
const fs = require('fs');

parentPort.on('message', ({ dim, count, query, topK, ef, hasGraph, entryPoint, binPath, spe, neighborBytes }) => {
    // Normalize query
    let norm = 0;
    for (let i = 0; i < query.length; i++) norm += query[i]*query[i];
    norm = Math.sqrt(norm);
    const inv = norm > 1e-10 ? 1/norm : 1;
    const q = new Float32Array(query.length);
    for (let i = 0; i < query.length; i++) q[i] = query[i]*inv;

    const M0      = Math.floor(neighborBytes / 4) - 1;
    const vecBuf  = Buffer.allocUnsafe(dim * 4);
    const nbrBuf  = Buffer.allocUnsafe(neighborBytes);
    let fd = null;
    try { fd = fs.openSync(binPath, 'r'); } catch(_) {}

    // Read neighbor slot for node i: returns { nCount, neighbors[] }
    function readNbr(i) {
        fs.readSync(fd, nbrBuf, 0, neighborBytes, i * spe);
        const nCount = Math.min(nbrBuf.readInt32LE(M0 * 4), M0);
        const nbrs = [];
        for (let j = 0; j < nCount; j++) nbrs.push(nbrBuf.readInt32LE(j * 4));
        return nbrs;
    }

    // Read and normalize vector for node i, return dot product with query
    function readSim(i) {
        fs.readSync(fd, vecBuf, 0, dim * 4, i * spe + neighborBytes);
        let n = 0;
        const v = new Float32Array(dim);
        for (let d = 0; d < dim; d++) { v[d] = vecBuf.readFloatLE(d * 4); n += v[d]*v[d]; }
        n = Math.sqrt(n);
        let dot = 0;
        if (n > 1e-10) { for (let d = 0; d < dim; d++) dot += (v[d]/n)*q[d]; }
        return dot;
    }

    let heap;
    if (hasGraph && fd !== null) {
        // ── HNSW graph traversal: all reads from disk ──
        const efVal   = Math.max(ef || 512, topK);  // never less than topK
        const visited = new Uint8Array(count);
        const ep      = entryPoint || 0;
        const epSim   = readSim(ep);
        let cands  = [{ sim: epSim, id: ep }];
        let result = [{ sim: epSim, id: ep }];
        visited[ep] = 1;
        let ci = 0;
        while (ci < cands.length) {
            const cur = cands[ci++];
            if (result.length >= efVal && cur.sim < result[result.length-1].sim) break;
            const nbrs = readNbr(cur.id);
            for (const nb of nbrs) {
                if (nb < 0 || nb >= count || visited[nb]) continue;
                visited[nb] = 1;
                const sim = readSim(nb);
                if (result.length < efVal || sim > result[result.length-1].sim) {
                    let ci2 = cands.length;
                    cands.push({ sim, id: nb });
                    while (ci2 > 0 && cands[ci2-1].sim < sim) { cands[ci2]=cands[ci2-1]; ci2--; }
                    cands[ci2] = { sim, id: nb };
                    let ri = result.length;
                    result.push({ sim, id: nb });
                    while (ri > 0 && result[ri-1].sim < sim) { result[ri]=result[ri-1]; ri--; }
                    result[ri] = { sim, id: nb };
                    if (result.length > efVal) result.pop();
                }
            }
        }
        heap = result.slice(0, topK).map(r => [r.sim, r.id]);
    } else {
        // ── Brute-force fallback ──
        const heapA = [];
        function heapPush(s,i){ heapA.push([s,i]); let x=heapA.length-1; while(x>0){const p=(x-1)>>1;if(heapA[p][0]<=heapA[x][0])break;const t=heapA[p];heapA[p]=heapA[x];heapA[x]=t;x=p;} }
        function heapPushPop(s,i){ heapA[0]=[s,i]; let x=0,n=heapA.length; while(true){let m=x,l=2*x+1,r=2*x+2;if(l<n&&heapA[l][0]<heapA[m][0])m=l;if(r<n&&heapA[r][0]<heapA[m][0])m=r;if(m===x)break;const t=heapA[m];heapA[m]=heapA[x];heapA[x]=t;x=m;} }
        for (let i = 0; i < count; i++) {
            const s = readSim(i);
            if (heapA.length < topK) heapPush(s,i);
            else if (s > heapA[0][0]) heapPushPop(s,i);
        }
        heap = heapA;
    }
    if (fd !== null) try { fs.closeSync(fd); } catch(_) {}
    heap.sort((a,b) => b[0]-a[0]);
    parentPort.postMessage({ heap });
});
`;

// searchVectorsAsync: spawns a fresh Worker per call.
// No pool  --  avoids the listener-race bug where two parallel once('message') calls
// on the same pooled worker both resolve to the first result, leaving the second hanging.
async function searchVectorsAsync(index, queryVec, k, ef) {
    const { dim, labels, count, hasGraph, entryPoint, binPath, spe, neighborBytes } = index;
    if (count === 0) return [];
    const topK = Math.min(k, count);

    return new Promise((resolve) => {
        let settled = false;
        let w;
        const done = (result) => {
            if (settled) return;
            settled = true;
            try { if (w) w.terminate(); } catch(_) {}
            resolve(result);
        };
        const timeout = setTimeout(() => {
            logErr(`searchVectorsAsync timeout count=${count} dim=${dim}  --  sync fallback`);
            done(searchVectorsSync(index, queryVec, topK));
        }, 30000);
        try {
            w = new Worker(_WORKER_SRC, { eval: true });
        } catch(e) {
            clearTimeout(timeout);
            logErr('Worker spawn failed: ' + e.message);
            return resolve(searchVectorsSync(index, queryVec, topK));
        }
        w.once('message', ({ heap }) => {
            clearTimeout(timeout);
            dbg(`searchVectors: top3=${heap.slice(0,3).map(e=>e[0].toFixed(4)).join(',')}`);
            done(heap.map(([score, i]) => ({ label: labels[i], distance: 1.0 - score })));
        });
        w.once('error', (err) => {
            clearTimeout(timeout);
            logErr('Worker error: ' + err.message);
            done(searchVectorsSync(index, queryVec, topK));
        });
        w.postMessage({ dim, count, query: Array.from(queryVec), topK, ef: ef||512, hasGraph: !!hasGraph, entryPoint: entryPoint||0, binPath: binPath||null, spe: spe||3212, neighborBytes: neighborBytes||132 });
    });
}

// Synchronous fallback -- brute-force disk reads (used on worker timeout)
function searchVectorsSync(index, queryVec, topK) {
    const { dim, labels, count, binPath, spe, neighborBytes } = index;
    let norm=0; for(let i=0;i<queryVec.length;i++) norm+=queryVec[i]*queryVec[i]; norm=Math.sqrt(norm);
    const inv=norm>1e-10?1/norm:1;
    const q=new Float32Array(queryVec.length); for(let i=0;i<q.length;i++) q[i]=queryVec[i]*inv;
    const heap=[];
    function heapPush(s,i){heap.push([s,i]);let x=heap.length-1;while(x>0){const p=(x-1)>>1;if(heap[p][0]<=heap[x][0])break;const t=heap[p];heap[p]=heap[x];heap[x]=t;x=p;}}
    function heapPushPop(s,i){heap[0]=[s,i];let x=0,n=heap.length;while(true){let m=x,l=2*x+1,r=2*x+2;if(l<n&&heap[l][0]<heap[m][0])m=l;if(r<n&&heap[r][0]<heap[m][0])m=r;if(m===x)break;const t=heap[m];heap[m]=heap[x];heap[x]=t;x=m;}}
    let fd=null; try { fd=fs.openSync(binPath,'r'); } catch(_) {}
    const vbuf=Buffer.allocUnsafe(dim*4);
    for(let i=0;i<count;i++){
        if(fd!==null) fs.readSync(fd,vbuf,0,dim*4,i*spe+neighborBytes);
        let dot=0,n=0;
        const v=new Float32Array(dim);
        for(let d=0;d<dim;d++){v[d]=vbuf.readFloatLE(d*4);n+=v[d]*v[d];}
        n=Math.sqrt(n); if(n>1e-10){for(let d=0;d<dim;d++){dot+=(v[d]/n)*q[d];}}
        if(heap.length<topK)heapPush(dot,i);else if(dot>heap[0][0])heapPushPop(dot,i);
    }
    if(fd!==null) try{fs.closeSync(fd);}catch(_){}
    heap.sort((a,b)=>b[0]-a[0]);
    return heap.map(([score,i])=>({label:labels[i],distance:1.0-score}));
}

// -- Collection loading ---------------------------------------------------------
const _collectionCache = {};

// Deterministic UUID from seed -- matches ingest.js so query can find JS-ingested segments
function deterministicUuid(seed) {
    const h = Array.from(seed).reduce((a, c) => Math.imul(31, a) + c.charCodeAt(0) | 0, 0x811c9dc5);
    const hex = (Math.abs(h) * 0x100000001 + 0x123456789abcdef).toString(16).padStart(16, '0');
    const h2  = Array.from(seed + '2').reduce((a, c) => Math.imul(37, a) + c.charCodeAt(0) | 0, 0xdeadbeef);
    const hex2 = (Math.abs(h2) * 0x100000001 + 0xfedcba987654321).toString(16).padStart(16, '0');
    const full = (hex + hex2).slice(0, 32);
    return `${full.slice(0,8)}-${full.slice(8,12)}-4${full.slice(13,16)}-${full.slice(16,20)}-${full.slice(20,32)}`;
}

async function loadCollection(name) {
    if (_collectionCache[name]) return _collectionCache[name];

    dbg(`[LC] loadCollection('${name}') start`);

    const DIM_MAP = {
        'nomic-embed-text': 768, 'mxbai-embed-large': 1024, 'all-minilm': 384,
        'text-embedding-ada-002': 1536, 'text-embedding-3-small': 1536,
        'text-embedding-3-large': 3072,
    };
    const embedModel = (cfg('EMBED_MODEL', 'nomic-embed-text') || '').toLowerCase();
    let defaultDim = 768;
    for (const [key, val] of Object.entries(DIM_MAP)) {
        if (embedModel.includes(key)) { defaultDim = val; break; }
    }

    // Step 1: find segDir -- deterministic UUID dir is authoritative (matches ingest.js)
    let segDir = null;
    const detSegUuid = deterministicUuid(name + ':vector');
    const detSegDir  = path.join(CHROMA_PATH, detSegUuid);
    if (fs.existsSync(path.join(detSegDir, 'data_level0.bin'))) {
        segDir = detSegDir;
        logToFile(`[LC] '${name}': found at deterministic dir ${detSegUuid.slice(0,8)}`);
    }

    // Step 2: segmap fallback (follows symlinks -- works for any dir layout)
    if (!segDir) {
        const segMap = getSegDirMap();
        if (segMap[name] && fs.existsSync(path.join(segMap[name].segDir, 'data_level0.bin'))) {
            segDir = segMap[name].segDir;
            logToFile(`[LC] '${name}': found via segmap at ${path.basename(segDir)}`);
        }
    }

    // Step 3: chroma.sqlite3 last resort (old unmigrated collections)
    if (!segDir) {
        try {
            const db = getDb();
            const collRow = db.prepare('SELECT id FROM collections WHERE name = ?').get(name);
            if (collRow) {
                let segRow = db.prepare(`SELECT id FROM segments WHERE collection = ? AND scope = 'VECTOR'`).get(collRow.id);
                if (!segRow) segRow = db.prepare('SELECT id FROM segments WHERE collection = ?').get(collRow.id);
                if (segRow) {
                    const d = path.join(CHROMA_PATH, segRow.id);
                    if (fs.existsSync(path.join(d, 'data_level0.bin'))) {
                        segDir = d;
                        logToFile(`[LC] '${name}': found via chroma.sqlite3 at ${segRow.id.slice(0,8)}`);
                    }
                }
            }
        } catch(_) {}
    }

    if (!segDir) {
        dbg(`loadCollection: '${name}' has no HNSW index yet -- skipping (collection is being built)`);
        return { count: 0, search: async () => [] };
    }

    // Step 4: read meta
    const meta = readPickleMeta(segDir, defaultDim);
    dbg(`loadCollection: '${name}' dim=${meta.dimensionality} elements=${meta.total_elements} labels=${Object.keys(meta.label_to_id||{}).length}`);

    // Step 5: load vectors (loadVectors sets meta._binPath internally)
    const index = await loadVectors(segDir, meta.dimensionality, meta);
    _loadedIndexes[name] = index;
    dbg(`loadCollection: '${name}' segUuid=${detSegUuid.slice(0,8)} dim=${meta.dimensionality} elements=${index.count}`);
    if (!_dbDiagDone) { _dbDiagDone = true; dbDiag(); }

    // Step 6: open rag.sqlite3 for metadata queries
    let collDb = null;
    const ragDbPath = path.join(segDir, 'rag.sqlite3');
    if (fs.existsSync(ragDbPath)) {
        try {
            const Database = require('better-sqlite3');
            collDb = new Database(ragDbPath, { readonly: true, fileMustExist: true });
            logToFile(`[LC] '${name}': opened rag.sqlite3`);
        } catch(e) {
            logToFile(`[LC] '${name}': failed to open rag.sqlite3: ${e.message} -- falling back to chroma.sqlite3`);
        }
    }

    const segUuid = path.basename(segDir);
    const coll = { meta, collUuid: detSegUuid, segUuid, segmentId: segUuid, name, index, db: collDb, loaded: true };
    _collectionCache[name] = coll;
    return coll;
}

function getDocumentsByLabels(labels, segmentId, collDb) {
    // labels: array of 1-based integer HNSW labels (= embeddings.id directly)
    // collDb: per-collection rag.sqlite3 connection
    // Returns array of { id, text, metadata }
    if (!labels.length) return [];
    let db;
    if (collDb) {
        db = collDb;
    } else {
        try { db = getDb(); } catch(e) {
            logToFile('[DOCS] SQLite unavailable, returning empty docs: ' + e.message);
            return [];
        }
    }
    dbg(`getDocumentsByLabels: ${labels.length} labels, sample=${labels.slice(0,3)}`);

    // Query by integer id directly -- label N = embeddings.id N (1-based)
    const CHUNK = 500;
    const allRows = [];
    for (let off = 0; off < labels.length; off += CHUNK) {
        const chunk = labels.slice(off, off + CHUNK);
        const ph = chunk.map(() => '?').join(',');
        try {
            const r = db.prepare(`
                SELECT e.id as label, e.embedding_id as eid, em.key, em.string_value
                FROM embeddings e
                LEFT JOIN embedding_metadata em ON em.id = e.id
                WHERE e.id IN (${ph})
                  AND (em.key IS NULL OR em.key IN ('chroma:document','source_file_name','file_name','page_label','_collection','ole_parent_name','ocr_type','source_rel_path'))
                ORDER BY e.id
            `).all(...chunk);
            allRows.push(...r);
        } catch(e) {
            logErr(`getDocumentsByLabels SQL: ${e.message}`);
        }
    }
    dbg(`getDocumentsByLabels: SQL returned ${allRows.length} rows for ${labels.length} labels`);

    // Group rows by label (integer id)
    const docs = {};
    for (const r of allRows) {
        const key = String(r.label);
        if (!docs[key]) docs[key] = { id: r.eid, label: r.label, text: '', metadata: {} };
        if (r.key === 'chroma:document') docs[key].text = r.string_value || '';
        else if (r.key) docs[key].metadata[r.key] = r.string_value || '';
    }
    return Object.values(docs);
}

// -- Embedding -----------------------------------------------------------------
// Dim-to-model map: given the dimensionality stored in the collection, pick the right embed model.
// RULE  --  backward compatibility: always adapt to what the collection says, never require manual fix.
const _DIM_TO_MODEL = {
    384:  'all-minilm',
    768:  'nomic-embed-text',
    1024: 'mxbai-embed-large',
    1536: 'text-embedding-ada-002',
    3072: 'text-embedding-3-large',
};
const _MODEL_TO_DIM = {
    'nomic-embed-text': 768, 'mxbai-embed-large': 1024, 'all-minilm': 384,
    'text-embedding-ada-002': 1536, 'text-embedding-3-small': 1536, 'text-embedding-3-large': 3072,
};
function _dimForModel(modelName) {
    const lower = (modelName||'').toLowerCase();
    for (const [m,d] of Object.entries(_MODEL_TO_DIM)) { if (lower.includes(m)) return d; }
    return null;
}

// In-process embedding cache: keyed by "model:text", stores Float32Array.
// Eliminates redundant Ollama calls for repeated or retried queries.
// Cap at 200 entries (LRU-lite: clear oldest half when full).
const _embedCache = new Map();
const _EMBED_CACHE_MAX = 200;

// embedQuery(text, targetDim?)  --  auto-selects correct model for targetDim,
// returns cached embedding if available.
async function embedQuery(text, targetDim) {
    const cfgModel = cfg('EMBED_MODEL', 'nomic-embed-text');
    let model = cfgModel;

    if (targetDim) {
        const cfgDim = _dimForModel(cfgModel);
        if (cfgDim && cfgDim !== targetDim) {
            const autoModel = _DIM_TO_MODEL[targetDim];
            if (autoModel) {
                // Verify the required model is actually available in Ollama before using it
                const ollamaBase = (cfg('EMBED_OLLAMA_HOST','') || cfg('OLLAMA_HOST','http://localhost:11434')).replace(/\/+$/,'');
                let modelAvailable = false;
                try {
                    const r = await fetch(`${ollamaBase}/api/tags`, { signal: AbortSignal.timeout(5000) });
                    if (r.ok) {
                        const d = await r.json();
                        modelAvailable = (d.models||[]).some(m => (m.name||'').startsWith(autoModel));
                    }
                } catch(_) {}
                if (!modelAvailable) {
                    const msg = `Collection requires embed model '${autoModel}' (dim=${targetDim}) but it is not available in Ollama.`
                        + ` Re-run the installer to pull it automatically, or: ollama pull ${autoModel}`;
                    logErr(`[EMBED] ${msg}`);
                    throw new Error(msg);
                }
                logToFile(`[EMBED] EMBED_MODEL=${cfgModel} (dim=${cfgDim}) but collection needs dim=${targetDim}  --  using model='${autoModel}'`);
                model = autoModel;
            } else {
                // Unknown dim -- no model mapping. Use configured model and warn.
                // The query vector dim will not match the index dim, so scores will be
                // meaningless for this collection. It will still return results but ranked
                // by a mismatched embedding space. Better than crashing the whole query.
                logToFile(`[EMBED] No model known for dim=${targetDim}  --  using configured model '${cfgModel}' (dim=${cfgDim||'?'}). Scores for this collection will be unreliable.`);
            }
        }
    }

    const cacheKey = `${model}:${text}`;
    if (_embedCache.has(cacheKey)) {
        dbg(`embedQuery: cache hit model=${model} dim=${targetDim||'?'}`);
        return _embedCache.get(cacheKey);
    }

    const t0 = Date.now();
    const base = cfg('EMBED_OLLAMA_HOST', cfg('OLLAMA_HOST', 'http://localhost:11434'));
    const resp = await fetch(`${base}/api/embeddings`, {
        method:  'POST',
        headers: { 'Content-Type': 'application/json' },
        body:    JSON.stringify({ model, prompt: text }),
    });
    if (!resp.ok) throw new Error(`Ollama embedding HTTP ${resp.status} model=${model}: ${resp.statusText}`);
    const data = await resp.json();
    if (!data.embedding) throw new Error(`No embedding in Ollama response for model=${model}`);
    const emb = data.embedding;
    logToFile(`[EMBED] model=${model} dim=${emb.length} elapsed=${Date.now()-t0}ms`);
    if (targetDim && emb.length !== targetDim) {
        logErr(`[EMBED] Model '${model}' returned dim=${emb.length} but expected dim=${targetDim}`);
    }

    // Store in cache
    if (_embedCache.size >= _EMBED_CACHE_MAX) {
        // Evict oldest half
        const keys = Array.from(_embedCache.keys()).slice(0, _EMBED_CACHE_MAX / 2);
        for (const k of keys) _embedCache.delete(k);
    }
    _embedCache.set(cacheKey, emb);
    return emb;
}

// -- Retrieval -----------------------------------------------------------------
async function directRetrieve(coll, queryVec, topK, ef) {
    const { meta, name, segmentId } = coll;
    const numElements = meta.total_elements || 1;
    const k = Math.min(topK, numElements);
    if (k === 0) return [];

    dbg(`directRetrieve: col='${name}' topK=${k} emb_len=${queryVec.length}`);

    // Search using pure-JS dot-product (vectors pre-normalized by ChromaDB -> cosine)
    const index = _loadedIndexes[name];
    if (!index) throw new Error(`Index for '${name}' not loaded`);
    // Dimension mismatch warning  --  self-healed by embedQuery auto-model selection
    if (index.dim && queryVec.length !== index.dim) {
        logErr(`[WARN] dim mismatch: query=${queryVec.length} index=${index.dim} col='${name}'  --  results may be NaN`);
    }
    const results = await searchVectorsAsync(index, queryVec, k, ef);

    const labels = results.map(r => r.label);
    const docs   = getDocumentsByLabels(labels, segmentId, coll.db || null);

    // Build label->doc map for O(1) lookup
    const docByLabel = {};
    for (const d of docs) docByLabel[d.label] = d;

    const nodes = [];
    for (let i = 0; i < results.length; i++) {
        const label = results[i].label;
        const score = 1.0 - results[i].distance;
        const doc   = docByLabel[label];
        if (!doc) continue;
        nodes.push({
            score,
            text:     doc.text,
            metadata: { ...doc.metadata, _collection: name },
            id:       doc.id,
        });
    }
    dbg(`directRetrieve: '${name}' got ${nodes.length} results`);
    return nodes;
}

async function retrieveNodes(collections, queryStr, llm, topK, signal, onStatus, shouldStop) {
    // Normalize query to lowercase for consistent embedding -- neural models are case-sensitive
    queryStr = queryStr.toLowerCase();
    const _ncoll = Object.keys(collections).length;
    // Backward compat: group collections by dim, embed once per unique dim (parallel)
    const _collVals = Object.values(collections);
    const _dimGroups = {};
    for (const c of _collVals) {
        const d = c.index?.dim || c.meta?.dimensionality || 0;
        if (!_dimGroups[d]) _dimGroups[d] = [];
        _dimGroups[d].push(c);
    }
    const _uniqueDims = Object.keys(_dimGroups).map(Number).filter(d => d > 0);
    logToFile(`[QUERY] Unique dims: ${_uniqueDims.join(',')} across ${_ncoll} collections`);
    onStatus?.('Embedding query...');
    const _queryVecs = {};  // dim -> embedding array
    await Promise.all(_uniqueDims.map(async (d) => {
        try {
            _queryVecs[d] = await embedQuery(queryStr, d);
            logToFile(`[QUERY] Embedded dim=${d}: ${_queryVecs[d].length} floats`);
        } catch(e) {
            logErr(`[QUERY] Failed to embed for dim=${d}: ${e.message}  --  collections with this dim will be skipped`);
        }
    }));
    onStatus?.(`Searching ${_ncoll} collection${_ncoll===1?'':'s'} in parallel...`);
    const allNodes = [];
    const errors   = [];
    let   _collDone = 0;

    // Run collection searches -- sequential when signal present (allows abort between collections)
    // parallel otherwise (faster: total time ≈ slowest collection)
    const _collEntries = Object.entries(collections);
    onStatus?.(`Searching ${_ncoll} collection${_ncoll===1?'':'s'}...`);
    const _searchOne = async ([name, coll]) => {
        if (signal?.aborted || shouldStop?.()) return;
        try {
            const t0 = Date.now();
            const _cDim = coll.index?.dim || coll.meta?.dimensionality || 0;
            let queryVec = _queryVecs[_cDim];
            if (!queryVec) {
                if (_cDim === 0) {
                    // dim=0 means collection has no valid index -- skip entirely, don't pollute scores
                    logToFile(`[QUERY] '${name}': dim=0 -- no index loaded, skipping`);
                    _collDone++;
                    onStatus?.(`Searched ${_collDone}/${_ncoll}: ${name} (skipped -- not indexed)`);
                    return;
                }
                // No exact-dim embedding available -- use any available vec (scores unreliable)
                queryVec = Object.values(_queryVecs)[0];
                if (queryVec) logToFile(`[QUERY] '${name}': dim=${_cDim} has no matching embed model -- using dim=${queryVec.length} fallback. Scores unreliable.`);
                else { errors.push(`${name}: no query embedding available`); return; }
            }
            const nodes = await directRetrieve(coll, queryVec, topK, cfgInt('HNSW_EF', 512));
            const elapsed = Date.now()-t0;
            logToFile(`[QUERY] ${name}: ${nodes.length} nodes in ${elapsed}ms`);
            _collDone++;
            onStatus?.(`Searched ${_collDone}/${_ncoll}: ${name} (${elapsed}ms, ${nodes.length} hits)`);
            allNodes.push(...nodes);
        } catch(e) {
            _collDone++;
            logErr(`[QUERY-ERR] ${name}: ${e.message}`);
            errors.push(`${name}: ${e.message}`);
            onStatus?.(`Searched ${_collDone}/${_ncoll}: ${name} (error)`);
        }
    };
    if (signal || shouldStop) {
        // Sequential -- check stop/abort before AND after each collection
        for (const entry of _collEntries) {
            if (shouldStop?.()) break;
            await _searchOne(entry);
            if (shouldStop?.()) break;
        }
    } else {
        await Promise.all(_collEntries.map(_searchOne));
    }

    if (!allNodes.length) {
        if (errors.length) return { text: `Retrieval error -- ${errors.join('; ')}`, nodes: [], usage: {}, status: 'retrieval_error' };
        return { text: 'No relevant content found.', nodes: [], usage: {}, status: 'no_results' };
    }

    // Sort by score descending, deduplicate by text content
    allNodes.sort((a,b) => b.score - a.score);
    const seen = new Set();
    const deduped = allNodes.filter(n => {
        const key = n.text.slice(0,200);
        if (seen.has(key)) return false;
        seen.add(key); return true;
    });

    // Relative score: normalise so floor=0, best match=1.0
    // Gives a meaningful signal independent of embedding model's absolute scale
    if (deduped.length > 0) {
        const _maxS = deduped[0].score;  // already sorted descending
        const _minS = deduped[deduped.length - 1].score;
        const _rng  = _maxS - _minS;
        deduped.forEach(n => { n.rel_score = _rng > 1e-6 ? (n.score - _minS) / _rng : 1.0; });
    }
    // Filter on rel_score (0=floor, 1=best) so MIN_SCORE is meaningful
    // regardless of embedding model or collection domain.
    // MIN_SCORE=0 returns all; MIN_SCORE=0.10 drops the bottom 10% of results.
    const filtered = MIN_SCORE > 0 ? deduped.filter(n => (n.rel_score ?? 1.0) >= MIN_SCORE) : deduped;
    // Greedy context packer: take chunks in rel_score order until the LLM's
    // input window is full. Budget = model_ctx - MAX_TOKENS (reserved for output)
    // - fixed overhead (system prompt + query + framing ~2000 tokens).
    // CONTEXT_CHUNKS is a hard cap on chunk count (safety valve only).
    const _maxOut  = parseInt(cfg('MAX_TOKENS', '4096'));
    const _ctxCap  = parseInt(cfg('CONTEXT_CHUNKS', String(topK)));
    const _mdlCtx  = _modelCtx(llm && llm.model ? llm.model : cfg('ANTHROPIC_MODEL','claude-sonnet-4-6'));
    const _budget  = _mdlCtx - _maxOut - 2000;  // 2000 = system prompt + query + framing
    let   _used    = 0;
    const topNodes = [];
    for (const n of filtered) {
        if (topNodes.length >= _ctxCap) break;
        const _t = _estTokens(n.text) + 40;  // 40 = per-chunk header tokens
        if (topNodes.length > 0 && _used + _t > _budget) break;
        topNodes.push(n);
        _used += _t;
    }
    logToFile(`[QUERY] allNodes=${allNodes.length} deduped=${deduped.length} filtered=${filtered.length} topNodes=${topNodes.length} tokensUsed=${_used}/${_budget} modelCtx=${_mdlCtx} scores=${topNodes.slice(0,3).map(n=>n.score.toFixed(3)).join(',')}`);

    if (topNodes.length === 0) {
        if (filtered.length === 0 && deduped.length > 0) {
            // Everything was filtered by MIN_SCORE  --  log the actual score range to help diagnosis
            const allScores = deduped.map(n => n.score);
            const maxS = Math.max(...allScores), minS = Math.min(...allScores);
            const nanCount = allScores.filter(s => isNaN(s)).length;
            if (nanCount > 0) {
                logErr(`[QUERY] ${nanCount}/${deduped.length} nodes have NaN scores  --  likely embedding dimension mismatch. Check EMBED_MODEL env var.`);
            } else {
                logToFile(`[QUERY] MIN_SCORE=${MIN_SCORE} (rel) filtered all ${deduped.length} nodes (raw score range: ${minS.toFixed(3)}..${maxS.toFixed(3)}). Lower MIN_SCORE or check embedding model.`);
            }
        }
        return { text: 'No relevant content found.', nodes: [], usage: {}, status: 'no_results' };
    }

    // retrieveNodes stops here -- returns ranked nodes without synthesis
    return { nodes: topNodes, status: 'ok', errors };
}

// queryCollections: full retrieve + synthesize (used when not annotating)
async function queryCollections(collections, queryStr, llm, topK, signal, onStatus, ctxMessages=[]) {
    const result = await retrieveNodes(collections, queryStr, llm, topK, signal, onStatus);
    if (result.status !== 'ok') return result;
    const topNodes = result.nodes;
    if (!topNodes.length) return { text: 'No relevant content found.', nodes: [], usage: {}, status: 'no_results' };
    try {
        onStatus?.('Generating LLM response...');
        const { text, usage } = await synthesizeCited(llm, queryStr, topNodes, signal, ctxMessages);
        return { text, nodes: topNodes, usage, status: 'ok' };
    } catch(e) {
        const cause = e.cause ? ` | cause: ${e.cause.message||e.cause}` : '';
        const detail = e.stack ? `\n${e.stack}` : '';
        logErr(`synthesis error: ${e.message}${cause}${detail}`);
        return { text: `Error generating response: ${e.message}${cause}`, nodes: topNodes, usage: {}, status: 'llm_error' };
    }
}

// -- LLM providers -------------------------------------------------------------
async function getLLM(provider, model) {
    provider = provider || cfg('LLM_PROVIDER', 'claude');
    switch (provider) {
        case 'claude': {
            const key = cfgLive('ANTHROPIC_API_KEY');
            if (!key) return { llm: null, label: '[ERROR] ANTHROPIC_API_KEY not set', error: true };
            const m = model || cfg('ANTHROPIC_MODEL', 'claude-sonnet-4-6');
            const { default: Anthropic } = await import('@anthropic-ai/sdk');
            return { llm: new Anthropic({ apiKey: key }), label: `Claude: ${m}`, model: m, provider };
        }
        case 'openai': {
            const key = cfgLive('OPENAI_API_KEY');
            if (!key) return { llm: null, label: '[ERROR] OPENAI_API_KEY not set', error: true };
            const m = model || cfg('OPENAI_MODEL', 'gpt-4o');
            const { default: OpenAI } = await import('openai');
            return { llm: new OpenAI({ apiKey: key }), label: `OpenAI: ${m}`, model: m, provider };
        }
        case 'gemini': {
            const key = cfgLive('GEMINI_API_KEY') || cfgLive('GOOGLE_API_KEY');
            if (!key) return { llm: null, label: '[ERROR] GEMINI_API_KEY not set', error: true };
            const m = model || cfg('GEMINI_MODEL', 'gemini-2.5-flash');
            const { GoogleGenAI } = await import('@google/genai');
            return { llm: new GoogleGenAI({ apiKey: key }), label: `Gemini: ${m}`, model: m, provider };
        }
        case 'local':
        case 'ollama': {
            const m = model || cfg('LOCAL_LLM_MODEL', '');
            if (!m) return { llm: null, label: '[ERROR] LOCAL_LLM_MODEL not set', error: true };
            return { llm: { _type: 'ollama', model: m }, label: `Local: ${m}`, model: m, provider: 'local' };
        }
        default:
            return { llm: null, label: `[ERROR] Unknown provider: ${provider}`, error: true };
    }
}

// ── Annotation: one-sentence per-source relevance note ───────────────────────
// Fires parallel LLM calls (concurrency-limited) to annotate each node.
// Returns string[] -- one annotation per node, empty string on failure.
async function annotateNodes(query, nodes, llmInfo, concurrency) {
    concurrency = concurrency || 4;
    const results = new Array(nodes.length).fill('');
    const sem = { n: 0 };
    async function annotateOne(n, i) {
        const col   = n.metadata._collection || '?';
        const fname = n.metadata.source_file_name || n.metadata.file_name || 'unknown';
        const page  = n.metadata.page_label ? ` p.${n.metadata.page_label}` : '';
        const _annotPromptDefault =
            'Judge relevance by conceptual and semantic content, not literal word matches -- ignore spelling variations, capitalisation differences, and phrasing differences. ' +
            'Write one concise sentence (3-5 lines maximum) explaining how this excerpt relates to the query. ' +
            'Only respond IRRELEVANT (one word, nothing more) if the excerpt has no meaningful connection to the subject matter of the query. ' +
            'If the excerpt contains one or more relevant quotes, include them verbatim in the sentence. ' +
            'Output only the sentence or the word IRRELEVANT, no preamble, no extra commentary.';
        const { llm, model, provider } = llmInfo;
        const _loaded = loadAnnotPrompt(provider);
        let _annotInstruction = _loaded || _annotPromptDefault;
        const prompt =
            `Query: ${query}\n\n` +
            `Source: [${col}] ${fname}${page}\n\n` +
            `Excerpt:\n${n.text.slice(0, 1200)}\n\n` +
            _annotInstruction;
        try {
            let text = '';
            if (provider === 'claude') {
                const resp = await llm.messages.create({
                    model, max_tokens: 200, temperature: 1,
                    messages: [{ role: 'user', content: prompt }],
                });
                text = resp.content.map(b => b.text || '').join('').trim();
            } else if (provider === 'openai') {
                const resp = await llm.chat.completions.create({
                    model, max_tokens: 200, temperature: 1,
                    messages: [{ role: 'user', content: prompt }],
                });
                text = (resp.choices[0]?.message?.content || '').trim();
            } else if (provider === 'gemini') {
                const resp = await llm.models.generateContent({ model, contents: prompt,
                    config: { temperature: 1 } });
                text = (resp.text || '').trim();
            } else if (provider === 'local') {
                const base = cfg('OLLAMA_HOST', 'http://localhost:11434');
                // Dynamic num_ctx: estimate tokens from prompt length (~3.5 chars/token)
                // plus 500 headroom for output and model overhead. Capped at 4096.
                const _estTokens = Math.ceil(prompt.length / 3.5);
                const _numCtx    = Math.min(_estTokens + 500, 4096);
                // Append /no_think to disable thinking mode on qwen3 and compatible models.
                // Has no effect on models that don't support it.
                const _prompt = prompt + '\n/no_think';
                const _timeout = parseInt(cfg('ANNOTATION_TIMEOUT_S', '120')) * 1000;
                const r = await fetch(`${base}/api/chat`, {
                    method: 'POST',
                    headers: { 'Content-Type': 'application/json' },
                    signal: AbortSignal.timeout(_timeout),
                    body: JSON.stringify({ model: llm.model, stream: false,
                        options: { temperature: 1, num_ctx: _numCtx, num_predict: 300 },
                        messages: [{ role: 'user', content: _prompt }] }),
                });
                if (r.ok) {
                    const d = await r.json();
                    text = (d.message?.content || '').trim();
                    // Strip any residual <think>...</think> blocks from the response
                    text = text.replace(/<think>[\s\S]*?<\/think>/gi, '').trim();
                    // Normalize: if response is just IRRELEVANT (with any punctuation/case), standardize it
                    if (text.replace(/[^a-zA-Z]/g, '').toUpperCase() === 'IRRELEVANT') text = 'IRRELEVANT';
                }
            }
            results[i] = text;
        } catch(e) {
            logErr(`annotateNodes[${i}]: ${e.message}`);
        }
    }
    // Concurrency-limited parallel execution
    const queue = nodes.map((n, i) => () => annotateOne(n, i));
    await new Promise(resolve => {
        let active = 0, idx = 0;
        function next() {
            while (active < concurrency && idx < queue.length) {
                active++;
                const fn = queue[idx++];
                fn().then(() => { active--; next(); });
            }
            if (active === 0) resolve();
        }
        next();
    });
    return results;
}

async function synthesizeCited(llmInfo, query, nodes, signal, ctxMessages=[], annotations=[]) {
    const systemPrompt = loadSystemPrompt(llmInfo?.provider);
    const ctxParts = nodes.map((n, i) => {
        const col   = n.metadata._collection || '?';
        const fname = n.metadata.source_file_name || n.metadata.file_name || 'unknown';
        const page  = n.metadata.page_label ? ` p.${n.metadata.page_label}` : '';
        const annot = annotations[i] ? `\nANNOTATION: ${annotations[i]}` : '';
        return `[${i+1}] SOURCE: ${col}/${fname}${page}${annot}\n${n.text}`;
    });
    const context = ctxParts.join('\n\n---\n\n');
    const userMsg = [
        `RETRIEVED SOURCES:\n\n${context}`,
        `QUERY: ${query}`,
        `INSTRUCTIONS:`,
        `- Answer using ONLY the retrieved sources above.`,
        `- Use plain prose. Do NOT use ## headers or bullet points.`,
        `- For emphasis use _underscores_ around a word.`,
        `- Do NOT use ** for bold.`,
        `- Place a citation [N] immediately after each claim that uses source N.`,
        `- Do NOT group citations at the end of paragraphs.`,
        `- Do NOT add a references section.`,
        `- If no source supports a point, omit it.`,
    ].join('\n');

    const { llm, model, provider } = llmInfo;
    let text = '', usage = {};

    if (provider === 'claude') {
        const resp = await llm.messages.create({
            model, max_tokens: parseInt(cfg('MAX_TOKENS','4096')),
            temperature: 0,
            system: systemPrompt,
            messages: [...ctxMessages, { role: 'user', content: userMsg }],
        }, { signal });
        text  = resp.content.map(b => b.text || '').join('');
        usage = { input_tokens: resp.usage?.input_tokens||0, output_tokens: resp.usage?.output_tokens||0 };

    } else if (provider === 'openai') {
        const resp = await llm.chat.completions.create({
            model, max_tokens: parseInt(cfg('MAX_TOKENS','4096')),
            temperature: 0,
            messages: [
                { role: 'system',    content: systemPrompt },
                ...ctxMessages,
                { role: 'user',      content: userMsg },
            ],
        }, { signal });
        text  = resp.choices[0]?.message?.content || '';
        usage = { input_tokens: resp.usage?.prompt_tokens||0, output_tokens: resp.usage?.completion_tokens||0 };

    } else if (provider === 'gemini') {
        const resp = await llm.models.generateContent({
            model, contents: userMsg,
            config: { systemInstruction: systemPrompt, temperature: 0 },
        });
        text  = resp.text || '';
        usage = { input_tokens: resp.usageMetadata?.promptTokenCount||0, output_tokens: resp.usageMetadata?.candidatesTokenCount||0 };

    } else if (provider === 'local') {
        const base = cfg('OLLAMA_HOST', 'http://localhost:11434');
        const ollamaTimeout = cfgInt('OLLAMA_TIMEOUT_S', 360) * 1000;
        const ollamaAbort = signal
            ? AbortSignal.any([signal, AbortSignal.timeout(ollamaTimeout)])
            : AbortSignal.timeout(ollamaTimeout);
        // Build a dispatcher with no header/body timeout for slow local models
        // Build undici Agent with no timeouts so slow/loading models don't get
        // cut off. Falls back to plain fetch if undici isn't available.
        let ollamaDispatcher;
        try {
            const { Agent } = await import('undici');
            ollamaDispatcher = new Agent({ headersTimeout: 0, bodyTimeout: 0, connectTimeout: 30000 });
        } catch(_) { /* undici not available */ }

        const _ollamaBody = JSON.stringify({
            model: llm.model, stream: false, temperature: 0,
            messages: [
                { role: 'system', content: systemPrompt },
                ...ctxMessages,
                { role: 'user',   content: userMsg },
            ],
        });

        // Retry loop: "Headers Timeout Error" means Ollama was still loading the
        // model when we connected. Wait briefly and retry  --  model will be ready.
        let r, _attempt = 0;
        while (true) {
            _attempt++;
            try {
                r = await fetch(`${base}/api/chat`, {
                    method: 'POST',
                    signal: ollamaAbort,
                    ...(ollamaDispatcher ? { dispatcher: ollamaDispatcher } : {}),
                    headers: { 'Content-Type': 'application/json' },
                    body: _ollamaBody,
                });
                break; // success  --  exit retry loop
            } catch (fetchErr) {
                const msg = fetchErr.message || '';
                const cause = fetchErr.cause?.message || '';
                const isTimeout = msg.includes('Headers Timeout') || cause.includes('Headers Timeout')
                               || msg.includes('fetch failed') || cause.includes('ECONNRESET')
                               || cause.includes('socket hang up');
                if (isTimeout && _attempt <= 3) {
                    const wait = _attempt * 5000; // 5s, 10s, 15s
                    logToFile(`[LLM] Ollama fetch attempt ${_attempt} failed (${cause||msg})  --  retrying in ${wait/1000}s`);
                    await new Promise(res => setTimeout(res, wait));
                    continue;
                }
                throw fetchErr; // non-retryable or too many attempts
            }
        }

        if (!r.ok) {
            const errText = await r.text().catch(() => '');
            // Detect embedding-only model used as chat model
            if (r.status === 400 && errText.includes('does not support chat')) {
                throw new Error(`Model '${llm.model}' is an embedding-only model and cannot generate responses. Switch to a chat model (e.g. llama3, mistral) in the provider selector.`);
            }
            throw new Error(`Ollama HTTP ${r.status}: ${errText.slice(0,200)}`);
        }
        const d = await r.json();
        if (d.error) throw new Error(`Ollama error: ${d.error}`);
        text  = d.message?.content || '';
        usage = { input_tokens: d.prompt_eval_count||0, output_tokens: d.eval_count||0 };
        if (!text) logErr(`Ollama returned empty response for model=${llm.model} done=${d.done} reason=${d.done_reason||'?'}`);
    }

    return { text, usage };
}


// ── Path map (written by ingest.py, maps collection/filename -> [rel_paths]) ─
let _pathMap = null;
function loadPathMap() {
    if (_pathMap) return _pathMap;
    try {
        const pmFile = path.join(PROJECT_DIR, 'data', 'path_map.json');
        if (fs.existsSync(pmFile)) {
            _pathMap = JSON.parse(fs.readFileSync(pmFile, 'utf8'));
        }
    } catch(e) { /* ignore - path map is optional */ }
    return _pathMap || {};
}
function relPathForSource(col, fname) {
    const pm = loadPathMap();
    const key = col + '/' + fname;
    const paths = pm[key] || pm[fname] || [];
    const result = paths[0] || fname;
    logToFile(`[URL] col=${col} fname=${fname} key=${key} paths=${JSON.stringify(paths)} result=${result} pmKeys=${Object.keys(pm).length}`);
    return result;
}
function formatSources(nodes, annotations) {
    const lines  = [];
    const chunks = [];
    const urls   = [];
    const annots = [];
    nodes.forEach((n, i) => {
        const col   = n.metadata._collection || '?';
        const fname = n.metadata.source_file_name || n.metadata.file_name || 'unknown';
        const page  = n.metadata.page_label ? ` p.${n.metadata.page_label}` : '';
        // Relative score only: [100%] = best match this query, [0%] = weakest.
        // Raw cosine similarity omitted  --  it is model-dependent and not user-meaningful.
        const _relPct = n.rel_score != null ? Math.round(n.rel_score * 100) : null;
        const sim   = _relPct != null ? ` [${_relPct}%]` : '';
        const ole   = n.metadata.ole_parent_name ? ` [OLE:${n.metadata.ole_parent_name}]` : '';
        const ocr   = n.metadata.ocr_type ? ' [OCR]' : '';
        lines.push(`[${col}] ${fname}${page}${ole}${ocr}${sim}`);
        chunks.push(n.text);
        // Store relative path only -- web.js prepends base URL from live config at serve time
        // source_rel_path = path relative to collection source dir (e.g. Articles/2017-04.html)
        // Fall back to path_map, then bare filename
        const _srcRelPath = n.metadata.source_rel_path || (() => {
            let r = relPathForSource(col, fname);
            const _cp = col + '/';
            if (r.startsWith(_cp)) r = r.slice(_cp.length);
            return r;
        })();
        urls.push(_srcRelPath);
        annots.push((annotations && annotations[i]) ? annotations[i] : '');
    });
    return { lines, chunks, urls, annotations: annots };
}

// -- Active collections --------------------------------------------------------
function parseActiveCollections() {
    const val = cfg('ACTIVE_COLLECTIONS', '');
    if (!val.trim()) return null; // null = all
    return val.split(',').map(s => s.trim()).filter(Boolean);
}

// -- Collection resolution delegated to collections.js -------------------------
import { buildSegDirMap as _buildSegDirMap, getCollectionNames as _getCollectionNames } from './collections.js';

// Cached segDirMap  --  rebuilt on first access, lives for process lifetime.
let _segDirMap = null;
function getSegDirMap() {
    if (!_segDirMap) _segDirMap = _buildSegDirMap(CHROMA_PATH);
    return _segDirMap;
}
function invalidateSegDirMap() { _segDirMap = null; }

function getAllCollectionNames() {
    // ACTIVE_COLLECTIONS in Config is authoritative (maintained by webc rename/config).
    // Falls back to filesystem scan if not set.
    const active = parseActiveCollections();
    if (active && active.length) return active.sort();
    return _getCollectionNames(CHROMA_PATH);
}

// -- TUI -----------------------------------------------------------------------
// Raw terminal output -- no external libraries. Works over SSH.

// No colors, no ANSI attributes -- plain text only
const ESC        = '';
const RESET_ATTR = '';
const BOLD_ATTR  = '';
const DIM_ATTR   = '';
const UL_ATTR    = '';
const FG_CYAN    = '';
const FG_GREEN   = '';
const FG_YELLOW  = '';
const FG_RED     = '';
const BG_BLUE    = '';

function termSize() {
    try { return { cols: process.stdout.columns||80, rows: process.stdout.rows||24 }; }
    catch(_) { return { cols: 80, rows: 24 }; }
}
function clearScreen() { process.stdout.write('\x1b[2J\x1b[H'); }
function moveTo(row, col) { process.stdout.write(`\x1b[${row};${col}H`); }
function clearLine() { process.stdout.write('\x1b[2K'); }
function hideCursor() { process.stdout.write('\x1b[?25l'); }
function showCursor() { process.stdout.write('\x1b[?25h'); }
function write(s) { process.stdout.write(s); }
function writeln(s='') { process.stdout.write(s + '\n'); }

// Wrap text to width, preserving words
function wrapText(text, width) {
    const words = text.split(' ');
    const lines = [];
    let cur = '';
    for (const w of words) {
        if (cur.length + w.length + (cur ? 1 : 0) > width) {
            if (cur) lines.push(cur);
            cur = w;
        } else {
            cur = cur ? cur + ' ' + w : w;
        }
    }
    if (cur) lines.push(cur);
    return lines;
}

// Render inline _word_ as underlined, [N] as bold cyan
function renderInline(text, width, qid) {
    // Replace [N] with [qid.N] if qid given, then style
    if (qid) text = text.replace(/\[(\d+)\]/g, (_, n) => `[${qid}.${n}]`);
    // For terminal output, convert _word_ -> underline, [N] -> bold cyan
    // Plain text -- no ANSI formatting
    return text;
}

class QueryTUI {
    constructor() {
        this._history   = [];   // historyLoadIndex() result
        this._histPos   = -1;   // position in query input history
        this._queryHist = loadQueryHist();  // typed queries (up/down arrow), persisted
        this._queryHistPos = -1;
        this._inputCursor  = 0;           // cursor position in _inputBuf
        this._collections = {};  // loaded: name -> coll
        this._collNames  = [];
        this._llmInfo    = null;
        this._cost       = loadCost();
        this._statusMsg  = '';
        this._scrollTop  = 0;   // scroll position in response pane
        this._currentEntry = null;
        this._entryPos   = -1;  // position in history for PgUp/PgDn
        this._srcMode    = false;
        this._inputBuf   = '';
        this._provider   = cfg('LLM_PROVIDER', 'claude');
        this._topK       = cfgInt('TOP_K', 8);
        this._running    = true;
        this._querying   = false;
        this._abortCtrl  = null;
    }

    async init() {
        this._history = historyLoadIndex();
        this._entryPos = this._history.length - 1;

        // Load collections
        const names = parseActiveCollections() || getAllCollectionNames();
        this._collNames = names;
        this.setStatus('Loading collections...');
        this.render();

        // Load collections in background -- don't block the TUI
        let loaded = 0;
        const loadErrors = [];
        const loadAll = async () => {
            for (const name of names) {
                try {
                    this._collections[name] = await loadCollection(name);
                    loaded++;
                    this.setStatus(`Loaded ${loaded}/${names.length}: ${name}`);
                    this.render();
                } catch(e) {
                    const msg = e.message.slice(0, 80);
                    logErr(`load '${name}': ${e.message}\n${e.stack}`);
                    loadErrors.push(`${name}: ${msg}`);
                    this.setStatus(`FAIL ${name}: ${msg}`);
                    this.render();
                }
            }
            if (loadErrors.length) {
                this.setStatus(`${loaded}/${names.length} loaded -- ${loadErrors.length} failed (see logs/query.log)`);
            } else {
                this.setStatus(`${loaded}/${names.length} collection(s) ready`);
            }
            this.render();
        };
        // Start loading but don't await -- TUI remains responsive
        loadAll().catch(e => logErr(`loadAll: ${e.message}`));
        // _llmInfo is updated lazily when provider changes -- don't re-fetch on every render
        if (!this._llmInfo || this._llmInfoProvider !== this._provider) {
            // Derive label synchronously without awaiting
            const p = this._provider || 'claude';
            const m = p === 'local'   ? (cfg('LOCAL_LLM_MODEL','?') )
                    : p === 'claude'  ? cfg('ANTHROPIC_MODEL','claude-sonnet-4-6')
                    : p === 'openai'  ? cfg('OPENAI_MODEL','gpt-4o')
                    : p === 'gemini'  ? cfg('GEMINI_MODEL','gemini-2.0-flash')
                    : p;
            const pLabel = p === 'local' ? `Local: ${m}` : `${p.charAt(0).toUpperCase()+p.slice(1)}: ${m}`;
            this._llmInfo = { label: pLabel };
            this._llmInfoProvider = p;
        }
        this.render();
    }

    setStatus(msg) { this._statusMsg = msg; }

    async initOneShot(colNames) {
        // Minimal init for one-shot mode -- no TUI rendering, awaits all collections
        const names = colNames && colNames.length > 0 ? colNames
                    : (parseActiveCollections() || getAllCollectionNames());
        for (const name of names) {
            try { this._collections[name] = await loadCollection(name); }
            catch(e) { writeln('Error loading collection ' + name + ': ' + e.message); process.exit(1); }
        }
    }

    render() {
        if (!process.stdout.isTTY) return;
        const { cols, rows } = termSize();
        clearScreen();

        // -- Header (3 lines) ------------------------------------------
        const costTxt = costStr(this._cost.in, this._cost.out, cfg('ANTHROPIC_MODEL','claude-sonnet-4-6'));
        const collTxt = this._collNames.join(', ').slice(0, cols - 40) || '(none)';
        const llmTxt  = this._llmInfo?.label || '...';
        const hdr     = `  ${collTxt}  |  top-${this._topK}  |  ${costTxt} total  |  ${llmTxt}`;
        writeln(('--- RAGWeed  v' + VERSION + ' ').padEnd(cols, '-'));
        writeln(hdr.slice(0, cols));
        writeln('-'.repeat(cols));

        // -- Response area ---------------------------------------------
        // Reserve 5 lines at bottom: separator, status, separator, "> input", help
        const reserved   = 5;
        const maxRespRows = Math.max(4, rows - 3 - reserved);
        const entry = this._currentEntry;

        if (entry) {
            const qLabel = `You #${entry.qid}  ${entry.ts}`;
            writeln(qLabel);
            for (const l of wrapText(entry.question, cols)) writeln(l);
            writeln();
            writeln(`RAGWeed  #${entry.qid}  ${entry.sources?.length||0} sources`);

            if (this._srcMode && entry.sources) {
                for (let i = 0; i < entry.sources.length; i++) {
                    writeln(`[${i+1}] ${entry.sources[i]}`);
                }
            } else {
                const answerLines = [];
                for (const para of (entry.answer||'').split('\n')) {
                    answerLines.push(...wrapText(para || ' ', cols));
                }
                // Dynamic split: answer gets top portion, sources get rest
                // Answer: up to 8 lines (or fewer if short), sources: rest of space
                const totalRows   = maxRespRows - 2;  // -2 for separator + spacing
                const srcCount    = entry.sources?.length || 0;
                const ansMax      = Math.min(8, Math.floor(totalRows * 0.4));
                const respRows    = Math.min(answerLines.length, ansMax);
                const srcAreaRows = totalRows - respRows - 1;  // -1 for separator
                const srcVisible  = Math.max(4, srcAreaRows);

                // Answer
                const visible = answerLines.slice(this._scrollTop, this._scrollTop + respRows);
                for (const l of visible) writeln(renderInline(l, cols, entry.qid));
                if (answerLines.length > respRows) {
                    writeln(`  ^v scroll answer (${answerLines.length} lines)  -- !src for source-only view`);
                }

                // Source list
                if (srcCount) {
                    writeln('.'.repeat(cols));
                    const srcStart = this._srcScrollTop || 0;
                    const srcs = entry.sources;
                    for (let i = srcStart; i < Math.min(srcs.length, srcStart + srcVisible); i++) {
                        const label = `  [${entry.qid}.${i+1}]`;
                        writeln((label + ` ${srcs[i]}`).slice(0, cols));
                    }
                    if (srcs.length > srcStart + srcVisible) {
                        const remaining = srcs.length - srcStart - srcVisible;
                        writeln(`  ... ${remaining} more -- PgDn to scroll`);
                    } else if (srcStart > 0) {
                        writeln(`  ... PgUp for previous sources`);
                    }
                }
            }
        } else {
            writeln('  No entries yet -- type a question and press Enter');
        }

        // -- Status + input at fixed bottom rows -----------------------
        const histTxt = this._history.length
            ? `Entry ${this._entryPos+1} of ${this._history.length}`
            : 'No history';
        moveTo(rows - 4, 1); clearLine();
        writeln('-'.repeat(cols));
        moveTo(rows - 3, 1); clearLine();
        writeln(`  ${histTxt}  |  ${this._statusMsg.slice(0, cols - 25)}`);
        moveTo(rows - 2, 1); clearLine();
        writeln('-'.repeat(cols));
        moveTo(rows - 1, 1); clearLine();
        // Show block cursor at end of input (hidden while query is running)
        if (!this._querying) {
            const pre  = this._inputBuf.slice(0, this._inputCursor || 0);
            const cur  = this._inputBuf[this._inputCursor || 0] || ' ';
            const post = this._inputBuf.slice((this._inputCursor || 0) + 1);
            write(`> ${pre}[7m${cur}[m${post}`);  // reverse-video cursor char
        } else {
            write(`> ${this._inputBuf}`);
        }
        moveTo(rows, 1); clearLine();
        write(`  Enter=query  q=quit  ?=help  !src=sources  !cfg=config  PgUp/PgDn=history`);
    }

    async handleInput(line) {
        const cmd = line.trim();
        if (!cmd) return;

        // quit commands
        if (cmd === 'q' || cmd === 'quit' || cmd === 'exit' || cmd === '!quit') {
            showCursor();
            clearScreen();
            process.exit(0);
        }

        if (cmd === '?') { await this._showHelp(); return; }
        if (cmd === '!src') { this._srcMode = !this._srcMode; this.render(); return; }
        if (cmd === '!cfg') { await this._configMenu(); return; }
        if (cmd === '!retry' || cmd === '!retry full') {
            const _mode = cmd === '!retry full' ? 'full' : 'auto';
            const entry = this._currentEntry;
            if (!entry) { this.setStatus('No current entry to retry'); this.render(); return; }
            if ((entry.status || 'ok') === 'ok') { this.setStatus('Entry succeeded -- nothing to retry'); this.render(); return; }
            const hasNodes = entry.raw_nodes && entry.raw_nodes.length > 0;
            const useMode  = (_mode === 'full' || !hasNodes) ? 'full' : 'synthesis';
            this.setStatus(`Retrying (${useMode})...`); this.render();
            if (useMode === 'synthesis') {
                this._querying = true;
                this._abortCtrl = new AbortController();
                this._queryStart = Date.now();
                this._timerInterval = setInterval(() => {
                    const s = ((Date.now() - this._queryStart) / 1000).toFixed(1);
                    this.setStatus(`Re-synthesizing... ${s}s`); this.render();
                }, 100);
                try {
                    const { llm, label, error, model, provider } = await getLLM(this._provider);
                    if (error || !llm) { this.setStatus(label); return; }
                    const llmInfo = { llm, label, model, provider };
                    const { text, usage } = await synthesizeCited(
                        llmInfo, entry.question, entry.raw_nodes, this._abortCtrl?.signal
                    );
                    const elapsed = ((Date.now() - this._queryStart) / 1000).toFixed(1);
                    const { lines: srcLines, chunks: srcChunks, urls: srcUrls, annotations: srcAnnotations } = formatSources(entry.raw_nodes, entry.src_annotations || []);
                    const inTok  = usage.input_tokens  || 0;
                    const outTok = usage.output_tokens || 0;
                    this._cost.in  += inTok; this._cost.out += outTok;
                    saveCost(this._cost.in, this._cost.out);
                    historyUpdateEntry(entry.qid, {
                        answer: text, sources: srcLines, src_chunks: srcChunks, src_urls: srcUrls,
                        src_annotations: srcAnnotations,
                        status: 'ok', raw_nodes: [],
                        meta: { ...entry.meta, provider, model, elapsed,
                                in_tokens: inTok, out_tokens: outTok,
                                retried_at: new Date().toISOString() },
                    });
                    this._history = historyLoadIndex();
                    this._currentEntry = historyLoadEntry(this._history, this._entryPos);
                    this.setStatus(`Retry done  ${elapsed}s  ${srcLines.length} sources  ${costStr(inTok, outTok, model||'')} this query`);
                } catch(e) {
                    if (e?.name === 'AbortError') { this.setStatus('Retry cancelled'); }
                    else { logErr(`retry synthesis: ${e.message}`); this.setStatus(`Retry error: ${e.message.slice(0,80)}`); }
                } finally {
                    this._querying = false; this._abortCtrl = null;
                    if (this._timerInterval) { clearInterval(this._timerInterval); this._timerInterval = null; }
                }
                this.render(); return;
            }
            await this._runQuery(entry.question); // full retry -- new entry
            return;
        }
        if (cmd === '!cancel') { if (this._abortCtrl) { this._abortCtrl.abort(); this._abortCtrl = null; } return; }
        // -- Collection management ------------------------------------------
        if (cmd === 'list') {
            const allNames = getAllCollectionNames();
            const active = parseActiveCollections() || allNames;
            this.setStatus(`Collections: ${allNames.map(n => (active.includes(n)?'[x]':'[ ]')+' '+n).join('  ')}`);
            this.render(); return;
        }
        if (cmd.startsWith('add ')) {
            const name = cmd.slice(4).trim();
            const active = parseActiveCollections() || getAllCollectionNames();
            if (!active.includes(name)) {
                const updated = [...active, name];
                process.env.ACTIVE_COLLECTIONS = updated.join(',');
                this.setStatus(`Added '${name}' -- re-run to load`);
            } else {
                this.setStatus(`'${name}' already active`);
            }
            this.render(); return;
        }
        if (cmd.startsWith('drop ')) {
            const name = cmd.slice(5).trim();
            const active = parseActiveCollections() || getAllCollectionNames();
            const updated = active.filter(n => n !== name);
            process.env.ACTIVE_COLLECTIONS = updated.join(',');
            this.setStatus(`Dropped '${name}' -- re-run to apply`);
            this.render(); return;
        }
        if (cmd.startsWith('c ')) {
            const name = cmd.slice(2).trim();
            const active = Object.keys(this._collections);
            if (this._collections[name]) {
                delete this._collections[name];
                this.setStatus(`Collection '${name}' disabled for this session`);
            } else {
                this.setStatus(`Loading '${name}'...`); this.render();
                try {
                    this._collections[name] = await loadCollection(name);
                    this.setStatus(`Collection '${name}' enabled`);
                } catch(e) { this.setStatus(`Failed: ${e.message}`); }
            }
            this.render(); return;
        }
        if (cmd === 'c') {
            const active = Object.keys(this._collections);
            const all = getAllCollectionNames();
            this.setStatus(`Active: ${active.join(', ')}  |  All: ${all.join(', ')}`);
            this.render(); return;
        }

        // -- LLM provider (r = remote) ----------------------------------
        if (cmd === 'r' || cmd === '!r') {
            const providers = ['claude','openai','gemini','local'];
            clearScreen();
            writeln('Select LLM provider:');
            providers.forEach((p, i) => writeln(`  ${i+1}  ${p}${p === this._provider ? '  <-- current' : ''}`));
            writeln('');
            writeln('  Enter number (or Enter to cancel): ');
            const pick = await new Promise(res => {
                let buf = '';
                const onKey = (chunk) => {
                    const k = chunk.toString();
                    if (k === '\r' || k === '\n') {
                        process.stdin.off('data', onKey);
                        res(buf.trim());
                    } else if (k === '\x7f') {
                        buf = buf.slice(0,-1);
                    } else if (k >= ' ') {
                        buf += k;
                        process.stdout.write(k);
                    }
                };
                process.stdin.on('data', onKey);
            });
            const n = parseInt(pick);
            if (n >= 1 && n <= providers.length) {
                this._provider = providers[n-1];
                cfgSet('LLM_PROVIDER', this._provider);
                this._llmInfo = null;  // force label refresh
                this.setStatus(`Provider: ${this._provider}`);
            } else if (pick) {
                this.setStatus(`Invalid choice: ${pick}`);
            }
            this.render(); return;
        }
        if (cmd.startsWith('r ') || cmd.startsWith('!r ')) {
            const prov = cmd.split(' ')[1];
            if (['claude','openai','gemini','local'].includes(prov)) {
                this._provider = prov;
                this.setStatus(`Provider: ${prov}`);
            } else {
                this.setStatus(`Unknown provider '${prov}' -- use: claude openai gemini local`);
            }
            this.render(); return;
        }

        // -- Local model (l = local) ------------------------------------
        if (cmd === 'l' || cmd === '!l') {
            let models = [];
            try {
                const resp = await fetch('http://localhost:11434/api/tags');
                const data = await resp.json();
                models = (data.models || []).map(m => m.name);
            } catch(e) {
                this.setStatus(`Ollama not reachable: ${e.message}`);
                this.render(); return;
            }
            if (!models.length) {
                this.setStatus('No Ollama models found');
                this.render(); return;
            }
            const cur = cfg('LOCAL_LLM_MODEL', 'llama3');
            clearScreen();
            writeln('Select local Ollama model:');
            models.forEach((m, i) => writeln(`  ${i+1}  ${m}${m === cur ? '  <-- current' : ''}`));
            writeln('');
            writeln('  Enter number (or Enter to cancel): ');
            const pick = await new Promise(res => {
                let buf = '';
                const onKey = (chunk) => {
                    const k = chunk.toString();
                    if (k === '\r' || k === '\n') {
                        process.stdin.off('data', onKey);
                        res(buf.trim());
                    } else if (k === '\x7f') {
                        buf = buf.slice(0,-1);
                    } else if (k >= ' ') {
                        buf += k;
                        process.stdout.write(k);
                    }
                };
                process.stdin.on('data', onKey);
            });
            const n = parseInt(pick);
            if (n >= 1 && n <= models.length) {
                cfgSet('LOCAL_LLM_MODEL', models[n-1]);
                cfgSet('LLM_PROVIDER', 'local');
                this._provider = 'local';
                this._llmInfo = null;  // force label refresh
                this.setStatus(`Switched to local: ${models[n-1]}`);
            } else if (pick) {
                this.setStatus(`Invalid choice: ${pick}`);
            }
            this.render(); return;
        }
        if (cmd.startsWith('l ') || cmd.startsWith('!l ')) {
            const model = cmd.split(' ')[1];
            process.env.LOCAL_LLM_MODEL = model;
            this._provider = 'local';
            this.setStatus(`Switched to local model: ${model}`);
            this.render(); return;
        }

        // -- Cost tracking ----------------------------------------------
        if (cmd === '$') {
            this.setStatus(`Cost: in=${this._cost.in} out=${this._cost.out} tokens  ${costStr(this._cost.in, this._cost.out, cfg('ANTHROPIC_MODEL','claude-sonnet-4-6'))}`);
            this.render(); return;
        }
        if (cmd === '!$') {
            this._cost = { in: 0, out: 0 }; saveCost(0, 0);
            this.setStatus('Cost reset to zero');
            this.render(); return;
        }

        // -- Context ----------------------------------------------------
        if (cmd === 'x' || cmd === '!x') {
            this._useCtx = !this._useCtx;
            this.setStatus(`Conversation context: ${this._useCtx ? 'ON' : 'OFF'}`);
            this.render(); return;
        }
        if (cmd === '!ctx clear') {
            this._ctxHistory = [];
            this.setStatus('Context history cleared');
            this.render(); return;
        }

        // -- History navigation -----------------------------------------
        if (cmd === '#' || cmd === '##') {
            if (this._history.length) {
                this._entryPos = this._history.length - 1;
                this._currentEntry = historyLoadEntry(this._history, this._entryPos);
                this._scrollTop = 0; this._srcMode = false; this._srcScrollTop = 0;
            }
            this.render(); return;
        }
        // #N -- jump to query N
        if (/^#\d+$/.test(cmd)) {
            const n = parseInt(cmd.slice(1)) - 1;
            if (n >= 0 && n < this._history.length) {
                this._entryPos = n;
                this._currentEntry = historyLoadEntry(this._history, n);
                this._scrollTop = 0; this._srcMode = false; this._srcScrollTop = 0;
            } else { this.setStatus(`No entry #${n+1}`); }
            this.render(); return;
        }
        // #N.M or .M -- view source passage
        const srcMatch = cmd.match(/^(?:#(\d+))?\.(\d+)$/) || cmd.match(/^(\d+)\.(\d+)$/);
        if (srcMatch) {
            const entryN = srcMatch[1] ? parseInt(srcMatch[1]) - 1 : this._entryPos;
            const passN  = parseInt(srcMatch[2]) - 1;
            const entry  = historyLoadEntry(this._history, entryN);
            if (entry?.src_chunks?.[passN]) {
                await this._showOverlay(entry, passN);
            } else { this.setStatus(`Source ${cmd} not found`); this.render(); }
            return;
        }
        if (cmd.startsWith('!quote ')) {
            const n = parseInt(cmd.slice(7)) - 1;
            if (this._currentEntry?.src_chunks?.[n]) {
                await this._showOverlay(this._currentEntry, n);
            }
            return;
        }
        // .N -- show source N text
        if (/^\.\d+$/.test(cmd)) {
            const n = parseInt(cmd.slice(1)) - 1;
            if (this._currentEntry?.src_chunks?.[n]) {
                await this._showOverlay(this._currentEntry, n);
            } else {
                writeln(`Source ${cmd} not available`);
            }
            return;
        }

        // Minimum query length: must be at least 3 chars and contain a space or be >= 4 chars
        // Single letters, two-char strings, and single words under 4 chars are not queries
        const words = cmd.trim().split(/\s+/);
        if (cmd.length < 3 || (words.length === 1 && cmd.length < 4)) {
            this.setStatus(`Unknown command: '${cmd}' -- type a question or '?' for help`);
            this.render();
            return;
        }
        await this._runQuery(cmd);
    }

    async _runQuery(queryStr) {
        this._querying = true;
        this._abortCtrl = new AbortController();
        this._queryStart = Date.now();
        this._timerInterval = setInterval(() => {
            const s = ((Date.now() - this._queryStart) / 1000).toFixed(1);
            this.setStatus(`Querying... ${s}s`);
            this.render();
        }, 100);
        this.render();

        const t0 = this._queryStart;
        try {
            const { llm, label, error, model, provider } = await getLLM(this._provider);
            if (error || !llm) {
                this.setStatus(label);
                this.render();
                return;
            }
            const llmInfo = { llm, label, model, provider };
            const result  = await queryCollections(
                this._collections, queryStr, llmInfo, this._topK,
                this._abortCtrl?.signal,
                (phase) => { this.setStatus(`${phase}  ${((Date.now()-this._queryStart)/1000).toFixed(1)}s`); this.render(); }
            );
            const elapsed = ((Date.now() - t0) / 1000).toFixed(1);

            const { lines: srcLines, chunks: srcChunks, urls: srcUrls } = formatSources(result.nodes);
            const usage = result.usage || {};
            const inTok  = usage.input_tokens  || 0;
            const outTok = usage.output_tokens || 0;

            this._cost.in  += inTok;
            this._cost.out += outTok;
            saveCost(this._cost.in, this._cost.out);

            const _qStatus = result.status || 'ok';
            const _rawNodes = (_qStatus === 'llm_error') ? result.nodes : [];
            const qid = historySaveEntry(queryStr, result.text, srcLines, srcChunks, srcUrls, {
                provider, model, elapsed,
                in_tokens: inTok, out_tokens: outTok,
            }, _qStatus, _rawNodes);

            this._history = historyLoadIndex();
            this._entryPos = this._history.length - 1;
            this._currentEntry = historyLoadEntry(this._history, this._entryPos);
            this._scrollTop = 0; this._srcScrollTop = 0;
            this._srcMode   = false;
            this._queryHist.push(queryStr);
            saveQueryHist(this._queryHist);
            this._queryHistPos = -1;
            if (_qStatus !== 'ok') {
                const _retryHint = _qStatus === 'llm_error'
                    ? '  -- type !retry to re-synthesize, or !retry full to re-search'
                    : '  -- type !retry to search again';
                this.setStatus(`${_qStatus}  ${elapsed}s${_retryHint}`);
            } else {
                this.setStatus(`Done  ${elapsed}s  ${srcLines.length} sources  ${costStr(inTok, outTok, model||'')} this query`);
            }
        } catch(e) {
            if (e?.name === 'AbortError' || e?.message?.includes('abort') || e?.message?.includes('cancel')) {
                this.setStatus('Cancelled');
            } else {
                logErr(`query error: ${e.message}\n${e.stack}`);
                this.setStatus(`Error: ${e.message.slice(0,80)}`);
            }
        } finally {
            this._querying   = false;
            this._abortCtrl  = null;
            if (this._timerInterval) { clearInterval(this._timerInterval); this._timerInterval = null; }
        }
        this.render();
    }

    async _showOverlay(entry, passN) {
        // Draw source text in an overlay box without clearing the full screen
        const { cols, rows } = termSize();
        const srcText = entry.src_chunks?.[passN] || '';
        const srcLabel = entry.sources?.[passN] || `Source ${passN+1}`;
        const qid = entry.qid;
        const boxW = Math.min(cols - 4, 100);
        const boxX = Math.floor((cols - boxW) / 2) + 1;
        const boxY = 3;
        const boxH = rows - boxY - 3;

        // Draw box
        const horiz = '-'.repeat(boxW - 2);
        moveTo(boxY, boxX);     write(`+${horiz}+`);
        moveTo(boxY+1, boxX);   write(`| [${qid}.${passN+1}] ${srcLabel.slice(0, boxW-8).padEnd(boxW-4)} |`);
        moveTo(boxY+2, boxX);   write(`+${horiz}+`);
        for (let r = 0; r < boxH - 4; r++) {
            moveTo(boxY+3+r, boxX);
            write('|' + ' '.repeat(boxW-2) + '|');
        }
        moveTo(boxY+boxH-1, boxX); write(`+${horiz}+`);

        // Fill content
        const textLines = [];
        for (const para of srcText.split('\n')) {
            textLines.push(...wrapText(para || ' ', boxW - 4));
        }
        let scrollY = 0;
        const contentRows = boxH - 5;

        const drawContent = () => {
            for (let r = 0; r < contentRows; r++) {
                moveTo(boxY+3+r, boxX+1);
                const line = textLines[scrollY+r] || '';
                write(' ' + line.slice(0, boxW-4).padEnd(boxW-3));
            }
            moveTo(boxY+boxH-2, boxX+1);
            const pct = textLines.length <= contentRows ? '' : ` (${scrollY+1}-${Math.min(scrollY+contentRows,textLines.length)} of ${textLines.length})`;
            write(` Esc/q/Enter=close  ^v=scroll${pct}`.padEnd(boxW-2).slice(0, boxW-2));
        };
        drawContent();

        // Key handler for overlay
        await new Promise(res => {
            const onKey = (chunk) => {
                const k = chunk.toString();
                if (k === '\x1b' || k === 'q' || k === '\r' || k === '\n' || k === ' ') {
                    process.stdin.off('data', onKey); res();
                } else if (k === '\x1b[A' && scrollY > 0) {
                    scrollY = Math.max(0, scrollY - 1); drawContent();
                } else if (k === '\x1b[B') {
                    scrollY = Math.min(Math.max(0, textLines.length - contentRows), scrollY + 1); drawContent();
                } else if (k === '\x1b[5~') {
                    scrollY = Math.max(0, scrollY - contentRows); drawContent();
                } else if (k === '\x1b[6~') {
                    scrollY = Math.min(Math.max(0, textLines.length - contentRows), scrollY + contentRows); drawContent();
                }
            };
            process.stdin.on('data', onKey);
        });
        this.render();
    }

    async _showHelp() {
        clearScreen();
        const H = [
            'RAGWeed v' + VERSION + ' -- Commands',
            '',
            'Queries',
            '  <question>              send a RAG query to active collections',
            '',
            'LLM Provider',
            '  r                       cycle provider (claude/openai/gemini/local)',
            '  r <1|2|3|4>             1=claude  2=openai  3=gemini  4=local',
            '  l                       list local Ollama models',
            '  l <name>                switch to named Ollama model',
            '',
            'Collections',
            '  c                       show active vs available collections',
            '  c <name>                toggle collection on/off this session',
            '  list                    list all collections with chunk counts',
            '  add <name>              add collection to active set',
            '  drop <name>             remove collection from active set',
            '',
            'History & Navigation',
            '  PgUp / !l               previous history entry',
            '  PgDn / !r               next history entry',
            '  ##                      jump to latest entry',
            '  #N                      jump to entry N',
            '  #N.M  /  .M             view source M of entry N (or current)',
            '',
            'Context & Cost',
            '  x                       toggle conversation context on/off',
            '  !ctx clear              clear context history',
            '  $  /  !cost             show token cost this session',
            '  !$  /  !reset           reset cost counter',
            '',
            'Sources',
            '  !src                    toggle source-only view',
            '  .N                      show raw text of source N',
            '',
            'Other',
            '  !cfg                    open configuration menu',
            '  !top N                  set top-k retrieval count',
            '  !cancel                 abort in-progress query',
            '  ?                       this help',
            '  q / quit                exit',
            '',
            'Indexing  (run from shell, not TUI)',
            '  ./run.sh ingest                    index all collections',
            '  ./run.sh ingest -c <name>          index one collection',
            '  ./run.sh ingest --zip always       extract ZIP files too',
            '  ./run.sh ingest --list             show collection status',
            '',
            '  Press any key to return...',
        ];
        for (const l of H) writeln(l);
        // Wait for keypress then restore TUI
        await new Promise(res => process.stdin.once('data', res));
        this._scrollTop = 0;
        this.render();
    }

    async _configMenu() {
        // Suspend TUI, run config.sh, then resume
        showCursor();
        clearScreen();
        process.stdin.setRawMode(false);
        process.stdin.pause();

        // Run config.sh synchronously
        const { spawnSync } = require('child_process');
        spawnSync('bash', [path.join(PROJECT_DIR, 'scripts', 'config.sh')], {
            stdio: 'inherit',
            env: { ...process.env, PROJECT_DIR }
        });

        // Reload Config after config changes
        try {
            const envLines = fs.readFileSync(path.join(PROJECT_DIR, 'Config'), 'utf8').split('\n');
            for (const line of envLines) {
                const m = line.match(/^([A-Z_][A-Z0-9_]*)=(.*)$/);
                if (m) process.env[m[1]] = m[2].replace(/^['"]|['"]$/g,'');
            }
        } catch(_) {}

        // Reload provider/model/topK from updated env
        this._provider  = cfg('LLM_PROVIDER', 'claude');
        this._llmInfo   = await getLLM(this._provider);
        this._topK      = parseInt(cfg('TOP_K', '64')) || 64;
        // Reload collections from updated ACTIVE_COLLECTIONS
        const newNames = parseActiveCollections() || getAllCollectionNames();
        this._collNames   = newNames;
        this._collections = {};
        let loaded = 0;
        for (const name of newNames) {
            try {
                this._collections[name] = await loadCollection(name);
                loaded++;
                this.setStatus(`Reloading ${loaded}/${newNames.length}: ${name}`);
                this.render();
            } catch(e) {
                dbg(`[RELOAD] failed ${name}: ${e.message}`);
            }
        }

        // Restore TUI
        process.stdin.resume();
        process.stdin.setRawMode(true);
        hideCursor();
        clearScreen();
        this.render();
    }

    async run() {
        // Parse args -- presence of a query string triggers one-shot mode, no args = TUI
        const rawArgs = process.argv.slice(2);
        const colOverrides = [];
        const queryArgs = [];
        for (let i = 0; i < rawArgs.length; i++) {
            if (rawArgs[i] === '-c' && i + 1 < rawArgs.length) {
                colOverrides.push(rawArgs[++i]);
            } else {
                queryArgs.push(rawArgs[i]);
            }
        }
        const query = queryArgs.join(' ');

        if (query) {
            // One-shot mode: query string present -- run, print result, exit
            await this.initOneShot(colOverrides);
            const { llm, label, error, model, provider } = await getLLM(this._provider);
            if (error) { writeln(label); return; }
            const result = await queryCollections(this._collections, query, { llm, label, model, provider }, this._topK);
            writeln(result.text);
            return;
        }

        if (!process.stdout.isTTY) {
            writeln('Usage: ./run.sh query [-c <collection>] [-c <collection>] "question"');
            return;
        }

        // Set raw mode for key-by-key input
        process.stdin.setRawMode(true);
        process.stdin.resume();
        process.stdin.setEncoding('utf8');
        process.stdin.on('error', (err) => {
            if (err.code === 'EIO' || err.code === 'EPIPE') { showCursor(); process.exit(0); }
            logErr(`stdin error: ${err.message}`);
        });
        process.on('SIGTERM', () => { showCursor(); clearScreen(); process.exit(0); });
        process.on('SIGINT', () => {
            if (this._querying && this._abortCtrl) {
                // Abort the running query
                this._abortCtrl.abort();
                this.setStatus('Cancelled');
                // _querying will be cleared in the finally block
            } else {
                showCursor(); clearScreen(); process.exit(0);
            }
        });

        hideCursor();
        clearScreen();
        write(`RAGWeed v${VERSION} -- loading collections...\n`);
        write(`  (type q to quit, other commands available when load completes)\n`);

        // Register early quit handler so keyboard works during loading
        const earlyQuit = (chunk) => {
            const k = chunk.toString();
            if (k === 'q' || k === '' || k === '') {
                showCursor(); clearScreen(); process.exit(0);
            }
        };
        process.stdin.on('data', earlyQuit);

        await this.init();

        process.stdin.off('data', earlyQuit);

        // Load last history entry for display
        if (this._history.length > 0) {
            this._entryPos   = this._history.length - 1;
            this._currentEntry = historyLoadEntry(this._history, this._entryPos);
        }
        this.render();

        // Key input handler
        let inputBuf = '';
        let escSeq   = '';

        // Dispatch a fully-assembled escape sequence or regular key
        const dispatchKey = async (seq) => {
            // Arrow up -- scroll typed query history
            if (seq === '\x1b[A') {
                if (this._querying) return;
                if (this._queryHist.length) {
                    this._queryHistPos = this._queryHistPos === -1
                        ? this._queryHist.length - 1
                        : Math.max(0, this._queryHistPos - 1);
                    this._inputBuf = this._queryHist[this._queryHistPos];
                    this._inputCursor = this._inputBuf.length;
                }
                this.render(); return;
            }
            // Arrow down -- scroll typed query history
            if (seq === '\x1b[B') {
                if (this._querying) return;
                if (this._queryHistPos !== -1) {
                    this._queryHistPos++;
                    this._inputBuf = this._queryHistPos < this._queryHist.length
                        ? this._queryHist[this._queryHistPos] : '';
                    this._inputCursor = this._inputBuf.length;
                    if (this._queryHistPos >= this._queryHist.length) this._queryHistPos = -1;
                }
                this.render(); return;
            }
            // PgUp -- scroll source list up, or go to previous history entry
            if (seq === '\x1b[5~' || seq === '\x1b[5;' || seq === '\x1bOy') {
                const srcs = this._currentEntry?.sources;
                if (!this._srcMode && srcs?.length && (this._srcScrollTop || 0) > 0) {
                    this._srcScrollTop = Math.max(0, (this._srcScrollTop || 0) - 4);
                } else if (this._entryPos > 0) {
                    this._entryPos--;
                    this._currentEntry = historyLoadEntry(this._history, this._entryPos);
                    this._scrollTop = 0; this._srcScrollTop = 0; this._srcMode = false;
                }
                this.render(); return;
            }
            // PgDn -- scroll source list down, or go to next history entry
            if (seq === '\x1b[6~' || seq === '\x1b[6;' || seq === '\x1bOs') {
                const srcs = this._currentEntry?.sources;
                const { rows } = termSize();
                const maxRespRows = rows - 7;
                const srcVisible = Math.max(2, 6);  // conservative estimate
                const srcStart = this._srcScrollTop || 0;
                if (!this._srcMode && srcs?.length && srcStart + srcVisible < srcs.length) {
                    this._srcScrollTop = srcStart + 4;
                } else if (this._entryPos < this._history.length - 1) {
                    this._entryPos++;
                    this._currentEntry = historyLoadEntry(this._history, this._entryPos);
                    this._scrollTop = 0; this._srcScrollTop = 0; this._srcMode = false;
                }
                this.render(); return;
            }
            // Ignore other escape sequences
            if (seq.startsWith('\x1b')) return;

            // Regular key
            if (seq === '\x03') {
                if (this._querying && this._abortCtrl) {
                    this._abortCtrl.abort();
                    this.setStatus('Cancelling...');
                    this.render();
                    return;
                }
                showCursor(); clearScreen(); process.exit(0);
            }
            if (seq === '\r' || seq === '\n') {
                const cmd = this._inputBuf;
                this._inputBuf = '';
                this._inputCursor = 0;
                if (cmd.trim()) await this.handleInput(cmd);
                this.render(); return;
            }
            // -- Line editing ------------------------------------------
            if (seq === '\x7f' || seq === '\b') {
                // Backspace: delete char before cursor
                if (this._inputCursor > 0) {
                    this._inputBuf = this._inputBuf.slice(0, this._inputCursor-1) + this._inputBuf.slice(this._inputCursor);
                    this._inputCursor--;
                }
                this.render(); return;
            }
            if (seq === '\x1b[3~') {
                // Delete: delete char at cursor
                this._inputBuf = this._inputBuf.slice(0, this._inputCursor) + this._inputBuf.slice(this._inputCursor+1);
                this.render(); return;
            }
            if (seq === '\x1b[D') {
                // Left arrow
                if (this._inputCursor > 0) this._inputCursor--;
                this.render(); return;
            }
            if (seq === '\x1b[C') {
                // Right arrow
                if (this._inputCursor < this._inputBuf.length) this._inputCursor++;
                this.render(); return;
            }
            if (seq === '\x1b[H' || seq === '\x01') {
                // Home / Ctrl-A
                this._inputCursor = 0;
                this.render(); return;
            }
            if (seq === '\x1b[F' || seq === '\x05') {
                // End / Ctrl-E
                this._inputCursor = this._inputBuf.length;
                this.render(); return;
            }
            if (seq === '\x0b') {
                // Ctrl-K: kill to end of line
                this._inputBuf = this._inputBuf.slice(0, this._inputCursor);
                this.render(); return;
            }
            if (seq === '\x15') {
                // Ctrl-U: kill to start of line
                this._inputBuf = this._inputBuf.slice(this._inputCursor);
                this._inputCursor = 0;
                this.render(); return;
            }
            if (seq === '\x17') {
                // Ctrl-W: delete word before cursor
                let i = this._inputCursor;
                while (i > 0 && this._inputBuf[i-1] === ' ') i--;
                while (i > 0 && this._inputBuf[i-1] !== ' ') i--;
                this._inputBuf = this._inputBuf.slice(0, i) + this._inputBuf.slice(this._inputCursor);
                this._inputCursor = i;
                this.render(); return;
            }
            if (seq === '\x1b[1;5D' || seq === '\x1bb') {
                // Ctrl-Left / Alt-b: back word
                let i = this._inputCursor;
                while (i > 0 && this._inputBuf[i-1] === ' ') i--;
                while (i > 0 && this._inputBuf[i-1] !== ' ') i--;
                this._inputCursor = i;
                this.render(); return;
            }
            if (seq === '\x1b[1;5C' || seq === '\x1bf') {
                // Ctrl-Right / Alt-f: forward word
                let i = this._inputCursor;
                while (i < this._inputBuf.length && this._inputBuf[i] === ' ') i++;
                while (i < this._inputBuf.length && this._inputBuf[i] !== ' ') i++;
                this._inputCursor = i;
                this.render(); return;
            }
            if (seq >= ' ') {
                // Insert char at cursor
                this._inputBuf = this._inputBuf.slice(0, this._inputCursor) + seq + this._inputBuf.slice(this._inputCursor);
                this._inputCursor++;
                this.render(); return;
            }
        };

        // Node raw mode may deliver escape sequences as single chunks OR split across calls.
        // We buffer until we have a complete sequence.
        const handleKey = async (chunk) => {
            escSeq += chunk;

            // Process all complete sequences from the buffer
            while (escSeq.length > 0) {
                // Not an escape -- dispatch char by char
                if (!escSeq.startsWith('\x1b')) {
                    await dispatchKey(escSeq[0]);
                    escSeq = escSeq.slice(1);
                    continue;
                }
                // Lone ESC -- wait briefly for more
                if (escSeq === '\x1b') {
                    setTimeout(async () => {
                        if (escSeq === '\x1b') { escSeq = ''; }
                    }, 50);
                    return;
                }
                // CSI sequences: \x1b[ ... terminated by letter or ~
                if (escSeq.startsWith('\x1b[')) {
                    const rest = escSeq.slice(2);
                    const m = rest.match(/^([0-9;]*)([A-Za-z~])(.*)$/s);
                    if (m) {
                        const seq = '\x1b[' + m[1] + m[2];
                        escSeq = m[3];
                        await dispatchKey(seq);
                        continue;
                    }
                    // Incomplete -- wait for more input
                    if (escSeq.length < 8) return;
                    // Too long with no terminator -- discard
                    escSeq = ''; return;
                }
                // SS3 sequences: \x1bO + char
                if (escSeq.startsWith('\x1bO') && escSeq.length >= 3) {
                    await dispatchKey(escSeq.slice(0,3));
                    escSeq = escSeq.slice(3);
                    continue;
                }
                // Other \x1b sequences -- wait one more char
                if (escSeq.length < 3) return;
                escSeq = escSeq.slice(1);
            }
        };

        process.stdin.on('data', handleKey);

        // Keep process alive
        await new Promise(() => {});
    }
}

// -- Entry point -- only run TUI when executed directly, not when imported -----
const _isMain = process.argv[1] &&
    (process.argv[1] === fileURLToPath(import.meta.url) ||
     process.argv[1].endsWith('/query.js'));
if (_isMain) {
    const tui = new QueryTUI();
    tui.run().catch(e => {
        showCursor();
        logErr('Fatal: ' + e.message + (e.stack ? '\n' + e.stack : ''));
        process.exit(1);
    });
}
RAGWEED-QRY-v1.0.102-20260319-000014-473


# =============================================================================
# STEP 10: Write scripts/web.js
# =============================================================================
ph "STEP 10: Writing scripts/web.js"

cat > "$SCRIPTS_DIR/web.js" << 'RAGWEED-WEB-v1.0.102-20260319-000014-473'
#!/usr/bin/env node
// VERSION: 1.0.102
// RAGWeed v10 -- Web server
'use strict';
import 'dotenv/config';
import http   from 'http';
import fs     from 'fs';
import path   from 'path';
import { URL } from 'url';
import { createRequire } from 'module';
import { fileURLToPath } from 'url';
const require   = createRequire(import.meta.url);
const __filename = fileURLToPath(import.meta.url);
const __dirname  = path.dirname(__filename);

// Inline cfg so web.js works even if query.js export list changes
function cfg(key, def='')    { return process.env[key] ?? def; }
function cfgInt(key, def=0)  { return parseInt(process.env[key] ?? String(def), 10) || def; }
const DEBUG_LEVEL = parseInt(process.env.DEBUG_LEVEL ?? '0', 10);
const _WEB_LOG_FILE = (() => {
    try {
        const fs2 = require('fs');
        const path2 = require('path');
        const logDir = path2.join(process.env.PROJECT_DIR || '.', 'logs');
        fs2.mkdirSync(logDir, { recursive: true });
        return path2.join(logDir, 'query.log');
    } catch(_) { return null; }
})();
// Session log (same file query.js uses -- run.sh logs displays this)
const _SESSION_LOG = (() => {
    try {
        const fs2 = require('fs');
        const path2 = require('path');
        const logDir = path2.join(process.env.PROJECT_DIR || '.', 'logs');
        // Use UTC ISO string for filename -- consistent with log timestamps
        const ts = new Date().toISOString().replace(/[-:T]/g,'').slice(0,14);
        return path2.join(logDir, 'query-' + ts + '.log');
    } catch(_) { return null; }
})();

function _ts() { return new Date().toISOString().slice(0,23).replace('T','T') + 'Z'; }
function _writeLine(line) {
    process.stdout.write(line + '\n');
    const _fs = require('fs');
    if (_WEB_LOG_FILE)  try { _fs.appendFileSync(_WEB_LOG_FILE,  line + '\n'); } catch(e) { process.stderr.write('log err: '+e.message+'\n'); }
    if (_SESSION_LOG)   try { _fs.appendFileSync(_SESSION_LOG,   line + '\n'); } catch(e) {}
}
function envSet(key, value) {
    process.env[key] = value;
    try {
        const envFile = path.join(PROJECT_DIR, 'Config');
        let content = fs.existsSync(envFile) ? fs.readFileSync(envFile, 'utf8') : '';
        const re = new RegExp(`^${key}=.*$`, 'm');
        if (re.test(content)) { content = content.replace(re, `${key}=${value}`); }
        else { content += `\n${key}=${value}`; }
        fs.writeFileSync(envFile, content);
    } catch(e) { werr(`envSet ${key}: ${e.message}`); }
}
// Read a key from Config file directly - authoritative current value, not stale process.env
function cfgLive(key, def='') {
    try {
        const txt = fs.readFileSync(path.join(PROJECT_DIR, 'Config'), 'utf8');
        const m = txt.match(new RegExp('^' + key + '=(.*)$', 'm'));
        return (m && m[1].trim()) ? m[1].trim() : def;
    } catch(_) { return process.env[key] ?? def; }
}
function wlog(msg)  { _writeLine(`${_ts()} [WEB]      ${msg}`); }
function winfo(msg) { _writeLine(`${_ts()} [WEB:INFO] ${msg}`); }
function wwarn(msg) { _writeLine(`${_ts()} [WEB:WARN] ${msg}`); }
function werr(msg, e) {
    const detail = e ? (e.stack ? '\n' + e.stack : ' | ' + (e.message || String(e))) : '';
    _writeLine(`${_ts()} [WEB:ERR]  ${msg}${detail}`);
}
function wdbg(msg)  { if (DEBUG_LEVEL >= 1) _writeLine(`${_ts()} [WEB:DBG1] ${msg}`); }
function wdbg2(msg) { if (DEBUG_LEVEL >= 2) _writeLine(`${_ts()} [WEB:DBG2] ${msg}`); }

// Import shared logic from query.js
import {
    loadCollection, getAllCollectionNames, parseActiveCollections, invalidateSegDirMap,
    queryCollections, retrieveNodes, getLLM, formatSources,
    historySaveEntry, historyUpdateEntry, historySaveIndex, historyLoadIndex, historyLoadEntry, historyEntryPath,
    synthesizeCited, annotateNodes, annotateAndFilter,
    loadCost, saveCost, costStr, logErr, dbg,
    VERSION
} from './query.js';
import os from 'os';
const HOSTNAME = os.hostname();

const PROJECT_DIR = path.resolve(path.dirname(new URL(import.meta.url).pathname), '..');
const SCRIPTS_DIR = path.join(PROJECT_DIR, 'scripts');
const PORT = parseInt(cfg('WEB_PORT', '3000'), 10);

const HTML_TMPL = fs.readFileSync(path.join(SCRIPTS_DIR, 'index.html'), 'utf8');
const HTML = HTML_TMPL.replace(/__VERSION__/g, VERSION).replace(/__HOSTNAME__/g, HOSTNAME);
const APP_JS_PATH = path.join(SCRIPTS_DIR, 'app.js');
let APP_JS = fs.readFileSync(APP_JS_PATH, 'utf8');
const FAVICON_SVG = '<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 32 32"><rect width="32" height="32" rx="4" fill="#1a1a2e"/><rect x="3" y="10" width="4" height="16" rx="1" fill="#f5f0e8"/><rect x="8" y="10" width="4" height="16" rx="1" fill="#f5f0e8"/><rect x="13" y="10" width="4" height="16" rx="1" fill="#f5f0e8"/><rect x="18" y="10" width="4" height="16" rx="1" fill="#f5f0e8"/><rect x="23" y="10" width="4" height="16" rx="1" fill="#f5f0e8"/><rect x="6" y="10" width="3" height="10" rx="1" fill="#111"/><rect x="11" y="10" width="3" height="10" rx="1" fill="#111"/><rect x="21" y="10" width="3" height="10" rx="1" fill="#111"/><circle cx="24" cy="5" r="2" fill="#c8a84b"/><rect x="25.5" y="1" width="1.5" height="6" fill="#c8a84b"/><rect x="19" y="2" width="8" height="1.5" fill="#c8a84b"/></svg>';
const LF_JS_PATH = path.join(SCRIPTS_DIR, 'lf.js');
let LF_JS;
try {
  LF_JS = fs.readFileSync(LF_JS_PATH, 'utf8');
  console.log('[web] lf.js loaded ok, len='+LF_JS.length);
} catch(e) {
  console.error('[web] FAILED to load lf.js:', e.message);
  LF_JS = 'console.error("lf.js failed to load: ' + e.message.replace(/'/g,'') + '")';
}
const APP_JS_LEN = Buffer.byteLength(APP_JS);

const sessions = {};
const _mutexFlags = new Map();  // simple per-session mutex flags


function getOrCreateSession(sid) {
    if (!sessions[sid]) {
        // Session created without SSE client -- will init when SSE connects
        sessions[sid] = {
            sid,
            collections: {},
            collNames:   [],
            provider:    cfg('LLM_PROVIDER', 'claude'),
            topK:        cfgInt('TOP_K', 64),
            useContext:  false,
            contextQids: new Set(),
            cost:        loadCost(),
            history:     [],
            navPos:      -1,
            currentQid:  null,
            sseClients:  [],
            ready:       false,
            stopped:     false,  // false=running, true=stopped/waiting, 'done'=terminate
            goGen:       0,      // increments each time Go is pressed
            _pipeline:   null,   // active pipeline state for Stop/Go/save
        };
    }
    return sessions[sid];
}

// Wait while sess.stopped===true -- simple poll, no timing races
const _sleep = ms => new Promise(r => setTimeout(r, ms));
async function waitIfStopped(sess) {
    while (sess.stopped === true) await _sleep(50);
}

async function _initSession(sess) {
    winfo(`_initSession START sid=${sess.sid}`);

    // ── PHASE 1: LLM options ─────────────────────────────────────────────────
    // Fetch Ollama models and current LLM label synchronously FIRST.
    // Sends 'llm_ready' so the client can enable the provider selector immediately.
    let ollamaModels = [];
    try {
        const oBase = cfg('OLLAMA_HOST', 'http://localhost:11434');
        const oResp = await fetch(`${oBase}/api/tags`, { signal: AbortSignal.timeout(2000) });
        const oData = await oResp.json();
        // Filter out embedding-only models - they cannot be used for chat.
        // Heuristic: model name contains 'embed', 'minilm', or ends with '-embed'.
        const _allModels = (oData.models || []).map(m => m.name);
        const _embedPattern = /embed|minilm/i;
        ollamaModels = _allModels.filter(m => !_embedPattern.test(m));
        const _filtered = _allModels.filter(m => _embedPattern.test(m));
        if (_filtered.length) wdbg2(`_initSession filtered embed-only models: ${_filtered.join(', ')}`);
        wdbg2(`_initSession ollama chat models: ${ollamaModels.join(', ') || '(none)'}`);
    } catch(_) { /* ollama not running */ }
    // Re-read provider from Config so a changed default is honoured on fresh connect
    const _liveProvider = cfgLive('LLM_PROVIDER', sess.provider);
    if (_liveProvider !== sess.provider) {
        wdbg2(`_initSession: syncing provider ${sess.provider} -> ${_liveProvider} from Config`);
        sess.provider = _liveProvider;
    }
    const { label: llmLabel } = await getLLM(sess.provider).catch(() => ({ label: '?' }));
    const _providerVal = sess.provider === 'local'
        ? 'ollama:' + cfgLive('LOCAL_LLM_MODEL', cfg('LOCAL_LLM_MODEL', ''))
        : sess.provider;
    await _sseAllAsync(sess, { type: 'llm_ready', provider: _providerVal, llm: llmLabel,
        ollamaModels, queryTimeoutS: cfgInt('QUERY_TIMEOUT_S', 300) });
    wdbg2(`_initSession llm_ready sent provider=${sess.provider}`);

    // ── PHASE 2: History ─────────────────────────────────────────────────────
    // Load history index from disk and send 'history_ready'.
    // Client enables nav buttons and History panel after this.
    try {
        sess.history = historyLoadIndex();
        if (sess.history.length > 0) sess.navPos = sess.history.length - 1;
        wdbg2(`_initSession history loaded: ${sess.history.length} entries`);
    } catch(e) {
        wlog(`_initSession ERROR loading history: ${e.message}`);
    }
    await _sseAllAsync(sess, { type: 'history_ready', total: sess.history.length,
        useContext: sess.useContext || false });
    wdbg2(`_initSession history_ready sent`);

    // ── PHASE 3: Collections ─────────────────────────────────────────────────
    // Collections are preloaded at server startup (_preloadCollections).
    // If already ready: assign immediately and fire 'ready' event.
    // If still loading: send init + current progress, then wait for completion.
    const _collNames = _serverCollNames.length ? _serverCollNames
        : (parseActiveCollections() || getAllCollectionNames());
    sess.collNames = _collNames;
    _sseAll(sess, { type: 'init', total: _collNames.length });
    await new Promise(r => setImmediate(r));

    if (_serverCollReady) {
        // Collections already loaded - attach and fire ready immediately
        sess.collections = { ..._serverCollections };
        const _loadedN = Object.keys(_serverCollections).length;
        wdbg2(`_initSession collections already ready (${_loadedN}/${_collNames.length}), skipping load`);
        // Send one status line so the progress bar shows complete
        _sseAll(sess, { type: 'status',
            text: 'Collections ready (' + _loadedN + '/' + _collNames.length + ')', loaded: _loadedN });
        sess.ready = true;
        wlog(`_initSession COMPLETE (cached): ${_loadedN}/${_collNames.length} loaded, failed=[${_serverCollFailed.join(',')}]`);
        await _sseAllAsync(sess, { type: 'ready', collections: _collNames,
            loaded: _loadedN, failed: _serverCollFailed,
            annotateDefault: cfg('ANNOTATE_DEFAULT','no').toLowerCase() === 'yes' });
    } else {
        // Still loading - send progress for whatever is already done,
        // then wait for _preloadCollections to broadcast the ready event.
        const _alreadyLoaded = Object.keys(_serverCollections);
        if (_alreadyLoaded.length > 0) {
            _sseAll(sess, { type: 'status',
                text: 'Loaded ' + _alreadyLoaded.length + '/' + _collNames.length
                    + ': ' + _alreadyLoaded[_alreadyLoaded.length - 1],
                loaded: _alreadyLoaded.length });
        }
        // sess.ready will be set true and ready event sent by _preloadCollections
        // when it finishes - no further action needed here.
        wdbg2(`_initSession Phase 3: waiting for _preloadCollections to finish (${_alreadyLoaded.length}/${_collNames.length} so far)`);
    }
}

function _sseAll(sess, obj) {
    const data = 'data: ' + JSON.stringify(obj) + '\n\n';
    wdbg2('_sseAll type='+obj.type+' clients='+sess.sseClients.length+' bytes='+data.length);
    sess.sseClients = sess.sseClients.filter(res => {
        try {
            res.write(data);
            // Force flush on each write -- critical for SSE progress updates
            if (typeof res.flush === 'function') res.flush();
            else if (res.socket) res.socket.write('');
            return true;
        } catch(e) {
            wdbg2('_sseAll write error type='+obj.type+': '+e.message);
            return false;
        }
    });
}
// Async wrapper -- yields to event loop after sending SSE, allowing http to flush
async function _sseAllAsync(sess, obj) {
    _sseAll(sess, obj);
    await new Promise(r => setImmediate(r));
}

// -- HTML ---------------------------------------------------------------------


// -- Server-level collection preload ----------------------------------------
// Collections are loaded once at server startup and shared across all sessions.
// _serverCollections is populated by _preloadCollections() and reused by
// _initSession so no browser connection has to wait for disk loads.
const _serverCollections  = {};   // name -> collection object
const _serverCollFailed   = [];   // names that failed to load
let   _serverCollReady    = false;
let   _serverCollLoading  = false;
let   _serverCollNames    = [];
// Listeners waiting for a specific collection to finish loading
const _collReadyListeners = {};   // name -> [resolve, ...]

async function _preloadCollections() {
    if (_serverCollLoading || _serverCollReady) return;
    _serverCollLoading = true;
    let names;
    try {
        names = parseActiveCollections() || getAllCollectionNames();
    } catch(e) {
        wlog(`_preloadCollections ERROR getting names: ${e.message}`);
        _serverCollReady = true; _serverCollLoading = false; return;
    }
    _serverCollNames = names;
    wlog(`_preloadCollections START: ${names.join(', ')}`);
    let loaded = 0;
    for (const name of names) {
        wdbg2(`_preloadCollections loading: ${name}`);
        try {
            const coll = await loadCollection(name);
            _serverCollections[name] = coll;
            loaded++;
            wdbg2(`_preloadCollections loaded ${loaded}/${names.length}: ${name}`);
        } catch(e) {
            wlog(`_preloadCollections ERROR '${name}': ${e.message}`);
            _serverCollFailed.push(name);
        }
        // Notify any sessions waiting for this collection
        if (_collReadyListeners[name]) {
            _collReadyListeners[name].forEach(fn => { try { fn(name); } catch(_) {} });
            delete _collReadyListeners[name];
        }
        // Broadcast progress to all connected SSE sessions
        for (const s of Object.values(sessions)) {
            if (s.sseClients.length > 0 && !s.ready) {
                _sseAll(s, { type: 'status',
                    text: (_serverCollFailed.includes(name) ? '[!] ' : 'Loaded ')
                        + loaded + '/' + names.length + ': ' + name, loaded });
            }
        }
    }
    _serverCollReady = true; _serverCollLoading = false;
    wlog(`_preloadCollections COMPLETE: ${loaded}/${names.length} loaded, failed=[${_serverCollFailed.join(',')}]`);

    // Warm up the embed model for every unique dimensionality found in loaded collections.
    // Runs after collections are known so we cover all dims, not just the configured default.
    (async () => {
        try {
            const _eb = process.env.EMBED_OLLAMA_HOST || process.env.OLLAMA_HOST || 'http://localhost:11434';
            const _cfgModel = process.env.EMBED_MODEL || 'nomic-embed-text';
            // Collect unique dims from all loaded collections
            const _dimModelMap = {
                384: 'all-minilm', 768: 'nomic-embed-text',
                1024: 'mxbai-embed-large', 1536: 'text-embedding-ada-002', 3072: 'text-embedding-3-large',
            };
            const _modelDimMap = {
                'nomic-embed-text': 768, 'mxbai-embed-large': 1024, 'all-minilm': 384,
                'text-embedding-ada-002': 1536, 'text-embedding-3-small': 1536, 'text-embedding-3-large': 3072,
            };
            const _seenDims = new Set();
            for (const coll of Object.values(_serverCollections)) {
                const d = coll.index?.dim || coll.meta?.dimensionality;
                if (d) _seenDims.add(d);
            }
            if (_seenDims.size === 0) _seenDims.add(null); // fallback: warm cfg model
            wlog(`Embed warmup: unique dims=[${[..._seenDims].join(',')}]`);
            for (const dim of _seenDims) {
                let warmModel = _cfgModel;
                if (dim) {
                    const cfgDim = Object.entries(_modelDimMap).find(([m]) => _cfgModel.toLowerCase().includes(m))?.[1];
                    if (cfgDim !== dim) warmModel = _dimModelMap[dim] || _cfgModel;
                }
                try {
                    const _wr = await fetch(`${_eb}/api/embeddings`, {
                        method: 'POST', headers: { 'Content-Type': 'application/json' },
                        body: JSON.stringify({ model: warmModel, prompt: 'warmup' }),
                    });
                    if (_wr.ok) { const _wd = await _wr.json(); wlog(`Embed warmup OK: model=${warmModel} dim=${_wd.embedding?.length||'?'}`); }
                    else wlog(`Embed warmup HTTP ${_wr.status} model=${warmModel} (non-fatal)`);
                } catch(e) { wlog(`Embed warmup skipped model=${warmModel}: ${e.message}`); }
            }
        } catch(e) { wlog(`Embed warmup error: ${e.message}`); }
    })();
    if (loaded === 0 && _serverCollFailed.length > 0) {    }
    // Fire ready event for all sessions that are still in Phase 3
    for (const s of Object.values(sessions)) {
        if (!s.ready) {
            s.collections = { ..._serverCollections };
            s.collNames   = _serverCollNames;
            s.ready       = true;
            _sseAll(s, { type: 'ready', collections: _serverCollNames,
                loaded: Object.keys(_serverCollections).length,
                failed: _serverCollFailed });
        }
    }
}

// -- Request handler -----------------------------------------------------------
async function handleRequest(req, res) {
    wlog(`REQ ${req.method} ${req.url}`);
    // Optional password protection
    const webPass = cfg('WEB_PASSWORD', '');
    if (webPass) {
        const auth = req.headers['authorization'] || '';
        const b64  = Buffer.from(`:${webPass}`).toString('base64');
        if (auth !== `Basic ${b64}`) {
            res.writeHead(401, { 'WWW-Authenticate': 'Basic realm="RAGWeed"' });
            res.end('Unauthorized');
            return;
        }
    }

    const u    = new URL(req.url, `http://localhost`);
    const path_ = u.pathname;
    // _params(): GET reads URL searchParams, POST reads JSON body - one processing path
    const _params = () => new Promise(resolve => {
        if (req.method !== 'POST') {
            const p = {}; u.searchParams.forEach((v, k) => { p[k] = v; }); resolve(p);
        } else {
            let body = ''; req.on('data', d => body += d);
            req.on('end', () => { try { resolve(JSON.parse(body)); } catch(_) { resolve({}); } });
        }
    });

    // CORS for local dev
    res.setHeader('Access-Control-Allow-Origin', '*');

    // -- GET / -- serve HTML
    if (req.method === 'GET' && (path_ === '/' || path_ === '/index.html')) {
        wlog(`Serving HTML ${HTML.length} chars to ${req.socket.remoteAddress||'?'}`);
        res.writeHead(200, {
            'Content-Type':  'text/html; charset=utf-8',
            'Cache-Control': 'no-store, no-cache, must-revalidate, max-age=0',
            'Pragma':        'no-cache',
            'Expires':       '0',
        });
        console.log('[web] serving index.html len='+HTML.length);
        res.end(HTML);
        return;
    }

    // -- GET /app.js -- external script (avoids inline script CSP restrictions)
    if (req.method === 'GET' && (path_ === '/favicon.ico' || path_ === '/favicon.svg')) {
        res.writeHead(200, {'Content-Type':'image/svg+xml','Cache-Control':'max-age=86400'});
        return res.end(FAVICON_SVG);
    }
    if (req.method === 'GET' && path_ === '/lf.js') {
        console.log('[web] serving lf.js len=' + (LF_JS ? LF_JS.length : 'null'));
        res.writeHead(200, {'Content-Type':'application/javascript; charset=utf-8',
            'Cache-Control':'no-store,no-cache,must-revalidate,max-age=0'});
        return res.end(LF_JS);
    }
    if (req.method === 'GET' && path_ === '/app.js') {
        wdbg2(`Serving app.js ${APP_JS.length} chars to ${req.socket.remoteAddress||'?'}`);
        res.writeHead(200, {
            'Content-Type':  'application/javascript; charset=utf-8',
            'Cache-Control': 'no-store, no-cache, must-revalidate, max-age=0',
        });
        res.end(APP_JS);
        return;
    }

    // -- GET /api/ping
    if (req.method === 'GET' && path_ === '/api/ping') {
        wdbg2(`/api/ping from ${req.socket.remoteAddress||'?'}`);
        res.writeHead(200, { 'Content-Type': 'application/json' });
        res.end(JSON.stringify({ ok: true, version: VERSION, debug: DEBUG_LEVEL }));
        return;
    }

    // -- POST /api/clientlog -- browser error reporting
    if (req.method === 'POST' && path_ === '/api/clientlog') {
        _params().then(p => {
            const level = p.level || 'LOG';
            const msg = p.msg || '';
            _writeLine(`${_ts()} [BROWSER:${level.padEnd(4)}] ${msg}`);
            res.writeHead(204); res.end();
        }).catch(_ => { res.writeHead(204); res.end(); });
        return;
    }

    // -- GET /api/events -- SSE stream
    if (req.method === 'GET' && path_ === '/api/events') {
        const sid = u.searchParams.get('sid') || 'default';
        res.writeHead(200, {
            'Content-Type':  'text/event-stream',
            'Cache-Control': 'no-cache',
            'Connection':    'keep-alive',
            'X-Accel-Buffering': 'no',
        });
        res.flushHeaders();
        wlog(`SSE CONNECT sid=${sid}`);
        res.write('data: {"type":"connected"}\n\n');
        if (res.flush) res.flush();
        wdbg(`SSE connected event sent to sid=${sid}`);

        // Register client FIRST so _initSession progress events reach it
        if (!sessions[sid]) {
            sessions[sid] = {
                sid,
                collections: {},
                collNames:   [],
                provider:    cfg('LLM_PROVIDER', 'claude'),
                topK:        cfgInt('TOP_K', 64),
                cost:        loadCost(),
                history:     [],
                navPos:      -1,
                currentQid:  null,
                sseClients:  [res],
                ready:       false,
            };
            _initSession(sessions[sid]);  // async -- events now go to res
        } else {
            const sess = sessions[sid];
            const wasEmpty = sess.sseClients.length === 0;
            sess.sseClients.push(res);
            // If not yet initialised (session was pre-created by /api/history or /api/state
            // before the SSE connection arrived), start loading now
            if (!sess.ready && wasEmpty) {
                _initSession(sess);  // async
            } else if (sess.ready) {
                // Reconnect within same page load (network blip) - keep session provider as-is
                const _pvVal = sess.provider === 'local'
                    ? 'ollama:' + cfg('LOCAL_LLM_MODEL', '')
                    : sess.provider;
                getLLM(sess.provider).catch(() => ({ label: sess.provider })).then(llmInfo => {
                    res.write('data: ' + JSON.stringify({
                        type: 'llm_ready', provider: _pvVal, llm: llmInfo.label,
                        ollamaModels: [], queryTimeoutS: cfgInt('QUERY_TIMEOUT_S', 300)
                    }) + '\n\n');
                    res.write('data: ' + JSON.stringify({
                        type: 'ready',
                        collections: sess.collNames,
                        loaded: sess.collNames.length
                    }) + '\n\n');
                    if (res.flush) res.flush();
                });
            } else {
                // Still loading -- send current progress
                res.write('data: ' + JSON.stringify({
                    type: 'init', total: sess.collNames.length || 0
                }) + '\n\n');
            }
        }

        req.on('close', () => {
            if (sessions[sid]) {
                sessions[sid].sseClients = sessions[sid].sseClients.filter(r => r !== res);
            }
        });
        return;
    }

    // -- GET /api/history
    if (req.method === 'GET' && path_ === '/api/history') {
        const sid  = u.searchParams.get('sid') || 'default';
        const pos  = parseInt(u.searchParams.get('pos') || '-1', 10);
        const sess = getOrCreateSession(sid);
        const idx   = historyLoadIndex();
        const total = idx.length;
        // ?list=1 returns full index for client-side navigation
        if (u.searchParams.get('list') === '1') {
            const index = idx.map((item, i) => {
                let q = (item.question || item.q || '').slice(0, 120);
                if (!q) {
                    try {
                        const ep = historyEntryPath(item.qid);
                        if (fs.existsSync(ep)) {
                            const e2 = JSON.parse(fs.readFileSync(ep, 'utf8'));
                            q = (e2.question || '').slice(0, 120);
                            if (q) idx[i].q = q;  // warm the in-memory index
                        }
                    } catch(_) {}
                }
                return { pos: i, qid: item.qid || i, ts: item.ts || '', q };
            });
            res.writeHead(200, { 'Content-Type': 'application/json' });
            res.end(JSON.stringify({ index, total }));
            return;
        }
        const actualPos = pos < 0 ? total - 1 : Math.min(pos, total - 1);
        let entry = actualPos >= 0 ? historyLoadEntry(idx, actualPos) : null;
        // Enrich src_urls: prepend collection base URL from live config
        if (entry && entry.src_urls && entry.sources) {
            entry = { ...entry, src_urls: entry.src_urls.map((relPath, i) => {
                if (!relPath) return '';
                if (relPath.startsWith('http://') || relPath.startsWith('https://')) return relPath;
                // Derive collection from source label e.g. "[a2e.co] file.html [80%]"
                const _colMatch = entry.sources[i] && entry.sources[i].match(/^\[([^\]]+)\]/);
                const _col = _colMatch ? _colMatch[1] : '';
                const _envKey = 'COLLECTION_URL_' + _col.replace(/[^A-Za-z0-9]/g, '_').toUpperCase();
                const _base = cfg(_envKey, 'https://all.net').replace(/\/+$/, '');
                return _base + '/' + relPath;
            })};
        }
        wdbg2('history pos='+actualPos+' of '+total+(entry?' qid='+entry.qid:''));
        res.writeHead(200, { 'Content-Type': 'application/json' });
        res.end(JSON.stringify({ entry, pos: actualPos, total }));
        return;
    }

    // -- POST /api/history/delete
    if (req.method === 'POST' && path_ === '/api/history/delete') {
        _params().then(parsed => {
            // qid may arrive as number or string - normalise to number
            const qid = Number(parsed.qid);
                if (isNaN(qid)) {
                    res.writeHead(400, { 'Content-Type': 'application/json' });
                    res.end(JSON.stringify({ ok: false, error: 'bad qid' }));
                    return;
                }
                const idx = historyLoadIndex();
                // Find by numeric comparison
                const pos = idx.findIndex(e => Number(e.qid) === qid);
                if (pos < 0) {
                    winfo(`history/delete: qid=${qid} not found in index (len=${idx.length})`);
                    res.writeHead(404, { 'Content-Type': 'application/json' });
                    res.end(JSON.stringify({ ok: false, error: 'not found' }));
                    return;
                }
                // Delete the entry file
                const delPath = historyEntryPath(qid);
                try {
                    if (fs.existsSync(delPath)) fs.unlinkSync(delPath);
                } catch(fe) { winfo('history/delete file error: '+fe.message); }
                // Remove from index array
                idx.splice(pos, 1);
                // Save updated index (filenames/qids of surviving entries unchanged)
                historySaveIndex(idx);
                winfo(`history/delete qid=${qid} ok, ${idx.length} entries remain`);
                // Build response index - backfill q from entry files for old entries
                const newIndex = idx.map((item, i) => {
                    let q = (item.q || '').slice(0, 120);
                    if (!q) {
                        try {
                            const ep = historyEntryPath(item.qid);
                            if (fs.existsSync(ep)) {
                                const e2 = JSON.parse(fs.readFileSync(ep, 'utf8'));
                                q = (e2.question || '').slice(0, 120);
                                if (q) idx[i].q = q;
                            }
                        } catch(_) {}
                    }
                    return { pos: i, qid: Number(item.qid), ts: item.ts || '', q };
                });
                // Update active sessions
                for (const s of Object.values(sessions)) {
                    s.history = idx;
                    if (s.navPos >= idx.length) s.navPos = Math.max(0, idx.length - 1);
                    if (s.contextQids) {
                        const newSet = new Set();
                        for (const nq of s.contextQids) {
                            if (idx.some(e => Number(e.qid) === Number(nq))) newSet.add(nq);
                        }
                        s.contextQids = newSet;
                    }
                }
                res.writeHead(200, { 'Content-Type': 'application/json' });
                res.end(JSON.stringify({ ok: true, index: newIndex, total: idx.length }));
        }).catch(e => {
            logErr('history/delete error: ' + e.message + '\n' + e.stack);
            res.writeHead(500, { 'Content-Type': 'application/json' });
            res.end(JSON.stringify({ ok: false, error: e.message }));
        }).catch(e => { werr('query error', e); res.writeHead(500); res.end(JSON.stringify({error:e.message})); });
        return;
    }


    // -- POST /api/retry -- retry a failed history entry (synthesis-only or full re-search)
    if (req.method === 'POST' && path_ === '/api/retry') {
        _params().then(async parsed => {
            const { sid = 'default', qid, mode = 'auto', collmode = 'current', annotate = false, activeColls } = parsed;
            const qidNum = Number(qid);
            const sess = getOrCreateSession(sid);
            sess.stopped = false;
            if (!sess.ready) { res.writeHead(503); res.end(JSON.stringify({ error: 'Still loading' })); return; }
            const index = historyLoadIndex();
            const pos   = index.findIndex(e => Number(e.qid) === qidNum);
            if (pos < 0) { res.writeHead(404); res.end(JSON.stringify({ error: 'Entry not found' })); return; }
            const entry = historyLoadEntry(index, pos);
            if (!entry) { res.writeHead(404); res.end(JSON.stringify({ error: 'Entry file missing' })); return; }
            const hasNodes = entry.raw_nodes && entry.raw_nodes.length > 0;
            const useMode  = (mode === 'full' || !hasNodes) ? 'full' : 'synthesis';
            res.writeHead(200, { 'Content-Type': 'application/json' });
            res.end(JSON.stringify({ ok: true, mode: useMode }));
            let useProvider = sess.provider;
            let useModel = null;
            if (useProvider && useProvider.startsWith('ollama:')) { useModel = useProvider.slice(7); useProvider = 'local'; }
            try {
                const llmInfo = await getLLM(useProvider, useModel);
                if (llmInfo.error) { _sseAll(sess, { type: 'error', text: llmInfo.label }); return; }
                sess.abortCtrl = new AbortController();
                if (useMode === 'synthesis') {
                    _sseAll(sess, { type: 'status', text: 'Re-synthesizing from saved sources...' });
                    const { text, usage } = await synthesizeCited(
                        llmInfo, entry.question, entry.raw_nodes, sess.abortCtrl.signal,
                        [], entry.src_annotations || []
                    );
                    const { lines: srcLines, chunks: srcChunks, urls: srcUrls } = formatSources(entry.raw_nodes);
                    const cost = loadCost();
                    cost.in  += usage.input_tokens  || 0;
                    cost.out += usage.output_tokens || 0;
                    saveCost(cost.in, cost.out);
                    historyUpdateEntry(qidNum, {
                        answer: text, sources: srcLines, src_chunks: srcChunks, src_urls: srcUrls,
                        status: 'ok', raw_nodes: [],
                        meta: { ...entry.meta, provider: useProvider, model: llmInfo.model,
                                in_tokens: usage.input_tokens||0, out_tokens: usage.output_tokens||0,
                                retried_at: new Date().toISOString() },
                    });
                    _sseAll(sess, { type: 'cost', text: costStr(cost.in, cost.out, llmInfo.model||'', llmInfo.provider||'') });
                    _sseAll(sess, { type: 'answer', qid: qidNum });
                } else {
                    // as-was: use saved collections from entry meta
                    // current: use activeColls from request (browser state) or sess.activeColls
                    let _retryCols = sess.collections;
                    if (collmode === 'as-was') {
                        // Derive collection names from saved meta or infer from source labels
                        let _savedNames = entry.meta && Array.isArray(entry.meta.collections) ? entry.meta.collections : null;
                        if (!_savedNames && entry.sources && entry.sources.length) {
                            // Infer from source labels: "[collname] filename [pct%]"
                            const _inferredSet = new Set();
                            entry.sources.forEach(s => { const m = s.match(/^\[([^\]]+)\]/); if (m) _inferredSet.add(m[1]); });
                            _savedNames = [..._inferredSet];
                        }
                        if (_savedNames && _savedNames.length) {
                            _retryCols = {};
                            for (const name of _savedNames) { if (sess.collections[name]) _retryCols[name] = sess.collections[name]; }
                        }
                        // If still no collections resolved, warn -- don't silently use all
                        if (!Object.keys(_retryCols).length) {
                            _sseAll(sess, { type: 'error', text: 'Cannot retry as-was: original collections unknown. Use Retry with current settings instead.' });
                            return;
                        }
                    } else if (collmode === 'current') {
                        const _ac = activeColls || sess.activeColls || [];
                        if (_ac.length) {
                            _retryCols = {};
                            for (const name of _ac) { if (sess.collections[name]) _retryCols[name] = sess.collections[name]; }
                            if (!Object.keys(_retryCols).length) _retryCols = sess.collections;
                        }
                    }
                    const _retryLabel = collmode === 'as-was' ? 'Retrying as-was' : 'Retrying with current settings';
                    _sseAll(sess, { type: 'status', text: _retryLabel + '...' });
                    const _retryAnnotate = collmode === 'as-was'
                        ? (entry.meta && entry.meta.annotate === true)
                        : (annotate === true || annotate === 'true');
                    let result;
                    if (_retryAnnotate) {
                        const _rRetrieved = await retrieveNodes(_retryCols, entry.question, llmInfo, sess.topK, sess.abortCtrl.signal, (phase) => _sseAll(sess, { type: 'status', text: phase }));
                        if (_rRetrieved.status !== 'ok' || !_rRetrieved.nodes.length) {
                            result = { text: 'No relevant content found.', nodes: [], usage: {}, status: _rRetrieved.status || 'no_results' };
                        } else {
                            const _rFmt = formatSources(_rRetrieved.nodes, []);
                            _sseAll(sess, { type: 'sources', lines: _rFmt.lines, chunks: _rFmt.chunks, urls: _rFmt.urls, isRetry: true });
                            const _rAnnotProv = (() => { const p = cfg('ANNOTATION_PROVIDER',''); return (p && p !== 'default') ? p : useProvider; })();
                            const _rAnnotLlm = await getLLM(_rAnnotProv, cfg('ANNOTATION_MODEL','') || null);
                            const _rAnnotConc = (_rAnnotProv === 'local')
                                ? (parseInt(cfg('ANNOTATION_LOCAL_CONCURRENCY', '1')) || 1)
                                : (parseInt(cfg('ANNOTATION_CONCURRENCY', '4')) || 4);
                            if (_rAnnotLlm.error) {
                                sess._annotPaused = { nodes: _rRetrieved.nodes, query: entry.question, llmInfo, ctxMessages: [], error: _rAnnotLlm.label };
                                _sseAll(sess, { type: 'annot_blocked', text: 'Annotation unavailable: ' + _rAnnotLlm.label });
                                return;
                            }
                            _sseAll(sess, { type: 'status', text: 'Annotating ' + _rRetrieved.nodes.length + ' sources...' });
                            const { annotations: _rAnnotations, filteredNodes: _rFiltered, filteredIndices: _rFilteredIdx, blocked: _rBlocked } =
                                await annotateAndFilter(entry.question, _rRetrieved.nodes, _rAnnotLlm, _rAnnotConc,
                                    (i, text, irrelevant) => _sseAll(sess, { type: 'annotation', index: i, text, irrelevant }),
                                    sess.abortCtrl?.signal, () => sess.stopped);
                            if (_rBlocked) {
                                sess._annotPaused = { nodes: _rRetrieved.nodes, query: entry.question, llmInfo, ctxMessages: [], error: 'All annotations failed' };
                                _sseAll(sess, { type: 'annot_blocked', text: 'All annotations failed. Check annotation provider settings.' });
                                return;
                            }
                            if (sess.abortCtrl?.signal?.aborted) {
                                winfo('retry stopped during annotation -- showing annotated sources, no synthesis');
                                return;
                            }
                            _sseAll(sess, { type: 'status', text: 'Generating response from ' + (_rFiltered.length || _rRetrieved.nodes.length) + ' sources...' });
                            const _rSynth = _rFiltered.length ? _rFiltered : _rRetrieved.nodes;
                            const _rFiltAnnot = _rSynth.map(n => _rAnnotations[_rRetrieved.nodes.indexOf(n)]);
                            try {
                                const { text, usage } = await synthesizeCited(llmInfo, entry.question, _rSynth, sess.abortCtrl?.signal, [], _rFiltAnnot);
                                result = { text, nodes: _rRetrieved.nodes, usage, status: 'ok', annotations: _rAnnotations, filtered_indices: [..._rFilteredIdx] };
                            } catch(e) { result = { text: 'Error: '+e.message, nodes: _rRetrieved.nodes, usage: {}, status: 'llm_error', annotations: _rAnnotations, filtered_indices: [..._rFilteredIdx] }; }
                        }
                    } else {
                        result = await queryCollections(
                        _retryCols, entry.question, llmInfo, sess.topK,
                        sess.abortCtrl.signal,
                        (phase) => _sseAll(sess, { type: 'status', text: phase })
                    );}
                    // Log result for diagnosis
                winfo(`query result: status=${result.status} nodes=${result.nodes?.length||0} text_len=${result.text?.length||0}`);
                const _rAnnotations = result.annotations || [];
                    const _rFiltIdx = result.filtered_indices || [];
                    const { lines: srcLines, chunks: srcChunks, urls: srcUrls, annotations: srcAnnotations } = formatSources(result.nodes, _rAnnotations);
                    // Build seq_index_map for annotated retry results
                    const _rFiltSet = new Set(_rFiltIdx);
                    const _rSeqMap = [];
                    srcLines.forEach((_, i) => { if (!_rFiltSet.has(i)) _rSeqMap.push(i); });
                    const usage   = result.usage || {};
                    const cost    = loadCost();
                    cost.in  += usage.input_tokens  || 0;
                    cost.out += usage.output_tokens || 0;
                    saveCost(cost.in, cost.out);
                    const _rStatus   = result.status || 'ok';
                    const _rRawNodes = (_rStatus === 'llm_error') ? result.nodes : [];
                    const newQid = historySaveEntry(entry.question, result.text, srcLines, srcChunks, srcUrls, {
                        provider: useProvider, model: llmInfo.model,
                        in_tokens: usage.input_tokens||0, out_tokens: usage.output_tokens||0,
                        collections: Object.keys(_retryCols),
                        annotate: _retryAnnotate,
                        filtered_indices: _rFiltIdx.length ? _rFiltIdx : undefined,
                        seq_index_map: _rSeqMap,
                    }, _rStatus, _rRawNodes, srcAnnotations);
                    sess.navPos = historyLoadIndex().length - 1;
                    _sseAll(sess, { type: 'cost', text: costStr(cost.in, cost.out, llmInfo.model||'', llmInfo.provider||'') });
                    _sseAll(sess, { type: 'answer', qid: newQid });
                }
            } catch(e) {
                if (e.message && (e.message.includes('aborted') || e.message.includes('Aborted'))) {
                    winfo('retry aborted by user');
                    _sseAll(sess, { type: 'status', text: 'Cancelled.' });
                } else {
                    werr('retry error', e);
                    _sseAll(sess, { type: 'error', text: e.message });
                }
            } finally { sess.abortCtrl = null; }
        }).catch(e => { werr('retry error', e); res.writeHead(500); res.end(JSON.stringify({error:e.message})); });
        return;
    }

    // -- POST /api/annot-choice -- resume after annotation block (skip or retry)
    if (req.method === 'POST' && path_ === '/api/annot-choice') {
        _params().then(async parsed => {
            const { sid = 'default', choice } = parsed; // choice: 'skip' | 'retry'
            const sess = getOrCreateSession(sid);
            const paused = sess._annotPaused;
            if (!paused) { res.writeHead(400); res.end(JSON.stringify({ error: 'No paused annotation' })); return; }
            sess._annotPaused = null;
            sess.stopped = false;
            res.writeHead(200, { 'Content-Type': 'application/json' });
            res.end(JSON.stringify({ ok: true, choice }));
            const { nodes, query: pQuery, llmInfo: pLlm, ctxMessages: pCtx } = paused;
            const _isIrrelevant = ann => {
                if (!ann) return false;
                const _t = ann.trim();
                if (_t.length < 10) return true;
                return _t.trim().replace(/[^a-zA-Z]/g, '').toUpperCase() === 'IRRELEVANT';
            };
            try {
                let annotations = new Array(nodes.length).fill('');
                if (choice === 'retry') {
                    const _rProv  = (() => { const p = cfg('ANNOTATION_PROVIDER',''); return (p && p !== 'default') ? p : (sess.provider||cfg('LLM_PROVIDER','claude')); })();
                    const _rModel = cfg('ANNOTATION_MODEL','') || null;
                    const _rConc  = parseInt(cfg('ANNOTATION_CONCURRENCY','4')) || 4;
                    const _rLlm   = await getLLM(_rProv, _rModel);
                    if (_rLlm.error) {
                        sess._annotPaused = { nodes, query: pQuery, llmInfo: pLlm, ctxMessages: pCtx, error: _rLlm.label };
                        _sseAll(sess, { type: 'annot_blocked', text: 'Annotation still unavailable: ' + _rLlm.label });
                        return;
                    }
                    _sseAll(sess, { type: 'status', text: 'Retrying annotation (' + nodes.length + ' sources)...' });
                    let _annotFailed = 0;
                    await new Promise(resolve => {
                        let active = 0, idx = 0;
                        function next() {
                            while (active < _rConc && idx < nodes.length) {
                                const i = idx++; active++;
                                annotateNodes(pQuery, [nodes[i]], _rLlm, 1).then(arr => {
                                    const text = arr[0] || '';
                                    annotations[i] = text;
                                    const _trim = text.trim();
                                    const _irrelevant = _trim && (_trim.length < 10 || _trim.replace(/[^a-zA-Z]/g,'').toUpperCase() === 'IRRELEVANT');
                                    _sseAll(sess, { type: 'annotation', index: i, text, irrelevant: _irrelevant });
                                    active--; next();
                                }).catch(() => { _annotFailed++; active--; next(); });
                            }
                            if (active === 0) resolve();
                        }
                        next();
                    });
                    if (_annotFailed === nodes.length) {
                        sess._annotPaused = { nodes, query: pQuery, llmInfo: pLlm, ctxMessages: pCtx, error: 'All annotations failed again' };
                        _sseAll(sess, { type: 'annot_blocked', text: 'All annotations failed again. Fix provider settings or skip.' });
                        return;
                    }
                }
                // Filter + synthesize
                const _filteredIndices = new Set();
                nodes.forEach((n, i) => { if (_isIrrelevant(annotations[i])) _filteredIndices.add(i); });
                const _filteredNodes = nodes.filter((n, i) => !_filteredIndices.has(i));
                winfo('annot-choice ' + choice + ': ' + nodes.length + ' -> ' + _filteredNodes.length + ' nodes');
                _sseAll(sess, { type: 'status', text: 'Generating response from ' + (_filteredNodes.length || nodes.length) + ' sources...' });
                const _synthNodes = _filteredNodes.length ? _filteredNodes : nodes;
                const _filtAnnotations = _synthNodes.map(n => annotations[nodes.indexOf(n)]);
                const { text, usage } = await synthesizeCited(pLlm, pQuery, _synthNodes, sess.abortCtrl?.signal, pCtx, _filtAnnotations);
                const _filtIdx = [..._filteredIndices];
                const { lines: srcLines, chunks: srcChunks, urls: srcUrls, annotations: srcAnnotations } = formatSources(nodes, annotations);
                const _filtSet2 = new Set(_filtIdx);
                const _seqIndexMap = [];
                srcLines.forEach((_, i) => { if (!_filtSet2.has(i)) _seqIndexMap.push(i); });
                const cost = loadCost();
                cost.in += usage.input_tokens||0; cost.out += usage.output_tokens||0;
                saveCost(cost.in, cost.out);
                const qid = historySaveEntry(pQuery, text, srcLines, srcChunks, srcUrls, {
                    provider: pLlm.provider||'', model: pLlm.model||'',
                    in_tokens: usage.input_tokens||0, out_tokens: usage.output_tokens||0,
                    filtered_indices: _filtIdx.length ? _filtIdx : undefined,
                    seq_index_map: _seqIndexMap,
                }, 'ok', [], srcAnnotations);
                sess.navPos = historyLoadIndex().length - 1;
                _sseAll(sess, { type: 'cost', text: costStr(cost.in, cost.out, pLlm.model||'', pLlm.provider||'') });
                _sseAll(sess, { type: 'answer', qid });
            } catch(e) {
                werr('annot-choice error', e);
                _sseAll(sess, { type: 'error', text: e.message });
            }
        }).catch(e => { werr('annot-choice', e); res.writeHead(500); res.end(JSON.stringify({error:e.message})); });
        return;
    }

    // -- POST /api/cancel
    if (req.method === 'POST' && path_ === '/api/setprovider') {
        // Atomically: check Config for key, if present set session provider
        _params().then(async parsed => {
            const provider = parsed.provider || '';
            const sid2 = parsed.sid || 'default';
            const envTxt = (() => { try { return fs.readFileSync(path.join(PROJECT_DIR, 'Config'), 'utf8'); } catch(_) { return ''; } })();
            // Normalise ollama:modelname -> provider='local', save model
            let normProvider = provider;
            let ollamaModel = null;
            if (provider.startsWith('ollama:')) {
                ollamaModel = provider.slice(7);
                normProvider = 'local';
            }
            const keyMap = { claude:'ANTHROPIC_API_KEY', openai:'OPENAI_API_KEY', gemini:'GEMINI_API_KEY', google:'GOOGLE_API_KEY' };
            const keyName = keyMap[normProvider];
            let hasKey = !keyName; // providers with no key (local/ollama) always pass
            if (keyName) {
                const m = envTxt.match(new RegExp('^' + keyName + '=(.+)$', 'm'));
                hasKey = !!(m && m[1].trim());
            } else if (normProvider === 'local') {
                hasKey = !!(ollamaModel || envTxt.match(/^LOCAL_LLM_MODEL=(.+)$/m)?.[1]?.trim());
            }
            wdbg2(`setprovider provider=${normProvider} ollamaModel=${ollamaModel} keyName=${keyName} hasKey=${hasKey}`);
            if (hasKey) {
                const sess2 = getOrCreateSession(sid2);
                if (ollamaModel) {
                    process.env.LOCAL_LLM_MODEL = ollamaModel;
                    envSet('LOCAL_LLM_MODEL', ollamaModel);
                }
                sess2.provider = normProvider;
                envSet('LLM_PROVIDER', sess2.provider);
                const llmInfo = await getLLM(sess2.provider).catch(() => ({ label: provider }));
                winfo(`setprovider OK: provider=${provider} llm=${llmInfo.label}`);
                res.writeHead(200, { 'Content-Type': 'application/json' });
                res.end(JSON.stringify({ ok: true, llm: llmInfo.label }));
            } else {
                winfo(`setprovider BLOCKED: provider=${provider} missing=${keyName}`);
                res.writeHead(200, { 'Content-Type': 'application/json' });
                res.end(JSON.stringify({ ok: false, missing: keyName || 'LOCAL_LLM_MODEL' }));
            }
        }).catch(e => {
            werr('setprovider error: ' + e.message);
            try { res.writeHead(500, {'Content-Type':'application/json'}); res.end(JSON.stringify({ok:false,error:e.message})); } catch(_){}
        });
        return;
    }

    // -- GET /api/state
    if (req.method === 'GET' && path_ === '/api/state') {
        const sid  = u.searchParams.get('sid') || 'default';
        const sess = getOrCreateSession(sid);
        const cost = loadCost();
        const { label } = await getLLM(sess.provider).catch(() => ({ label: '?' }));
        res.writeHead(200, { 'Content-Type': 'application/json' });
        res.end(JSON.stringify({
            provider:   sess.provider === 'local' ? 'ollama:' + cfg('LOCAL_LLM_MODEL', '') : sess.provider,
            navPos:     sess.navPos,
            useContext: sess.useContext || false,
            ollamaModels: await (async () => {
                try {
                    const oBase = cfg('OLLAMA_HOST','http://localhost:11434');
                    const oR = await fetch(`${oBase}/api/tags`, { signal: AbortSignal.timeout(2000) });
                    const oD = await oR.json();
                    return (oD.models || []).map(m => m.name);
                } catch(_) { return []; }
            })(),
            cost:       costStr(cost.in, cost.out, cfg('ANTHROPIC_MODEL','claude-sonnet-4-6'), cfg('LLM_PROVIDER','claude')),
            llm:        label,
        }));
        return;
    }

    // -- POST /api/query
    if (req.method === 'POST' && path_ === '/api/query') {
        _params().then(async parsed => {
            const { sid='default', query, provider, collections: activeColls } = parsed;
            const sess = getOrCreateSession(sid);
            // New query -- clear any stale pipeline (already saved to history at Stop time)
            sess.stopped = false;
            sess._pipeline = null;

            const isCommand = query && (query[0]==='!'||query==='?'||query[0]==='/'||query==='$');
            if (!sess.ready && !isCommand) { res.writeHead(503); res.end(JSON.stringify({ error: 'Still loading collections -- try again shortly' })); return; }
            if (!query)      { res.writeHead(400); res.end(JSON.stringify({ error: 'No query' })); return; }

            res.writeHead(200, { 'Content-Type': 'application/json' });
            res.end(JSON.stringify({ ok: true }));

            // Handle commands without doing a full RAG query
            if (isCommand) {
                let cmdOut = '';
                if (query === '?') {
                    const colls = sess.collNames.length ? sess.collNames.join(', ') : '(loading)';
                    const cost  = loadCost();
                    cmdOut = '**RAGWeed Web Help**\n\n'
                           + '**Commands:**\n'
                           + '`?` - show this help\n'
                           + '`!cost` - show token cost\n'
                           + '`!reset` - clear cost counter\n'
                           + '`!top N` - set top-k (currently ' + sess.topK + ')\n'
                           + '`!coll NAME` - toggle collection\n'
                           + '\n**Collections:** ' + colls + '\n'
                           + '**Provider:** ' + (sess.provider||'?') + '\n'
                           + '**Cost so far:** ' + costStr(cost.in, cost.out, cfg('ANTHROPIC_MODEL','claude-sonnet-4-6'));
                } else if (query === '!cost') {
                    const cost = loadCost();
                    cmdOut = '**Cost:** ' + costStr(cost.in, cost.out, cfg('ANTHROPIC_MODEL','claude-sonnet-4-6'))
                           + '  (in: ' + cost.in + ' out: ' + cost.out + ' tokens)';
                } else if (query === '!reset') {
                    saveCost(0, 0);
                    cmdOut = 'Cost counter reset.';
                } else if (query.startsWith('!top ')) {
                    const n = parseInt(query.slice(5));
                    if (n > 0) { sess.topK = n; cmdOut = 'top-k set to ' + n; }
                    else cmdOut = 'Usage: !top N (e.g. !top 32)';
                } else if (query.startsWith('!coll ')) {
                    cmdOut = 'Use the collection badges in the UI to toggle collections.';
                } else if (query === '$') {
                    const cost = loadCost();
                    cmdOut = '**Cost:** ' + costStr(cost.in, cost.out, cfg('ANTHROPIC_MODEL','claude-sonnet-4-6'))
                           + '  (in: ' + cost.in + ' out: ' + cost.out + ' tokens)';
                } else if (query === '!$') {
                    saveCost(0, 0);
                    cmdOut = 'Cost counter reset.';
                } else if (query === '!colls') {
                    const _cnames = Object.keys(sess.collections);
                    cmdOut = '**Loaded collections (' + _cnames.length + '):**\n' + _cnames.join('\n');
                } else {
                    cmdOut = 'Unknown command: ' + query + '\nType ? for help.';
                }
                const qid = historySaveEntry(query, cmdOut, [], [], { provider: 'system', model: 'cmd' });
                sess.navPos = historyLoadIndex().length - 1;
                _sseAll(sess, { type: 'answer', qid });
                return;
            }

            // Run query async, send result via SSE
            // Normalise 'ollama:modelname' -> provider='local', model extracted
            let useProvider = provider || sess.provider;
            let useModel = null;
            if (useProvider && useProvider.startsWith('ollama:')) {
                useModel = useProvider.slice(7);
                useProvider = 'local';
            }
            try {
                const llmInfo = await getLLM(useProvider, useModel);
                if (llmInfo.error) { _sseAll(sess, { type: 'error', text: llmInfo.label }); return; }

                sess.abortCtrl = new AbortController();
                const _querySignal = sess.abortCtrl.signal; // capture before it can be nulled
                // Filter to only the collections the client has active (badge state)
                let queryColls = sess.collections;
                if (activeColls && activeColls.length) {
                    queryColls = {};
                    for (const name of activeColls) {
                        if (sess.collections[name]) queryColls[name] = sess.collections[name];
                    }
                    if (!Object.keys(queryColls).length) queryColls = sess.collections; // fallback: all
                }
                const _nqc = Object.keys(queryColls).length;
                winfo(`query: activeColls=[${(activeColls||[]).join(',')}] queryColls=[${Object.keys(queryColls).join(',')}] (${_nqc})`);  // status emitted by queryCollections via onStatus
                // Build context from prior entries if enabled
                let ctxMessages = [];
                const _hasQids = sess.contextQids && sess.contextQids.size > 0;
                if (_hasQids || sess.useContext) {
                    const idx2 = historyLoadIndex();
                    const toLoad = _hasQids
                        ? idx2.filter(e => sess.contextQids.has(e.qid))
                        : idx2.slice(-6);
                    for (const ie of toLoad) {
                        const e = historyLoadEntry(idx2, idx2.indexOf(ie));
                        if (e && e.question && e.answer) {
                            ctxMessages.push({ role: 'user',      content: e.question });
                            ctxMessages.push({ role: 'assistant', content: e.answer });
                        }
                    }
                }
                const _annotate = parsed.annotate === true || parsed.annotate === 'true';
                let result;

                if (_annotate) {
                    // Annotate-first: retrieve -> SSE sources immediately -> annotate (SSE each) -> filter -> synthesize
                    sess._pipeline = { phase: 'search', query, annotate: _annotate, collections: Object.keys(queryColls), llmInfo, ctxMessages };
                    const _retrieved = await retrieveNodes(
                        queryColls, query, llmInfo, sess.topK,
                        _querySignal,
                        (phase) => _sseAll(sess, { type: 'status', text: phase }),
                        () => sess.stopped
                    );
                    if (_retrieved.status !== 'ok' || !_retrieved.nodes.length) {
                        result = { text: 'No relevant content found.', nodes: [], usage: {}, status: _retrieved.status || 'no_results' };
                    } else {
                        // Send source list immediately so browser renders them
                        const _preFmt = formatSources(_retrieved.nodes, []);
                        _sseAll(sess, { type: 'sources', lines: _preFmt.lines, chunks: _preFmt.chunks, urls: _preFmt.urls });
                        // Transition to annotation phase
                        sess._pipeline = { phase: 'annotation', query, annotate: _annotate, collections: Object.keys(queryColls), llmInfo, ctxMessages, nodes: _retrieved.nodes, annotations: new Array(_retrieved.nodes.length).fill('') };

                        const _annotProv  = (() => { const p = cfg('ANNOTATION_PROVIDER',''); return (p && p !== 'default') ? p : useProvider; })();
                        const _annotModel = cfg('ANNOTATION_MODEL', '') || null;
                        const _annotConc  = (_annotProv === 'local')
                            ? (parseInt(cfg('ANNOTATION_LOCAL_CONCURRENCY', '1')) || 1)
                            : (parseInt(cfg('ANNOTATION_CONCURRENCY', '4')) || 4);
                        const _annotLlm   = await getLLM(_annotProv, _annotModel);
                        if (_annotLlm.error) {
                            // Annotation provider unavailable -- pause and wait for user choice
                            winfo('annotation provider error: ' + _annotLlm.label);
                            sess._annotPaused = { nodes: _retrieved.nodes, query, llmInfo, ctxMessages, error: _annotLlm.label };
                            _sseAll(sess, { type: 'annot_blocked', text: 'Annotation unavailable: ' + _annotLlm.label });
                            return;
                        }
                        winfo('annotating ' + _retrieved.nodes.length + ' nodes with ' + _annotProv + '/' + _annotLlm.model);
                        _sseAll(sess, { type: 'status', text: 'Annotating ' + _retrieved.nodes.length + ' sources...' });
                        const { annotations: _annotations, filteredNodes: _filteredNodes, filteredIndices: _filteredIndices, blocked: _annotBlocked } =
                            await annotateAndFilter(query, _retrieved.nodes, _annotLlm, _annotConc,
                                (i, text, irrelevant) => {
                                    if (sess._pipeline) sess._pipeline.annotations[i] = text;
                                    _sseAll(sess, { type: 'annotation', index: i, text, irrelevant });
                                },
                                sess.abortCtrl?.signal, () => sess.stopped);
                        if (_annotBlocked) {
                            sess._annotPaused = { nodes: _retrieved.nodes, query, llmInfo, ctxMessages, error: 'All annotation calls failed' };
                            _sseAll(sess, { type: 'annot_blocked', text: 'All annotations failed. Check annotation provider settings.' });
                            return;
                        }
                        // annotateAndFilter handles Stop/Go internally -- pauses when stopped,
                        // resumes when Go is pressed, returns only when all annotations complete
                        // or signal is aborted. Nothing to do here except check for abort.
                        winfo('annotation filter: ' + _retrieved.nodes.length + ' -> ' + _filteredNodes.length + ' nodes');
                        _sseAll(sess, { type: 'status', text: 'Generating response from ' + _filteredNodes.length + ' relevant sources...' });
                        if (!_filteredNodes.length) {
                            result = { text: 'No relevant content found in the retrieved sources.', nodes: _retrieved.nodes, usage: {}, status: 'no_results', annotations: _annotations, filtered_indices: [..._filteredIndices] };
                        } else {
                            sess._pipeline = { phase: 'synthesis', query, annotate: _annotate, collections: Object.keys(queryColls), llmInfo, ctxMessages, nodes: _retrieved.nodes, annotations: _annotations, filteredNodes: _filteredNodes, filteredIndices: _filteredIndices };
                            try {
                                const _filtAnnotations = _filteredNodes.map(n => _annotations[_retrieved.nodes.indexOf(n)]);
                                const { text, usage } = await synthesizeCited(llmInfo, query, _filteredNodes, _querySignal, ctxMessages, _filtAnnotations);
                                result = { text, nodes: _retrieved.nodes, usage, status: 'ok', annotations: _annotations, filtered_indices: [..._filteredIndices] };
                            } catch(e) {
                                if (e.name === 'AbortError' || (e.message && (e.message.includes('aborted') || e.message.includes('Aborted')))) {
                                    _sseAll(sess, { type: 'status', text: 'Generation paused -- press Go to resume' });
                                    await waitIfStopped(sess);
                                    _sseAll(sess, { type: 'status', text: 'Resuming generation...' });
                                    sess.abortCtrl = new AbortController();
                                    try {
                                        const _filtAnnotations2 = _filteredNodes.map(n => _annotations[_retrieved.nodes.indexOf(n)]);
                                        const { text: _rt, usage: _ru } = await synthesizeCited(llmInfo, query, _filteredNodes, sess.abortCtrl.signal, ctxMessages, _filtAnnotations2);
                                        result = { text: _rt, nodes: _retrieved.nodes, usage: _ru, status: 'ok', annotations: _annotations, filtered_indices: [..._filteredIndices] };
                                    } catch(e2) {
                                        const cause2 = e2.cause ? ` | cause: ${e2.cause.message||e2.cause}` : '';
                                        result = { text: `Error generating response: ${e2.message}${cause2}`, nodes: _retrieved.nodes, usage: {}, status: 'llm_error', annotations: _annotations, filtered_indices: [..._filteredIndices] };
                                    }
                                } else {
                                    const cause = e.cause ? ` | cause: ${e.cause.message||e.cause}` : '';
                                    result = { text: `Error generating response: ${e.message}${cause}`, nodes: _retrieved.nodes, usage: {}, status: 'llm_error', annotations: _annotations, filtered_indices: [..._filteredIndices] };
                                }
                            }
                        }
                    }
                } else {
                    // Standard path: retrieve -> SSE sources -> synthesize
                    sess._pipeline = { phase: 'search', query, annotate: _annotate, collections: Object.keys(queryColls), llmInfo, ctxMessages };
                    const _stdRetrieved = await retrieveNodes(
                        queryColls, query, llmInfo, sess.topK,
                        _querySignal,
                        (phase) => _sseAll(sess, { type: 'status', text: phase }),
                        () => sess.stopped
                    );
                    if (_stdRetrieved.status !== 'ok' || !_stdRetrieved.nodes.length) {
                        result = { text: 'No relevant content found.', nodes: [], usage: {}, status: _stdRetrieved.status || 'no_results' };
                    } else {
                        // SSE sources immediately so browser renders them while LLM generates
                        const _stdFmt = formatSources(_stdRetrieved.nodes, []);
                        _sseAll(sess, { type: 'sources', lines: _stdFmt.lines, chunks: _stdFmt.chunks, urls: _stdFmt.urls });
                        sess._pipeline = { phase: 'synthesis', query, annotate: _annotate, collections: Object.keys(queryColls), llmInfo, ctxMessages, nodes: _stdRetrieved.nodes, annotations: [] };
                        _sseAll(sess, { type: 'status', text: 'Generating LLM response...' });
                        try {
                            const { text, usage } = await synthesizeCited(llmInfo, query, _stdRetrieved.nodes, _querySignal, ctxMessages);
                            result = { text, nodes: _stdRetrieved.nodes, usage, status: 'ok' };
                        } catch(e) {
                            if (e.name === 'AbortError' || (e.message && (e.message.includes('aborted') || e.message.includes('Aborted')))) {
                                _sseAll(sess, { type: 'status', text: 'Generation paused -- press Go to resume' });
                                await waitIfStopped(sess);
                                _sseAll(sess, { type: 'status', text: 'Resuming generation...' });
                                sess.abortCtrl = new AbortController();
                                try {
                                    const { text: _srt, usage: _sru } = await synthesizeCited(llmInfo, query, _stdRetrieved.nodes, sess.abortCtrl.signal, ctxMessages);
                                    result = { text: _srt, nodes: _stdRetrieved.nodes, usage: _sru, status: 'ok' };
                                } catch(e2) {
                                    const cause2 = e2.cause ? ` | cause: ${e2.cause.message||e2.cause}` : '';
                                    result = { text: `Error generating response: ${e2.message}${cause2}`, nodes: _stdRetrieved.nodes, usage: {}, status: 'llm_error' };
                                }
                            } else {
                                const cause = e.cause ? ` | cause: ${e.cause.message||e.cause}` : '';
                                result = { text: `Error generating response: ${e.message}${cause}`, nodes: _stdRetrieved.nodes, usage: {}, status: 'llm_error' };
                            }
                        }
                    }
                }
                sess.abortCtrl = null;
                sess._pipeline = null;

                // Surface errors
                if (result.text && result.text.startsWith('Error generating response:')) {
                    const _errNodes = result.nodes || [];
                    const _errFmt = formatSources(_errNodes, []);
                    historySaveEntry(query, result.text, _errFmt.lines, _errFmt.chunks, _errFmt.urls, {
                        provider: useProvider, model: llmInfo.model, in_tokens: 0, out_tokens: 0,
                        collections: Object.keys(queryColls),
                        annotate: _annotate,
                    }, 'llm_error', _errNodes);
                    sess.navPos = historyLoadIndex().length - 1;
                    winfo('query returned error text: ' + result.text.slice(0, 120));
                    _sseAll(sess, { type: 'error', text: result.text, retryable: true });
                    return;
                }
                winfo(`query result: status=${result.status} nodes=${result.nodes?.length||0} text_len=${result.text?.length||0}`);
                const _filtIdx = result.filtered_indices || [];
                const { lines: srcLines, chunks: srcChunks, urls: srcUrls, annotations: srcAnnotations } = formatSources(result.nodes, result.annotations || []);
                // Always build seq_index_map -- maps sequential display numbers to original indices
                // For unfiltered queries this is identity map; for filtered it skips filtered indices
                const _filtSet2 = new Set(_filtIdx);
                const _seqIndexMap = [];
                srcLines.forEach((_, i) => { if (!_filtSet2.has(i)) _seqIndexMap.push(i); });
                const usage = result.usage || {};
                const cost  = loadCost();
                cost.in  += usage.input_tokens  || 0;
                cost.out += usage.output_tokens || 0;
                saveCost(cost.in, cost.out);

                const _qStatus = result.status || 'ok';
                const _rawNodes = (_qStatus === 'llm_error') ? result.nodes : [];
                const qid = historySaveEntry(query, result.text, srcLines, srcChunks, srcUrls, {
                    provider: useProvider, model: llmInfo.model,
                    in_tokens: usage.input_tokens||0, out_tokens: usage.output_tokens||0,
                    filtered_indices: _filtIdx.length ? _filtIdx : undefined,
                    seq_index_map: _seqIndexMap,
                    collections: Object.keys(queryColls),
                    annotate: _annotate,
                }, _qStatus, _rawNodes, srcAnnotations);
                sess.navPos = historyLoadIndex().length - 1;
                _sseAll(sess, { type: 'cost', text: costStr(cost.in, cost.out, llmInfo.model||'', llmInfo.provider||'') });
                _sseAll(sess, { type: 'answer', qid });
            } catch(e) {
                if (e.message && (e.message.includes('aborted') || e.message.includes('Aborted'))) {
                    winfo('query aborted by user');
                } else {
                    werr('query error', e);
                    _sseAll(sess, { type: 'error', text: e.message });
                }
            }
        }).catch(e => { werr('setprovider error', e); res.writeHead(500); res.end(JSON.stringify({ok:false,error:e.message})); });
        return;
    }

    // -- POST /api/set -- persist a session setting
    if (req.method === 'POST' && path_ === '/api/set') {
        _params().then(p => {
            const { sid='default', key, value } = p;
                const sess = getOrCreateSession(sid);
                if (key === 'provider') {
                    // 'ollama:modelname' -> provider='local', model saved to config
                    if (value && value.startsWith('ollama:')) {
                        const oModel = value.slice(7);
                        process.env.LOCAL_LLM_MODEL = oModel;
                        envSet('LOCAL_LLM_MODEL', oModel);
                        sess.provider = 'local';
                    } else {
                        sess.provider = value;
                    }
                    envSet('LLM_PROVIDER', sess.provider);
                    winfo(`provider changed to ${sess.provider} for sid=${sid}`);
                    // Immediate interim label so client display updates without waiting
                    const _interimLabel = value.startsWith('ollama:')
                        ? 'Ollama: ' + value.slice(7)
                        : value === 'openai'  ? 'OpenAI: gpt-4o'
                        : value === 'claude'  ? 'Claude'
                        : value === 'gemini'  ? 'Gemini'
                        : value;
                    _sseAll(sess, { type: 'llm', text: _interimLabel });
                    // Then resolve accurate model name and broadcast again
                    getLLM(sess.provider).then(llmInfo => {
                        const cost = loadCost();
                        _sseAll(sess, { type: 'llm',  text: llmInfo.label });
                        _sseAll(sess, { type: 'cost', text: costStr(cost.in, cost.out, llmInfo.model||value) });
                    }).catch(e => werr('getLLM after provider change', e));
                }
                if (key === 'collections') {
                    sess.activeColls = typeof value === 'string' ? value.split(',').map(s=>s.trim()).filter(Boolean) : (Array.isArray(value) ? value : []);
                    winfo('activeColls=['+sess.activeColls.join(',')+'] sid='+sid);
                }
                if (key === 'useContext') {
                    sess.useContext = value === true || value === 'true';
                    winfo('useContext='+sess.useContext+' sid='+sid);
                }
                if (key === 'contextQids') {
                    sess.contextQids = new Set(Array.isArray(value) ? value.map(Number) : []);
                    wdbg2('contextQids=['+[...sess.contextQids].join(',')+'] sid='+sid);
                }
                if (key === 'topK') sess.topK = parseInt(value)||64;
                const _setLlmLabel = (key === 'provider') ? (
                    value && value.startsWith('ollama:') ? 'Ollama: ' + value.slice(7)
                    : value === 'openai' ? 'OpenAI: gpt-4o'
                    : value === 'claude' ? 'Claude'
                    : value === 'gemini' ? 'Gemini' : value
                ) : null;
                res.writeHead(200, { 'Content-Type': 'application/json' });
                res.end(JSON.stringify({ ok: true, llm: _setLlmLabel }));
        }).catch(_ => { res.writeHead(400); res.end('{}'); });
        return;
    }

    // POST /api/cancel
    if (req.method === 'POST' && path_ === '/api/cancel') {
        _params().then(p => {
            const sid = p.sid || 'default';
            const s2 = sessions[sid];
            if (s2) {
                const phase = s2._pipeline?.phase || 'idle';
                winfo('stop requested sid='+sid+' phase='+phase);
                if (phase === 'search') {
                    // Pause search in place -- retrieveNodes will finish current collection then wait
                    s2.stopped = true;
                    _sseAll(s2, { type: 'stopped', phase });
                } else if (phase === 'annotation') {
                    // Set stopped -- annotateAndFilter will finish in-flight calls then call onStop to save, then pause
                    s2.stopped = true;
                    _sseAll(s2, { type: 'stopped', phase });
                } else if (phase === 'synthesis') {
                    // Abort LLM generation immediately -- pipeline will save + park
                    if (s2.abortCtrl) { s2.abortCtrl.abort(); s2.abortCtrl = null; }
                    s2.stopped = true;
                    _sseAll(s2, { type: 'stopped', phase });
                } else {
                    // idle or unknown -- just set stopped
                    s2.stopped = true;
                    _sseAll(s2, { type: 'stopped', phase });
                }
            }
            res.writeHead(200); res.end(JSON.stringify({ ok: true }));
        }).catch(_ => { res.writeHead(200); res.end(JSON.stringify({ ok: true })); });
        return;
    }
    // -- POST /api/go -- resume a stopped query
    if (req.method === 'POST' && path_ === '/api/go') {
        _params().then(p => {
            const sid = p.sid || 'default';
            const s2 = sessions[sid];
            if (s2) { s2.goGen = (s2.goGen || 0) + 1; s2.stopped = false; winfo('go: resumed sid='+sid); }
            res.writeHead(200); res.end(JSON.stringify({ ok: true }));
        }).catch(_ => { res.writeHead(200); res.end(JSON.stringify({ ok: true })); });
        return;
    }
    // Reload collections (called after ingest, or manually via UI button)
    // -- GET /api/copies -- find all copies of a file via ChromaDB source_md5
    if (req.method === 'GET' && path_ === '/api/copies') {
        const _col = u.searchParams.get('collection') || '';
        let _rel = u.searchParams.get('rel_path') || '';
        if (!_col || !_rel) { res.writeHead(400); res.end(JSON.stringify({ error: 'collection and rel_path required' })); return; }
        // Strip collection prefix if present (client may send "CID/path/to/file")
        if (_rel.startsWith(_col + '/')) _rel = _rel.slice(_col.length + 1);
        try {
            const _chromaPath = path.resolve(PROJECT_DIR, cfg('CHROMA_PATH', './chromadb'));
            const Database = require('better-sqlite3');
            const { buildSegDirMap } = await import('./collections.js');
            const _sdmap = buildSegDirMap(_chromaPath);

            // Step 1: find source_md5 -- look in the requesting collection's rag.sqlite3 first
            let _md5 = null;
            const _srcEntry = _sdmap[_col];
            if (_srcEntry) {
                const _ragPath = path.join(_srcEntry.segDir, 'rag.sqlite3');
                if (fs.existsSync(_ragPath)) {
                    try {
                        const _db = new Database(_ragPath, { readonly: true, fileMustExist: true });
                        const _row = _db.prepare(
                            "SELECT m1.string_value as md5 FROM embedding_metadata m1 " +
                            "JOIN embedding_metadata m2 ON m2.id=m1.id AND m2.key='source_rel_path' AND m2.string_value=? " +
                            "WHERE m1.key='source_md5' LIMIT 1"
                        ).get(_rel);
                        if (_row) _md5 = _row.md5;
                        _db.close();
                    } catch(_) {}
                }
            }
            if (!_md5) { res.writeHead(200); res.end(JSON.stringify({ copies: [] })); return; }

            // Step 2: scan ALL rag.sqlite3 files for that md5 -> collect (rel_path, collection)
            const _copies = [];
            for (const [name, entry] of Object.entries(_sdmap)) {
                const _ragPath = path.join(entry.segDir, 'rag.sqlite3');
                if (!fs.existsSync(_ragPath)) continue;
                try {
                    const _db = new Database(_ragPath, { readonly: true, fileMustExist: true });
                    const _rows = _db.prepare(
                        "SELECT DISTINCT m2.string_value as rel_path FROM embedding_metadata m1 " +
                        "JOIN embedding_metadata m2 ON m2.id=m1.id AND m2.key='source_rel_path' " +
                        "WHERE m1.key='source_md5' AND m1.string_value=?"
                    ).all(_md5);
                    _db.close();
                    for (const r of _rows) {
                        const _envKey = 'COLLECTION_URL_' + name.replace(/[^A-Za-z0-9]/g, '_').toUpperCase();
                        const _baseRaw = cfg(_envKey, 'https://all.net');
                        const _base = _baseRaw.endsWith('/') ? _baseRaw.slice(0, -1) : _baseRaw;
                        _copies.push({ collection: name, rel_path: r.rel_path, url: _base + '/' + r.rel_path });
                    }
                } catch(_) {}
            }
            res.writeHead(200, { 'Content-Type': 'application/json' });
            res.end(JSON.stringify({ md5: _md5, copies: _copies }));
        } catch(e) {
            werr('api/copies', e);
            res.writeHead(500); res.end(JSON.stringify({ error: e.message }));
        }
        return;
    }

    if (req.method === 'POST' && path_ === '/api/reload') {
        wlog('/api/reload requested  --  resetting collection cache');
        try { invalidateSegDirMap(); } catch(_) {}
        // Update all active sessions with new env-derived values
        try {
            for (const sess of Object.values(sessions)) {
                sess.topK = cfgInt('TOP_K', 64);
            }
        } catch(_) {}
        Object.keys(_serverCollections).forEach(k => delete _serverCollections[k]);
        _serverCollFailed.length = 0;
        _serverCollNames.length  = 0;
        _serverCollReady   = false;
        _serverCollLoading = false;
        _preloadCollections().catch(e => wlog('reload _preloadCollections: ' + e.message));
        res.writeHead(200, {'Content-Type':'application/json'});
        res.end(JSON.stringify({ ok: true }));
        return;
    }
    res.writeHead(404); res.end('Not found');
}

// -- Main ----------------------------------------------------------------------
function getLocalIP() {
    try {
        const os = require('os');
        const nets = os.networkInterfaces();
        for (const iface of Object.values(nets)) {
            for (const n of iface) {
                if (n.family === 'IPv4' && !n.internal) return n.address;
            }
        }
    } catch(_) {}
    return '0.0.0.0';
}

const server = http.createServer((req, res) => {
    handleRequest(req, res).catch(e => {
        werr('handler error', e);
        try { res.writeHead(500); res.end('Internal error'); } catch(_) {}
    });
});

const localIP = getLocalIP();
const activeColls = parseActiveCollections() || getAllCollectionNames();
const provider = cfg('LLM_PROVIDER', 'claude');

wlog(`==================================================`);
wlog(`RAGWeed v${VERSION} web server starting`);
winfo(`debug_level=${DEBUG_LEVEL}  port=${PORT}  pid=${process.pid}`);
winfo(`project_dir=${process.env.PROJECT_DIR || '(not set)'}`);
winfo(`log_file=${_WEB_LOG_FILE||"(none)"}`);
wlog(`RAGWeed v${VERSION}  DEBUG_LEVEL=${DEBUG_LEVEL}  pid=${process.pid}`);
wlog(`  provider   : ${provider}`);
wlog(`  collections: ${activeColls.join(', ') || '(auto-discover)'}`);
wlog(`  top-k      : ${cfgInt('TOP_K',8)}`);
wlog(`  local  ->  http://localhost:${PORT}`);
wlog(`  network->  http://${localIP}:${PORT}`);
wlog('Press Ctrl-C to stop');

process.on('uncaughtException',    e => { werr('UNCAUGHT EXCEPTION (fatal)', e); process.exit(1); });
process.on('unhandledRejection',   e => { werr('UNHANDLED REJECTION', e instanceof Error ? e : new Error(String(e))); });

server.listen(PORT, '0.0.0.0', () => {
    if (_SESSION_LOG) try {
        require('fs').appendFileSync(_SESSION_LOG,
            '[START] RAGWeed v' + VERSION + ' (web) ' + new Date().toISOString() + '\n');
    } catch(_) {}
    wlog('Listening on port ' + PORT);
    winfo('debug_level=' + DEBUG_LEVEL + '  session_log=' + (_SESSION_LOG||'(none)'));
    // Kick off collection preload after a short yield - server responds to HTTP first
    // Diagnostic: log rag.sqlite3 collection count
    setTimeout(() => {
        try {
            const PROJECT_DIR_diag = process.env.PROJECT_DIR || path.dirname(__dirname);
            const chromaPath = path.resolve(PROJECT_DIR_diag, process.env.CHROMA_PATH || './chromadb');
            wlog(`[DIAG] chromaPath=${chromaPath}`);
            let ragCount = 0;
            try {
                for (const d of fs.readdirSync(chromaPath, { withFileTypes: true })) {
                    if (d.isDirectory() && fs.existsSync(path.join(chromaPath, d.name, 'rag.sqlite3'))) ragCount++;
                }
            } catch(_) {}
            wlog(`[DIAG] rag.sqlite3 files found: ${ragCount}`);
        } catch(e) { wlog('[DIAG] startup diag err: ' + e.message); }
    }, 50);
    setTimeout(() => _preloadCollections().catch(e => wlog('_preloadCollections fatal: ' + e.message)), 100);
});
process.on('SIGINT', () => { wlog('Stopped.'); process.exit(0); });

// Watch Config for changes (webc saves trigger this)  --  auto-reload collections.
// Also poll every 5s as a safety net.
(function _watchEnv() {
    const envPath = path.join(process.env.PROJECT_DIR || path.resolve(__dirname, '..'), 'Config');
    let lastMtime = 0;
    try { lastMtime = fs.statSync(envPath).mtimeMs; } catch(_) {}
    function _reloadFromEnv() {
        wlog('Config changed  --  reloading active collections');
        try {
            for (const line of fs.readFileSync(envPath,'utf8').split('\n')) {
                const m = line.match(/^([A-Z_][A-Z0-9_]*)=(.*)$/);
                if (m) process.env[m[1]] = m[2].replace(/^["']|["']$/g,'');
            }
        } catch(_) {}
        try { invalidateSegDirMap(); } catch(_) {}
        Object.keys(_serverCollections).forEach(k => delete _serverCollections[k]);
        _serverCollFailed.length = 0;
        _serverCollNames.length  = 0;
        _serverCollReady   = false;
        _serverCollLoading = false;
        _preloadCollections().catch(e => wlog('env-watch reload: ' + e.message));
    }
    function _checkEnv() {
        try {
            const mtime = fs.statSync(envPath).mtimeMs;
            if (mtime !== lastMtime) { lastMtime = mtime; _reloadFromEnv(); }
        } catch(_) {}
    }
    setInterval(_checkEnv, 5000);
    try { fs.watch(envPath, () => setTimeout(_checkEnv, 100)); } catch(_) {}
})();
RAGWEED-WEB-v1.0.102-20260319-000014-473

cat > "$SCRIPTS_DIR/index.html" << 'RAGWEED-HTM-v1.0.102-20260319-000014-473'
<!DOCTYPE html>
<!-- RAGWEED_VERSION=1.0.102 -->
<html lang="en" data-version="__VERSION__">
<head><meta charset="UTF-8">
<link rel="icon" type="image/svg+xml" href="data:image/svg+xml,%3Csvg%20xmlns%3D%22http%3A//www.w3.org/2000/svg%22%20viewBox%3D%220%200%2032%2032%22%3E%3Crect%20width%3D%2232%22%20height%3D%2232%22%20rx%3D%224%22%20fill%3D%22%231a1a2e%22/%3E%3Crect%20x%3D%223%22%20y%3D%2210%22%20width%3D%224%22%20height%3D%2216%22%20rx%3D%221%22%20fill%3D%22%23f5f0e8%22/%3E%3Crect%20x%3D%228%22%20y%3D%2210%22%20width%3D%224%22%20height%3D%2216%22%20rx%3D%221%22%20fill%3D%22%23f5f0e8%22/%3E%3Crect%20x%3D%2213%22%20y%3D%2210%22%20width%3D%224%22%20height%3D%2216%22%20rx%3D%221%22%20fill%3D%22%23f5f0e8%22/%3E%3Crect%20x%3D%2218%22%20y%3D%2210%22%20width%3D%224%22%20height%3D%2216%22%20rx%3D%221%22%20fill%3D%22%23f5f0e8%22/%3E%3Crect%20x%3D%2223%22%20y%3D%2210%22%20width%3D%224%22%20height%3D%2216%22%20rx%3D%221%22%20fill%3D%22%23f5f0e8%22/%3E%3Crect%20x%3D%226%22%20y%3D%2210%22%20width%3D%223%22%20height%3D%2210%22%20rx%3D%221%22%20fill%3D%22%23111%22/%3E%3Crect%20x%3D%2211%22%20y%3D%2210%22%20width%3D%223%22%20height%3D%2210%22%20rx%3D%221%22%20fill%3D%22%23111%22/%3E%3Crect%20x%3D%2221%22%20y%3D%2210%22%20width%3D%223%22%20height%3D%2210%22%20rx%3D%221%22%20fill%3D%22%23111%22/%3E%3Ccircle%20cx%3D%2224%22%20cy%3D%225%22%20r%3D%222%22%20fill%3D%22%23c8a84b%22/%3E%3Crect%20x%3D%2225.5%22%20y%3D%221%22%20width%3D%221.5%22%20height%3D%226%22%20fill%3D%22%23c8a84b%22/%3E%3Crect%20x%3D%2219%22%20y%3D%222%22%20width%3D%228%22%20height%3D%221.5%22%20fill%3D%22%23c8a84b%22/%3E%3C/svg%3E">
<meta name="viewport" content="width=device-width,initial-scale=1">
<meta http-equiv="Cache-Control" content="no-cache,no-store,must-revalidate">
<title>RAGWeed v__VERSION__</title>
<style>*{box-sizing:border-box;margin:0;padding:0}body{font-family:Georgia,serif;background:#fff;color:#111;display:flex;flex-direction:column;height:100vh}#header{background:#fff;border-bottom:1px solid #111;padding:2px 12px;display:flex;align-items:center;gap:6px;flex-wrap:nowrap}#header h1{font-size:13px;font-weight:bold}#header-right{margin-left:6px;flex-shrink:0;display:flex;gap:6px;align-items:center;font-size:13px}#cost-display{padding:1px 6px;white-space:nowrap;border:1px solid currentColor}#llm-display{padding:1px 6px;white-space:nowrap;border:1px solid currentColor}#load-bar-wrap{height:3px;background:#111;opacity:0.3;display:none}#load-bar{height:3px;background:#111;width:0;transition:width .3s}#status-row{padding:2px 12px;min-height:18px;border-bottom:1px solid #111}#colls-row{padding:2px 12px;border-bottom:1px solid #111;display:flex;flex-wrap:wrap;gap:4px;align-items:center;font-size:13px}#ver-tag{white-space:nowrap;margin-right:6px}#colls{display:flex;flex-wrap:wrap;gap:4px;flex:1;min-width:0}.coll-badge{padding:1px 8px;border:1px solid #111;cursor:pointer;background:#fff;color:#111}.coll-badge.active{background:#111;color:#fff}.coll-badge.failed{background:#999;color:#fff;text-decoration:line-through;cursor:not-allowed;opacity:0.5}#history-nav{display:flex;align-items:center;gap:6px;padding:2px 12px;border-bottom:1px solid #111;font-size:13px}#history-nav button{padding:1px 8px;border:1px solid #111;background:#fff;cursor:pointer;font-family:Georgia,serif}#history-nav button:disabled{opacity:.4;cursor:default}#nav-label{opacity:0.7}#chat{flex:1;overflow-y:auto;padding:6px 12px}#chat-placeholder{font-style:italic;font-size:13px}.entry{margin-bottom:3px}.you{font-weight:bold;margin-bottom:3px}.bot{line-height:1.15;white-space:pre-wrap}.sources{margin-top:3px;line-height:1.15}.sources a{color:inherit;text-decoration:underline;cursor:pointer;display:inline-block;margin-right:8px;opacity:0.8}cite{color:inherit;text-decoration:underline;cursor:pointer;opacity:0.8}.meta-line{opacity:0.6;margin-top:3px}#input-area{border-top:2px solid #111;padding:4px 12px;background:#fff}#input-row{display:flex;gap:6px}#qi{flex:1;font-family:Georgia,serif;padding:4px 8px;border:1px solid #111;resize:none;min-height:28px;max-height:120px;overflow-y:auto}#send-btn{padding:4px 14px;background:#111;color:#fff;border:none;font-family:Georgia,serif;font-weight:bold;cursor:pointer}#send-btn:disabled{opacity:.4;cursor:default}#src-modal{display:none;position:fixed;inset:0;background:rgba(0,0,0,.5);z-index:100}#src-modal.open{display:flex;align-items:center;justify-content:center}#src-box{background:#fff;border:2px solid #111;padding:0;width:80%;max-height:80vh;display:flex;flex-direction:column;overflow:hidden}#src-text{line-height:1.15;white-space:pre-wrap;overflow-y:auto;padding:10px;flex:1}#src-text p,#src-text li{margin-bottom:1.5em}#src-close{padding:3px 12px;border:1px solid currentColor;cursor:pointer;font-family:Georgia,serif;font-size:13px}#help-panel{display:none;position:fixed;inset:0;background:rgba(0,0,0,.5);z-index:100}#help-panel.open{display:flex;align-items:center;justify-content:center}#help-box{background:#fff;border:2px solid #111;padding:0;width:80%;max-height:85vh;display:flex;flex-direction:column;overflow:hidden;font-size:13px}#help-box h2{margin:0}#help-close{padding:3px 12px;border:1px solid currentColor;cursor:pointer;font-family:Georgia,serif;white-space:nowrap}#diag{background:#ffff99;border:2px solid #c00;padding:4px 12px;font-family:monospace}.warn-box{border:1px solid;padding:4px 10px;margin:3px 0}select{font-family:Georgia,serif;padding:1px 4px;border:1px solid #111;background:#fff;min-width:100px}</style>
</head>
<body>
<div id="diag">Loading... (if stuck, check browser console)</div>
<div id="load-bar-wrap"><div id="load-bar"></div></div>
<div id="header">
  <h1>RAGWeed v__VERSION__</h1>
  <div id="header-right">
    <span id="cost-display"></span>
    <span id="llm-display"></span>
    <select id="provider-sel" disabled>
      <option value="claude">Claude</option>
      <option value="openai">OpenAI</option>
      <option value="gemini">Gemini</option>
      <option value="local">Ollama (local)</option>
    </select>
    <button id="help-btn">? Help</button>
    <button id="lf-btn" style="padding:1px 8px;border:1px solid #111;background:#fff;font-family:Georgia,serif;font-size:13px;cursor:pointer">&#9881; L&amp;F</button>
  </div>
</div>
<div id="colls-row"><span id="ver-tag">__HOSTNAME__</span><div id="colls"></div><button id="reload-colls-btn" title="Reload collections after ingest" onclick="reloadCollections()" style="margin-left:6px;padding:1px 6px;border:1px solid #111;background:#fff;cursor:pointer;font-size:13px">&#x27F3;</button></div>
<div id="status-row"><span id="status">Connecting...</span></div>
<div id="history-nav">
  <button id="prev-btn" disabled>&#9664; Prev</button>
  <span id="nav-label">--</span>
  <button id="next-btn" disabled>Next &#9654;</button>
  <button id="latest-btn" disabled>Latest</button>
  <button id="back-hist-btn" style="display:none;margin-left:8px">&#8592; History</button>
  <button id="hist-panel-btn" style="margin-left:4px" disabled>&#9776; History</button>
  <label style="margin-left:10px;cursor:pointer;font-size:13px"><input type="checkbox" id="ctx-toggle" style="cursor:pointer" disabled> Use context</label>
  <label style="margin-left:10px;cursor:pointer;font-size:13px"><input type="checkbox" id="annotate-toggle" style="cursor:pointer"> Annotate</label>
</div>
<div id="hist-modal" style="display:none;position:fixed;inset:0;background:rgba(0,0,0,.55);z-index:200"><div id="hist-box" style="border:2px solid #111;width:80%;max-height:82vh;margin:5vh auto;display:flex;flex-direction:column;overflow:hidden"><div style="display:flex;justify-content:space-between;align-items:center;padding:8px 14px;border-bottom:2px solid #111;flex-shrink:0"><b>Query History</b><button id="hist-close-btn" style="padding:2px 12px;border:2px solid #111;background:#fff;cursor:pointer">Close</button></div><div id="hist-list" style="overflow-y:auto;flex:1;font-size:13px;font-family:Georgia,serif"></div></div></div>
<div id="chat"><span id="chat-placeholder" style="font-style:italic;opacity:0.6">Loading...</span></div>
<div id="input-area"><div id="input-row">
  <textarea id="qi" rows="1" placeholder="Loading..." disabled></textarea>
  <button id="send-btn" disabled>Send</button>
  <button id="cancel-btn" class="btn" style="display:none;padding:4px 10px;font-weight:bold;cursor:pointer" title="Stop current query">Stop</button>
  <button id="go-btn" class="btnp" style="display:none;padding:4px 10px;font-weight:bold;cursor:pointer" title="Resume stopped query">Go</button>
</div></div>
<div id="src-modal"><div id="src-box">
  <div style="display:flex;justify-content:space-between;align-items:center;padding:10px 16px;border-bottom:2px solid #111;flex-shrink:0">
    <b id="src-title">Source</b><div style="display:flex;gap:6px;align-items:center"><a id="src-url-link" href="#" target="_blank" rel="noopener" style="display:none;font-size:13px;border:1px solid currentColor;padding:1px 8px;text-decoration:none">Open &#8599;</a><button id="src-copies-btn" class="btn" style="display:none;font-size:13px;padding:1px 8px">Copies</button><button id="src-html-btn" style="display:none;font-size:13px;border:1px solid currentColor;padding:1px 8px;background:#fff;color:#111;cursor:pointer;font-family:Georgia,serif">HTML</button><button id="src-safe-btn" style="display:none;font-size:13px;border:1px solid currentColor;padding:1px 8px;background:#fff;color:#111;cursor:pointer;font-family:Georgia,serif">Safer</button><button id="src-close">Close [Esc]</button></div>
  </div>
  <div id="src-copies-list" style="display:none;border-top:1px solid;padding:6px 12px;font-size:13px"></div>
  <div id="src-text"></div>
</div></div>
<div id="help-panel"><div id="help-box">  <div style="display:flex;justify-content:space-between;align-items:center;padding:10px 16px;border-bottom:2px solid #111;flex-shrink:0">    <b>RAGWeed Help</b>    <button id="help-close">Close [Esc]</button>  </div>  <div style="overflow-y:auto;padding:16px"><table style="border-collapse:collapse;width:100%;font-size:14px;margin-bottom:8px"><tr style="background:#f0f0f0"><th colspan="2" style="padding:5px 8px;text-align:left;border-bottom:2px solid #111">Queries</th></tr><tr><td style="padding:3px 8px"><b>two or more words</b></td><td>Send a RAG query to active collections</td></tr><tr><td style="padding:3px 8px"><b>N &nbsp;or&nbsp; #N</b></td><td>Jump to history entry N</td></tr><tr><td style="padding:3px 8px"><b>#N.M &nbsp;or&nbsp; N.M</b></td><td>Jump to entry N and open source reference M</td></tr><tr><td style="padding:3px 8px"><b>.M</b></td><td>Open source M of the current entry</td></tr><tr style="background:#f0f0f0"><th colspan="2" style="padding:5px 8px;text-align:left;border-bottom:2px solid #111">Keyboard</th></tr><tr><td style="padding:3px 8px"><kbd>Enter</kbd></td><td>Send query</td></tr><tr><td style="padding:3px 8px"><kbd>Shift+Enter</kbd></td><td>Insert newline in query box</td></tr><tr><td style="padding:3px 8px"><kbd>&uarr; / &darr; (empty box)</kbd></td><td>Navigate history prev / next</td></tr><tr style="background:#f0f0f0"><th colspan="2" style="padding:5px 8px;text-align:left;border-bottom:2px solid #111">Navigation</th></tr><tr><td style="padding:3px 8px"><b>Prev / Next</b></td><td>Step through query history</td></tr><tr><td style="padding:3px 8px"><b>Latest</b></td><td>Jump to most recent answer</td></tr><tr style="background:#f0f0f0"><th colspan="2" style="padding:5px 8px;text-align:left;border-bottom:2px solid #111">Collections</th></tr><tr><td style="padding:3px 8px"><b>Badges</b></td><td>Click to toggle a collection on/off for queries</td></tr><tr><td style="padding:3px 8px"><b>list</b></td><td>List all collections with active status</td></tr><tr><td style="padding:3px 8px"><b>c NAME</b></td><td>Toggle named collection this session</td></tr><tr style="background:#f0f0f0"><th colspan="2" style="padding:5px 8px;text-align:left;border-bottom:2px solid #111">LLM Provider</th></tr><tr><td style="padding:3px 8px"><b>Provider selector</b></td><td>Switch Claude / OpenAI / Gemini / Ollama</td></tr><tr><td style="padding:3px 8px"><b>r N</b></td><td>Switch provider: 1=claude 2=openai 3=gemini 4=ollama</td></tr><tr style="background:#f0f0f0"><th colspan="2" style="padding:5px 8px;text-align:left;border-bottom:2px solid #111">Sources &amp; Citations</th></tr><tr><td style="padding:3px 8px"><b>[N] inline citations</b></td><td>Click to view source passage</td></tr><tr><td style="padding:3px 8px"><b>.N</b></td><td>Show source N of current entry</td></tr><tr><td style="padding:3px 8px"><b>!src</b></td><td>Toggle source list visibility</td></tr><tr style="background:#f0f0f0"><th colspan="2" style="padding:5px 8px;text-align:left;border-bottom:2px solid #111">Context &amp; Cost</th></tr><tr><td style="padding:3px 8px"><b>x</b></td><td>Toggle conversation context on/off</td></tr><tr><td style="padding:3px 8px"><b>!ctx clear</b></td><td>Clear context history</td></tr><tr><td style="padding:3px 8px"><b>$  or  !cost</b></td><td>Show token cost this session</td></tr><tr><td style="padding:3px 8px"><b>!$  or  !reset</b></td><td>Reset cost counter</td></tr><tr style="background:#f0f0f0"><th colspan="2" style="padding:5px 8px;text-align:left;border-bottom:2px solid #111">Commands</th></tr><tr><td style="padding:3px 8px"><b>!top N</b></td><td>Set top-k retrieval count</td></tr><tr><td style="padding:3px 8px"><b>!cfg</b></td><td>Open configuration menu (TUI)</td></tr><tr><td style="padding:3px 8px"><b>!stop</b></td><td>Stop in-progress query</td></tr><tr><td style="padding:3px 8px"><b>?</b></td><td>Show this help</td></tr></table>  </div></div></div></div>
<script src="/lf.js"></script><script src="/app.js"></script>
<script>
document.addEventListener('DOMContentLoaded',function(){var openLF=lfInit();document.getElementById('lf-btn').onclick=openLF;});
</script>
<div style="text-align:center;font-size:11px;padding:6px;border-top:1px solid currentColor;margin-top:8px">Copyright &copy; Fred Cohen, 2026 - ALL RIGHTS RESERVED - <a href="//all.net/Notices.html" target="_blank" style="color:inherit">Patents</a></div>
</body></html>
RAGWEED-HTM-v1.0.102-20260319-000014-473

cat > "$SCRIPTS_DIR/app.js" << 'RAGWEED-APP-v1.0.102-20260319-000014-473'
// VERSION: 1.0.102
function clientLog(level,msg){
  console.log('[ragweed:'+level+']',msg);
  try{fetch('/api/clientlog',{method:'POST',headers:{'Content-Type':'application/json'},
    body:JSON.stringify({level:level,msg:msg})});}catch(e){}
}
window.onerror=function(msg,src,line,col,err){
  var t='JS Error: '+msg+' line '+line;
  try{document.getElementById('status').textContent=t;}catch(e){}
  clientLog('ERR',t+' src='+src+(err?' stack='+err.stack:''));
  return false;
};
window.onunhandledrejection=function(e){
  var m='Unhandled promise: '+(e.reason||e);
  try{document.getElementById('status').textContent=m;}catch(e2){}
  clientLog('ERR',m);
};
(function(){
'use strict';
var SID=Math.random().toString(36).slice(2);
var _ver=document.documentElement.dataset.version||'?';
clientLog('INFO','start SID='+SID+' ver='+_ver);
document.getElementById('diag').textContent='v'+_ver+' loading...';

var navPos=-1,histLen=0,ready=false,streaming=false,currentEntry=null;
var totalColl=0,loadedColl=0;
var _suppressProviderChange=false;
var _queryStart=0,_queryTimer=null,_queryPhase='';
var _queryWatchdog=null;
var MAX_QUERY_S=300;
function _stopQueryTimer(){
  if(_queryTimer){clearInterval(_queryTimer);_queryTimer=null;}
  if(_queryWatchdog){clearTimeout(_queryWatchdog);_queryWatchdog=null;}
  var el=_queryStart>0?Math.round((Date.now()-_queryStart)/1000):0;
  _queryStart=0;
  if(el>0){setStatus('Done in '+el+'s');setTimeout(function(){if(!streaming)setStatus('Ready');},3000);}
}

function $(id){return document.getElementById(id);}
function setStatus(t,busy){
  $('status').textContent=t;
  var row=$('status-row');
  if(row)row.style.background='';
}
function setCost(t){var e=$('cost-display');if(e)e.textContent=t;}
function setLLM(t){var e=$('llm-display');if(e)e.textContent=t;}

function setInputEnabled(on){
  $('qi').disabled=!on;
  $('send-btn').disabled=!on;
  $('qi').placeholder=on?'Ask a question...':(ready?'Ask a question...':'Loading collections...');
}
function showCancelBtn(on){
  var b=$('cancel-btn');
  if(b)b.style.display=on?'inline-block':'none';
  var p=$('pause-btn');
  if(p)p.style.display=on?'inline-block':'none';
}
function refreshNav(){
  $('prev-btn').disabled=(navPos<=0);
  $('next-btn').disabled=(navPos>=histLen-1);
  $('latest-btn').disabled=(histLen===0);
}
function updateProgress(n,t,label){
  var w=$('load-bar-wrap'),b=$('load-bar'),d=$('diag');
  if(t<=0){w.style.display='none';return;}
  w.style.display='block';
  b.style.width=Math.round((n/t)*100)+'%';
  if(label)d.textContent=label;
  if(n>=t)setTimeout(function(){w.style.display='none';},1200);
}

var histIndex=[];
function loadHistoryIndex(){
  clientLog('INFO','loadHistoryIndex');
  fetch('/api/history?sid='+SID+'&list=1')
    .then(function(r){return r.json();})
    .then(function(d){
      histIndex=d.index||[];
      histLen=typeof d.total==='number'?d.total:histIndex.length;
      clientLog('INFO','histLen='+histLen);
      if(histLen>0){
        if(navPos<0)navPos=histLen-1;
        loadEntry(navPos);
      } else{showEmpty('No history yet -- ask a question below');}
      refreshNav();
    })
    .catch(function(e){clientLog('WARN','loadHistoryIndex: '+e.message);});
}

function loadEntry(pos,cb){
  if(pos<0||pos===undefined)pos=histLen>0?histLen-1:0;
  pos=Math.max(0,Math.min(pos,Math.max(0,histLen-1)));
  clientLog('INFO','loadEntry pos='+pos+'/'+histLen);
  fetch('/api/history?sid='+SID+'&pos='+pos)
    .then(function(r){return r.json();})
    .then(function(d){
      if(typeof d.total==='number')histLen=d.total;
      if(typeof d.pos==='number')navPos=d.pos;
      refreshNav();
      if(d.entry){renderEntry(d.entry);if(cb)cb();}
      else showEmpty('No entry at position '+pos);
    })
    .catch(function(e){clientLog('WARN','loadEntry: '+e.message);});
}

function showEmpty(msg){
  var ph=$('chat-placeholder');
  ph.style.display='block';ph.textContent=msg;
  $('chat').innerHTML='';$('chat').appendChild(ph);
  $('nav-label').textContent='--';
}

function renderEntry(entry){
  currentEntry=entry;
  var chat=$('chat');chat.innerHTML='';
  var div=document.createElement('div');div.className='entry';
  var you=document.createElement('div');you.className='you';
  you.textContent=entry.question;div.appendChild(you);
  var bot=document.createElement('div');bot.className='bot';
  bot.innerHTML=renderAnswer(entry.answer||'');div.appendChild(bot);
  if(entry.sources&&entry.sources.length){
    var sd=document.createElement('div');sd.className='sources';
    var qpfx=(navPos+1)+'.';
    sd.innerHTML='<b style="font-size:13px">Sources:</b><br>';
    var _filtSet=new Set((entry.meta&&entry.meta.filtered_indices)||[]);
    var _map2=(entry.meta&&entry.meta.seq_index_map)||null;
    var _seqNum=0;
    var _filteredItems=[];
    entry.sources.forEach(function(s,i){
      var annot=(entry.src_annotations&&entry.src_annotations[i])||'';
      var _isFiltered=_filtSet.has(i);
      if(_isFiltered){
        _filteredItems.push({s:s,i:i,annot:annot});
      } else {
        _seqNum++;
        var _origIdx2=_map2&&_map2[_seqNum-1]!==undefined?_map2[_seqNum-1]:i;
        var a=document.createElement('a');
        a.textContent='['+qpfx+_seqNum+'] '+s;
        a.onclick=(function(seq){return function(){showSource(seq);};}(_seqNum-1));
        a.setAttribute('data-src-idx',i);
        sd.appendChild(a);
        var an=document.createElement('div');
        an.setAttribute('data-annot-idx',i);
        an.style.cssText='margin-left:2em;font-style:italic;line-height:1.3;margin-bottom:2px';
        if(annot){an.textContent=annot;}else{an.style.display='none';}
        sd.appendChild(an);
        sd.appendChild(document.createElement('br'));
      }
    });
    if(_filteredItems.length){
      var _ftoggle=document.createElement('button');
      _ftoggle.className='btn';_ftoggle.style.cssText='margin-top:4px;';
      _ftoggle.textContent='Show '+_filteredItems.length+' filtered';
      var _fsec=document.createElement('div');
      _fsec.style.display='none';
      _filteredItems.forEach(function(fi){
        var _fb=document.createElement('div');
        _fb.style.cssText='margin-left:1.5em;font-style:italic;font-size:12px;margin-top:2px';
        var _fa=document.createElement('span');
        _fa.textContent='[FILTERED OUT] '+fi.s;
        _fa.style.cssText='text-decoration:line-through';
        _fb.appendChild(_fa);
        if(fi.annot){
          var _fan=document.createElement('div');
          _fan.style.cssText='margin-left:1em;line-height:1.3';
          _fan.textContent=fi.annot;
          _fb.appendChild(_fan);
        }
        _fsec.appendChild(_fb);
      });
      _ftoggle.onclick=function(){
        var _vis=_fsec.style.display!=='none';
        _fsec.style.display=_vis?'none':'block';
        _ftoggle.textContent=_vis?'Show '+_filteredItems.length+' filtered':'Hide filtered';
      };
      sd.appendChild(_ftoggle);
      sd.appendChild(_fsec);
    }
    div.appendChild(sd);
    window._pendingAnnotations=null;

  }
  var meta=document.createElement('div');meta.className='meta-line';
  var m=entry.meta||{};
  meta.textContent='#'+(navPos+1)+'  '+(entry.ts||'')
    +(m.elapsed?'  '+m.elapsed+'s':'')
    +(m.in_tokens?'  in:'+m.in_tokens+' out:'+m.out_tokens:'')
    +(m.model?'  '+m.model:'');
  div.appendChild(meta);
  if(entry.status&&entry.status!=='ok'){
    var _rb=document.createElement('div');
    _rb.className='warn-box';
    _rb.style.cssText='margin-top:4px;display:flex;align-items:center;gap:8px';
    var _rmsg={'no_results':'No relevant content found','retrieval_error':'Retrieval error','llm_error':'LLM generation failed'}[entry.status]||entry.status;
    var _rs=document.createElement('span');_rs.textContent='\u26a0 '+_rmsg;_rb.appendChild(_rs);
    var _rbtn=document.createElement('button');
    _rbtn.className='btn';
    _rbtn.textContent=entry.status==='llm_error'?'Retry synthesis':'Retry as-was';
    (function(qid,st){_rbtn.onclick=function(){_doRetry(qid,st==='llm_error'?'synthesis':'full','as-was');};})(entry.qid,entry.status);
    _rb.appendChild(_rbtn);
    var _rbtn2=document.createElement('button');
    _rbtn2.className='btn';
    _rbtn2.textContent=entry.status==='llm_error'?'Re-search (full)':'Retry with current settings';
    (function(qid){_rbtn2.onclick=function(){_doRetry(qid,'full','current');};})(entry.qid);
    _rb.appendChild(_rbtn2);
    div.appendChild(_rb);
  }
  chat.appendChild(div);
  $('nav-label').textContent='Entry '+(navPos+1)+' of '+histLen;
  chat.scrollTop=0;
}


function _doRetry(qid,mode,collmode){
  collmode=collmode||'current';
  if(streaming){setStatus('Cannot retry while a query is in progress');return;}
  streaming=true;setInputEnabled(false);showCancelBtn(true);
  _queryStart=Date.now();
  _queryPhase=mode==='synthesis'?'Re-synthesizing...':(collmode==='as-was'?'Retrying as-was...':'Retrying with current settings...');
  setStatus(_queryPhase,true);
  if(_queryTimer)clearInterval(_queryTimer);
  _queryTimer=setInterval(function(){
    if(!streaming){clearInterval(_queryTimer);_queryTimer=null;return;}
    setStatus(_queryPhase+'  ('+Math.round((Date.now()-_queryStart)/1000)+'s)',true);
  },1000);
  if(_queryWatchdog)clearTimeout(_queryWatchdog);
  _queryWatchdog=setTimeout(function(){
    if(!streaming)return;
    _stopQueryTimer();showCancelBtn(false);streaming=false;setInputEnabled(ready);
    setStatus('Retry timed out -- please try again');
  },MAX_QUERY_S*1000);
  var _retryActive=[];
  document.querySelectorAll('.coll-badge.active').forEach(function(x){_retryActive.push(x.textContent);});
  var _retryAnnotate=$('annotate-toggle')&&$('annotate-toggle').checked;
  fetch('/api/retry',{method:'POST',headers:{'Content-Type':'application/json'},
    body:JSON.stringify({sid:SID,qid:qid,mode:mode,collmode:collmode,
      activeColls:collmode==='current'?_retryActive:undefined,
      annotate:collmode==='current'?_retryAnnotate:undefined})})
    .then(function(r){return r.json();})
    .then(function(rr){if(rr.error){_stopQueryTimer();showCancelBtn(false);streaming=false;setInputEnabled(ready);setStatus('Retry error: '+rr.error);}})
    .catch(function(err){_stopQueryTimer();showCancelBtn(false);streaming=false;setInputEnabled(ready);setStatus('Retry net error: '+err.message);});
}

function renderAnswer(text){var q=currentEntry?(navPos+1)+'.':'';return text.replace(/&/g,'&amp;').replace(/</g,'&lt;').replace(/>/g,'&gt;').replace(/[*][*]([^*]+)[*][*]/g,'<strong>$1</strong>').replace(/_([^_]+)_/g,'<em>$1</em>').replace(/\[(\d+(?:,\s*\d+)+)\]/g,function(_,nums){return nums.split(',').map(function(n){n=n.trim();return '<cite onclick="showSource('+(parseInt(n)-1)+')">['+q+n+']</cite>';}).join('');}).replace(/\[(\d+)\]/g,function(_,n){return '<cite onclick="showSource('+(parseInt(n)-1)+')">['+q+n+']</cite>';});}

var evtSrc=null,_sseRetry=0,_sseTimer=null;
function connectSSE(){
  if(evtSrc){try{evtSrc.close();}catch(e){}}
  clientLog('INFO','SSE connect attempt '+_sseRetry);
  evtSrc=new EventSource('/api/events?sid='+SID);
  evtSrc.onopen=function(){
    _sseRetry=0;
    clientLog('INFO','SSE OPEN');
    $('diag').textContent='v'+_ver+' connected -- loading collections...';
    setStatus('Connected -- loading collections...',true);
  };
  evtSrc.onerror=function(){
    if(_sseTimer)return;
    var state=evtSrc.readyState;
    clientLog('WARN','SSE error state='+state+' retry='+_sseRetry);
    evtSrc.close();
    var delay=Math.min(3000*(1+_sseRetry),30000);
    _sseRetry++;
    setStatus('Server offline -- retrying in '+(delay/1000)+'s...',true);
    _sseTimer=setTimeout(function(){_sseTimer=null;connectSSE();},delay);
  };
  evtSrc.onmessage=_sseOnMessage;
}
connectSSE();
function _sseOnMessage(e){
  var d;
  try{d=JSON.parse(e.data);}catch(ex){clientLog('ERR','SSE JSON: '+e.data.slice(0,60));return;}
  clientLog('INFO','SSE type='+d.type+(d.text?' txt='+String(d.text).slice(0,50):''));

  // ── PHASE 1: LLM ready ───────────────────────────────────────────────────
  if(d.type==='llm_ready'){
    // Populate Ollama model options
    var _psel=$('provider-sel');
    var _oold=_psel?_psel.querySelector('option[value="ollama"]'):null;
    if(_oold)_oold.remove();
    if(d.ollamaModels&&d.ollamaModels.length){
      d.ollamaModels.forEach(function(m){
        if(!_psel)return;
        var _oo=document.createElement('option');
        _oo.value='ollama:'+m;_oo.textContent='Ollama: '+m;
        _psel.appendChild(_oo);
      });
    }
    // Restore saved provider selection  --  d.provider is the full selector value
    if(d.provider&&_psel){
      _suppressProviderChange=true;
      _psel.value=d.provider;
      // If the option doesn't exist yet (race), fall back to first ollama option
      if(_psel.value!==d.provider){
        var _fo=_psel.querySelector('option[value^="ollama:"]');
        if(_fo)_psel.value=_fo.value;
      }
      _suppressProviderChange=false;
    }
    if(d.llm)setLLM(d.llm);
    if(typeof d.queryTimeoutS==='number'&&d.queryTimeoutS>0)MAX_QUERY_S=d.queryTimeoutS;
    // Enable provider selector
    if(_psel)_psel.disabled=false;
    setStatus('LLM ready -- loading history...',true);
  }

  // ── PHASE 2: History ready ───────────────────────────────────────────────
  if(d.type==='history_ready'){
    if(typeof d.useContext==='boolean'){var _ct=$('ctx-toggle');if(_ct)_ct.checked=d.useContext;}
    loadHistoryIndex();
    // Enable nav, history controls, AND query input
    // User can query immediately; collections load in background
    var _hpb=$('hist-panel-btn');if(_hpb)_hpb.disabled=false;
    var _ctg=$('ctx-toggle');if(_ctg)_ctg.disabled=false;
    setInputEnabled(true);
    setStatus('History ready -- loading collections...',true);
  }

  // ── PHASE 3: Collections loading ─────────────────────────────────────────
  if(d.type==='connected'){setStatus('Connected -- loading LLM options...',true);}
  if(d.type==='init'){
    totalColl=d.total||0;loadedColl=0;
    updateProgress(0,totalColl,'v'+_ver+' loading 0/'+totalColl+'...');
    setStatus('Loading 0 of '+totalColl+' collections...',true);
  }
  if(d.type==='status'){
    var isColl=(typeof d.loaded==='number');
    if(isColl){loadedColl=d.loaded;updateProgress(loadedColl,totalColl,'v'+_ver+' '+d.text);if(!streaming)setStatus(d.text,true);}
    else{_queryPhase=d.text;if(_queryTimer&&_queryStart>0)setStatus(d.text+' ('+Math.round((Date.now()-_queryStart)/1000)+'s)',true);else setStatus(d.text,true);}
  }
  if(d.type==='ready'){
    ready=true;loadedColl=d.loaded;totalColl=d.collections.length;
    var _at=$('annotate-toggle');if(_at&&d.annotateDefault!==undefined)_at.checked=!!d.annotateDefault;
    updateProgress(d.loaded,d.loaded,'');
    renderCollBadges(d.collections,d.failed||[]);
    var failNote=d.failed&&d.failed.length?' ('+d.failed.length+' failed: '+d.failed.join(', ')+')':'';
    setStatus(d.loaded+' of '+d.collections.length+' collections ready'+failNote);
    setTimeout(function(){if(!streaming)setStatus('Ready');},5000);
    // Enable query input -- final step
    setInputEnabled(true);refreshNav();
    $('diag').style.display='none';
  }
  if(d.type==='stopped'){
    _stopQueryTimer();
    // All phases pause in place -- show Go to resume
    var _gbx=$('go-btn'); if(_gbx) _gbx.style.display='';
    setStatus('Stopped at ' + (d.phase||'') + ' -- press Go to continue');
  }
  if(d.type==='answer'){
    var _gbz=$('go-btn'); if(_gbz) _gbz.style.display='none';
    _stopQueryTimer();showCancelBtn(false);
    streaming=false;setInputEnabled(true);
    // Remove pending entry div -- loadEntry will render the final entry properly
    if(window._pendingEntryDiv){window._pendingEntryDiv.remove();window._pendingEntryDiv=null;}
    window._pendingSources=null;window._pendingChunks=null;
    fetch('/api/history?sid='+SID+'&list=1').then(function(r){return r.json();})
    .then(function(d2){histIndex=d2.index||[];histLen=typeof d2.total==='number'?d2.total:histIndex.length;
      navPos=histLen-1;loadEntry(navPos);refreshNav();}).catch(function(){});}
  if(d.type==='sources'){
    window._pendingSources=d;
    var _chat=$('chat');
    if(_chat){
      // Clear chat only for fresh queries, not retries
      var _ph=$('chat-placeholder');if(_ph)_ph.style.display='none';
      if(!d.isRetry){var _chat2=$('chat');if(_chat2)_chat2.innerHTML='';}
      // Create pending entry div if needed
      if(!window._pendingEntryDiv){
        var _pd=document.createElement('div');_pd.className='entry';_pd.id='pending-entry';
        // Add question text
        var _pq=document.createElement('div');_pq.className='you';
        _pq.textContent=d.isRetry?(currentEntry&&currentEntry.question||''):lastQuery||'';
        _pd.appendChild(_pq);
        // Add status line
        var _ps=document.createElement('div');_ps.className='bot';
        _ps.style.cssText='font-style:italic';
        _ps.textContent='Annotating sources...';
        _ps.id='pending-status';
        _pd.appendChild(_ps);
        _chat.appendChild(_pd);_chat.scrollTop=_chat.scrollHeight;
        window._pendingEntryDiv=_pd;
      }
      var _pd=window._pendingEntryDiv;
      // Render sources list
      var _psd=_pd.querySelector('.sources')||document.createElement('div');
      _psd.className='sources';_psd.innerHTML='<b style="font-size:13px">Sources:</b><br>';
      (d.lines||[]).forEach(function(s,i){
        var _pa=document.createElement('a');
        _pa.textContent='[?.'+(i+1)+'] '+s;
        _pa.setAttribute('data-src-idx',i);
        _psd.appendChild(_pa);
        var _an=document.createElement('div');
        _an.setAttribute('data-annot-idx',i);
        _an.style.cssText='margin-left:2em;font-style:italic;line-height:1.3;margin-bottom:2px';
        _an.style.display='none';
        _psd.appendChild(_an);
        _psd.appendChild(document.createElement('br'));
      });
      window._pendingChunks=d.chunks;
      if(!_pd.querySelector('.sources'))_pd.appendChild(_psd);
      else _pd.replaceChild(_psd,_pd.querySelector('.sources'));
      _chat.scrollTop=_chat.scrollHeight;
    }
  }
  if(d.type==='annotation'){
    // Find annotation div -- either in pending entry or buffer for later
    var _found=false;
    if(window._pendingEntryDiv){
      var _an2=window._pendingEntryDiv.querySelector('[data-annot-idx="'+d.index+'"]');
      if(_an2){
        if(d.irrelevant){
          // Mark source as irrelevant -- strikethrough only
          var _src=window._pendingEntryDiv.querySelector('[data-src-idx="'+d.index+'"]');
          if(_src){_src.style.textDecoration='line-through';}
          _an2.style.display='none';
        } else if(d.text){
          _an2.textContent=d.text;_an2.style.display='';
        }
        _found=true;
      }
    }
    if(!_found){
      if(!window._pendingAnnotations)window._pendingAnnotations={};
      window._pendingAnnotations[d.index]={text:d.text,irrelevant:d.irrelevant};
    }
  }
  if(d.type==='annot_warn'){
    // Show annotation warning in pending entry if visible, else status bar
    setStatus('⚠ '+d.text);
    if(window._pendingEntryDiv){
      var _aw=document.createElement('div');
      _aw.className='warn-box';
      _aw.textContent='⚠ '+d.text;
      window._pendingEntryDiv.insertBefore(_aw,window._pendingEntryDiv.firstChild.nextSibling);
    }
  }
  if(d.type==='annot_blocked'){
    setStatus('⚠ '+d.text);
    var _ab=document.getElementById('annot-blocked-banner');
    if(_ab)_ab.remove();
    var _abDiv=document.createElement('div');
    _abDiv.id='annot-blocked-banner';
    _abDiv.className='warn-box';
    _abDiv.style.cssText='margin:4px 0;display:flex;align-items:center;gap:8px;flex-wrap:wrap';
    var _abMsg=document.createElement('span');
    _abMsg.textContent='⚠ '+d.text;
    _abDiv.appendChild(_abMsg);
    var _abRetry=document.createElement('button');
    _abRetry.className='btn';
    _abRetry.textContent='Retry annotation';
    _abRetry.onclick=function(){
      _abDiv.remove();
      fetch('/api/annot-choice',{method:'POST',headers:{'Content-Type':'application/json'},
        body:JSON.stringify({sid:SID,choice:'retry'})});
    };
    var _abSkip=document.createElement('button');
    _abSkip.className='btn';
    _abSkip.textContent='Continue without annotation';
    _abSkip.onclick=function(){
      _abDiv.remove();
      fetch('/api/annot-choice',{method:'POST',headers:{'Content-Type':'application/json'},
        body:JSON.stringify({sid:SID,choice:'skip'})});
    };
    _abDiv.appendChild(_abRetry);
    _abDiv.appendChild(_abSkip);
    // Insert into pending entry if visible, else into chat
    var _target=window._pendingEntryDiv||$('chat');
    if(_target)_target.appendChild(_abDiv);
  }
  if(d.type==='cost')setCost(d.text);
  if(d.type==='llm')setLLM(d.text);
  if(d.type==='error'){
    _stopQueryTimer();showCancelBtn(false);
    streaming=false;setInputEnabled(ready);
    // Show error in both status bar and chat area
    setStatus('Error: '+d.text.slice(0,80));
    var _chat=$('chat');
    if(_chat){
      var _ed=document.createElement('div');_ed.className='entry';
      var _eq=document.createElement('div');_eq.className='you';
      _eq.textContent=currentEntry?'':'';
      var _eb=document.createElement('div');_eb.className='bot';
      _eb.style.color='#c00';
      _eb.textContent=d.text;
      _ed.appendChild(_eb);_chat.appendChild(_ed);
      _chat.scrollTop=_chat.scrollHeight;
    }
  }
};

function reloadCollections(){
  var btn=$('reload-colls-btn');
  if(btn){btn.disabled=true;}
  setStatus('Reloading collections...',true);
  fetch('/api/reload',{method:'POST'})
  .then(function(r){return r.json();})
  .then(function(){
    setTimeout(function(){
      if(evtSrc){evtSrc.close();evtSrc=null;}
      connectSSE();
      if(btn){btn.disabled=false;}
    },800);
  })
  .catch(function(e){
    setStatus('Reload error: '+e.message);
    if(btn){btn.disabled=false;}
  });
}
var _COLL_SEL_KEY='ragweed-coll-sel';
function _saveCollSel(){
  var active=[];
  var el=$('colls');if(!el)return;
  el.querySelectorAll('.coll-badge.active').forEach(function(x){active.push(x.textContent);});
  try{localStorage.setItem(_COLL_SEL_KEY,JSON.stringify(active));}catch(e){}
}
function _loadCollSel(){
  try{var r=localStorage.getItem(_COLL_SEL_KEY);return r?JSON.parse(r):null;}catch(e){return null;}
}
function renderCollBadges(names,failed){
  failed=failed||[];
  var el=$('colls');el.innerHTML='';
  var saved=_loadCollSel();
  names.forEach(function(n){
    var b=document.createElement('span');
    var isFailed=failed.indexOf(n)>=0;
    // Restore saved selection if available; otherwise default to active
    var isActive=isFailed?false:(saved?saved.indexOf(n)>=0:true);
    b.className='coll-badge'+(isFailed?' failed':(isActive?' active':''));
    b.textContent=n;b.title=isFailed?'Failed to load':'Click to toggle';
    if(!isFailed)b.onclick=function(){
      b.classList.toggle('active');
      var active=[];
      el.querySelectorAll('.coll-badge.active').forEach(function(x){active.push(x.textContent);});
      fetch('/api/set',{method:'POST',headers:{'Content-Type':'application/json'},
        body:JSON.stringify({sid:SID,key:'collections',value:active.join(',')})});
      _saveCollSel();
    };
    el.appendChild(b);
  });
  // Always sync active collection state to server on render
  var active=[];
  el.querySelectorAll('.coll-badge.active').forEach(function(x){active.push(x.textContent);});
  fetch('/api/set',{method:'POST',headers:{'Content-Type':'application/json'},
    body:JSON.stringify({sid:SID,key:'collections',value:active.join(',')})});
}

var _srcRaw=true;
var _srcIdx=-1;
var _srcOnList=['onmousedown','onmouseup','onclick','ondblclick','onmousemove','onchange','onmouseover','onmouseout','onwheel','onabort','oncanplay','oncanplaythrough','oncuechange','ondurationchange','onemptied','onended','onerror','onloadeddata','onloadedmetadata','onloadstart','onpause','onplay','onplaying','onprogress','onratechange','onseeked','onseeking','onstalled','onsuspend','ontimeupdate','onvolumechange','onwaiting','ontoggle','ondrag','ondragend','ondragenter','ondragleave','ondragover','ondragstart','ondrop','onscroll','oncopy','onpaste','oncut','onbeforeprint','onafterprint','onbeforeunload','onerror','onfocus','onblur','oncontextmenu','onfocus','oninvalid','onresent','onsearch','onselect','onsubmit','onkeydown','onkeypress','onkeyup','onhashchange','onload','onmessage','onoffline','ononline','onpagfehide','onpageshow','onpopstate','onresize','onstorage','onunload','oninput'];
var _srcDanger=false;
function _srcFixHtml(s){
  var hide='<div style="display:none;">';
  var r=s.replaceAll(/<style/gi,hide);
  r=r.replaceAll(/<[/]style/gi,'</div style');
  r=r.replaceAll(/<script/gi,hide);
  r=r.replaceAll(/<[/]script/gi,'</div script');
  for(var i=0;i<_srcOnList.length;i++){var from=new RegExp(_srcOnList[i],'gi');r=r.replaceAll(from,'no'+_srcOnList[i].slice(2));}
  r=r.replace(/(<a\s(?![^>]*\starget=)[^>]*)(>)/gi,'$1 target="_blank" rel="noopener"$2');
  return r;
}
function _srcRender(){
  if(_srcIdx<0||!currentEntry)return;
  var t=currentEntry.src_chunks[_srcIdx]||'';
  var el=$('src-text');
  var hasHtml=/<[a-zA-Z]/.test(t);
  var hbtn=$('src-html-btn');
  var sbtn=$('src-safe-btn');
  if(!hasHtml){_srcRaw=true;}
  if(hbtn){hbtn.style.display=hasHtml?'':'none';hbtn.className=_srcRaw?'btn':'btnp';}
  if(sbtn){sbtn.style.display=(!_srcRaw)?'':'none';sbtn.className='btnp';sbtn.textContent=_srcDanger?'Danger':'Safer';}
  if(_srcRaw){
  var td=t;
  if(/[ \t]\n/.test(td)){
    td=td.replace(/[ \t]+\n/g,'\n');
    td=td.replace(/([^.!?:])\n([^\n])/g,'$1 $2');
    td=td.replace(/\n{2,}/g,'\n\n');
  }
  el.style.whiteSpace='pre-wrap';el.textContent=td;
}else{el.style.whiteSpace='normal';el.innerHTML=_srcDanger?_srcFixHtml(t):_srcFixHtml(t);}
}
function showSource(i){
  // Resolve sequential display number to original chunk index via map if present
  var _map=currentEntry&&currentEntry.meta&&currentEntry.meta.seq_index_map;
  var _displayNum=i+1;  // sequential display number (1-based)
  var _origIdx=(_map&&_map[i]!==undefined)?_map[i]:i;
  if(!currentEntry||!currentEntry.src_chunks||!currentEntry.src_chunks[_origIdx])return;
  _srcIdx=_origIdx;
  var _t=currentEntry.src_chunks[_origIdx]||'';
  _srcRaw=!/<[a-zA-Z]/.test(_t);
  _srcDanger=false;
  var lbl=currentEntry.sources&&currentEntry.sources[_origIdx]?currentEntry.sources[_origIdx]:'Source';
  var _srcNum=(navPos+1)+'.'+_displayNum;
  var url=currentEntry.src_urls&&currentEntry.src_urls[_origIdx]?currentEntry.src_urls[_origIdx]:'';
  // Make title a direct link when URL present; hide the separate Open button
  var _stitle=$('src-title');
  if(url){
    // Only the filename part is clickable -- extract it from lbl e.g. "[a2e.co] file.pdf p.3 [80%]"
    var _lblMatch=lbl.match(/^(\[.*?\]\s*)(.+?)(\s*(p\.\d+)?\s*(\[\d+%\])?)$/);
    var _prefix=_lblMatch?_lblMatch[1]:'';
    var _fname=_lblMatch?_lblMatch[2]:lbl;
    var _suffix=_lblMatch?(_lblMatch[3]||''):'';
    _stitle.innerHTML='[#'+_srcNum+'] '+_prefix+'<a href="'+url+'" target="_blank" rel="noopener" style="color:inherit;text-decoration:underline">'+_fname+'</a>'+_suffix;
  } else {
    _stitle.textContent='[#'+_srcNum+'] '+lbl;
  }
  var alink=$('src-url-link');
  if(alink){alink.style.display='none';}
  // Show Copies button if we have a collection to look up
  var _copiesBtn=$('src-copies-btn');
  var _copiesList=$('src-copies-list');
  if(_copiesBtn){
    // Extract collection from label e.g. "[a2e.co] file.html [80%]"
    var _colM=lbl.match(/^\[([^\]]+)\]/);
    var _srcColl=_colM?_colM[1]:'';
    var _srcRel=currentEntry.src_urls&&currentEntry.src_urls[_origIdx]?currentEntry.src_urls[_origIdx]:'';
    // Strip base URL if already prepended (src_urls may have full URL)
    if(_srcRel.startsWith('http')){var _slashIdx=_srcRel.indexOf('/',8);_srcRel=_slashIdx>=0?_srcRel.slice(_slashIdx+1):_srcRel;}
    if(_srcColl&&_srcRel){
      _copiesBtn.style.display='';
      _copiesBtn.dataset.collection=_srcColl;
      _copiesBtn.dataset.relPath=_srcRel;
    } else {
      _copiesBtn.style.display='none';
    }
    if(_copiesList){_copiesList.style.display='none';_copiesList.innerHTML='';}
  }
  _srcRender();
  $('src-modal').classList.add('open');
}
window.showSource=showSource;
function closeModal(id){document.getElementById(id).classList.remove('open');}
$('src-close').onclick=function(){closeModal('src-modal');};
var _copiesBtnEl=$('src-copies-btn');
if(_copiesBtnEl)_copiesBtnEl.onclick=function(){
  var _coll=this.dataset.collection;
  var _rel=this.dataset.relPath;
  if(!_coll||!_rel)return;
  var _list=$('src-copies-list');
  if(!_list)return;
  _list.innerHTML='Loading copies...';
  _list.style.display='';
  fetch('/api/copies?collection='+encodeURIComponent(_coll)+'&rel_path='+encodeURIComponent(_rel)+'&sid='+SID)
    .then(function(r){return r.json();})
    .then(function(d){
      if(!d.copies||!d.copies.length){_list.innerHTML='No other copies found.';return;}
      if(d.copies.length===1){_list.innerHTML='No other copies found (this is the only known location).';return;}
      var html='<b>Known copies ('+d.copies.length+'):</b><br>';
      d.copies.forEach(function(cp){
        html+='<a href="'+cp.url+'" target="_blank" rel="noopener" style="display:block;margin-top:3px;text-decoration:underline">['+cp.collection+'] '+cp.rel_path+'</a>';
      });
      _list.innerHTML=html;
    })
    .catch(function(e){_list.innerHTML='Error: '+e.message;});
};
$('src-html-btn').onclick=function(){_srcRaw=!_srcRaw;if(_srcRaw)_srcDanger=false;_srcRender();};
$('src-safe-btn').onclick=function(){_srcDanger=!_srcDanger;_srcRender();};
$('src-modal').onclick=function(e){if(e.target===$('src-modal'))closeModal('src-modal');};
$('help-btn').onclick=function(){$('help-panel').classList.add('open');};
$('help-close').onclick=function(){closeModal('help-panel');};
$('help-panel').onclick=function(e){if(e.target===$('help-panel'))closeModal('help-panel');};

document.addEventListener('keydown',function(e){
  var tag=(e.target||{}).tagName||'';
  var inInput=(tag==='INPUT'||tag==='TEXTAREA'||tag==='SELECT');
  var modalOpen=($('src-modal').classList.contains('open')||$('help-panel').classList.contains('open'));
  if(e.key==='Escape'||(modalOpen&&!inInput&&(e.key===' '||e.key==='Enter'))){
    if($('src-modal').classList.contains('open')){e.preventDefault();closeModal('src-modal');return;}
    if($('help-panel').classList.contains('open')){e.preventDefault();closeModal('help-panel');return;}
  }
});

$('prev-btn').onclick=function(){
  clientLog('INFO','prev navPos='+navPos+' histLen='+histLen);
  if(navPos>0){navPos--;loadEntry(navPos);}
};
$('next-btn').onclick=function(){
  clientLog('INFO','next navPos='+navPos+' histLen='+histLen);
  if(navPos<histLen-1){navPos++;loadEntry(navPos);}
};
$('latest-btn').onclick=function(){navPos=histLen-1;loadEntry(navPos);};

function sendQuery(){
  var q=$('qi').value.trim();
  if(!q||streaming)return;
  if(q==='?'){$('help-panel').classList.add('open');return;}
  var srcM=q.match(/^\.(\d+)$/);
  var navM=q.match(/^#?(\d+)(?:\.(\d+))?$/);
  if(srcM){$('qi').value='';$('qi').style.height='auto';window.showSource(parseInt(srcM[1])-1);return;}
  if(navM){
    $('qi').value='';$('qi').style.height='auto';
    var tq=parseInt(navM[1]),tr=navM[2]?parseInt(navM[2]):null;
    var pos=-1;
    for(var i=0;i<histIndex.length;i++){
      var hqid=(histIndex[i].qid!=null)?histIndex[i].qid:(i+1);
      if(hqid===tq){pos=i;break;}
    }
    if(pos<0&&tq>=1&&tq<=histLen)pos=tq-1;
    if(pos<0){setStatus('Entry '+tq+' not found');return;}
    navPos=pos;refreshNav();
    loadEntry(pos,function(){if(tr!==null)setTimeout(function(){window.showSource(tr-1);},200);});
    return;
  }
  var isCmd=(q[0]==='!'||q[0]==='/'||q==='$');
  if(!isCmd&&q.split(/\s+/).filter(Boolean).length<2){
    setStatus('Enter at least 2 words to search');return;
  }
  if(!ready&&!isCmd){setStatus('Collections still loading -- results may be partial...',true);}
  _localHistory.push(q);if(_localHistory.length>100)_localHistory.shift();
  _localHistPos=-1;_qiHistPos=-1;
  $('qi').value='';$('qi').style.height='auto';
  if(isCmd){setStatus('Running command...',true);}
  else{
    streaming=true;setInputEnabled(false);
    showCancelBtn(true);
    _queryStart=Date.now();_queryPhase='Querying...';
    if(_queryTimer)clearInterval(_queryTimer);
    _queryTimer=setInterval(function(){
      if(!streaming){clearInterval(_queryTimer);_queryTimer=null;return;}
      setStatus(_queryPhase+' ('+Math.round((Date.now()-_queryStart)/1000)+'s)',true);
    },1000);
    // Watchdog: re-enable input if SSE never delivers answer/error
    if(_queryWatchdog)clearTimeout(_queryWatchdog);
    _queryWatchdog=setTimeout(function(){
      if(!streaming)return;
      clientLog('WARN','query watchdog fired after '+MAX_QUERY_S+'s -- re-enabling input');
      _stopQueryTimer();showCancelBtn(false);streaming=false;setInputEnabled(ready);
      setStatus('Query timed out after '+MAX_QUERY_S+'s -- please try again');
    },MAX_QUERY_S*1000);
    setStatus('Querying...',true);
  }
  var provider=$('provider-sel').value;
  var active=[];
  document.querySelectorAll('.coll-badge.active').forEach(function(x){active.push(x.textContent);});
  var _doAnnotate=$('annotate-toggle')&&$('annotate-toggle').checked;
  window.lastQuery=q;
  fetch('/api/query',{method:'POST',headers:{'Content-Type':'application/json'},
    body:JSON.stringify({sid:SID,query:q,provider:provider,collections:active,annotate:_doAnnotate})})
    .then(function(r){return r.json();})
    .then(function(qr){
      if(qr.error){setStatus('Error: '+qr.error);if(!isCmd){_stopQueryTimer();showCancelBtn(false);streaming=false;setInputEnabled(ready);}}
      else if(isCmd)setStatus('Ready');
    })
    .catch(function(err){setStatus('Net error: '+err.message);if(!isCmd){_stopQueryTimer();showCancelBtn(false);streaming=false;setInputEnabled(ready);}});
}
function openHistPanel(){
  var modal=$('hist-modal');
  if(!modal)return;
  renderHistList();
  modal.style.display='block';
}
function closeHistPanel(){var m=$('hist-modal');if(m)m.style.display='none';}
var _ctxQids=new Set();
function _syncCtxQids(){
  fetch('/api/set',{method:'POST',headers:{'Content-Type':'application/json'},
    body:JSON.stringify({sid:SID,key:'contextQids',value:[..._ctxQids]})}).catch(function(){});
}
function renderHistList(){
  var ul=$('hist-list');
  if(!ul)return;
  ul.innerHTML='';
  if(!histIndex.length){ul.innerHTML='<p style="padding:12px;">No history yet.</p>';return;}
  var arr=histIndex.slice(); // ascending: first entry at top
  arr.forEach(function(h){
    var row=document.createElement('div');
    row.style.cssText='display:flex;align-items:center;gap:6px;padding:5px 10px;border-bottom:1px solid currentColor';
    var ts=h.ts?(h.ts.slice(0,16).replace('T',' ')):'?';
    var qtext=(h.q&&h.q.trim())?h.q:'(no text)';
    // ctx checkbox
    var cbx=document.createElement('input');
    cbx.type='checkbox';cbx.title='Include in LLM context';
    cbx.checked=_ctxQids.has(h.qid);
    cbx.style.cssText='flex:0 0 auto;cursor:pointer';
    cbx.onchange=function(){
      if(this.checked)_ctxQids.add(h.qid);else _ctxQids.delete(h.qid);
      _syncCtxQids();
    };
    // #N
    var qidSpan=document.createElement('span');
    qidSpan.style.cssText='flex:0 0 28px;text-align:right';
    qidSpan.textContent='#'+(h.pos+1);
    // timestamp
    var tsSpan=document.createElement('span');
    tsSpan.style.cssText='flex:0 0 112px';
    tsSpan.textContent=ts;
    // question text  --  clickable link
    var ql=document.createElement('span');
    ql.style.cssText='flex:1;cursor:pointer;text-decoration:underline;font-size:13px;overflow:hidden;text-overflow:ellipsis;white-space:nowrap';
    ql.title=qtext;
    ql.textContent=qtext.slice(0,120);
    ql.onclick=function(){
      var pos=-1;
      for(var i=0;i<histIndex.length;i++){if(histIndex[i].qid===h.qid){pos=i;break;}}
      if(pos<0)return;
      var savedScroll=ul.scrollTop;
      closeHistPanel();
      navPos=pos;refreshNav();
      loadEntry(pos,function(){
        var bb=$('back-hist-btn');
        if(bb){
          bb.style.display='inline-block';
          bb.onclick=function(){
            bb.style.display='none';
            openHistPanel();
            setTimeout(function(){var ul2=$('hist-list');if(ul2)ul2.scrollTop=savedScroll;},40);
          };
        }
      });
    };
    // Del button
    var del=document.createElement('button');
    del.textContent='Del';
    del.className='btn';del.style.cssText='flex:0 0 auto;';
    del.onclick=function(e){
      e.stopPropagation();
      if(!confirm('Delete entry #'+(h.pos+1)+'?'))return;
      fetch('/api/history/delete',{method:'POST',headers:{'Content-Type':'application/json'},
        body:JSON.stringify({qid:h.qid})})
        .then(function(r){return r.json();})
        .then(function(resp){
          if(!resp.ok){clientLog('ERR','delete failed: '+(resp.error||'?'));return;}
          // Adopt server's new index (surviving entries, qids unchanged)
          histIndex=resp.index||[];
          histLen=typeof resp.total==='number'?resp.total:histIndex.length;
          // Remove deleted qid from context set
          var newCtx=new Set();
          histIndex.forEach(function(e){if(_ctxQids.has(e.qid))newCtx.add(e.qid);});
          _ctxQids=newCtx;_syncCtxQids();
          // Clamp navPos
          if(navPos>=histLen)navPos=Math.max(0,histLen-1);
          refreshNav();renderHistList();
          if(histLen===0){showEmpty('No history yet -- ask a question below');}
          else{loadEntry(navPos);}
        }).catch(function(e){clientLog('ERR','delete fetch: '+e.message);});
    };
    row.appendChild(cbx);row.appendChild(qidSpan);row.appendChild(tsSpan);
    row.appendChild(ql);row.appendChild(del);
    ul.appendChild(row);
  });
}
var _histPanelBtn=$('hist-panel-btn');
if(_histPanelBtn)_histPanelBtn.onclick=function(){
  var bb=$('back-hist-btn');if(bb)bb.style.display='none';
  openHistPanel();
};
var _histCloseBtn=$('hist-close-btn');
if(_histCloseBtn)_histCloseBtn.onclick=closeHistPanel;
var _histModal=$('hist-modal');
if(_histModal)_histModal.onclick=function(e){if(e.target===this)closeHistPanel();};
var _ctxToggle=$('ctx-toggle');
if(_ctxToggle)_ctxToggle.onchange=function(){
  fetch('/api/set',{method:'POST',headers:{'Content-Type':'application/json'},
    body:JSON.stringify({sid:SID,key:'useContext',value:this.checked})}).catch(function(){});
};
$('send-btn').onclick=sendQuery;
var _cb=$('cancel-btn');if(_cb)_cb.onclick=function(){
  fetch('/api/cancel',{method:'POST',headers:{'Content-Type':'application/json'},
    body:JSON.stringify({sid:SID})}).catch(function(){});
  _stopQueryTimer();
  showCancelBtn(false);
  streaming=false;setInputEnabled(ready);
  setStatus('Stopped');
};
var _gb=$('go-btn');if(_gb)_gb.onclick=function(){
  _gb.style.display='none';
  showCancelBtn(true);
  streaming=true; setInputEnabled(false);
  setStatus('Resuming...');
  fetch('/api/go',{method:'POST',headers:{'Content-Type':'application/json'},
    body:JSON.stringify({sid:SID})}).catch(function(){});
};
var _qiHistPos=-1; // position in query recall (-1 = not browsing)
var _qiDraft='';   // saved draft text while browsing
var _localHistory=[];  // in-memory recall: includes failed/cancelled queries not saved to disk
var _localHistPos=-1; // position within _localHistory (-1 = not browsing)
$('qi').addEventListener('keydown',function(e){
  if(e.key==='Enter'&&!e.shiftKey){e.preventDefault();_qiHistPos=-1;sendQuery();return;}
  // ↑  --  recall previous query text into textarea
  if(e.key==='ArrowUp'&&!e.shiftKey&&!e.ctrlKey&&!e.metaKey){
    var sel=this.selectionStart,atTop=(this.value.indexOf('\n')<0||sel===0);
    if(!atTop)return; // let cursor move up within multi-line text
    e.preventDefault();
    if(_qiHistPos<0&&_localHistPos<0){_qiDraft=this.value;}
    // Check _localHistory first (most recent, includes failed queries)
    if(_localHistPos<0&&_localHistory.length>0){
      _localHistPos=_localHistory.length;
    }
    if(_localHistPos>0){
      _localHistPos--;
      this.value=_localHistory[_localHistPos]||'';
      this.setSelectionRange(0,0);
      return;
    }
    // Fall through to disk-persisted histIndex
    if(_qiHistPos<0)_qiHistPos=histLen;
    if(_qiHistPos>0){
      _qiHistPos--;
      var h=histIndex[_qiHistPos];
      if(h)this.value=h.q||'';
      this.setSelectionRange(0,0);
    }
    return;
  }
  // ↓  --  recall next query text (or restore draft)
  if(e.key==='ArrowDown'&&!e.shiftKey&&!e.ctrlKey&&!e.metaKey){
    var sel2=this.selectionStart,atBot=(this.value.indexOf('\n')<0||sel2===this.value.length);
    if(!atBot)return;
    if(_qiHistPos<0&&_localHistPos<0)return;
    e.preventDefault();
    // Move forward through localHistory first
    if(_localHistPos>=0){
      _localHistPos++;
      if(_localHistPos>=_localHistory.length){
        _localHistPos=-1;
        if(_qiHistPos<0){this.value=_qiDraft;return;}
      } else {
        this.value=_localHistory[_localHistPos]||'';
        return;
      }
    }
    _qiHistPos++;
    if(_qiHistPos>=histLen){_qiHistPos=-1;this.value=_qiDraft;}
    else{var h2=histIndex[_qiHistPos];if(h2)this.value=h2.q||'';}
    this.setSelectionRange(this.value.length,this.value.length);
    return;
  }
  // PgUp  --  scroll chat output up
  if(e.key==='PageUp'){e.preventDefault();var c=$('chat');c.scrollTop-=c.clientHeight*0.85;return;}
  // PgDn  --  scroll chat output down
  if(e.key==='PageDown'){e.preventDefault();var c2=$('chat');c2.scrollTop+=c2.clientHeight*0.85;return;}
});
$('qi').addEventListener('input',function(){
  this.style.height='auto';
  this.style.height=Math.min(this.scrollHeight,150)+'px';
});

var _cancelBtn=$('cancel-btn');
if(_cancelBtn)_cancelBtn.addEventListener('click',function(){
  if(!streaming)return;
  fetch('/api/cancel',{method:'POST',headers:{'Content-Type':'application/json'},
    body:JSON.stringify({sid:SID})});
  if(_queryTimer){clearInterval(_queryTimer);_queryTimer=null;}
  _queryStart=0;streaming=false;showCancelBtn(false);setInputEnabled(true);
  setStatus('Query cancelled');
  setTimeout(function(){if(!streaming)setStatus('Ready');},2000);
});
$('provider-sel').addEventListener('change',function(){
  if(_suppressProviderChange)return;
  var _pv=this.value;
  clientLog('INFO','provider->'+_pv);
  // Single call: server checks Config and sets provider atomically
  fetch('/api/setprovider',{method:'POST',headers:{'Content-Type':'application/json'},
      body:JSON.stringify({provider:_pv,sid:SID})})
    .then(function(r){return r.json();})
    .then(function(d){
      if(d.ok){
        if(d.llm)setLLM(d.llm);
        if(!streaming){setInputEnabled(ready);setStatus('Ready');}
      } else {
        setStatus('No '+d.missing+' in Config  --  set it in webc first',false);
        setInputEnabled(false);
      }
    }).catch(function(){});
});

// history loaded via history_ready SSE event
fetch('/api/state?sid='+SID)
  .then(function(r){return r.json();})
  .then(function(d){
    // /api/state is a fallback for page-refresh timing; llm_ready SSE is the primary path.
    // Only populate if the SSE llm_ready hasn't already enabled the selector.
    var _stpsel=$('provider-sel');
    if(_stpsel&&_stpsel.disabled&&d.ollamaModels&&d.ollamaModels.length){
      var _stoold=_stpsel.querySelector('option[value="ollama"]');
      if(_stoold)_stoold.remove();
      d.ollamaModels.forEach(function(m){
        var _stoo=document.createElement('option');
        _stoo.value='ollama:'+m;_stoo.textContent='Ollama: '+m;
        _stpsel.appendChild(_stoo);
      });
    }
    if(d.provider&&_stpsel&&_stpsel.disabled){
      _suppressProviderChange=true;
      _stpsel.value=d.provider;
      _suppressProviderChange=false;
    }
    if(d.llm)setLLM(d.llm);
    if(typeof d.useContext==='boolean'){var _ct=$('ctx-toggle');if(_ct)_ct.checked=d.useContext;}
    if(d.cost)setCost(d.cost);
    if(d.llm)setLLM(d.llm);
    clientLog('INFO','state: provider='+d.provider+' llm='+d.llm);
  })
  .catch(function(e){clientLog('WARN','state: '+e.message);});


})();

RAGWEED-APP-v1.0.102-20260319-000014-473

cat > "$SCRIPTS_DIR/lf.js" << 'RAGWEED-LFJ-v1.0.102-20260319-000014-473'
// VERSION: 1.0.102
// RAGWeed - shared Look & Feel  (lf.js)
console.log('[lf.js] loading, lfInit will be defined on window');
// Call lfInit($) once on DOMContentLoaded; wire your L&F button to the returned fn.
(function(G){
'use strict';

var LF_KEY     = 'ragweed-lf';
var LF_CUST    = 'ragweed-lf-custom';
var LF_DEFAULT = {font:'Arial,Helvetica,sans-serif', size:13, fg:'#111111', bg:'#ffffff', lh:1.15, ps:1.5};

// ── style injection ──────────────────────────────────────────────────────────
var _styleEl = null;
function _getStyleEl(){
  if(!_styleEl){ _styleEl=document.getElementById('ragweed-lf-style');
    if(!_styleEl){ _styleEl=document.createElement('style'); _styleEl.id='ragweed-lf-style'; document.head.appendChild(_styleEl); }
  } return _styleEl;
}

function lfApply(p){
  var ff=p.font||LF_DEFAULT.font, sz=parseInt(p.size)||LF_DEFAULT.size,
      fg=p.fg||LF_DEFAULT.fg,    bg=p.bg||LF_DEFAULT.bg,
      lh=parseFloat(p.lh)||LF_DEFAULT.lh, ps=parseFloat(p.ps)||LF_DEFAULT.ps;
  var r=document.documentElement;
  r.style.setProperty('--lf-ff',ff); r.style.setProperty('--lf-sz',sz+'px');
  r.style.setProperty('--lf-fg',fg); r.style.setProperty('--lf-bg',bg);
  _getStyleEl().textContent =
    'html *{font-family:'+ff+' !important;font-size:'+sz+'px !important}'+
    'html body,#header,#status-row,#colls-row,#history-nav,#chat,#input-area,#hdr,#main,#left,#right,#fields,#sbar{'+
      'font-family:'+ff+' !important;font-size:'+sz+'px !important;color:'+fg+' !important;background:'+bg+' !important;line-height:'+lh+'}'+
    'html p,html li{margin-bottom:'+ps+'em}'+
    '#src-text p,#src-text li{margin-bottom:'+ps+'em}'+
    '.bot{line-height:'+lh+'}'+
    '.sources{line-height:'+lh+'}'+
    '#src-box,#help-box,#src-text,#hist-list,#hist-box{'+
      'font-family:'+ff+' !important;font-size:'+sz+'px !important;color:'+fg+' !important;background:'+bg+' !important}'+
    '#src-close,#help-close{background:'+fg+' !important;color:'+bg+' !important;border-color:'+fg+' !important}'+
    '#qi{background:'+bg+';color:'+fg+'}'+
    '.you,.bot,.meta-line,.sources,.entry,.sources a,cite,#src-copies-list a,#src-copies-list a:visited,#src-copies-list a:hover,#src-copies-list a:active{color:'+fg+' !important}'+
    '.fv,.fi,.sp-ta,.fl,.fl b,.hint,.sp-meta,.fr{'+
      'font-family:'+ff+' !important;font-size:'+sz+'px !important;'+
      'color:'+fg+' !important;background:'+bg+' !important;border-color:'+fg+' !important}'+
    '.si{color:'+fg+' !important;background:'+bg+' !important;border-color:'+fg+' !important}'+
    '.si.active{color:'+bg+' !important;background:'+fg+' !important}'+
    '.btn{color:'+fg+' !important;background:'+bg+' !important;border-color:'+fg+' !important}'+
    '.btnp{color:'+bg+' !important;background:'+fg+' !important}'+
    '.ci{color:'+fg+' !important;border-color:'+fg+' !important}'+
    '.ci.on{color:'+bg+' !important;background:'+fg+' !important}'+
    '#sbar,#stline{color:'+fg+' !important;background:'+bg+' !important}'+
    '#rtitle{color:'+fg+' !important;border-color:'+fg+' !important}'+
    '#hdr{background:'+fg+' !important;color:'+bg+' !important}'+
    '#hdr *{color:'+bg+' !important}'+
    '#hdr .lf-btn{border-color:'+bg+' !important;color:'+bg+' !important;background:transparent !important}'+
    '.fr{border-color:'+fg+' !important}'+
    '.fr.ed{background:'+bg+' !important}'+
    '.sub{color:'+fg+' !important}'+
    '.hint{color:'+fg+' !important}'+
    '.status-ok{color:'+fg+' !important;font-weight:bold}'+
    '.status-warn{color:'+fg+' !important}'+
    '.status-err{color:'+fg+' !important}'+
    '.warn-box{border-color:'+fg+' !important;color:'+fg+' !important;background:'+bg+' !important}'+

    '.coll-badge{background:'+bg+';color:'+fg+';border-color:'+fg+'}'+
    '.coll-badge.active{background:'+fg+';color:'+bg+'}'+
    '#send-btn{background:'+fg+' !important;color:'+bg+' !important}'+
    'select,button,input[type=range]{font-family:'+ff+' !important;font-size:'+sz+'px !important;'+
      'background:'+bg+' !important;color:'+fg+' !important;border-color:'+fg+' !important}'+
    '#lf-box,#lf-cp-box{'+
      'font-family:'+ff+' !important;font-size:'+sz+'px !important;'+
      'color:'+fg+' !important;background:'+bg+' !important;border-color:'+fg+' !important}'+
    '#lf-box b,#lf-box td,#lf-box th,#lf-box label{'+
      'font-family:'+ff+' !important;font-size:'+sz+'px !important;'+
      'color:'+fg+' !important;background:'+bg+' !important}'+
    '#lf-box select,#lf-box input[type=range]{'+
      'font-family:'+ff+' !important;font-size:'+sz+'px !important;'+
      'color:'+fg+' !important;background:'+bg+' !important;border-color:'+fg+' !important}'+
    '#lf-close,#lf-reset{background:'+bg+' !important;color:'+fg+' !important;border-color:'+fg+' !important}'+
    '#lf-apply{background:'+fg+' !important;color:'+bg+' !important;border-color:'+fg+' !important}'+
    '#lf-preview{border-color:'+fg+' !important}'+
    '#lf-cp-box b,#lf-cp-box td{color:'+fg+' !important;background:'+bg+' !important}'+
    '#lf-cp-cancel,#lf-cp-ok,#lf-cp-add{background:'+bg+' !important;color:'+fg+' !important;border-color:'+fg+' !important}'+
    '.tbl-hdr{border-bottom:2px solid '+fg+' !important;color:'+fg+' !important}'+
    '.tbl-row{border-bottom:1px solid '+fg+' !important}'+
    '.chip{color:'+fg+' !important;background:'+bg+' !important;border-color:'+fg+' !important}'+
    '.chip.active{color:'+bg+' !important;background:'+fg+' !important}';
}

function lfLoad(){
  try{ var r=localStorage.getItem(LF_KEY); if(r){ var p=JSON.parse(r); lfApply(p); return p; }}catch(e){}
  return Object.assign({},LF_DEFAULT);
}
function lfSave(p){ try{localStorage.setItem(LF_KEY,JSON.stringify(p));}catch(e){} }

function _loadCustom(){ try{ var r=localStorage.getItem(LF_CUST); if(r)return JSON.parse(r); }catch(e){} return []; }
function _saveCustom(a){ try{localStorage.setItem(LF_CUST,JSON.stringify(a));}catch(e){} }

var _prefs  = lfLoad();
var _custom = _loadCustom();

// ── color picker ─────────────────────────────────────────────────────────────
var _cpCb = null;

function _cpOpen(currentHex, cb){
  _cpCb = cb;
  var box = document.getElementById('lf-cp-panel');
  box.style.display = 'flex';
  _cpRenderCustom();
}
function _cpClose(){ document.getElementById('lf-cp-panel').style.display='none'; }
function _cpPick(hex){
  if(_cpCb) _cpCb(hex);
  _cpClose();
}
function _cpRenderCustom(){
  var row = document.getElementById('lf-cp-custom');
  if(!row) return;
  row.innerHTML = '';
  for(var i=0;i<_custom.length;i++){
    (function(c){
      var sp=document.createElement('span');
      sp.style.cssText='display:inline-block;width:28px;height:28px;background:'+c+
        ';border:1px solid rgba(0,0,0,.3);cursor:pointer;margin:1px;vertical-align:middle';
      sp.title=c;
      sp.onclick=function(){ _cpPick(c); };
      row.appendChild(sp);
    })(_custom[i]);
  }
}

// ── panel HTML ────────────────────────────────────────────────────────────────
var _PANEL = '<div id="lf-panel" style="display:none;position:fixed;inset:0;background:rgba(0,0,0,.5);z-index:300;align-items:center;justify-content:center;padding:10px 2.5%;box-sizing:border-box">'+
'<div id="lf-box" style="border:2px solid #111;width:100%;max-height:100%;display:flex;flex-direction:column;overflow:hidden">'+
'<div style="display:flex;justify-content:space-between;align-items:center;padding:8px 14px;border-bottom:2px solid #111">'+
'<b>Look & Feel</b><button id="lf-close" style="padding:2px 12px;border:2px solid #111;cursor:pointer">Close [Esc]</button></div>'+
'<div style="padding:12px 16px;overflow-y:auto;flex:1"><table style="border-collapse:collapse;width:100%">'+
'<tr><td style="padding:4px 8px 4px 0;white-space:nowrap;width:1%"><b>Theme</b></td>'+
'<td style="width:99%"><select id="lf-theme" style="width:100%;padding:2px 4px;border:1px solid #111"><option value="">- Themes -</option><option value="#111111|#ffffff">Default</option><option value="#657b83|#fdf6e3">Solarized Light</option><option value="#839496|#002b36">Solarized Dark</option><option value="#f8f8f2|#272822">Monokai</option><option value="#f8f8f2|#282a36">Dracula</option><option value="#d8dee9|#2e3440">Nord</option><option value="#ebdbb2|#282828">Gruvbox Dark</option><option value="#3c3836|#fbf1c7">Gruvbox Light</option><option value="#abb2bf|#282c34">One Dark</option><option value="#24292e|#ffffff">GitHub Light</option><option value="#c9d1d9|#0d1117">GitHub Dark</option><option value="#4a3728|#f4e4c1">Parchment</option><option value="#3a2a1a|#f0e8d8">Sepia</option><option value="#2a2218|#fffff0">Ivory</option><option value="#e8e8e8|#2a2a3a">Slate</option><option value="#dddddd|#1c1c1c">Charcoal</option><option value="#00ff00|#000000">Terminal Green</option><option value="#ffb000|#0a0800">Terminal Amber</option><option value="#111111|#f5f0e8">Bauhaus</option><option value="#c8a84b|#1a1a2e">Art Deco</option><option value="#3d5a3e|#f0ede0">Impressionist</option><option value="#888888|#f8f8f8">Faded</option></select></td></tr>'+
'<tr><td style="padding:4px 8px 4px 0;white-space:nowrap;width:1%"><b>Font</b></td>'+
'<td style="width:99%"><select id="lf-font" style="width:100%;padding:2px 4px;border:1px solid #111"><option value="Arial,Helvetica,sans-serif">Arial</option><option value="Verdana,Geneva,sans-serif">Verdana</option><option value="Trebuchet MS,Helvetica,sans-serif">Trebuchet MS</option><option value="Gill Sans,Gill Sans MT,Calibri,sans-serif">Gill Sans</option><option value="Optima,Segoe,Candara,sans-serif">Optima</option><option value="Futura,Century Gothic,AppleGothic,sans-serif">Futura</option><option value="Tahoma,Geneva,sans-serif">Tahoma</option><option value="Georgia,Times New Roman,serif">Georgia</option><option value="Palatino,Palatino Linotype,Book Antiqua,serif">Palatino</option><option value="Times New Roman,Times,serif">Times New Roman</option><option value="Garamond,Garamond Premier Pro,serif">Garamond</option><option value="Didot,Bodoni MT,serif">Didot</option><option value="Baskerville,Baskerville Old Face,serif">Baskerville</option><option value="Courier New,Courier,monospace">Courier New</option><option value="Monaco,Consolas,Lucida Console,monospace">Monaco</option><option value="Lucida Console,Courier New,monospace">Lucida Console</option><option value="Candara,Calibri,Optima,sans-serif">Candara</option><option value="Calibri,Candara,Segoe UI,sans-serif">Calibri</option><option value="Segoe UI,Tahoma,Geneva,sans-serif">Segoe UI</option><option value="Brush Script MT,Brush Script Std,cursive">Brush Script</option><option value="Comic Sans MS,Comic Sans,cursive">Comic Sans</option><option value="Papyrus,fantasy">Papyrus</option><option value="Luminari,Papyrus,fantasy">Luminari</option><option value="Copperplate,Copperplate Gothic Light,fantasy">Copperplate</option><option value="Impact,Haettenschweiler,Arial Narrow Bold,sans-serif">Impact</option><option value="Rockwell,Courier Bold,Courier,serif">Rockwell</option><option value="American Typewriter,Courier New,monospace">American Typewriter</option></select></td></tr>'+
'<tr><td style="padding:4px 8px 4px 0;white-space:nowrap;width:1%"><b>Size</b></td>'+
'<td style="display:flex;width:99%;align-items:center;gap:8px">'+
'<input type="range" id="lf-size" min="9" max="56" step="1" style="flex:1">'+
'<span id="lf-size-val" style="min-width:36px;text-align:right">13px</span></td></tr>'+
'<tr><td style="padding:4px 8px 4px 0;white-space:nowrap;width:1%"><b>Line height</b></td>'+
'<td style="display:flex;width:99%;align-items:center;gap:8px">'+
'<input type="range" id="lf-lh" min="0.8" max="3.0" step="0.05" style="flex:1">'+
'<span id="lf-lh-val" style="min-width:36px;text-align:right">1.15</span></td></tr>'+
'<tr><td style="padding:4px 8px 4px 0;white-space:nowrap;width:1%"><b>Para spacing</b></td>'+
'<td style="display:flex;width:99%;align-items:center;gap:8px">'+
'<input type="range" id="lf-ps" min="0" max="4.0" step="0.05" style="flex:1">'+
'<span id="lf-ps-val" style="min-width:36px;text-align:right">1.5em</span></td></tr>'+
'<tr><td style="padding:4px 8px 4px 0;white-space:nowrap;width:1%"><b>Text color</b></td>'+
'<td style="display:flex;width:99%;align-items:center;gap:6px">'+
'<button id="lf-fg-btn" style="width:40px;height:26px;border:2px solid #111;cursor:pointer;padding:0;flex-shrink:0"></button>'+
'<select id="lf-fg-pre" style="flex:1;padding:2px 4px;border:1px solid #111"><option value="">-- color --</option><option value="#111111">Near black</option><option value="#000000">Black</option><option value="#ffffff">White</option><option value="#dddddd">Light grey</option><option value="#888888">Mid grey</option><option value="#333333">Dark grey</option><option value="#1a1a2e">Navy</option><option value="#0d47a1">Blue</option><option value="#1b5e20">Dark green</option><option value="#b71c1c">Dark red</option><option value="#4a3728">Brown</option><option value="#c8a84b">Gold</option><option value="#657b83">Solarized fg</option><option value="#839496">Solarized dark fg</option><option value="#ebdbb2">Gruvbox fg</option><option value="#d8dee9">Nord fg</option><option value="#abb2bf">One Dark fg</option></select>'+
'</td></tr>'+
'<tr><td style="padding:4px 8px 4px 0;white-space:nowrap;width:1%"><b>Background</b></td>'+
'<td style="display:flex;width:99%;align-items:center;gap:6px">'+
'<button id="lf-bg-btn" style="width:40px;height:26px;border:2px solid #111;cursor:pointer;padding:0;flex-shrink:0"></button>'+
'<select id="lf-bg-pre" style="flex:1;padding:2px 4px;border:1px solid #111"><option value="">-- color --</option><option value="#ffffff">White</option><option value="#000000">Black</option><option value="#111111">Near black</option><option value="#1c1c1c">Charcoal</option><option value="#2a2a3a">Slate</option><option value="#fdf6e3">Solarized light</option><option value="#002b36">Solarized dark</option><option value="#272822">Monokai</option><option value="#282a36">Dracula</option><option value="#2e3440">Nord</option><option value="#282828">Gruvbox dark</option><option value="#fbf1c7">Gruvbox light</option><option value="#282c34">One Dark</option><option value="#f4e4c1">Parchment</option><option value="#f0e8d8">Sepia</option><option value="#fffff0">Ivory</option><option value="#f5f0e8">Bauhaus</option><option value="#1a1a2e">Art Deco dark</option><option value="#f8f8f8">Faded</option></select>'+
'</td></tr>'+
'</table>'+
'<div id="lf-preview" style="margin-top:10px;padding:8px;border:1px solid #111">Preview: The quick brown fox jumps over the lazy dog.</div>'+
'<div style="display:flex;gap:8px;margin-top:10px">'+
'<button id="lf-apply" style="flex:1;padding:5px;border:2px solid #111;cursor:pointer">Apply & Close</button>'+
'<button id="lf-reset" style="flex:1;padding:5px;border:2px solid #111;cursor:pointer">Reset</button>'+
'</div></div></div></div>';

var _CP = '<div id="lf-cp-panel" style="display:none;position:fixed;inset:0;background:rgba(0,0,0,.4);z-index:400;align-items:center;justify-content:center">'+
'<div id="lf-cp-box" style="border:2px solid #111;padding:12px;width:90%;max-width:340px">'+
'<div style="display:flex;justify-content:space-between;align-items:center;margin-bottom:8px">'+
'<b>Choose a color</b>'+
'<button id="lf-cp-cancel" style="padding:2px 12px;border:1px solid #111;cursor:pointer">Cancel</button>'+
'</div>'+
'<div id="lf-cp-grid"><div style="white-space:nowrap;margin:1px 0"><span data-c="#bbdefb" style="display:inline-block;width:28px;height:28px;background:#bbdefb;border:2px solid #bbb;cursor:pointer;margin:1px;vertical-align:middle"></span><span data-c="#64b5f6" style="display:inline-block;width:28px;height:28px;background:#64b5f6;border:2px solid #bbb;cursor:pointer;margin:1px;vertical-align:middle"></span><span data-c="#1e88e5" style="display:inline-block;width:28px;height:28px;background:#1e88e5;border:2px solid #bbb;cursor:pointer;margin:1px;vertical-align:middle"></span><span data-c="#1565c0" style="display:inline-block;width:28px;height:28px;background:#1565c0;border:2px solid #bbb;cursor:pointer;margin:1px;vertical-align:middle"></span><span data-c="#0d47a1" style="display:inline-block;width:28px;height:28px;background:#0d47a1;border:2px solid #bbb;cursor:pointer;margin:1px;vertical-align:middle"></span></div><div style="white-space:nowrap;margin:1px 0"><span data-c="#c8e6c9" style="display:inline-block;width:28px;height:28px;background:#c8e6c9;border:1px solid rgba(0,0,0,.15);cursor:pointer;margin:1px;vertical-align:middle"></span><span data-c="#66bb6a" style="display:inline-block;width:28px;height:28px;background:#66bb6a;border:1px solid rgba(0,0,0,.15);cursor:pointer;margin:1px;vertical-align:middle"></span><span data-c="#43a047" style="display:inline-block;width:28px;height:28px;background:#43a047;border:1px solid rgba(0,0,0,.15);cursor:pointer;margin:1px;vertical-align:middle"></span><span data-c="#2e7d32" style="display:inline-block;width:28px;height:28px;background:#2e7d32;border:1px solid rgba(0,0,0,.15);cursor:pointer;margin:1px;vertical-align:middle"></span><span data-c="#1b5e20" style="display:inline-block;width:28px;height:28px;background:#1b5e20;border:1px solid rgba(0,0,0,.15);cursor:pointer;margin:1px;vertical-align:middle"></span></div><div style="white-space:nowrap;margin:1px 0"><span data-c="#fff9c4" style="display:inline-block;width:28px;height:28px;background:#fff9c4;border:1px solid rgba(0,0,0,.15);cursor:pointer;margin:1px;vertical-align:middle"></span><span data-c="#fff176" style="display:inline-block;width:28px;height:28px;background:#fff176;border:1px solid rgba(0,0,0,.15);cursor:pointer;margin:1px;vertical-align:middle"></span><span data-c="#fdd835" style="display:inline-block;width:28px;height:28px;background:#fdd835;border:1px solid rgba(0,0,0,.15);cursor:pointer;margin:1px;vertical-align:middle"></span><span data-c="#f9a825" style="display:inline-block;width:28px;height:28px;background:#f9a825;border:1px solid rgba(0,0,0,.15);cursor:pointer;margin:1px;vertical-align:middle"></span><span data-c="#f57f17" style="display:inline-block;width:28px;height:28px;background:#f57f17;border:1px solid rgba(0,0,0,.15);cursor:pointer;margin:1px;vertical-align:middle"></span></div><div style="white-space:nowrap;margin:1px 0"><span data-c="#ffe0b2" style="display:inline-block;width:28px;height:28px;background:#ffe0b2;border:1px solid rgba(0,0,0,.15);cursor:pointer;margin:1px;vertical-align:middle"></span><span data-c="#ffa726" style="display:inline-block;width:28px;height:28px;background:#ffa726;border:1px solid rgba(0,0,0,.15);cursor:pointer;margin:1px;vertical-align:middle"></span><span data-c="#fb8c00" style="display:inline-block;width:28px;height:28px;background:#fb8c00;border:1px solid rgba(0,0,0,.15);cursor:pointer;margin:1px;vertical-align:middle"></span><span data-c="#e65100" style="display:inline-block;width:28px;height:28px;background:#e65100;border:1px solid rgba(0,0,0,.15);cursor:pointer;margin:1px;vertical-align:middle"></span><span data-c="#bf360c" style="display:inline-block;width:28px;height:28px;background:#bf360c;border:1px solid rgba(0,0,0,.15);cursor:pointer;margin:1px;vertical-align:middle"></span></div><div style="white-space:nowrap;margin:1px 0"><span data-c="#ffcdd2" style="display:inline-block;width:28px;height:28px;background:#ffcdd2;border:1px solid rgba(0,0,0,.15);cursor:pointer;margin:1px;vertical-align:middle"></span><span data-c="#ef9a9a" style="display:inline-block;width:28px;height:28px;background:#ef9a9a;border:1px solid rgba(0,0,0,.15);cursor:pointer;margin:1px;vertical-align:middle"></span><span data-c="#e53935" style="display:inline-block;width:28px;height:28px;background:#e53935;border:1px solid rgba(0,0,0,.15);cursor:pointer;margin:1px;vertical-align:middle"></span><span data-c="#b71c1c" style="display:inline-block;width:28px;height:28px;background:#b71c1c;border:1px solid rgba(0,0,0,.15);cursor:pointer;margin:1px;vertical-align:middle"></span><span data-c="#7f0000" style="display:inline-block;width:28px;height:28px;background:#7f0000;border:1px solid rgba(0,0,0,.15);cursor:pointer;margin:1px;vertical-align:middle"></span></div></div>'+
'<div style="margin-top:8px"><b>Custom</b></div>'+
'<div style="display:flex;align-items:center;gap:4px;margin-top:4px">'+
'<button id="lf-cp-add" style="width:28px;height:28px;border:1px solid #111;cursor:pointer;font-size:16px;padding:0" title="Add custom color">+</button>'+
'<button id="lf-cp-ok"  style="width:28px;height:28px;border:1px solid #111;cursor:pointer;font-size:16px;padding:0" title="Confirm">&#10003;</button>'+
'<div id="lf-cp-custom" style="display:inline-flex;flex-wrap:wrap;align-items:center"></div>'+
'</div>'+
'<input type="color" id="lf-cp-native" style="position:absolute;opacity:0;width:0;height:0">'+
'</div></div>';

// ── init ──────────────────────────────────────────────────────────────────────
function lfInit(){
  function ge(id){ return document.getElementById(id); }

  // Inject panels
  ['_PANEL','_CP'].forEach(function(k){
    var tmp=document.createElement('div');
    tmp.innerHTML = k==='_PANEL'?_PANEL:_CP;
    document.body.appendChild(tmp.firstChild);
  });

  // Use flex display for panels (set display:flex to show)
  function showPanel(id){ ge(id).style.display='flex'; }
  function hidePanel(id){ ge(id).style.display='none'; }

  // State
  var curFg = _prefs.fg||LF_DEFAULT.fg;
  var curBg = _prefs.bg||LF_DEFAULT.bg;
  var cpTarget = 'fg';

  function updateBtns(){
    ge('lf-fg-btn').style.cssText = ge('lf-fg-btn').style.cssText.replace(/background[^;]*/,'') + ';background:'+curFg+' !important';
    ge('lf-bg-btn').style.cssText = ge('lf-bg-btn').style.cssText.replace(/background[^;]*/,'') + ';background:'+curBg+' !important';
  }

  function syncPreview(){
    var ff = ge('lf-font').value;
    var sz = ge('lf-size').value;
    var el = ge('lf-preview-style');
    if(!el){ el=document.createElement('style'); el.id='lf-preview-style'; document.head.appendChild(el); }
    var lhv=ge('lf-lh')?ge('lf-lh').value:LF_DEFAULT.lh;
    var psv=ge('lf-ps')?ge('lf-ps').value:LF_DEFAULT.ps;
    el.textContent = '#lf-preview{font-family:'+ff+' !important;font-size:'+sz+'px !important;'+
      'color:'+curFg+' !important;background:'+curBg+' !important;line-height:'+lhv+'}'+
      '#lf-preview p,#lf-preview li{margin-bottom:'+psv+'em}';
  }

  function applyTheme(font,size,fg,bg,lh,ps){
    var fs=ge('lf-font');
    for(var i=0;i<fs.options.length;i++){ if(fs.options[i].value===font){ fs.selectedIndex=i; break; } }
    ge('lf-size').value=size; ge('lf-size-val').textContent=size+'px';
    if(ge('lf-lh')){ ge('lf-lh').value=lh||LF_DEFAULT.lh; ge('lf-lh-val').textContent=ge('lf-lh').value; }
    if(ge('lf-ps')){ ge('lf-ps').value=ps||LF_DEFAULT.ps; ge('lf-ps-val').textContent=ge('lf-ps').value+'em'; }
    curFg=fg; curBg=bg; updateBtns(); syncPreview();
  }

  // Populate controls from prefs
  applyTheme(_prefs.font||LF_DEFAULT.font, _prefs.size||LF_DEFAULT.size, curFg, curBg, _prefs.lh||LF_DEFAULT.lh, _prefs.ps||LF_DEFAULT.ps);

  // Palette swatches
  var spans = ge('lf-cp-grid').querySelectorAll('span[data-c]');
  for(var i=0;i<spans.length;i++){
    (function(sp){
      sp.onclick=function(){ _cpPick(sp.getAttribute('data-c')); };
    })(spans[i]);
  }
  _cpCb = function(hex){
    if(cpTarget==='fg'){ curFg=hex; } else { curBg=hex; }
    updateBtns(); syncPreview();
  };

  // + button opens native picker
  ge('lf-cp-add').onclick = function(){ ge('lf-cp-native').click(); };
  ge('lf-cp-native').oninput = function(){
    var c=this.value;
    if(_custom.indexOf(c)<0){ _custom.unshift(c); if(_custom.length>16)_custom=_custom.slice(0,16); _saveCustom(_custom); }
    _cpCb(c); _cpRenderCustom(); hidePanel('lf-cp-panel');
  };
  ge('lf-cp-ok').onclick = function(){ hidePanel('lf-cp-panel'); };
  ge('lf-cp-cancel').onclick = function(){ hidePanel('lf-cp-panel'); };
  ge('lf-cp-panel').onclick = function(e){ if(e.target===ge('lf-cp-panel')) hidePanel('lf-cp-panel'); };

  // Color buttons
  ge('lf-fg-btn').onclick = function(){ cpTarget='fg'; _cpOpen(curFg,function(h){curFg=h;updateBtns();syncPreview();}); showPanel('lf-cp-panel'); _cpRenderCustom(); };
  ge('lf-bg-btn').onclick = function(){ cpTarget='bg'; _cpOpen(curBg,function(h){curBg=h;updateBtns();syncPreview();}); showPanel('lf-cp-panel'); _cpRenderCustom(); };
  ge('lf-fg-pre').onchange = function(){ if(!this.value)return; curFg=this.value; updateBtns(); syncPreview(); this.value=''; };
  ge('lf-bg-pre').onchange = function(){ if(!this.value)return; curBg=this.value; updateBtns(); syncPreview(); this.value=''; };

  // Size slider
  ge('lf-size').oninput = function(){ ge('lf-size-val').textContent=this.value+'px'; syncPreview(); };
  ge('lf-lh').oninput = function(){ ge('lf-lh-val').textContent=this.value; syncPreview(); };
  ge('lf-ps').oninput = function(){ ge('lf-ps-val').textContent=this.value+'em'; syncPreview(); };

  // Font select
  ge('lf-font').onchange = syncPreview;

  // Theme select
  ge('lf-theme').onchange = function(){
    if(!this.value) return;
    var parts = this.value.split('|');
    curFg=parts[0]; curBg=parts[1];
    updateBtns(); syncPreview();
    this.value='';
  };

  // Panel close
  ge('lf-close').onclick  = function(){ hidePanel('lf-panel'); };
  ge('lf-panel').onclick  = function(e){ if(e.target===ge('lf-panel')) hidePanel('lf-panel'); };
  document.addEventListener('keydown',function(e){
    if(e.key==='Escape'){
      if(ge('lf-cp-panel').style.display==='flex') hidePanel('lf-cp-panel');
      else if(ge('lf-panel').style.display==='flex') hidePanel('lf-panel');
    }
  });

  // Apply / Reset
  ge('lf-apply').onclick = function(){
    var p={font:ge('lf-font').value, size:parseInt(ge('lf-size').value), fg:curFg, bg:curBg,
      lh:parseFloat(ge('lf-lh').value), ps:parseFloat(ge('lf-ps').value)};
    lfApply(p); lfSave(p); _prefs=p; hidePanel('lf-panel');
  };
  ge('lf-reset').onclick = function(){
    var p=Object.assign({},LF_DEFAULT);
    lfApply(p); lfSave(p); _prefs=p;
    applyTheme(p.font,p.size,p.fg,p.bg,p.lh,p.ps);
    ge('lf-theme').value='';
  };

  syncPreview();

  // Return the open-panel function - caller wires it to their L&F button
  return function(){ showPanel('lf-panel'); syncPreview(); };
}

// Expose
console.log('[lf.js] lfInit exposed on window');
G.lfInit  = lfInit;
G.lfApply = lfApply;
G.lfLoad  = lfLoad;

})(window);

RAGWEED-LFJ-v1.0.102-20260319-000014-473
ok "scripts/web.js written ($(wc -l < "$SCRIPTS_DIR/web.js") lines)"

# =============================================================================
# STEP 11: Write run.sh
# =============================================================================
ph "STEP 11: Writing run.sh"

# Only write run.sh if it is an installer-generated file (not locally customized).
# RAGWEED_VERSION=1.0.102
_skip_run_sh=0
if [ -f "$PROJECT_DIR/run.sh" ]; then
  if ! grep -q "RAGWEED_VERSION=" "$PROJECT_DIR/run.sh" 2>/dev/null; then
    ok "run.sh has local customizations (no RAGWEED_VERSION marker) -- skipping overwrite"
    _skip_run_sh=1
  fi
fi
if [ "$_skip_run_sh" = "0" ]; then
cat > "$PROJECT_DIR/run.sh" << 'RAGWEED-RUN-v1.0.102-20260319-000014-473'
#!/usr/bin/env bash
# RAGWeed v10  --  launcher
# Copyright (c) Fred Cohen, 2026 - ALL RIGHTS RESERVED - Patents: https://all.net/Notices.html
# Must be executed, not sourced
if [[ "${BASH_SOURCE[0]}" != "${0}" ]]; then
    echo "Error: run.sh must be executed, not sourced. Use: ./run.sh [command]" >&2
    return 1
fi

RAGWEED_VERSION="1.0.102"
PROJECT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
ENV_FILE="$PROJECT_DIR/Config"

set -a; [ -f "$ENV_FILE" ] && source "$ENV_FILE"; set +a
# Source project-local environment (NVM, etc.) -- no ~/.bashrc modification needed
[ -f "$PROJECT_DIR/local/env.sh" ] && source "$PROJECT_DIR/local/env.sh" 2>/dev/null
# Add RAGWeed tool paths -- whisper-cpp binary and libs live inside the project dir
export PATH="$PROJECT_DIR/whisper-cpp/build/bin:$PATH"
export LD_LIBRARY_PATH="$PROJECT_DIR/whisper-cpp/build/src:$PROJECT_DIR/whisper-cpp/build/ggml/src${LD_LIBRARY_PATH:+:$LD_LIBRARY_PATH}"
export LD_LIBRARY_PATH="$PROJECT_DIR/whisper-cpp/build/src:$PROJECT_DIR/whisper-cpp/build/ggml/src${LD_LIBRARY_PATH:+:$LD_LIBRARY_PATH}"

if ! command -v node &>/dev/null; then
    echo "Error: node not found. Run: bash 010-ragweed-v10.sh" >&2; exit 1
fi

export PROJECT_DIR ENV_FILE

cmd="${1:-tui}"
[ $# -gt 0 ] && shift

case "$cmd" in
    web)
        PORT="${WEB_PORT:-3000}"
        _LAN_IP=$(ip route get 1.1.1.1 2>/dev/null | awk '{for(i=1;i<=NF;i++) if($i=="src") print $(i+1)}' | head -1)
        echo "RAGWeed v${RAGWEED_VERSION}  --  Web interface"
        echo "  http://localhost:${PORT}"
        [ -n "$_LAN_IP" ] && echo "  http://${_LAN_IP}:${PORT}"
        echo "  Press Ctrl-C to stop"
        echo ""
        exec node "$PROJECT_DIR/scripts/web.js" "$@"
        ;;
    query|tui)
        exec node "$PROJECT_DIR/scripts/query.js" "$@"
        ;;
    config)
        bash "$PROJECT_DIR/scripts/config.sh"
        ;;
    logs)
        echo "RAGWeed $RAGWEED_VERSION"
        echo "================================================================"
        _total=$(ls -1t "$PROJECT_DIR/logs/"query-*.log 2>/dev/null | wc -l | tr -d ' ')
        echo "  Session logs: $_total total  (showing last 5)"
        echo "================================================================"
        ls -1t "$PROJECT_DIR/logs/"query-*.log 2>/dev/null | head -5 | while read f; do
            echo "  $(basename "$f")  ($(wc -l < "$f") lines)"
        done
        echo ""
        _recent=$(ls -1t "$PROJECT_DIR/logs/"query-*.log 2>/dev/null | head -1)
        if [ -n "$_recent" ]; then
            echo "  --- Most recent: $(basename "$_recent") ---"
            tail -40 "$_recent"
        else
            tail -40 "$PROJECT_DIR/logs/query.log" 2>/dev/null || echo "  (no log yet)"
        fi
        ;;
    install|upgrade)
        exec bash "$PROJECT_DIR/010-ragweed-v10.sh"
        ;;
    ingest)
        mkdir -p "$PROJECT_DIR/logs"
        _CRASH="$PROJECT_DIR/logs/crash.log"
        # Rotate crash log on each run  --  crash.log always contains only THIS run
        if [ -f "$_CRASH" ]; then
            _ts=$(date +%Y%m%d_%H%M%S)
            mv "$_CRASH" "$PROJECT_DIR/logs/crash_${_ts}.log"
            ls -t "$PROJECT_DIR/logs/crash_"*.log 2>/dev/null | tail -n +6 | xargs rm -f 2>/dev/null || true
        fi
        # Ensure Ollama is running  --  start it if not
        _OLLAMA_HOST="${OLLAMA_HOST:-http://localhost:11434}"
        if ! curl -sf "${_OLLAMA_HOST}/api/tags" -o /dev/null 2>/dev/null; then
            if command -v ollama &>/dev/null; then
                echo "  Starting Ollama..."
                ollama serve >/dev/null 2>&1 &
                _OLLAMA_PID=$!
                # Wait up to 15s for Ollama to become ready
                _waited=0
                while ! curl -sf "${_OLLAMA_HOST}/api/tags" -o /dev/null 2>/dev/null; do
                    sleep 1; _waited=$((_waited+1))
                    [ $_waited -ge 15 ] && echo "  [WARN] Ollama not responding after 15s  --  continuing anyway" && break
                done
            else
                echo "  [WARN] Ollama not found  --  run: bash 010-ragweed-v10.sh"
            fi
        fi
        _run_ingest() {
            local zp="${ZIP_POLICY:-never}"
            case "$zp" in y) zp="always";; n) zp="never";; a) zp="ask";; esac
            if echo "$@" | grep -q -- "--zip"; then
                node "$PROJECT_DIR/scripts/ingest.js" "$@"
            else
                node "$PROJECT_DIR/scripts/ingest.js" --zip "$zp" "$@"
            fi
        }
        _run_ingest "$@" 2> >(while IFS= read -r line; do
            echo "$line" >&2
            echo "$line" >> "$_CRASH"
        done)
        _exit=$?
        [ $_exit -ne 0 ] && [ $_exit -ne 130 ] && \
            echo "  [WARN] ingest exited with code $_exit -- see ./run.sh logs"
        ;;
    webc)
        exec node "$PROJECT_DIR/scripts/webc.js" "$@"
        ;;
    annotate)
        exec node "$PROJECT_DIR/scripts/annotation_test_runner.cjs" "$@"
        ;;
    man)
        man -l "$PROJECT_DIR/docs/ragweed.1" 2>/dev/null || \
        groff -man -Tascii "$PROJECT_DIR/docs/ragweed.1" 2>/dev/null | less || \
        cat "$PROJECT_DIR/docs/ragweed.1"
        ;;
    diagnose)
        echo "RAGWeed v$RAGWEED_VERSION -- diagnostics"
        echo ""
        echo "=== Node.js ==="
        node --version; npm --version
        echo ""
        echo "=== Ollama ==="
        curl -s "${OLLAMA_HOST:-http://localhost:11434}/api/tags" | node -e "const d=require('fs').readFileSync('/dev/stdin','utf8');try{const j=JSON.parse(d);console.log('  models: '+j.models.map(m=>m.name).join(', '));}catch(e){console.log('  (could not parse: '+d.slice(0,80)+')');};" 2>/dev/null || echo "  Ollama not reachable"
        echo ""
        echo "=== Collections ==="
        node "$PROJECT_DIR/scripts/collections.js" 2>/dev/null || echo "  (none found)"
        echo ""
        echo "=== Audio Transcription ==="
        if command -v whisper-cli &>/dev/null; then
            echo "  whisper-cli (C++): $(which whisper-cli)"
            _wmodel="${WHISPER_MODEL:-$PROJECT_DIR/whisper-cpp/models/ggml-base.en.bin}"
            [ -f "$_wmodel" ] && echo "  model: $_wmodel" || echo "  model: NOT FOUND (set WHISPER_MODEL= in Config)"
        elif command -v whisper &>/dev/null; then
            echo "  whisper (Python): $(which whisper)"
        else
            echo "  whisper: NOT INSTALLED  --  audio files will not be transcribed"
            echo "  (re-run installer to build whisper.cpp, or: ./run.sh install)"
        fi
        command -v ffmpeg &>/dev/null && echo "  ffmpeg: $(ffmpeg -version 2>&1 | head -1 | grep -oE '[0-9]+\.[0-9]+' | head -1)" || echo "  ffmpeg: NOT FOUND"
        echo ""
        echo "=== Config settings ==="
        [ -f "$PROJECT_DIR/Config" ] && cat "$PROJECT_DIR/Config" || echo "  (no Config)"
        ;;
    help)
        cat "$PROJECT_DIR/scripts/help.txt"
        ;;
    *)
        echo "RAGWeed v$RAGWEED_VERSION -- run ./run.sh help for help"
        ;;
esac

RAGWEED-RUN-v1.0.102-20260319-000014-473
chmod +x "$PROJECT_DIR/run.sh"
fi # end skip_run_sh
ok "run.sh written"



# =============================================================================
# STEP 11e: Write developer tools (bump.js, inject_preflight.js)
# =============================================================================
ph "STEP 11e: Writing developer tools"

cat > "$PROJECT_DIR/bump.js" << 'RAGWEED-BMP-v1.0.102-20260319-000014-473'
#!/usr/bin/env node
// VERSION: 1.0.102
// =============================================================================
// bump.js  --  RAGWeed installer version bump tool
// Usage:  node bump.js [installer]
//         node bump.js 010-ragweed-v10.sh
//
// What it does:
//   1. Reads the installer
//   2. Increments the patch version (10.8.X -> 10.8.X+1)
//   3. Generates a new UTC timestamp
//   4. Replaces ALL version strings and heredoc markers consistently
//   5. Recomputes the manifest table (line counts + MD5s) for every heredoc
//   6. Writes the manifest table back into the installer
//   7. Saves the result (overwrites in place)
//
// Rules:
//   - JS only. No Python. No shell exec.
//   - Never modifies heredoc CONTENT  --  only markers and version strings
//   - Uses replacer functions for all string.replace() calls to avoid
//     JavaScript's special $ replacement patterns ($`, $&, $', $1 etc.)
//   - Aborts with a clear error if anything looks wrong
// =============================================================================

'use strict';
const fs     = require('fs');
const crypto = require('crypto');
const path   = require('path');

// ---------------------------------------------------------------------------
// Args
// ---------------------------------------------------------------------------
const installerPath = process.argv[2] || '010-ragweed-v10.sh';
if (!fs.existsSync(installerPath)) {
    console.error(`Error: installer not found: ${installerPath}`);
    process.exit(1);
}

// ---------------------------------------------------------------------------
// Read
// ---------------------------------------------------------------------------
const src = fs.readFileSync(installerPath, 'utf8');

// ---------------------------------------------------------------------------
// Parse current version
// ---------------------------------------------------------------------------
const verMatch = src.match(/RAGWEED_INSTALL_VERSION="(\d+\.\d+\.\d+)"/);
if (!verMatch) {
    console.error('Error: RAGWEED_INSTALL_VERSION not found in installer');
    process.exit(1);
}
const oldVer = verMatch[1];
const parts  = oldVer.split('.');
parts[2]     = String(parseInt(parts[2], 10) + 1);
const newVer = parts.join('.');

// ---------------------------------------------------------------------------
// Generate new timestamp  YYYYMMDD-HHMMSS-mmm  (UTC)
// ---------------------------------------------------------------------------
const now = new Date();
const pad = (n, w) => String(n).padStart(w, '0');
const ts  = `${now.getUTCFullYear()}${pad(now.getUTCMonth()+1,2)}${pad(now.getUTCDate(),2)}`
          + `-${pad(now.getUTCHours(),2)}${pad(now.getUTCMinutes(),2)}${pad(now.getUTCSeconds(),2)}`
          + `-${pad(now.getUTCMilliseconds(),3)}`;

const newVerTag = `v${newVer}-${ts}`;

console.log(`Bumping: ${oldVer} -> ${newVer}`);
console.log(`Tag:     ${newVerTag}`);

// ---------------------------------------------------------------------------
// Parse ALL heredoc blocks BEFORE any replacement
// We need the original content to compute correct MD5s
// ---------------------------------------------------------------------------
const HEREDOC_RE = /^\s*cat > "(\$[A-Z_]+\/[^"]+)" << '(RAGWEED-[A-Z0-9]+-v[\d.]+-\d{8}-\d{6}-\d{3})'\s*$/;

// Flags for special-case files
// install_only: extracted and run during install, deleted before install finishes
// conditional:  only written if destination does not already exist
const SPECIAL = {
    'RAGWEED-S7B': { install_only: true },
    'RAGWEED-SYS': { conditional:  true },
};

const lines    = src.split('\n');
const heredocs = [];  // { prefix, oldMarker, dest, startLine, endLine, content, lineCount, sha }

for (let i = 0; i < lines.length; i++) {
    const m = lines[i].match(HEREDOC_RE);
    if (!m) continue;
    const dest      = m[1];
    const oldMarker = m[2];
    const prefix    = oldMarker.match(/^(RAGWEED-[A-Z0-9]+)-/)[1];
    const startLine = i + 1;  // first line of content
    let   endLine   = startLine;
    while (endLine < lines.length && lines[endLine] !== oldMarker) endLine++;
    if (endLine >= lines.length) {
        console.error(`Error: closing marker not found for ${oldMarker} (opened at line ${i+1})`);
        process.exit(1);
    }
    const content   = lines.slice(startLine, endLine).join('\n');
    const lineCount = endLine - startLine;
    const md5       = crypto.createHash('md5').update(content).digest('hex');
    if (lineCount === 0) {
        console.error(`Error: heredoc for ${prefix} (${dest}) is empty`);
        process.exit(1);
    }
    heredocs.push({
        prefix, oldMarker, dest, startLine, endLine,
        content, lineCount, md5,
        ...( SPECIAL[prefix] || {} )
    });
    console.log(`  ${prefix.padEnd(12)} ${String(lineCount).padStart(5)} lines  ${md5.slice(0,16)}  ${dest}`);
}

if (heredocs.length === 0) {
    console.error('Error: no heredoc blocks found');
    process.exit(1);
}
console.log(`\nFound ${heredocs.length} heredoc files`);

// ---------------------------------------------------------------------------
// Build manifest table string (will be embedded in installer)
// ---------------------------------------------------------------------------
// Format:
//   RAGWEED_MANIFEST_BEGIN
//   PREFIX|DEST|LINES|MD5|FLAGS
//   ...
//   RAGWEED_MANIFEST_END
// FLAGS: install_only, conditional, or empty
function buildManifestTable(hdocs, verTag) {
    const rows = hdocs.map(h => {
        const flags = h.install_only ? 'install_only' : h.conditional ? 'conditional' : '';
        return `# ${h.prefix}|${h.dest}|${h.lineCount}|${h.md5}|${flags}`;
    });
    return [
        `RAGWEED_MANIFEST_BEGIN="${verTag}"`,
        ...rows,
        `RAGWEED_MANIFEST_END="${verTag}"`,
    ].join('\n');
}

const newManifest = buildManifestTable(heredocs, newVerTag);

// ---------------------------------------------------------------------------
// ---------------------------------------------------------------------------
// Line-by-line replacement  --  match known line patterns, replace whole line.
// Nothing else on the line is touched, so no variable/pattern corruption.
// ---------------------------------------------------------------------------

// Build oldMarker -> newMarker lookup
const markerMap = new Map();
for (const h of heredocs) {
    markerMap.set(h.oldMarker, h.prefix + '-' + newVerTag);
}

const outLines = src.split('\n').map(ln => {
    const t = ln.trim();

    // 1. Heredoc marker lines (open: cat > ... << 'RAGWEED-X-vOLD'  or close: bare RAGWEED-X-vOLD)
    for (const [oldM, newM] of markerMap) {
        if (ln.includes(oldM)) return ln.split(oldM).join(newM);
    }

    // 2. RAGWEED_INSTALL_VERSION="OLD"
    if (t.startsWith('RAGWEED_INSTALL_VERSION='))
        return 'RAGWEED_INSTALL_VERSION="' + newVer + '"';

    // 3. RAGWEED_VERSION="OLD"  (inside run.sh)
    if (t.startsWith('RAGWEED_VERSION='))
        return 'RAGWEED_VERSION="' + newVer + '"';

    // 4. // VERSION: OLD  (comment at top of JS files)
    if (t.startsWith('// VERSION:'))
        return '// VERSION: ' + newVer;
    // 4b. const VERSION = 'OLD'  (inside JS files  --  kept for any that still use it)
    if (t.startsWith("const VERSION = '"))
        return ln.replace(/const VERSION = '[^']*'/, "const VERSION = '" + newVer + "'");
    // 4c. # RAGWEED_VERSION=OLD  (shell/bash files: run.sh, config.sh)
    if (t.startsWith('# RAGWEED_VERSION='))
        return '# RAGWEED_VERSION=' + newVer;
    // 4d. <!-- RAGWEED_VERSION=OLD -->  (HTML files: index.html)
    if (t.startsWith('<!-- RAGWEED_VERSION='))
        return '<!-- RAGWEED_VERSION=' + newVer + ' -->';
    // 4e. .\" RAGWEED_VERSION=OLD  (man page: ragweed.1)
    if (t.startsWith('.\\" RAGWEED_VERSION='))
        return '.\\" RAGWEED_VERSION=' + newVer;
    // 4f. // RAGWEED_VERSION=OLD  (JS/CJS files using RAGWEED_VERSION= variant)
    if (t.startsWith('// RAGWEED_VERSION='))
        return '// RAGWEED_VERSION=' + newVer;

    // 5. "version": "OLD"  (package.json)
    if (t.startsWith('"version":'))
        return '  "version": "' + newVer + '",';

    // 6. MANIFEST lines  --  handled as a block below, leave unchanged here
    return ln;
});

let out = outLines.join('\n');
// 6. Replace manifest block (between RAGWEED_MANIFEST_BEGIN and RAGWEED_MANIFEST_END)
//    If no manifest block exists yet, insert one after RAGWEED_INSTALL_VERSION line
const manifestBeginRe = /RAGWEED_MANIFEST_BEGIN="[^"]*"\n[\s\S]*?\nRAGWEED_MANIFEST_END="[^"]*"/;
if (manifestBeginRe.test(out)) {
    out = out.replace(manifestBeginRe, () => newManifest);
} else {
    // First time  --  insert after RAGWEED_INSTALL_VERSION line
    out = out.replace(
        /^(RAGWEED_INSTALL_VERSION="[^"]+")$/m,
        () => `RAGWEED_INSTALL_VERSION="${newVer}"\n${newManifest}`
    );
}

// ---------------------------------------------------------------------------
// Verify the output looks sane
// ---------------------------------------------------------------------------
const errors = [];

// Check version strings updated
if (!out.includes(`RAGWEED_INSTALL_VERSION="${newVer}"`))
    errors.push('RAGWEED_INSTALL_VERSION not updated');
if (!out.includes(`RAGWEED_MANIFEST_BEGIN="${newVerTag}"`))
    errors.push('Manifest BEGIN not present');
if (!out.includes(`RAGWEED_MANIFEST_END="${newVerTag}"`))
    errors.push('Manifest END not present');

// Check each new marker appears exactly twice (open + close)
for (const h of heredocs) {
    const newMarker = `${h.prefix}-${newVerTag}`;
    const count = (out.match(new RegExp(newMarker.replace(/[.*+?^${}()|[\]\\]/g, '\\$&'), 'g')) || []).length;
    if (count !== 2)
        errors.push(`${newMarker}: expected 2 occurrences, found ${count}`);
}

// Check no old version tag remains in bash body (skip heredoc content)
const oldTag = heredocs[0].oldMarker.match(/-(v\d+\.\d+\.\d+-\d{8}-\d{6}-\d{3})$/)[1];
{
    const outLines = out.split('\n');
    let inHD = false, hdMarker = '';
    let oldTagHits = 0;
    for (const ln of outLines) {
        if (!inHD) {
            const m = ln.match(/^\s*cat > "[^"]+" << '(RAGWEED-[^']+)'/);
            if (m) { hdMarker = m[1]; inHD = true; continue; }
            if (ln.includes(oldTag)) oldTagHits++;
        } else {
            if (ln === hdMarker) { inHD = false; hdMarker = ''; }
        }
    }
    if (oldTagHits > 0)
        errors.push(`Old version tag ${oldTag} still appears ${oldTagHits} times in bash body`);
}

if (errors.length > 0) {
    console.error('\nVerification FAILED:');
    errors.forEach(e => console.error('  ✗  ' + e));
    process.exit(1);
}

// ---------------------------------------------------------------------------
// Write output
// ---------------------------------------------------------------------------
fs.writeFileSync(installerPath, out);

console.log('\nVerification passed:');
console.log(`  ✓  ${heredocs.length} heredoc markers updated`);
console.log(`  ✓  manifest table written (${heredocs.length} entries)`);
console.log(`  ✓  all version strings updated`);
console.log(`  ✓  no old version tag remaining`);
console.log(`\nInstaller saved: ${installerPath}`);
console.log(`New version:     ${newVer}`);
console.log(`Lines:           ${out.split('\n').length}`);
RAGWEED-BMP-v1.0.102-20260319-000014-473
chmod +x "$PROJECT_DIR/bump.js"
ok "bump.js written"

cat > "$PROJECT_DIR/inject_preflight.js" << 'RAGWEED-IPF-v1.0.102-20260319-000014-473'
#!/usr/bin/env node
// VERSION: 1.0.102
// inject_preflight.js
// Injects four preflight steps into the installer immediately before STEP 1:
//   PREFLIGHT A: Backup existing installed files to OLD/<prev-version>/
//   PREFLIGHT B: Extract all heredoc files to their final locations
//   PREFLIGHT C: Manifest verification (non-empty, line count, MD5)
//   PREFLIGHT D: OS version check (Ubuntu >= 22.04.5)
//
// Usage: node inject_preflight.js [installer]
// Then:  node bump.js [installer]
//
// Rules:
//   - JS only. No Python.
//   - All string.replace() calls use () => replacement to avoid JS special
//     $ replacement patterns ($`, $&, $', $1 etc.)
//   - Aborts with a clear error if insertion point not found or already present.
'use strict';
const fs = require('fs');

const installerPath = process.argv[2] || '010-ragweed-v10.sh';
if (!fs.existsSync(installerPath)) {
    console.error(`Error: installer not found: ${installerPath}`);
    process.exit(1);
}

let src = fs.readFileSync(installerPath, 'utf8');

const INSERT_BEFORE = '# =============================================================================\n# STEP 1: System Dependencies';
if (!src.includes(INSERT_BEFORE)) {
    console.error('Error: STEP 1 insertion point not found in installer');
    process.exit(1);
}
if (src.includes('# PREFLIGHT A:')) {
    console.error('Error: preflight block already present  --  remove it before re-injecting');
    process.exit(1);
}

// ---------------------------------------------------------------------------
// Build the preflight bash block as an array of lines then join.
// No template literals with $-variables to avoid any risk of JS interpolation
// confusion when the string is later used as a replace() argument.
// ---------------------------------------------------------------------------
const B = [
'# =============================================================================',
'# PREFLIGHT A: Backup existing installed files to OLD/<prev-version>/',
'# =============================================================================',
'ph "PREFLIGHT A: Backing up previous installation"',
'',
'_OLD_DIR="$PROJECT_DIR/OLD"',
'',
'# Extract version string from an installed file by type.',
'# Prints the version found, "template" for index.html, "none" for unversioned',
'# files, or empty string if the pattern is not found.',
'_extract_file_version() {',
'    local fpath="$1" ext="${1##*.}"',
'    case "$ext" in',
'        js|mjs)',
'            grep -oP "(?<=const VERSION = \x27)[\d.]+(?=\x27)" "$fpath" 2>/dev/null | head -1 ;;',
'        sh)',
'            grep -oP "(?<=RAGWEED_VERSION=\")[\d.]+(?=\")" "$fpath" 2>/dev/null | head -1 ||',
'            grep -oP "(?<=RAGWEED_INSTALL_VERSION=\")[\d.]+(?=\")" "$fpath" 2>/dev/null | head -1 ;;',
'        json)',
'            grep -oP "(?<=\"version\": \")[\d.]+(?=\")" "$fpath" 2>/dev/null | head -1 ;;',
'        html)',
'            # index.html uses __VERSION__ as a runtime template placeholder',
'            echo "template" ;;',
'        txt|1)',
'            # system_prompt.txt and man page carry no version string',
'            echo "none" ;;',
'        *)',
'            echo "" ;;',
'    esac',
'}',
'',
'# All files managed by this installer, relative to PROJECT_DIR',
'_MANAGED_FILES=(',
'    "run.sh"',
'    "package.json"',
'    "scripts/pickle.js"',
'    "scripts/collections.js"',
'    "scripts/query.js"',
'    "scripts/web.js"',
'    "scripts/index.html"',
'    "scripts/app.js"',
'    "scripts/lf.js"',
'    "scripts/webc.js"',
'    "scripts/ingest.js"',
'    "system_prompt.txt"',
'    "scripts/annotation_prompt.txt"',
'    "scripts/help.txt"',
'    "scripts/prompts.json"',
'    "scripts/annotation_test_runner.cjs"',
'    "docs/ragweed.1"',
')',
'',
'_backup_errors=0',
'_backup_count=0',
'_backup_version=""',
'',
'for _rel in "${_MANAGED_FILES[@]}"; do',
'    _fpath="$PROJECT_DIR/$_rel"',
'    [ -f "$_fpath" ] || continue   # fresh install  --  nothing to back up',
'',
'    _fver="$(_extract_file_version "$_fpath")"',
'    _dest_ver=""',
'',
'    case "$_fver" in',
'        "")',
'            warn "$_rel: no version string found  --  backing up to OLD/unknown/"',
'            _backup_errors=$((_backup_errors + 1))',
'            _dest_ver="unknown"',
'            ;;',
'        template)',
'            info "$_rel: template file  --  no version check"',
'            _dest_ver="${_backup_version:-unknown}"',
'            ;;',
'        none)',
'            info "$_rel: no version string expected"',
'            _dest_ver="${_backup_version:-unknown}"',
'            ;;',
'        *)',
'            _dest_ver="$_fver"',
'            if [ -z "$_backup_version" ]; then',
'                _backup_version="$_fver"',
'            elif [ "$_fver" != "$_backup_version" ]; then',
'                warn "$_rel: version mismatch  --  file says $_fver, expected $_backup_version"',
'                warn "  backing up to OLD/$_fver/ instead of OLD/$_backup_version/"',
'                _backup_errors=$((_backup_errors + 1))',
'            fi',
'            ;;',
'    esac',
'',
'    _backup_dest="$_OLD_DIR/$_dest_ver/$(dirname "$_rel")"',
'    mkdir -p "$_backup_dest"',
'    cp "$_fpath" "$_backup_dest/"',
'    _backup_count=$((_backup_count + 1))',
'done',
'',
'if [ $_backup_count -gt 0 ]; then',
'    ok "$_backup_count files backed up to OLD/${_backup_version:-unknown}/"',
'    if [ $_backup_errors -gt 0 ]; then',
'        warn "$_backup_errors file(s) had version issues  --  see warnings above  --  continuing"',
'    fi',
'else',
'    info "No previous installation found  --  fresh install"',
'fi',
'',
'# =============================================================================',
'# PREFLIGHT B: Extract all heredoc files to their final locations',
'# =============================================================================',
'ph "PREFLIGHT B: Extracting installer files"',
'',
'_INSTALLER_PATH="${BASH_SOURCE[0]}"',
'mkdir -p "$SCRIPTS_DIR" "$PROJECT_DIR/docs"',
'',
'# Extract a named heredoc block from this installer into a destination file.',
'# Finds the full marker (RAGWEED-XXX-vN.N.N-TIMESTAMP) by prefix, then uses',
'# awk to print only the lines between the opening and closing markers.',
'_do_extract() {',
'    local prefix="$1" dest="$2"',
'    local full_marker',
'    full_marker=$(grep -oP "(?<=<< \x27)(${prefix}-[^\x27]+)(?=\x27)" "$_INSTALLER_PATH" | head -1)',
'    if [ -z "$full_marker" ]; then',
'        err "Cannot find heredoc marker for $prefix in installer"',
'        return 1',
'    fi',
'    awk -v m="$full_marker" \'',
'        found && $0 == m { exit }',
'        found             { print }',
'        index($0, "\x27" m "\x27") { found=1 }',
'    \' "$_INSTALLER_PATH" > "$dest"',
'    if [ ! -s "$dest" ]; then',
'        err "Extracted file is empty: $dest"',
'        return 1',
'    fi',
'    ok "  $(basename "$dest")  ($(wc -l < "$dest") lines)"',
'    return 0',
'}',
'',
'_extract_errors=0',
'',
'_do_extract "RAGWEED-PKG" "$PROJECT_DIR/package.json"          || _extract_errors=$((_extract_errors+1))',
'_do_extract "RAGWEED-PCK" "$SCRIPTS_DIR/pickle.js"             || _extract_errors=$((_extract_errors+1))',
'_do_extract "RAGWEED-COL" "$SCRIPTS_DIR/collections.js"        || _extract_errors=$((_extract_errors+1))',
'_do_extract "RAGWEED-S7B" "$PROJECT_DIR/step7b-init.mjs"       || _extract_errors=$((_extract_errors+1))',
'_do_extract "RAGWEED-QRY" "$SCRIPTS_DIR/query.js"              || _extract_errors=$((_extract_errors+1))',
'_do_extract "RAGWEED-WEB" "$SCRIPTS_DIR/web.js"                || _extract_errors=$((_extract_errors+1))',
'_do_extract "RAGWEED-HTM" "$SCRIPTS_DIR/index.html"            || _extract_errors=$((_extract_errors+1))',
'_do_extract "RAGWEED-APP" "$SCRIPTS_DIR/app.js"                || _extract_errors=$((_extract_errors+1))',
'_do_extract "RAGWEED-LFJ" "$SCRIPTS_DIR/lf.js"                 || _extract_errors=$((_extract_errors+1))',
'_do_extract "RAGWEED-RUN" "$PROJECT_DIR/run.sh"                || _extract_errors=$((_extract_errors+1))',
'_do_extract "RAGWEED-WBC" "$SCRIPTS_DIR/webc.js"               || _extract_errors=$((_extract_errors+1))',
'_do_extract "RAGWEED-ING" "$SCRIPTS_DIR/ingest.js"             || _extract_errors=$((_extract_errors+1))',
'_do_extract "RAGWEED-HLP" "$SCRIPTS_DIR/help.txt"              || _extract_errors=$((_extract_errors+1))',
'# RAGWEED-SYS (system_prompt.txt) is conditional  --  handled in STEP 13',
'# RAGWEED-MAN (ragweed.1) extracted here if present',
'if grep -q "RAGWEED-MAN-" "$_INSTALLER_PATH" 2>/dev/null; then',
'    _do_extract "RAGWEED-MAN" "$PROJECT_DIR/docs/ragweed.1"  || _extract_errors=$((_extract_errors+1))',
'fi',
'',
'if [ $_extract_errors -gt 0 ]; then',
'    err "$_extract_errors file(s) failed to extract  --  installation cannot continue"',
'    err "Previous versions are preserved in OLD/${_backup_version:-unknown}/"',
'    exit 1',
'fi',
'ok "All files extracted"',
'',
'# =============================================================================',
'# PREFLIGHT C: Manifest verification',
'# =============================================================================',
'ph "PREFLIGHT C: Verifying installed files against manifest"',
'',
'_verify_errors=0',
'# Parse manifest entries from this installer',
'declare -A _mv_dest _mv_lines _mv_sha _mv_flags',
'',
'while IFS="|" read -r _pfx _dst _lc _sha _flags; do',
'    [[ "$_pfx" =~ ^RAGWEED- ]] || continue',
'    _dst="${_dst/\$PROJECT_DIR/$PROJECT_DIR}"',
'    _dst="${_dst/\$SCRIPTS_DIR/$SCRIPTS_DIR}"',
'    _mv_dest["$_pfx"]="$_dst"',
'    _mv_lines["$_pfx"]="$_lc"',
'    _mv_sha["$_pfx"]="$_sha"',
'    _mv_flags["$_pfx"]="$_flags"',
'done < <(sed -n "/^RAGWEED_MANIFEST_BEGIN=/,/^RAGWEED_MANIFEST_END=/p" "$_INSTALLER_PATH")',
'',
'if [ ${#_mv_dest[@]} -eq 0 ]; then',
'    err "Manifest table not found or empty in installer  --  cannot verify"',
'    exit 1',
'fi',
'info "Manifest: ${#_mv_dest[@]} entries"',
'',
'for _pfx in "${!_mv_dest[@]}"; do',
'    _dst="${_mv_dest[$_pfx]}"',
'    _exp_lines="${_mv_lines[$_pfx]}"',
'    _exp_sha="${_mv_sha[$_pfx]}"',
'    _flags="${_mv_flags[$_pfx]}"',
'',
'    # install_only files are extracted and deleted during install  --  skip verify',
'    [ "$_flags" = "install_only" ] && continue',
'',
'    # conditional files may not exist yet on a fresh install',
'    if [ "$_flags" = "conditional" ] && [ ! -f "$_dst" ]; then',
'        info "$(basename "$_dst"): conditional  --  not yet written, skipping verify"',
'        continue',
'    fi',
'',
'    # 1. Non-empty check',
'    if [ ! -s "$_dst" ]; then',
'        err "$(basename "$_dst"): missing or empty after extraction"',
'        _verify_errors=$((_verify_errors + 1))',
'        continue',
'    fi',
'',
'    _ok=1',
'',
'    # 2. Line count check',
'    _got_lines=$(wc -l < "$_dst")',
'    if [ "$_got_lines" != "$_exp_lines" ]; then',
'        err "$(basename "$_dst"): line count mismatch  --  expected $_exp_lines, got $_got_lines"',
'        _verify_errors=$((_verify_errors + 1))',
'        _ok=0',
'    fi',
'',
'    # 3. MD5 check',
'    _got_md5=$(md5sum "$_dst" 2>/dev/null | awk "{print \$1}")',
'    if [ -n "$_got_md5" ] && [ "$_got_md5" != "$_exp_sha" ]; then',
'        err "$(basename "$_dst"): MD5 mismatch"',
'        err "  expected: $_exp_sha"',
'        err "  got:      $_got_md5"',
'        _verify_errors=$((_verify_errors + 1))',
'        _ok=0',
'    fi',
'',
'    [ $_ok -eq 1 ] && ok "  $(basename "$_dst")  verified ($_got_lines lines)"',
'done',
'',
'if [ $_verify_errors -gt 0 ]; then',
'    err "$_verify_errors file(s) failed manifest verification  --  installation cannot continue"',
'    err "Previous versions are preserved in OLD/${_backup_version:-unknown}/"',
'    exit 1',
'fi',
'ok "All files verified against manifest"',
'',
'# =============================================================================',
'# PREFLIGHT D: OS version check  --  Ubuntu >= 22.04.5 required',
'# =============================================================================',
'ph "PREFLIGHT D: OS version check"',
'',
'if [ ! -f /etc/os-release ]; then',
'    err "Cannot determine OS  --  /etc/os-release not found"',
'    err "RAGWeed requires Ubuntu 22.04.5 LTS or later."',
'    exit 1',
'fi',
'',
'_os_id=$(grep "^ID=" /etc/os-release | cut -d= -f2 | tr -d \'"\')',
'_os_ver=$(grep "^VERSION_ID=" /etc/os-release | cut -d= -f2 | tr -d \'"\')',
'',
'if [ "$_os_id" != "ubuntu" ]; then',
'    err "Unsupported OS: $_os_id $_os_ver"',
'    err "RAGWeed requires Ubuntu 22.04.5 LTS or later."',
'    err "Please upgrade your OS and re-run this installer."',
'    exit 1',
'fi',
'',
'_os_major=$(echo "$_os_ver" | cut -d. -f1)',
'_os_minor=$(echo "$_os_ver" | cut -d. -f2)',
'_os_patch=$(echo "$_os_ver" | cut -d. -f3)',
'_os_patch=${_os_patch:-0}',
'',
'_req_major=22; _req_minor=04; _req_patch=5',
'_os_ok=0',
'',
'if   [ "$_os_major" -gt "$_req_major" ]; then',
'    _os_ok=1',
'elif [ "$_os_major" -eq "$_req_major" ] && [ "$_os_minor" -gt "$_req_minor" ]; then',
'    _os_ok=1',
'elif [ "$_os_major" -eq "$_req_major" ] && [ "$_os_minor" -eq "$_req_minor" ] \\',
'  && [ "$_os_patch" -ge "$_req_patch" ]; then',
'    _os_ok=1',
'fi',
'',
'if [ $_os_ok -eq 0 ]; then',
'    err "Ubuntu $_os_ver is below the minimum required version 22.04.5 LTS"',
'    err "Please run:  sudo do-release-upgrade"',
'    err "Or download Ubuntu 22.04.5 LTS from https://ubuntu.com/download/server"',
'    exit 1',
'fi',
'',
'ok "Ubuntu $_os_ver"',
'',
];

const PREFLIGHT = B.join('\n') + '\n';

// Insert before STEP 1 using () => to avoid JS $ special replacement patterns
src = src.replace(INSERT_BEFORE, () => PREFLIGHT + INSERT_BEFORE);

// Verify all four sections present
const checks = ['PREFLIGHT A:', 'PREFLIGHT B:', 'PREFLIGHT C:', 'PREFLIGHT D:'];
let failed = false;
for (const c of checks) {
    if (!src.includes('# ' + c)) {
        console.error(`Error: insertion failed  --  # ${c} not found after replace`);
        failed = true;
    }
}
if (failed) process.exit(1);

fs.writeFileSync(installerPath, src);
console.log('Preflight block injected successfully');
console.log(`Lines: ${src.split('\n').length}`);
console.log(`Next: node bump.js ${installerPath}`);
RAGWEED-IPF-v1.0.102-20260319-000014-473

# STEP 11e: Writing patch.js
ph "STEP 11e: Writing patch.js"
cat > "$PROJECT_DIR/patch.js" << 'RAGWEED-PCH-v1.0.102-20260319-000014-473'
#!/usr/bin/env node
// VERSION: 1.0.102
// patch.js -- safe string replacement tool for 010-ragweed-v10.sh
// Usage: node patch.js <patchfile.patch.js>
//
// A patch file exports: { file, patches: [ { old, new, count? } ] }
// count defaults to 1. Pass 0 to assert the string does NOT exist.
// Aborts on any mismatch -- never writes a partial result.

import fs from 'fs';
import path from 'path';

const [,, patchFile] = process.argv;
if (!patchFile) { console.error('Usage: node patch.js <patchfile.patch.js>'); process.exit(1); }

const { default: spec } = await import(path.resolve(patchFile));
const { file, patches } = spec;

let content = fs.readFileSync(file, 'utf8');
let ok = true;

for (let i = 0; i < patches.length; i++) {
  const p = patches[i];
  const expected = p.count ?? 1;
  const found = content.split(p.old).length - 1;
  if (found !== expected) {
    console.error(`PATCH ${i+1}: expected ${expected} occurrence(s), found ${found}`);
    console.error(`  First 60 chars of old: ${JSON.stringify(p.old.slice(0, 60))}`);
    ok = false;
  }
}

if (!ok) { console.error('Aborted -- file not modified.'); process.exit(1); }

for (const p of patches) {
  content = content.split(p.old).join(p.new);
}

fs.writeFileSync(file, content);
console.log(`Patched ${file} (${patches.length} replacement${patches.length !== 1 ? 's' : ''})`);
RAGWEED-PCH-v1.0.102-20260319-000014-473

# STEP 11d: Writing docs/ragweed.1 (man page)
ph "STEP 11d: Writing docs/ragweed.1"
mkdir -p "$PROJECT_DIR/docs"
cat > "$PROJECT_DIR/docs/ragweed.1" << 'RAGWEED-MAN-v1.0.102-20260319-000014-473'
.\" RAGWEED_VERSION=1.0.102
.TH "RAGWEED" "1" "2026" "RAGWeed online collection search and analysis system" "User Commands"
.SH NAME
ragweed \- local RAG system for analysis and summarization of RAGWeed data collections
.SH SYNOPSIS
\fB./run.sh\fR \fIcommand\fR [\fIoptions\fR]
.SH DESCRIPTION
\fBragweed\fR is a local retrieval-augmented generation system for indexing and
querying large document corpora. All embedding is performed locally via Ollama.
Queries can use Anthropic, OpenAI, Gemini, or a local Ollama LLM.
Only retrieved chunks (not the corpus itself) travel to any cloud API per query.
.PP
The system is a single-file bash installer that builds a complete runtime under
\fB~/RW/\fR requiring no further installs. Pure JavaScript/Node.js  --  no Python
anywhere in the runtime. Uses SQLite and a binary HNSW vector index directly
(no ChromaDB server process). Each collection stores its vectors and metadata in its
own \fIrag.sqlite3\fR file alongside the HNSW binary index -- collections are fully
independent and can be added, removed, or synced individually.
.PP
Vector search uses HNSW graph traversal with on-demand disk reads -- no vectors
are kept in RAM. Only neighbor slot data is used for traversal; individual vectors
are read from disk as needed. Memory usage stays near-zero regardless of corpus size.
.PP
Collections are discovered dynamically: any subdirectory under SOURCE_DIR is a
queryable collection. Files are deduplicated by content so renamed copies are
not re-indexed. Active collections are configured via \fB./run.sh webc\fR.
.SH COMMANDS
.TP
\fBweb\fR
Start the browser-based query interface (default port 3000).
Open http://localhost:3000 in any browser.
.TP
\fBwebc\fR
Start the web configuration UI (default port 3001).
Use this to set API keys, manage active collections, tune ingest settings,
download Ollama models, and edit the system prompt.
.TP
\fBingest\fR
Ingest all collections from SOURCE_DIR. Skips files already indexed.
.TP
\fBingest \-c \fIname [name...]\fR
Ingest specific collections only.
.TP
\fBingest \-\-list\fR
Show collection status: chunk counts, file counts, dupe counts per collection.
.TP
\fBingest \-\-failed\fR (alias: \fB\-\-unindexed\fR)
List all files that have been attempted but not successfully indexed (chunks=0).
These will be retried automatically on the next ingest run.
Useful for auditing after a failed or interrupted ingest.
.TP
\fBingest \-\-zip \fIpolicy\fR
Override ZIP extraction policy for this run.
Policy values: \fBalways\fR, \fBnever\fR, \fBask\fR.
.TP
\fBquery\fR / \fBtui\fR
Open the TUI query interface. If query text is provided as an argument, runs in
one-shot mode: executes the query, prints the answer, and exits.
.RS
.TP
\fB\-c\fR \fIcollection\fR
Restrict the query to the named collection. Repeat for multiple collections.
If omitted, uses ACTIVE_COLLECTIONS from Config.
.RE
.TP
\fBannotate\fR
Run the annotation prompt test runner against local Ollama models.
.RS
.TP
\fB\-\-auto\fR
Test all RAM-fitting models, stopping at the first model that meets the F1 threshold.
.TP
\fB\-\-all\fR
Test all installed models regardless of available RAM or threshold.
.TP
\fB\-\-model\fR \fIname\fR
Test a specific named model only.
.TP
\fB\-\-threshold\fR \fIN\fR
Override the F1 threshold percentage (default: ANNOTATION_TEST_THRESHOLD in Config).
.TP
\fB\-\-verbose\fR
Show full model responses for each test case.
.RE
.TP
\fBconfig\fR
Open the TUI configuration menu (bash-based). Covers all Config settings interactively.
Also accessible as \fB!cfg\fR from within the query TUI.
.TP
\fBdiagnose\fR
Check Node.js, Ollama, collections, and Config settings.
.TP
\fBlogs\fR
Show recent session logs.
.TP
\fBinstall\fR / \fBupgrade\fR
Re-run the installer (preserves Config and existing index data).
.TP
\fBman\fR
Display this manual page.
.SH WEB INTERFACE (port 3000)
The web UI accepts natural language queries and displays cited answers.
All active collections are searched. The provider (Claude/OpenAI/Gemini/local)
is set in \fBConfig\fR or via the webc UI.
Citations are shown as \fB[N]\fR inline; click any citation to view the source passage.
.SH CONFIG UI  --  webc (port 3001)
.TP
\fBAI Services\fR
Set LLM provider and API keys. Button-group pickers for model selection.
.TP
\fBAI Downloads\fR
Download or remove Ollama embedding and LLM models.
.TP
\fBEmbeddings\fR
Set EMBED_MODEL and EMBED_OLLAMA_HOST.
.TP
\fBRetrieval\fR
Tune TOP_K, HNSW_EF, MIN_SCORE, MAX_TOKENS, CONTEXT_CHUNKS, ACTIVE_COLLECTIONS.
.TP
\fBPaths\fR
Set CHROMA_PATH and SOURCE_DIR.
.TP
\fBWeb\fR
Set WEB_PORT and WEB_PASSWORD.
.TP
\fBCollection URLs\fR
Map per-collection URL bases for source linking (path_map.json).
.TP
\fBIngest\fR
All ingest tuning knobs. \fBAdvise\fR button shows hardware-tuned recommendations.
.TP
\fBEmbed Models\fR
Shows which embed model each existing collection was built with.
.TP
\fBDebug\fR
Set DEBUG_LEVEL and edit system_prompt.txt inline.
.TP
\fBDownloadable Search\fR
Multi-provider web search (Claude/OpenAI/Gemini) to discover Ollama models.
Preset category chips: General, Coding, Reasoning, Vision, Uncensored,
Tiny (<4B), Embedding, Medical, Multilingual, Agents/Tools.
.SH CONFIGURATION
All settings are stored in \fIConfig\fR in the project directory.
Use \fB./run.sh webc\fR to edit interactively.
.PP
.TS
l l l.
Key	Default	Description
_
LLM_PROVIDER	claude	claude / openai / gemini / local
ANTHROPIC_API_KEY	(required)	Anthropic API key
ANTHROPIC_MODEL	claude-sonnet-4-6	Model for queries
OPENAI_API_KEY		OpenAI API key
OPENAI_MODEL	gpt-4o	OpenAI model
GEMINI_API_KEY		Google Gemini API key
GEMINI_MODEL	gemini-2.5-flash	Gemini model
LOCAL_LLM_MODEL		Ollama model name for local inference
ANNOTATION_MODEL		Ollama model for annotation (blank = use LOCAL_LLM_MODEL).
OLLAMA_HOST	http://localhost:11434	Ollama server URL
EMBED_MODEL	nomic-embed-text	Embedding model (Ollama)
TOP_K	64	Chunks retrieved per query
HNSW_EF	512	Graph search candidates (higher = better recall, slower)
MIN_SCORE	0	Minimum similarity score (0 = no cutoff)
MAX_TOKENS	4096	Max tokens for LLM context
CONTEXT_CHUNKS	64	Max chunks sent to LLM
ACTIVE_COLLECTIONS		Comma-separated active collection names
CHROMA_PATH	./chromadb	Vector store location
SOURCE_DIR	./source	Root directory for collections
WEB_PORT	3000	Web query UI port
WEBC_PORT	3001	Web config UI port
WEB_PASSWORD		Optional password for web UI
CHUNK_SIZE	512	Tokens per chunk (text/code)
CHUNK_SIZE_PDF	512	Tokens per chunk (PDF)
CHUNK_SIZE_AV	256	Tokens per chunk (audio/video)
CHUNK_OVERLAP_PCT	50	Overlap between chunks (percent)
INGEST_PROFILE	low	low / medium / high RAM preset
OCR_ENABLED	no	Enable tesseract OCR for image PDFs
WHISPER_ENABLED	yes	Enable audio/video transcription
ZIP_POLICY	never	ZIP extraction: always / never / ask
WHISPER_MODEL	(auto)	Path to ggml model file for whisper-cli
WHISPER_TIMEOUT_S	600	Max seconds per audio transcription
.TE
.PP
The system prompt sent with every query is stored in \fIsystem_prompt.txt\fR.
Edit it via the \fBDebug\fR section in webc. Changes take effect on the next query.
.SH PREPROCESSING PIPELINE
Each file passes through the following stages before embedding:
.IP 1. 4
\fBDeduplication\fR  --  Files already indexed are skipped (by content hash).
State stored in \fIdedup.sqlite3\fR. Files with zero chunks are retried on next ingest.
Use \fBingest --failed\fR to inspect files that repeatedly fail.
.IP 2. 4
\fBZIP extraction\fR  --  Archives extracted per ZIP_POLICY. Zip bomb detection via ratio threshold.
.IP 3. 4
\fBText extraction\fR  --  PDF via pdftotext + optional OCR. ODT/ODS/ODP via LibreOffice.
EPUB via zip/HTML extraction. RTF via unrtf. LaTeX via detex. Audio/video via whisper.cpp.
SVG via XML parse. Unknown types via strings fallback.
.IP 4. 4
\fBMagic-byte sniffing\fR  --  Extensionless files identified by first 64 bytes.
.IP 5. 4
\fBAudio transcription\fR  --  MP4/MP3/WAV/etc. transcribed via whisper-cli (whisper.cpp, C++)
if installed; skipped silently if not present.
.IP 6. 4
\fBChunking and embedding\fR  --  Text split into overlapping chunks of CHUNK_SIZE tokens,
embedded via Ollama and stored in SQLite + binary HNSW index.
Each new vector is inserted into the HNSW graph incrementally -- no offline rebuild required.
If a chunk exceeds the model context limit (Ollama HTTP 400), it is automatically
halved and the two embeddings averaged -- no data is lost.
.SH VECTOR INDEX
RAGWeed uses an HNSW (Hierarchical Navigable Small World) graph stored in
\fIdata_level0.bin\fR alongside each collection's SQLite database.
.PP
Each element contains: 32 neighbor IDs (128 bytes), a neighbor count prefix (4 bytes),
the float32 vector (dim*4 bytes), and an int64 label (8 bytes).
Total 3212 bytes per element for dim=768 (nomic-embed-text).
.PP
At query time, vectors are read from disk on demand via random seeks -- no vectors
are kept in RAM. The \fBHNSW_EF\fR parameter controls how many candidate nodes are
explored per query (higher = better recall, more disk reads, slower).
.PP
The HNSW graph is built incrementally during ingest. Existing flat collections
(built before HNSW support) can have graphs built offline using \fI010-build-graph.cjs\fR.
.SH FILES
.TP
\fIConfig\fR
Configuration file. Edit via \fB./run.sh webc\fR or manually. Permissions should be 600.
.TP
\fIsystem_prompt.txt\fR
System prompt sent with every query. Not overwritten on reinstall.
.TP
\fIchromadb/\fR
Vector store. Each collection occupies its own subdirectory containing:
\fIdata_level0.bin\fR (HNSW binary vectors and graph), \fIrag.sqlite3\fR (metadata),
\fIindex_meta.json\fR (dimensionality, element count, HNSW entry point).
Relocate via CHROMA_PATH.
.TP
\fIsource/\fR
Root for all collections. Each subdirectory is an independent collection.
.TP
\fIdedup.sqlite3\fR
Ingest tracking database. Records per-file MD5 hashes, chunk counts, and
capability results to avoid re-processing unchanged files.
.TP
\fIdata/path_map.json\fR
Per-collection URL base mappings for source linking in the web UI.
.TP
\fIlogs/ingest.log\fR
Full log from ingestion runs. Includes per-file dupe/skip/embed events.
.TP
\fIlogs/crash.log\fR
Stderr capture from the most recent ingest run.
.SH EXAMPLES
.PP
First-time setup:
.RS
.nf
./run.sh webc                     # configure API key and settings (port 3001)
mkdir source/book source/refs     # create collections
cp /path/to/corpus/* source/book/ # add files
./run.sh ingest                   # index everything
./run.sh web                      # start query UI (port 3000)
.fi
.RE
.PP
Ingest a single collection:
.RS
.nf
./run.sh ingest -c book
.fi
.RE
.PP
Check collection status:
.RS
.nf
./run.sh ingest --list
.fi
.RE
.PP
Check files that failed to index:
.RS
.nf
./run.sh ingest --failed
.fi
.RE
.PP
Re-run installer (preserves data):
.RS
.nf
./run.sh install
.fi
.RE
.SH COST
Anthropic Sonnet 4.6: $3.00/M input, $15.00/M output tokens.
A typical query costs $0.03\(en0.10. Only retrieved chunks travel to the API.
The full corpus stays local.
.SH SEE ALSO
.BR ollama (1),
.BR node (1),
.BR tesseract (1)
.SH AUTHORS
RAGWeed system documentation.
.SH COPYRIGHT
Copyright \(co Fred Cohen, 2026 - ALL RIGHTS RESERVED
.br
Patents: https://all.net/Notices.html
RAGWEED-MAN-v1.0.102-20260319-000014-473
chmod +x "$PROJECT_DIR/inject_preflight.js"
ok "inject_preflight.js written"

# =============================================================================
# STEP 11b: Write scripts/webc.js (web config UI) -- REPLACED_MARKER
ph "STEP 11b: Writing scripts/webc.js"
cat > "$SCRIPTS_DIR/webc.js" << 'RAGWEED-WBC-v1.0.102-20260319-000014-473'
#!/usr/bin/env node
// VERSION: 1.0.102
const VERSION = '1.0.102';
// RAGWeed v10 -- Web Config UI (WebC)
'use strict';
import 'dotenv/config';
import http from 'http';
import fs from 'fs';
import path from 'path';
import { fileURLToPath } from 'url';
import { createRequire } from 'module';
import { getCollectionNames } from './collections.js';
import os from 'os';

const __dirname   = path.dirname(fileURLToPath(import.meta.url));
const LF_JS = fs.readFileSync(path.join(__dirname, 'lf.js'), 'utf8');
const FAVICON_SVG = '<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 32 32"><rect width="32" height="32" rx="4" fill="#1a1a2e"/><rect x="3" y="10" width="4" height="16" rx="1" fill="#f5f0e8"/><rect x="8" y="10" width="4" height="16" rx="1" fill="#f5f0e8"/><rect x="13" y="10" width="4" height="16" rx="1" fill="#f5f0e8"/><rect x="18" y="10" width="4" height="16" rx="1" fill="#f5f0e8"/><rect x="23" y="10" width="4" height="16" rx="1" fill="#f5f0e8"/><rect x="6" y="10" width="3" height="10" rx="1" fill="#111"/><rect x="11" y="10" width="3" height="10" rx="1" fill="#111"/><rect x="21" y="10" width="3" height="10" rx="1" fill="#111"/><circle cx="24" cy="5" r="2" fill="#c8a84b"/><rect x="25.5" y="1" width="1.5" height="6" fill="#c8a84b"/><rect x="19" y="2" width="8" height="1.5" fill="#c8a84b"/></svg>';
const PROJECT_DIR = process.env.PROJECT_DIR || path.resolve(__dirname, '..');
const ENV_FILE    = path.join(PROJECT_DIR, 'Config');
const SYSPROMPT   = path.join(PROJECT_DIR, 'system_prompt.txt');
const ANNOTPROMPT = path.join(PROJECT_DIR, 'scripts', 'annotation_prompt.txt');
const PROMPTS_FILE = path.join(PROJECT_DIR, 'scripts', 'prompts.json');

const DEFAULT_PROMPTS = {
  system_default: '',
  system_claude: '', system_openai: '', system_gemini: '', system_local: '',
  annotation_default: '',
  annotation_claude: '', annotation_openai: '', annotation_gemini: '',
  annotation_local: 'Does this excerpt relate to the query?\nIf yes: write one sentence explaining how.\nIf no: reply with only the word IRRELEVANT.\nNo other text. No explanation. No preamble.'
};

function readPrompts() {
  let p = Object.assign({}, DEFAULT_PROMPTS);
  try {
    const j = JSON.parse(fs.readFileSync(PROMPTS_FILE, 'utf8'));
    Object.assign(p, j);
  } catch(_) {
    // Fall back to legacy .txt files if prompts.json missing
    try { const t = fs.readFileSync(SYSPROMPT, 'utf8').trim(); if (t) p.system_default = t; } catch(_) {}
    try { const t = fs.readFileSync(ANNOTPROMPT, 'utf8').trim(); if (t) p.annotation_default = t; } catch(_) {}
  }
  return p;
}
function writePrompts(obj) {
  const current = readPrompts();
  fs.writeFileSync(PROMPTS_FILE, JSON.stringify(Object.assign(current, obj), null, 2), 'utf8');
}
const PORT        = parseInt(process.env.WEBC_PORT || '3001', 10);
const HOSTNAME = os.hostname().replace(/\..*$/, ''); // short hostname only

// -- Config helpers ------------------------------------------------------------
function envRead() {
  try { return fs.readFileSync(ENV_FILE, 'utf8'); } catch(e) { return ''; }
}
function envGet(key, def) {
  if (def === undefined) def = '';
  const re = new RegExp('^' + key + '=(.*)$', 'm');
  const m  = envRead().match(re);
  return m ? m[1].replace(/^["']|["']$/g, '') : def;
}
function envSet(key, value) {
  let txt      = envRead();
  const line   = key + '=' + value;
  const re     = new RegExp('^' + key + '=.*$', 'm');
  txt = re.test(txt) ? txt.replace(re, line)
                     : txt + (txt.endsWith('\n') ? '' : '\n') + line + '\n';
  fs.writeFileSync(ENV_FILE, txt);
}
function envDelete(key) {
  let txt = envRead();
  const re = new RegExp('^' + key + '=.*$\\n?', 'm');
  txt = txt.replace(re, '');
  fs.writeFileSync(ENV_FILE, txt);
}
function masked(v) {
  if (!v) return '(not set)';
  return v.slice(0, 4) + '...' + '(' + v.length + ' chars)';
}
function sysPromptRead() {
  const p = readPrompts();
  return p.system_default || '';
}
function annotPromptRead() {
  const p = readPrompts();
  return p.annotation_default || '';
}
function annotPromptWrite(txt) {
  writePrompts({ annotation_default: txt });
  // Keep legacy file in sync for backward compat
  try { fs.writeFileSync(ANNOTPROMPT, txt); } catch(_) {}
}
function sysPromptWrite(txt) {
  writePrompts({ system_default: txt });
  // Keep legacy file in sync for backward compat
  try { fs.writeFileSync(SYSPROMPT, txt, 'utf8'); } catch(_) {}
}
function sysPromptLines() {
  const t = sysPromptRead();
  return t ? t.split('\n').length + ' lines' : '0 lines';
}

// -- Section definitions -----------------------------------------------------
const SECTIONS = [
  { title: 'AI Services', fields: [
    { key:'LLM_PROVIDER', label:'Active provider', def:'claude',
      select:['claude','openai','gemini','local'],
      selectHints:{'claude':'Anthropic Claude. Requires ANTHROPIC_API_KEY.','openai':'OpenAI GPT. Requires OPENAI_API_KEY.','gemini':'Google Gemini. Requires GEMINI_API_KEY.','local':'Local Ollama model. No API key needed  --  runs on this machine.'},
      hint:'Sets the default LLM used for all queries. You can override per-query in the web UI.' },
    { key:'ANTHROPIC_API_KEY', label:'Claude API key',   def:'', secret:true },
    { key:'ANTHROPIC_MODEL',   label:'Claude model',     def:'claude-sonnet-4-6',
      select:['claude-opus-4-6','claude-sonnet-4-6','claude-haiku-4-5-20251001'],
      selectHints:{'claude-opus-4-6':'Most capable. Best for complex reasoning and synthesis.','claude-sonnet-4-6':'Balanced speed and quality. Best for everyday queries.','claude-haiku-4-5-20251001':'Fastest and cheapest. Best for simple lookups and high volume.'},
      hint:'Model used for all Claude queries.' },
    { key:'OPENAI_API_KEY',    label:'OpenAI API key',   def:'', secret:true },
    { key:'OPENAI_MODEL',      label:'OpenAI model',     def:'gpt-4o',
      select:['gpt-4o','gpt-4o-mini','gpt-4-turbo','o1-mini','o3-mini','gpt-4o-mini-search-preview','gpt-4o-search-preview'],
      selectHints:{'gpt-4o':'Flagship model. Fast, multimodal, strong reasoning.','gpt-4o-mini':'Lightweight and cheap. Good for simple tasks.','gpt-4-turbo':'Previous-gen flagship. Slower but proven.','o1-mini':'Compact reasoning model. Thinks before answering.','o3-mini':'Latest compact reasoning model. Best small-model reasoning.','gpt-4o-mini-search-preview':'gpt-4o-mini with live web search. Required for Downloadable Search.','gpt-4o-search-preview':'gpt-4o with live web search. Required for Downloadable Search.'},
      hint:'Model used for all OpenAI queries.' },
    { key:'GEMINI_API_KEY',    label:'Gemini API key',   def:'', secret:true },
    { key:'GEMINI_MODEL',      label:'Gemini model',     def:'gemini-2.5-flash',
      select:['gemini-2.5-pro','gemini-2.5-flash','gemini-2.0-flash','gemini-1.5-pro'],
      selectHints:{'gemini-2.5-pro':'Most capable Gemini. Best for complex reasoning and long context.','gemini-2.5-flash':'Fast and efficient. Best everyday balance for Gemini.','gemini-2.0-flash':'Previous-gen fast model. Reliable and well-tested.','gemini-1.5-pro':'Large context window (1M tokens). Good for long documents.'},
      hint:'Model used for all Gemini queries.' },
    { key:'LOCAL_LLM_MODEL',   label:'Local model name', def:'', ollama:true },
    { key:'OLLAMA_HOST',       label:'Ollama host',      def:'http://localhost:11434' }
  ]},
  { title: 'AI Downloads', _aiDownloads: true, fields: [] },
  { title: 'Embeddings', fields: [
    { key:'EMBED_MODEL',       label:'Embed model', def:'nomic-embed-text',
      hint:'nomic-embed-text / mxbai-embed-large / all-minilm' },
    { key:'EMBED_OLLAMA_HOST', label:'Embed host',  def:'http://localhost:11434' }
  ], _embedCatalog: true },
  { title: 'Retrieval', fields: [
    { key:'TOP_K',              label:'Top-K results',      def:'64' },
    { key:'HNSW_EF',            label:'Search depth (HNSW ef)', def:'512', hint:'Number of candidate nodes explored during graph search. Higher = better recall but slower. 512 is fast, 1024-2048 improves recall noticeably, 4096 for maximum recall.' },
    { key:'MIN_SCORE', label:'Min score (0=all)', def:'0', hint:'Filters on relative score (0=floor, 1=best match for this query). 0 returns all results. 0.10 drops the bottom 10% of results. 0.25 drops the bottom quarter. Works the same regardless of embedding model or collection  --  relative position is always meaningful.' },
    { key:'MAX_TOKENS',         label:'Max tokens',         def:'4096' },
    { key:'CONTEXT_CHUNKS',     label:'Context chunks',     def:'64' },
    { key:'ACTIVE_COLLECTIONS', label:'Active collections', def:'', list:true },
    { key:'QUERY_TIMEOUT_S',  label:'Query display timeout (s)', def:'300', hint:'Client-side countdown shown while waiting. Should be ≥ Ollama gen timeout when using local models  --  the server keeps generating until the Ollama timeout fires. Does not abort the server.' },
    { key:'OLLAMA_TIMEOUT_S', label:'Ollama gen timeout (s)', def:'360', hint:'Server-side abort for local Ollama generation  --  this actually kills the request. Set query display timeout ≥ this to avoid showing a timeout warning while the server is still generating.' }
  ]},
  { title: 'Annotation', fields: [
    { key:'ANNOTATE_DEFAULT', label:'Annotate by default', def:'no',
      select:['no','yes'],
      selectHints:{'no':'Annotate checkbox starts unchecked -- annotation must be enabled per query.','yes':'Annotate checkbox starts checked -- annotation runs on every query.'},
      hint:'Whether the Annotate checkbox is on or off when the page loads.' },
    { key:'ANNOTATION_PROVIDER', label:'Annotation provider', def:'default',
      select:['default','claude','openai','gemini','local'],
      selectHints:{'default':'Use same provider as the main query (recommended).','claude':'Anthropic Claude. Requires ANTHROPIC_API_KEY.','openai':'OpenAI GPT. Requires OPENAI_API_KEY.','gemini':'Google Gemini. Requires GEMINI_API_KEY.','local':'Local Ollama model. Good for high-volume annotation.'},
      hint:'LLM provider used to annotate sources. Default uses the same provider as the main query.' },
    { key:'ANNOTATION_MODEL', label:'Annotation model', def:'', ollama:true,
      hint:'Model for annotation. Leave blank to use the active model for the selected provider. For local provider, pick from available Ollama models.' },
    { key:'ANNOTATION_CONCURRENCY', label:'Annotation concurrency', def:'4',
      hint:'Number of parallel annotation calls for cloud providers (claude/openai/gemini).' },
    { key:'ANNOTATION_LOCAL_CONCURRENCY', label:'Local annotation concurrency', def:'1',
      hint:'Parallel annotation calls for local Ollama. Default 1 -- Ollama queues requests anyway so parallel calls add overhead. Increase only if running multiple GPUs.' },
    { key:'ANNOTATION_TIMEOUT_S', label:'Annotation timeout (s)', def:'120',
      hint:'Seconds to wait for each annotation call before giving up. Increase for slow local models.' },
  ]},
  { title: 'Paths', fields: [
    { key:'CHROMA_PATH', label:'ChromaDB path', def:'./chromadb' },
    { key:'SOURCE_DIR',  label:'Source dir',    def:'./source' }
  ]},
  { title: 'Web', fields: [
    { key:'WEB_PORT',     label:'Web port', def:'3000' },
    { key:'WEB_PASSWORD', label:'Password', def:'', secret:true,
      hint:'Leave blank to disable' }
  ]},
  { title: 'Collection URLs', colurls:true },
  { title: 'Ingest', fields: [
    { key:'INGEST_PROFILE',       label:'Profile',                  def:'low',
      select:['low','medium','high'],
      hint:'Preset for chunk sizes and timeouts. Click Advise to see what this machine recommends.' },
    { key:'OCR_ENABLED',          label:'OCR (scanned PDFs)',        def:'no',
      select:['no','yes'],
      hint:'Run Tesseract OCR on PDFs with no text layer. Requires poppler-utils and tesseract-ocr installed.' },
    { key:'WHISPER_ENABLED',      label:'Whisper transcription',     def:'yes',
      select:['yes','no'],
      hint:'Enable audio/video transcription via whisper-cli or Python whisper. Set no to skip all audio/video files.' },
    { key:'ZIP_POLICY',           label:'ZIP handling',              def:'never',
      select:['never','always','ask'],
      hint:'Whether to extract and ingest ZIP archives found in source.' },
    { key:'USER_SKIP_EXTS',       label:'Skip extensions',           def:'',
      hint:'Comma-separated list of file extensions to ignore, e.g. .log,.tmp,.bak  --  in addition to built-in skip list.' },
    { key:'CHUNK_SIZE',           label:'Chunk size (text/code)',   def:'512',
      hint:'Tokens per chunk for text, markdown, code files. Larger = more context per result but slower embeds.' },
    { key:'CHUNK_SIZE_PDF',       label:'Chunk size (PDF)',         def:'512',
      hint:'Tokens per chunk for PDF files.' },
    { key:'CHUNK_SIZE_AV',        label:'Chunk size (audio/video)', def:'256',
      hint:'Tokens per chunk for Whisper transcripts. Keep small  --  speech is repetitive and low-density.' },
    { key:'CHUNK_OVERLAP_PCT',    label:'Chunk overlap %',          def:'50',
      hint:'Percentage of chunk size to overlap between adjacent chunks (0-100). 50 = half-chunk overlap.' },
    { key:'EMBED_TIMEOUT_S',      label:'Embed timeout (s)',        def:'300',
      hint:'Seconds to wait per Ollama embed call. Increase on slow or low-RAM machines where model swaps to disk.' },
    { key:'EMBED_BACKOFF_S',      label:'Embed retry backoff (s)',  def:'10',
      hint:'Seconds to wait between embed retries on failure.' },
    { key:'WHISPER_TIMEOUT_S',    label:'Whisper timeout (s)',      def:'600',
      hint:'Seconds allowed per audio/video file for transcription. 15MB MP4 may need 10+ min on slow CPU.' },
    { key:'LIBREOFFICE_TIMEOUT_S',label:'LibreOffice timeout (s)',  def:'60',
      hint:'Seconds allowed for LibreOffice to convert .doc/.odt/.rtf files to text.' },
  ]},
  { title: 'Embed Models', fields: [], _embedModels: true },
{ title: 'Debug/Optimize', fields: [
    { key:'DEBUG_LEVEL', label:'Debug level', def:'0',
      hint:'0=off  1=errors  2=verbose' },
    { key:'ANNOTATION_TEST_THRESHOLD', label:'Annotation test threshold (%)', def:'95',
      hint:'F1 score threshold (0-100) for recommending a model in auto test mode.' },
    { key:'ANNOTATION_TEST_STOP_AT_THRESHOLD', label:'Stop at threshold', def:'yes',
      select:['yes','no'],
      hint:'Stop testing models once one meets the threshold. Set no to test all fitting models and see full ranking.' },
    { key:'ANNOTATION_TEST_FIT_ONLY', label:'Test fitting models only', def:'yes',
      select:['yes','no'],
      hint:'Only test models that fit in available RAM. Set no to test all installed models regardless of size.' }
  ], _debugOptimize: true },
  { title: 'Prompts', _prompts: true, fields: [] },
  { title: 'Downloadable Search', _dlSearch: true, fields: [] }
];

async function getConfig() {
  // Build Collection URLs fields dynamically from actual collection names
  const cols = await getCollections();
  return SECTIONS.map(sec => {
    if (sec.colurls) {
      const colUrls = {};
      cols.forEach(c => {
        const key = 'COLLECTION_URL_' + c.replace(/[^A-Za-z0-9]/g, '_').toUpperCase();
        colUrls[c] = envGet(key, 'https://all.net/');
      });
      return { title: sec.title, fields: [], _colurls: true, cols, colUrls };
    }
    // Preserve special section flags (_embedModels, etc.) for the client
    if (sec._embedModels)  return { title: sec.title, fields: [], _embedModels: true };
    if (sec._aiDownloads)  return { title: sec.title, fields: [], _aiDownloads: true };
    if (sec._dlSearch)     return { title: sec.title, fields: [], _dlSearch: true };
    if (sec._prompts)      return { title: sec.title, fields: [], _prompts: true };
    if (sec._debugOptimize) return { title: sec.title, fields: sec.fields.map(f => {
      const val = envGet(f.key, f.def);
      return { key:f.key, label:f.label, val, secret:false, list:false, sysprompt:false, annotprompt:false,
               ollama:false, select:f.select||null, selectHints:null, hint:f.hint||'', def:f.def };
    }), _debugOptimize: true };
    return {
      title: sec.title,
      fields: sec.fields.map(f => {
        const val = f.annotprompt ? annotPromptRead()
                  : f.sysprompt ? sysPromptRead()
                  : f.secret   ? masked(envGet(f.key, ''))
                  :               envGet(f.key, f.def);
        return { key:f.key, label:f.label, val,
                 secret:!!f.secret, list:!!f.list, sysprompt:!!f.sysprompt, annotprompt:!!f.annotprompt,
                 ollama:!!f.ollama, select:f.select||null, selectHints:f.selectHints||null, hint:f.hint||'', def:f.def };
      })
    };
  });
}

async function getOllamaModels() {
  try {
    const host = envGet('OLLAMA_HOST', 'http://localhost:11434').replace(/\/$/, '');
    const res  = await fetch(host + '/api/tags');
    if (!res.ok) return [];
    const data = await res.json();
    return (data.models || []).map(m => m.name).sort();
  } catch(e) { return []; }
}
async function getOllamaModelsFull() {
  try {
    const host = envGet('OLLAMA_HOST', 'http://localhost:11434').replace(/\/$/, '');
    const res  = await fetch(host + '/api/tags');
    if (!res.ok) return [];
    const data = await res.json();
    return (data.models || []).map(m => ({
      name:     m.name,
      sizeMb:   m.size ? Math.round(m.size / 1024 / 1024) : null,
      modified: m.modified_at || null,
    })).sort((a,b) => a.name.localeCompare(b.name));
  } catch(e) { return []; }
}

async function getCollections() {
  // Delegated to collections.js  --  single source of truth for name resolution.
  const chromaDir = path.resolve(PROJECT_DIR, envGet('CHROMA_PATH', './chromadb'));
  return getCollectionNames(chromaDir);
}

// -- HTML (backtick template literals -- safe inside << 'WEBCEOF') ------------
function buildHtml() {
  return `<!DOCTYPE html>
<html lang="en"><head>
<meta charset="UTF-8">
<link rel="icon" type="image/svg+xml" href="data:image/svg+xml,%3Csvg%20xmlns%3D%22http%3A//www.w3.org/2000/svg%22%20viewBox%3D%220%200%2032%2032%22%3E%3Crect%20width%3D%2232%22%20height%3D%2232%22%20rx%3D%224%22%20fill%3D%22%231a1a2e%22/%3E%3Crect%20x%3D%223%22%20y%3D%2210%22%20width%3D%224%22%20height%3D%2216%22%20rx%3D%221%22%20fill%3D%22%23f5f0e8%22/%3E%3Crect%20x%3D%228%22%20y%3D%2210%22%20width%3D%224%22%20height%3D%2216%22%20rx%3D%221%22%20fill%3D%22%23f5f0e8%22/%3E%3Crect%20x%3D%2213%22%20y%3D%2210%22%20width%3D%224%22%20height%3D%2216%22%20rx%3D%221%22%20fill%3D%22%23f5f0e8%22/%3E%3Crect%20x%3D%2218%22%20y%3D%2210%22%20width%3D%224%22%20height%3D%2216%22%20rx%3D%221%22%20fill%3D%22%23f5f0e8%22/%3E%3Crect%20x%3D%2223%22%20y%3D%2210%22%20width%3D%224%22%20height%3D%2216%22%20rx%3D%221%22%20fill%3D%22%23f5f0e8%22/%3E%3Crect%20x%3D%226%22%20y%3D%2210%22%20width%3D%223%22%20height%3D%2210%22%20rx%3D%221%22%20fill%3D%22%23111%22/%3E%3Crect%20x%3D%2211%22%20y%3D%2210%22%20width%3D%223%22%20height%3D%2210%22%20rx%3D%221%22%20fill%3D%22%23111%22/%3E%3Crect%20x%3D%2221%22%20y%3D%2210%22%20width%3D%223%22%20height%3D%2210%22%20rx%3D%221%22%20fill%3D%22%23111%22/%3E%3Ccircle%20cx%3D%2224%22%20cy%3D%225%22%20r%3D%222%22%20fill%3D%22%23c8a84b%22/%3E%3Crect%20x%3D%2225.5%22%20y%3D%221%22%20width%3D%221.5%22%20height%3D%226%22%20fill%3D%22%23c8a84b%22/%3E%3Crect%20x%3D%2219%22%20y%3D%222%22%20width%3D%228%22%20height%3D%221.5%22%20fill%3D%22%23c8a84b%22/%3E%3C/svg%3E">
<meta name="viewport" content="width=device-width,initial-scale=1">
<meta http-equiv="Cache-Control" content="no-cache,no-store,must-revalidate">
<title>RAGWeed Config v`
    + VERSION + 
`</title>
<style>
*{box-sizing:border-box;margin:0;padding:0}
body{font-family:Georgia,serif;background:#fff;color:#111;
     display:flex;flex-direction:column;height:100vh}
#hdr{background:#111;color:#fff;padding:5px 14px;display:flex;
     align-items:center;gap:10px;flex-shrink:0}
#hdr h1{font-weight:bold}
#hdr .ver{font-size:13px;font-weight:normal}
#hdr .env{margin-left:auto;font-family:monospace;
          overflow:hidden;text-overflow:ellipsis;white-space:nowrap}
#hdr .lf-btn{padding:1px 8px;border:1px solid #fff;background:transparent;
             color:#fff;font-family:Georgia,serif;font-size:13px;cursor:pointer}
#hdr .lf-btn:hover{background:rgba(255,255,255,.25)}
#main{display:flex;flex:1;overflow:hidden}
#left{width:170px;flex-shrink:0;border-right:2px solid #111;overflow-y:auto}
.si{padding:6px 12px;cursor:pointer;border-bottom:1px solid #111;font-size:13px}
.si:hover{background:#e8e8e8}
.si.active{background:#111;color:#fff;font-weight:bold}
#right{flex:1;overflow:hidden;padding:14px 18px;display:flex;flex-direction:column}
#rtitle{font-size:15px;font-weight:bold;border-bottom:2px solid #111;
        padding-bottom:5px;margin-bottom:12px;flex-shrink:0}
#fields{flex:1;overflow-y:auto;display:flex;flex-direction:column}
.fr{display:flex;align-items:flex-start;gap:8px;margin-bottom:8px;
    padding:6px 8px;border:1px solid #111;flex-shrink:0}
.fr.sp-row{flex:1;min-height:0;margin-bottom:0;align-items:stretch}
.fr:hover{border-color:#111}
.fr.ed{border-color:#111;background:#fffde7}
.fl{width:170px;flex-shrink:0;font-size:13px;padding-top:3px}
.fl b{display:block;font-size:13px;color:#111}
.fv{flex:1;font-family:monospace;font-size:13px;padding:3px 6px;
    border:1px solid #111;background:#fff;cursor:pointer;
    word-break:break-all;min-height:26px}
.fv:hover{border-color:#111}
.fv.changed{font-weight:bold;border-color:#c80}
.fi{flex:1;font-family:monospace;font-size:13px;padding:3px 6px;
    border:2px solid #111;outline:none}
.sp-mid{flex:1;min-height:0;display:flex;flex-direction:column}
.sp-ta{width:100%;font-family:monospace;font-size:13px;padding:6px 8px;
       border:2px solid #111;outline:none;resize:none;flex:1;min-height:0;
       background:#fff;color:#111;line-height:1.4}
.sp-ta.changed{border-color:#c80}
.sp-meta{font-size:13px;color:#111;margin-top:3px}
.hint{font-size:13px;color:#111;margin-top:2px}
.fa{display:flex;gap:6px;margin-top:4px}
.btn{padding:3px 10px;border:1px solid #111;background:#fff;cursor:pointer;
     font-family:Georgia,serif;font-size:13px}
.btn:hover{background:#e8e8e8}
.btnp{background:#111;color:#fff}
.btnp:hover{background:#333}
.ci{display:inline-flex;align-items:center;gap:3px;margin:2px;padding:2px 8px;
    border:1px solid #111;cursor:pointer;font-size:13px}
.ci.on{background:#111;color:#fff}
.ca{margin-top:5px;display:flex;gap:5px}
#sbar{padding:4px 14px;border-top:2px solid #111;background:#e8e8e8;
      font-size:13px;color:#111;flex-shrink:0;display:none;
      align-items:center;gap:8px}
#sbar.show{display:flex}
#stline{padding:3px 12px;font-size:13px;min-height:22px;
        border-top:1px solid #111;color:#111;flex-shrink:0}
#stline.ok{background:#e8f5e9;color:#1b5e20}
#stline.err{background:#ffebee;color:#b71c1c}
/* L&F panel styles owned by lf.js */
#advice-panel{border:2px solid #111;padding:14px;margin:10px 0;}
#advice-panel table{width:100%;border-collapse:collapse;font-size:13px}
#advice-panel th{text-align:left;padding:2px 8px;font-weight:normal}
#advice-panel td{padding:2px 8px}
#advice-panel .adv-footer{margin-top:8px}
#advice-panel .adv-presets{margin:8px 0}
#advice-panel .adv-hdr{margin-bottom:8px;font-size:13px}
#advice-panel .adv-close-row{text-align:right;margin-top:8px}
.sub{font-size:12px;font-style:italic}
.status-ok{font-weight:bold}
.status-warn{}
.status-err{}
.warn-box{border:1px solid;border-radius:4px;padding:8px 12px;margin-top:10px}
</style><script src="/lf.js"></script></head><body>
<div id="hdr">
  <h1>&#9881; RAGWeed Configuration</h1>
  <span class="ver">v`
    + VERSION + `&nbsp;&nbsp;` + HOSTNAME + 
`</span>
  <button class="lf-btn" id="lf-btn">&#9881; L&amp;F</button>
  <span class="env" id="envpath"></span>
</div>
<div id="main">
  <div id="left"></div>
  <div id="right">
    <div id="rtitle">Loading...</div>
    <div id="fields"></div>
  </div>
</div>
<div id="sbar">
  <span>Unsaved changes</span>
  <button class="btn btnp" id="savebtn">&#10003; Save All</button>
  <button class="btn" id="discbtn">Discard</button>
</div>
<div id="stline">Loading...</div>

<!-- L&F panel injected by lf.js -->

<script>
var cfg=[],cur=0,dirty={},editing=null;
function esc(s){var s2=String(s).replace(/&/g,'&amp;');s2=s2.replace(String.fromCharCode(60),'&lt;');s2=s2.replace(String.fromCharCode(62),'&gt;');return s2;}
function st(msg,cls){
  var e=document.getElementById('stline');
  e.textContent=msg; e.className=cls||'';
  if(cls==='ok') setTimeout(function(){e.textContent='Ready';e.className='';},2500);
}
function mkd(k,v){
  dirty[k]=v;
  document.getElementById('sbar').className=Object.keys(dirty).length?'show':'';
}
function load(){
  fetch('/api/config').then(function(r){return r.json();})
  .then(function(d){
    cfg=d.sections;
    document.getElementById('envpath').textContent=d.envFile;
    renderLeft(); selSec(cur);
    st('Ready');
  }).catch(function(e){st('Load error: '+e.message,'err');});
}
function renderLeft(){
  var el=document.getElementById('left'); el.innerHTML='';
  cfg.forEach(function(s,i){
    var d=document.createElement('div');
    d.className='si'+(i===cur?' active':'');
    d.textContent=s.title;
    d.onclick=function(){selSec(i);};
    el.appendChild(d);
  });
}
function renderColUrls(el, sec){
  if(!sec.cols||!sec.cols.length){
    var msg=document.createElement('div');
    msg.className='hint'; msg.style.padding='8px';
    msg.textContent='No collections found. Ingest data first.';
    el.appendChild(msg); return;
  }
  sec.cols.forEach(function(col){
    var colSources=[];
    var urlKey='COLLECTION_URL_'+col.replace(/[^A-Za-z0-9]/g,'_').toUpperCase();
    var row=document.createElement('div'); row.className='fr';
    // Left: collection name only
    var lbl=document.createElement('div'); lbl.className='fl';
    var nm=document.createElement('b'); nm.textContent=col; lbl.appendChild(nm);
    row.appendChild(lbl);
    // Right: URL, hint, sources, rename, buttons
    var mid=document.createElement('div'); mid.style.cssText='flex:1;display:flex;flex-direction:column;gap:6px';
    // URL row -- uses mkd() for standard dirty tracking + Save All
    var urlrow=document.createElement('div'); urlrow.style.cssText='display:flex;align-items:center;gap:6px';
    var ullbl=document.createElement('span'); ullbl.textContent='Base URL:'; ullbl.style.cssText='white-space:nowrap;min-width:70px';
    var ulinp=document.createElement('input'); ulinp.type='text'; ulinp.className='fi'; ulinp.style.flex='1';
    ulinp.value=dirty[urlKey]!==undefined?dirty[urlKey]:(sec.colUrls[col]||'');
    ulinp.placeholder='https://example.com/docs/';
    ulinp.oninput=function(){ mkd(urlKey, ulinp.value); };
    urlrow.appendChild(ullbl); urlrow.appendChild(ulinp); mid.appendChild(urlrow);
    var hint=document.createElement('div'); hint.className='hint';
    hint.textContent='Base URL prepended to source file path for clickable source links.';
    mid.appendChild(hint);
    // Sources list (hidden)
    var srcDiv=document.createElement('div');
    srcDiv.style.cssText='max-height:140px;overflow-y:auto;padding:4px 6px;font-size:12px;font-family:monospace;display:none;border:1px solid';
    mid.appendChild(srcDiv);
    // Rename row
    var renrow=document.createElement('div'); renrow.style.cssText='display:flex;align-items:center;gap:6px';
    var renlbl=document.createElement('span'); renlbl.textContent='Rename to:'; renlbl.style.cssText='white-space:nowrap;min-width:70px';
    var reninp=document.createElement('input'); reninp.type='text'; reninp.className='fi'; reninp.style.flex='1';
    reninp.placeholder='new collection name';
    var renbtn=document.createElement('button'); renbtn.className='btn'; renbtn.textContent='Rename';
    renbtn.onclick=(function(colName,inp){return function(){
      var nname=inp.value.trim();
      if(!nname){st('Enter new name','err');return;}
      if(nname===colName){st('Same name -- nothing to do','ok');return;}
      if(!/^[a-zA-Z0-9_.-]+$/.test(nname)){st('Invalid name: letters, numbers, _ . - only','err');return;}
      renbtn.disabled=true; renbtn.textContent='Renaming...';
      fetch('/api/rename-collection',{method:'POST',headers:{'Content-Type':'application/json'},
        body:JSON.stringify({oldName:colName,newName:nname})})
      .then(function(r){return r.json();})
      .then(function(d){
        if(d.ok){st('Renamed '+colName+' -> '+nname+'. Reload page to see changes.','ok'); nm.textContent=nname; inp.value=''; renbtn.disabled=false; renbtn.textContent='Rename';}
        else{st('Rename error: '+(d.error||'unknown'),'err'); renbtn.disabled=false; renbtn.textContent='Rename';}
      }).catch(function(e){st('Rename error: '+e.message,'err'); renbtn.disabled=false; renbtn.textContent='Rename';});
    };})(col,reninp);
    renrow.appendChild(renlbl); renrow.appendChild(reninp); renrow.appendChild(renbtn); mid.appendChild(renrow);
    // Buttons
    var btnrow=document.createElement('div'); btnrow.style.cssText='display:flex;gap:6px;flex-wrap:wrap';
    var togbtn=document.createElement('button'); togbtn.className='btn'; togbtn.textContent='Show sources';
    var pathmapbtn=document.createElement('button'); pathmapbtn.className='btn'; pathmapbtn.textContent='Save path map';
    togbtn.onclick=function(){
      var vis=srcDiv.style.display==='none';
      srcDiv.style.display=vis?'block':'none';
      togbtn.textContent=vis?'Hide sources':'Show sources';
    };
    pathmapbtn.onclick=function(){
      var baseUrl=(dirty[urlKey]!==undefined?dirty[urlKey]:ulinp.value).trim().replace(/[/]+$/,'');
      fetch('/api/savepathmap',{method:'POST',headers:{'Content-Type':'application/json'},
        body:JSON.stringify({col:col,baseUrl:baseUrl,sources:colSources})})
      .then(function(r){return r.json();})
      .then(function(d){if(d.ok){st('Path map saved for '+col,'ok');}else{st('Save error','err');}})
      .catch(function(){st('Save error','err');});
    };
    btnrow.appendChild(togbtn); btnrow.appendChild(pathmapbtn); mid.appendChild(btnrow);
    row.appendChild(mid);
    el.appendChild(row);
    // Load sources eagerly
    fetch('/api/colsources?col='+encodeURIComponent(col))
    .then(function(r){return r.json();})
    .then(function(d){
      colSources=d.sources||[];
      srcDiv.innerHTML='';
      if(colSources.length){
        colSources.forEach(function(s){var r2=document.createElement('div');r2.textContent=s;srcDiv.appendChild(r2);});
      } else {
        var none=document.createElement('div');
        none.textContent='No source filenames found in metadata'; srcDiv.appendChild(none);
      }
    })
    .catch(function(){});
  });
}
function toggleAdvice(){
  var ap=document.getElementById('advice-panel');
  if(ap){ap.remove();return;}
  fetch('/api/profile').then(function(r){return r.json();}).then(function(d){
    var panel=document.createElement('div');
    panel.id='advice-panel';
    var hdr=document.createElement('div');
    hdr.className='adv-hdr';
    hdr.innerHTML='<b>&#9881; Hardware Advice<'+'/b>'
      +' ('+esc(d.ramGb)+'GB RAM, '+d.cores+' cores, '+d.swapMb+'MB swap)'
      +'  --  detected: <b>'+esc(d.detectedProfile)+'<'+'/b> &nbsp; installed: <b>'+esc(d.installedProfile)+'<'+'/b>';
    panel.appendChild(hdr);
    var pdiv=document.createElement('div');
    pdiv.className='adv-presets';
    pdiv.textContent='Apply preset: ';
    ['low','medium','high'].forEach(function(p){
      var btn=document.createElement('button');
      btn.className='btn'+(p===d.installedProfile?' btnp':'');
      btn.textContent=p;
      btn.style.marginRight='6px';
      btn.onclick=function(){applyPreset(p);};
      pdiv.appendChild(btn);
    });
    panel.appendChild(pdiv);
    var tbl=document.createElement('table');
    var hrow=tbl.insertRow();
    ['Key','Current','Recommended ('+d.detectedProfile+')',''].forEach(function(h){
      var th=document.createElement('th');
      th.textContent=h; hrow.appendChild(th);
    });
    var rec=d.profiles[d.detectedProfile];
    d.ingestKeys.forEach(function(k){
      var row=tbl.insertRow();
      var match=(d.current[k]===rec[k]);
      [k, d.current[k], rec[k]].forEach(function(v,ci){
        var td=row.insertCell();
        if(ci===1&&!match) td.style.color='#c60';
        if(ci===2) td.style.color='inherit';
        td.textContent=v||'';
      });
      var act=row.insertCell();
      if(match){
        act.textContent='✓';
      } else {
        var ab=document.createElement('button');
        ab.className='btn';
        ab.textContent='Apply';
        (function(key,val){ab.onclick=function(){applyOneKey(key,val);};})(k,rec[k]);
        act.appendChild(ab);
      }
    });
    panel.appendChild(tbl);
    var foot=document.createElement('div');
    foot.className='adv-footer';
    foot.textContent='Orange = differs from recommendation. Changing profile does not delete existing index data.';
    if(d.ramGb<4&&d.embedModel!=='all-minilm'){
      var mrow=tbl.insertRow();
      var mtd0=mrow.insertCell(); mtd0.textContent='EMBED_MODEL';
      var mtd1=mrow.insertCell(); mtd1.textContent=d.embedModel||'nomic-embed-text'; mtd1.style.color='#c60';
      var mtd2=mrow.insertCell(); mtd2.textContent='all-minilm';
      var mtd3=mrow.insertCell();
      var mb=document.createElement('button'); mb.className='btn'; mb.textContent='Apply';
      mb.onclick=function(){
        mb.textContent='Pulling...';
        mb.disabled=true;
        _doPull('all-minilm', mb,
          function(){ applyOneKey('EMBED_MODEL','all-minilm'); saveAll(); mb.textContent='Done'; },
          function(e){ mb.textContent='Apply'; mb.disabled=false; alert('Pull failed: '+e); }
        );
      };
      mtd3.appendChild(mb);
      var mfoot=document.createElement('div'); mfoot.className='adv-footer'; mfoot.style.marginTop='4px';
      mfoot.textContent='all-minilm (46MB) embeds ~6x faster than nomic-embed-text (274MB) on low-RAM systems.';
      panel.appendChild(mfoot);
    }
    panel.appendChild(foot);
    var cldiv=document.createElement('div');
    cldiv.className='adv-close-row';
    var clbtn=document.createElement('button');
    clbtn.className='btn';
    clbtn.textContent='Close';
    clbtn.onclick=function(){panel.remove();};
    cldiv.appendChild(clbtn);
    panel.appendChild(cldiv);
    var el=document.getElementById('fields');
    el.parentNode.insertBefore(panel,el);
  }).catch(function(e){alert('Profile fetch failed: '+e.message);});
}
function applyPreset(profile){
  fetch('/api/profile').then(function(r){return r.json();}).then(function(d){
    var recs=d.profiles[profile];
    Object.entries(recs).forEach(function(kv){applyOneKey(kv[0],kv[1]);});
    document.getElementById('advice-panel')&&document.getElementById('advice-panel').remove();
    var msg=document.getElementById('save-msg');
    if(msg){msg.textContent='Profile ['+profile+'] applied  --  click Save to write to Config';msg.style.color='#ff9800';}
  });
}
function applyOneKey(key,val){
  dirty[key]=val;
  renderRight();
}
function selSec(i){editing=null;cur=i;renderLeft();renderRight();}
function renderRight(){
  var sec=cfg[cur];
  var rt=document.getElementById('rtitle');
  rt.textContent=sec.title;
  var oldBtn=document.getElementById('advise-btn');
  if(oldBtn) oldBtn.remove();
  var oldAp=document.getElementById('advice-panel'); if(oldAp) oldAp.remove();
  if(sec.title==='Ingest'){
    var advBtn=document.createElement('button');
    advBtn.id='advise-btn'; advBtn.className='btn';
    advBtn.textContent='⚙ Advise';
    advBtn.style.cssText='margin-left:12px;padding:2px 8px;vertical-align:middle;';
    advBtn.onclick=toggleAdvice;
    rt.appendChild(advBtn);
  }
  var el=document.getElementById('fields'); el.innerHTML='';
  if(sec._colurls){renderColUrls(el,sec);return;}
  if(sec._embedModels){renderEmbedModels(el);return;}
  if(sec._aiDownloads){renderAiDownloads(el);return;}
  if(sec._prompts){renderPrompts(el);return;}
  if(sec._dlSearch){renderDlSearch(el);return;}
  // Normal field rendering for all sections including _debugOptimize
  sec.fields.forEach(function(f){
    var row=document.createElement('div');
    row.className='fr'+(editing===f.key?' ed':'')+(f.sysprompt?' sp-row':'');
    var lbl=document.createElement('div'); lbl.className='fl';
    lbl.innerHTML='<b>'+esc(f.label)+'<'+'/b>';
    row.appendChild(lbl);
    var mid=document.createElement('div');
    mid.className=(f.sysprompt||f.annotprompt)?'sp-mid':''; mid.style.flex=(f.sysprompt||f.annotprompt)?'':'1';
    if(f.annotprompt){
      renderAnnotPrompt(mid, f);
    } else if(f.sysprompt){
      renderSysPrompt(mid, f);
    } else if(f.select){
      renderSelectPicker(mid,f);
    } else if(f.list){
      renderListPicker(mid,f);
    } else if(f.ollama){
      renderOllamaPicker(mid,f);
    } else if(editing===f.key){
      var inp=document.createElement('input');
      inp.type=f.secret?'password':'text'; inp.className='fi';
      inp.value=dirty[f.key]!==undefined?dirty[f.key]:(f.secret?'':f.val);
      inp.placeholder=f.secret?'(blank = keep current)':'';
      inp.onkeydown=function(e){
        if(e.key==='Enter'){commitEdit(f,inp.value);}
        else if(e.key==='Escape'){cancelEdit();}
      };
      mid.appendChild(inp);
      if(f.hint){var h=document.createElement('div');h.className='hint';h.textContent=f.hint;mid.appendChild(h);}
      var fa=document.createElement('div'); fa.className='fa';
      var ok=document.createElement('button'); ok.className='btn btnp'; ok.textContent='Set';
      ok.onclick=function(){commitEdit(f,inp.value);};
      var ca=document.createElement('button'); ca.className='btn'; ca.textContent='Cancel';
      ca.onclick=cancelEdit;
      fa.appendChild(ok); fa.appendChild(ca); mid.appendChild(fa);
      setTimeout(function(){inp.focus();inp.select();},30);
    } else {
      var vd=document.createElement('div');
      var dv=dirty[f.key]!==undefined?(f.secret?'(changed)':dirty[f.key]):f.val;
      vd.className='fv'+(dirty[f.key]!==undefined?' changed':'');
      vd.textContent=dv; vd.title='Click to edit';
      vd.onclick=function(){startEdit(f.key);};
      mid.appendChild(vd);
      if(f.secret&&f.val&&f.val!=='(not set)'){
        var dbtn=document.createElement('button');
        dbtn.className='btn';
        dbtn.textContent='Delete key';
        dbtn.style.cssText='margin-left:8px'; dbtn.className='status-err';
        dbtn.title='Remove this key from Config';
        dbtn.onclick=function(e){
          e.stopPropagation();
          if(!confirm('Delete '+f.label+' from Config?'))return;
          var changes={}; changes[f.key]=null;
          fetch('/api/save',{method:'POST',headers:{'Content-Type':'application/json'},
            body:JSON.stringify({changes:changes})})
          .then(function(r){return r.json();})
          .then(function(){ f.val=''; renderRight(); })
          .catch(function(e2){console.error('delete failed',e2);});
        };
        mid.appendChild(dbtn);
      }
      if(f.hint){var h2=document.createElement('div');h2.className='hint';h2.textContent=f.hint;mid.appendChild(h2);}
    }
    row.appendChild(mid); el.appendChild(row);
  });
  // Cross-field validation: warn if query display timeout < ollama gen timeout
  if(sec.title==='Retrieval'){
    var _fv=function(k,def){var f=(sec.fields||[]).find(function(x){return x.key===k;});return dirty[k]!==undefined?dirty[k]:(f?f.val:def);};
    var _qt=parseInt(_fv('QUERY_TIMEOUT_S','300'));
    var _ot=parseInt(_fv('OLLAMA_TIMEOUT_S','360'));
    if(!isNaN(_qt)&&!isNaN(_ot)&&_qt<_ot){
      var _warn=document.createElement('div');
      _warn.className='warn-box';
      _warn.innerHTML='⚠️ <b>Query display timeout ('+_qt+'s) is less than Ollama gen timeout ('+_ot+'s).<'+'/'+'b>'
        +' The client countdown will expire before Ollama finishes generating.'
        +' Increase query display timeout to at least '+_ot+'s to avoid a misleading timeout warning.';
      el.appendChild(_warn);
    }
  }
  // Append extra UI for special sections after normal field rendering
  if(sec._debugOptimize) renderDebugOptimize(el, sec);
}
var _spOriginal='';
function renderSysPrompt(mid, f){
  var ta=document.createElement('textarea');
  ta.className='sp-ta'+(dirty[f.key]!==undefined?' changed':'');
  ta.value=dirty[f.key]!==undefined?dirty[f.key]:f.val;
  _spOriginal=f.val;
  ta.oninput=function(){
    if(ta.value!==_spOriginal){
      ta.className='sp-ta changed';
      dirty[f.key]=ta.value;
      document.getElementById('sbar').className='show';
    } else {
      ta.className='sp-ta';
      delete dirty[f.key];
      document.getElementById('sbar').className=Object.keys(dirty).length?'show':'';
    }
  };
  mid.appendChild(ta);
  var meta=document.createElement('div'); meta.className='sp-meta';
  meta.textContent='Editing system_prompt.txt directly   --   changes saved with Save All';
  mid.appendChild(meta);
  var fa=document.createElement('div'); fa.className='fa';
  var sb=document.createElement('button'); sb.className='btn btnp';
  sb.textContent='Save prompt now';
  sb.onclick=function(){
    fetch('/api/sysprompt',{method:'POST',
      headers:{'Content-Type':'application/json'},
      body:JSON.stringify({text:ta.value})})
    .then(function(r){return r.json();})
    .then(function(d){
      if(d.ok){
        _spOriginal=ta.value; f.val=ta.value;
        ta.className='sp-ta';
        delete dirty[f.key];
        document.getElementById('sbar').className=Object.keys(dirty).length?'show':'';
        st('System prompt saved','ok');
      } else st('Error: '+d.error,'err');
    }).catch(function(e){st('Error: '+e.message,'err');});
  };
  mid.appendChild(fa); fa.appendChild(sb);
}
function renderAnnotPrompt(mid, f){
  var _DEFAULT =
    'Judge relevance by conceptual and semantic content, not literal word matches -- ignore spelling variations, capitalisation differences, and phrasing differences. ' +
    'Write one concise sentence (3-5 lines maximum) explaining how this excerpt relates to the query. ' +
    'Only respond IRRELEVANT (one word, nothing more) if the excerpt has no meaningful connection to the subject matter of the query. ' +
    'If the excerpt contains one or more relevant quotes, include them verbatim in the sentence. ' +
    'Output only the sentence or the word IRRELEVANT, no preamble, no extra commentary.';
  var ta=document.createElement('textarea');
  // Show current value, or default if nothing configured
  var _stored=f.val||'';
  var _cur=dirty[f.key]!==undefined?dirty[f.key]:(_stored||_DEFAULT);
  ta.className='sp-ta'+(dirty[f.key]!==undefined?' changed':'');
  ta.value=_cur;
  var _orig=_stored||_DEFAULT;
  ta.oninput=function(){
    var v=ta.value.trim()||_DEFAULT;
    if(v!==_orig){ ta.className='sp-ta changed'; dirty[f.key]=v; document.getElementById('sbar').className='show'; }
    else { ta.className='sp-ta'; delete dirty[f.key]; document.getElementById('sbar').className=Object.keys(dirty).length?'show':''; }
  };
  mid.appendChild(ta);
  var meta=document.createElement('div'); meta.className='sp-meta';
  meta.textContent='Annotation instruction sent to LLM. Stored in annotation_prompt.txt. Changes saved with Save All.';
  mid.appendChild(meta);
  var fa=document.createElement('div'); fa.className='fa';
  var savnow=document.createElement('button'); savnow.className='btn btnp'; savnow.textContent='Save prompt now';
  savnow.onclick=function(){
    var v=ta.value.trim()||_DEFAULT;
    fetch('/api/annotprompt',{method:'POST',headers:{'Content-Type':'application/json'},body:JSON.stringify({text:v})})
    .then(function(r){return r.json();})
    .then(function(d){
      if(d.ok){_orig=v;f.val=v;ta.className='sp-ta';delete dirty[f.key];document.getElementById('sbar').className=Object.keys(dirty).length?'show':'';st('Annotation prompt saved','ok');}
      else st('Error: '+d.error,'err');
    }).catch(function(e){st('Error: '+e.message,'err');});
  };
  var defbtn=document.createElement('button'); defbtn.className='btn'; defbtn.textContent='Default';
  defbtn.onclick=function(){ ta.value=_DEFAULT; dirty[f.key]=_DEFAULT; ta.className='sp-ta changed'; document.getElementById('sbar').className='show'; };
  fa.appendChild(savnow); fa.appendChild(defbtn); mid.appendChild(fa);
}
function renderSelectPicker(mid,f){
  var cur=dirty[f.key]!==undefined?dirty[f.key]:f.val;
  var wrap=document.createElement('div');
  wrap.style.cssText='display:flex;gap:6px;flex-wrap:wrap;align-items:center';
  f.select.forEach(function(opt){
    var btn=document.createElement('button');
    btn.className='btn';
    btn.textContent=opt;
    if(opt===cur){
      btn.className='btnp'; btn.style.fontWeight='bold';
    }
    btn.onclick=function(){
      if(opt===(dirty[f.key]!==undefined?dirty[f.key]:f.val))return;
      dirty[f.key]=opt;
      document.getElementById('sbar').className='show';
      renderRight();
    };
    wrap.appendChild(btn);
  });
  // If current value not in list, show it as a dimmed label so it's not invisible
  if(cur && f.select.indexOf(cur)<0){
    var unlisted=document.createElement('span');
    unlisted.className='hint'; unlisted.style.cssText='margin-left:6px;font-style:italic';
    unlisted.textContent='current: '+cur+' (not in list  --  click a button to change)';
    wrap.appendChild(unlisted);
  }
  mid.appendChild(wrap);
  if(f.selectHints && f.selectHints[cur]){
    var sh=document.createElement('div'); sh.className='hint'; sh.style.marginTop='3px';
    sh.textContent=f.selectHints[cur];
    mid.appendChild(sh);
  }
  if(f.hint){var h=document.createElement('div');h.className='hint';h.textContent=f.hint;mid.appendChild(h);}
  // Validate annotation provider: warn if required key missing
  if(f.key==='ANNOTATION_PROVIDER'&&cur&&cur!=='default'){
    var _keyMap={'claude':'ANTHROPIC_API_KEY','openai':'OPENAI_API_KEY','gemini':'GEMINI_API_KEY'};
    var _reqKey=_keyMap[cur];
    if(_reqKey){
      var _keyVal=dirty[_reqKey]!==undefined?dirty[_reqKey]:(function(){for(var _s=0;_s<cfg.length;_s++){var _fs=cfg[_s].fields||[];for(var _fi=0;_fi<_fs.length;_fi++){if(_fs[_fi].key===_reqKey)return _fs[_fi].val||'';}}return '';})();
      if(!_keyVal||_keyVal==='(not set)'){
        var _warn=document.createElement('div');
        _warn.className='warn-box';
        _warn.textContent='⚠ '+_reqKey+' is not set  --  annotation will fail with this provider.';
        mid.appendChild(_warn);
      }
    } else if(cur==='local'){
      fetch('/api/ollamamodels').then(function(r){return r.json();}).then(function(d){
        if(!d.models||!d.models.length){
          var _warn2=document.createElement('div');
          _warn2.className='warn-box';
          _warn2.textContent='⚠ No Ollama models available  --  annotation will fail with local provider.';
          mid.appendChild(_warn2);
        }
      }).catch(function(){});
    }
  }
}
function renderListPicker(mid,f){
  mid.textContent='Loading...';
  fetch('/api/collections').then(function(r){return r.json();})
  .then(function(d){
    mid.innerHTML='';
    var cur_val=dirty[f.key]!==undefined?dirty[f.key]:f.val;
    var active=cur_val.split(',').map(function(s){return s.trim();}).filter(Boolean);
    var wrap=document.createElement('div');
    if(!d.collections.length){
      wrap.textContent='No collections found.'; mid.appendChild(wrap); return;
    }
    d.collections.forEach(function(c){
      var it=document.createElement('span');
      it.className='ci'+(active.indexOf(c)>=0?' on':'');
      it.textContent=c;
      it.onclick=function(){
        var idx=active.indexOf(c);
        if(idx>=0) active.splice(idx,1); else active.push(c);
        mkd(f.key,active.join(','));
        renderRight();
      };
      wrap.appendChild(it);
    });
    var ca=document.createElement('div'); ca.className='ca';
    var all=document.createElement('button'); all.className='btn'; all.textContent='All';
    all.onclick=function(){mkd(f.key,d.collections.join(','));renderRight();};
    var none=document.createElement('button'); none.className='btn'; none.textContent='None';
    none.onclick=function(){mkd(f.key,'');renderRight();};
    ca.appendChild(all); ca.appendChild(none); wrap.appendChild(ca); mid.appendChild(wrap);
  }).catch(function(){mid.textContent='(error loading collections)';});
}
function _catalogTable(el, fetchUrl, ramDiv) {
  var tbl=document.createElement('table');
  tbl.style.cssText='width:100%;border-collapse:collapse;font-size:13px;margin-bottom:16px';
  var hr=tbl.createTHead().insertRow();
  ['Model','Params','Size','RAM req','Status','Action'].forEach(function(h){
    var th=document.createElement('th');
    th.style.cssText='text-align:left;padding:3px 8px;white-space:nowrap;font-size:13px'; th.className='tbl-hdr';
    th.textContent=h; hr.appendChild(th);
  });
  var tbody=tbl.createTBody();
  var lr=tbody.insertRow(); lr.insertCell().colSpan=6; lr.cells[0].textContent='Loading...';
  el.appendChild(tbl);
  fetch(fetchUrl).then(function(r){return r.json();})
  .then(function(d){
    if(ramDiv){var ram=d.ramAvailMb||0;ramDiv.textContent='Available RAM: '+(ram>=1024?(ram/1024).toFixed(1)+'GB':ram+'MB');}
    tbody.innerHTML='';
    (d.models||[]).forEach(function(m){
      var row=tbody.insertRow();
      var fitClass=m.installed?'status-ok':m.fits?'':m.tight?'status-warn':'status-err';
      var td0=row.insertCell(); td0.style.cssText='padding:4px 8px'; td0.className='tbl-row';
      var nm=document.createElement('b'); nm.textContent=m.name; td0.appendChild(nm);
      if(m.note){var nt=document.createElement('div');nt.className='sub';nt.textContent=m.note;;td0.appendChild(nt);}
      var td1=row.insertCell(); td1.style.cssText='padding:4px 8px;white-space:nowrap'; td1.className='tbl-row';
      td1.textContent=m.params||m.dim||'';
      var td2=row.insertCell(); td2.style.cssText='padding:4px 8px;white-space:nowrap'; td2.className='tbl-row';
      td2.textContent=m.sizeMb>=1024?(m.sizeMb/1024).toFixed(1)+'GB':m.sizeMb+'MB';
      var td3=row.insertCell(); td3.style.cssText='padding:4px 8px;white-space:nowrap'; td3.className='tbl-row';
      td3.textContent=m.ramMb>=1024?(m.ramMb/1024).toFixed(1)+'GB':m.ramMb+'MB';
      var td4=row.insertCell(); td4.style.cssText='padding:4px 8px;white-space:nowrap'; td4.className='tbl-row '+fitClass;
      td4.textContent=m.installed?'✓ installed':(m.fits?'✓ fits':(m.tight?'~ tight':'✗ too large'));
      var td5=row.insertCell(); td5.style.cssText='padding:4px 8px;white-space:nowrap'; td5.className='tbl-row';
      if(!m.installed){
        var pb=document.createElement('button'); pb.className='btn'; pb.textContent='Pull';
        if(!m.fits&&!m.tight) pb.style.opacity='0.5';
        (function(model,btn,scell,acell){
          btn.onclick=function(){
            if(!m.fits&&!m.tight&&!confirm('Model may exceed available RAM. Pull anyway?'))return;
            btn.textContent='Pulling...'; btn.disabled=true;
            _doPull(model, btn,
              function(){ scell.textContent='✓ installed'; scell.style.color='#080';
                acell.innerHTML='';
                var db=document.createElement('button');db.className='btn';db.textContent='Remove';
                _addDeleteBtn(db,model,scell,acell); acell.appendChild(db); },
              function(e){ btn.textContent='Pull'; btn.disabled=false; st('Pull failed: '+(e||'?'),'err'); }
            );
          };
        })(m.tag||m.name,pb,td4,td5);
        td5.appendChild(pb);
      } else {
        var db=document.createElement('button'); db.className='btn'; db.textContent='Remove';
        _addDeleteBtn(db,m.tag||m.name,td4,td5);
        td5.appendChild(db);
      }
    });
  }).catch(function(e){tbody.innerHTML='';var r=tbody.insertRow();r.insertCell().colSpan=6;r.cells[0].textContent='Error: '+e.message;});
  return tbl;
}
function renderAiDownloads(el){
  // Status bar: fetching indicator + elapsed timer
  var statusBar=document.createElement('div');
  statusBar.style.cssText='margin-bottom:10px;min-height:20px'; statusBar.className='sub';
  statusBar.textContent='Fetching list of available downloads...';
  el.appendChild(statusBar);
  var ramBar=document.createElement('div');
  ramBar.style.cssText='margin-bottom:12px'; ramBar.className='sub';
  ramBar.textContent='';
  el.appendChild(ramBar);
  // Elapsed timer  --  ticks every second until data arrives
  var _elapsed=0;
  var _timer=setInterval(function(){
    _elapsed++;
    statusBar.textContent='Fetching list of available downloads... ('+_elapsed+'s)';
  },1000);
  function _done(msg){ clearInterval(_timer); statusBar.textContent=msg||''; }
  // Fetch live catalog (20s timeout)
  var ctrl=new AbortController();
  var fetchTimeout=setTimeout(function(){ctrl.abort();},20000);
  fetch('/api/modelcatalog',{signal:ctrl.signal})
  .then(function(r){return r.json();})
  .then(function(d){
    clearTimeout(fetchTimeout);
    _done('');
    var ram=d.ramAvailMb||0;
    var ramStr=ram>=1024?(ram/1024).toFixed(1)+'GB':ram+'MB';
    ramBar.textContent='Available RAM: '+ramStr;
    // LLM section
    var h1=document.createElement('div');
    h1.style.cssText='font-weight:bold;font-size:14px;margin:0 0 4px';
    h1.textContent='Local LLM Models'; el.appendChild(h1);
    var s1=document.createElement('div');
    s1.style.cssText='margin-bottom:8px'; s1.className='sub';
    s1.textContent='Download Ollama LLM models. Set active model in AI Services → Local model name.';
    el.appendChild(s1);
    _renderCatalogRows(el, d.llm||[], 'llm');
    // Embed section
    var h2=document.createElement('div');
    h2.style.cssText='font-weight:bold;font-size:14px;margin:16px 0 4px;border-top:2px solid #111;padding-top:12px';
    h2.textContent='Embed Models'; el.appendChild(h2);
    var s2=document.createElement('div');
    s2.style.cssText='margin-bottom:8px'; s2.className='sub';
    s2.textContent='Download Ollama embed models. Set active model in Embeddings → Embed model.';
    el.appendChild(s2);
    _renderCatalogRows(el, d.embed||[], 'embed');
    if(!d.llm.length&&!d.embed.length){
      var msg=document.createElement('div');
      msg.style.cssText='margin-top:8px'; msg.className='status-err';
      msg.textContent=d.error?('Error: '+d.error):'No models found. Check Ollama host is reachable.';
      el.appendChild(msg);
    }
  })
  .catch(function(e){
    clearTimeout(fetchTimeout);
    _done('');
    var msg=document.createElement('div');
    msg.style.cssText='margin-top:8px'; msg.className='status-err';
    msg.textContent=e.name==='AbortError'?'Timed out fetching model list (20s). Check network.':'Error: '+e.message;
    el.appendChild(msg);
  });
}
function _renderCatalogRows(el, models, kind){
  var cols=kind==='embed'
    ?['Model','Dim','Size','RAM req','Status','Action']
    :['Model','Params','Size','RAM req','Status','Action'];
  var tbl=document.createElement('table');
  tbl.style.cssText='width:100%;border-collapse:collapse;font-size:13px;margin-bottom:16px';
  var hr=tbl.createTHead().insertRow();
  cols.forEach(function(h){
    var th=document.createElement('th');
    th.style.cssText='text-align:left;padding:3px 8px;white-space:nowrap;font-size:13px'; th.className='tbl-hdr';
    th.textContent=h; hr.appendChild(th);
  });
  var tbody=tbl.createTBody();
  models.forEach(function(m){
    var row=tbody.insertRow();
    var fitClass=m.installed?'status-ok':m.fits?'':m.tight?'status-warn':'status-err';
    var td0=row.insertCell(); td0.style.cssText='padding:4px 8px'; td0.className='tbl-row';
    var nm=document.createElement('b'); nm.textContent=m.name; td0.appendChild(nm);
    if(m.note){var nt=document.createElement('div');nt.className='sub';nt.textContent=m.note;;td0.appendChild(nt);}
    var td1=row.insertCell(); td1.style.cssText='padding:4px 8px;white-space:nowrap'; td1.className='tbl-row';
    td1.textContent=m.params||m.dim||'';
    var td2=row.insertCell(); td2.style.cssText='padding:4px 8px;white-space:nowrap'; td2.className='tbl-row';
    td2.textContent=m.sizeMb?((m.sizeMb>=1024?(m.sizeMb/1024).toFixed(1)+'GB':m.sizeMb+'MB')):' -- ';
    var td3=row.insertCell(); td3.style.cssText='padding:4px 8px;white-space:nowrap'; td3.className='tbl-row';
    td3.textContent=m.ramMb?((m.ramMb>=1024?(m.ramMb/1024).toFixed(1)+'GB':m.ramMb+'MB')):' -- ';
    var td4=row.insertCell(); td4.style.cssText='padding:4px 8px;white-space:nowrap'; td4.className='tbl-row '+fitClass;
    td4.textContent=m.installed?'✓ installed':(m.fits?'✓ fits':(m.tight?'~ tight':'✗ too large'));
    var td5=row.insertCell(); td5.style.cssText='padding:4px 8px;white-space:nowrap'; td5.className='tbl-row';
    if(!m.installed){
      var pb=document.createElement('button'); pb.className='btn'; pb.textContent='Pull';
      if(!m.fits&&!m.tight) pb.style.opacity='0.5';
      (function(model,btn,scell,acell){
        btn.onclick=function(){
          if(!m.fits&&!m.tight&&!confirm('Model may exceed available RAM. Pull anyway?'))return;
          btn.textContent='Pulling...'; btn.disabled=true;
          _doPull(model, btn,
            function(){ scell.textContent='✓ installed'; scell.style.color='#080';
              acell.innerHTML='';
              var db=document.createElement('button');db.className='btn';db.textContent='Remove';
              _addDeleteBtn(db,model,scell,acell); acell.appendChild(db); },
            function(e){ btn.textContent='Pull'; btn.disabled=false; st('Pull failed: '+(e||'?'),'err'); }
          );
        };
      })(m.tag||m.name,pb,td4,td5);
      td5.appendChild(pb);
    } else {
      var db=document.createElement('button'); db.className='btn'; db.textContent='Remove';
      _addDeleteBtn(db,m.tag||m.name,td4,td5);
      td5.appendChild(db);
    }
  });
  el.appendChild(tbl);
}
// _doPull: stream pull progress from server, update btn text with %, call onSuccess/onFail when done
function _doPull(model, btn, onSuccess, onFail) {
  btn.disabled = true;
  fetch('/api/ollama-pull', {method:'POST', headers:{'Content-Type':'application/json'},
    body: JSON.stringify({model: model})})
  .then(function(r) {
    var reader = r.body.getReader();
    var decoder = new TextDecoder();
    var buf = '';
    function pump() {
      return reader.read().then(function(chunk) {
        if (chunk.done) { onSuccess(); return; }
        buf += decoder.decode(chunk.value, {stream: true});
        var lines = buf.split(String.fromCharCode(10)); buf = lines.pop();
        for (var i = 0; i < lines.length; i++) {
          if (!lines[i].trim()) continue;
          try {
            var obj = JSON.parse(lines[i]);
            if (obj.error) { btn.disabled = false; onFail(obj.error); return; }
            if (obj.done && obj.ok) { onSuccess(); return; }
            if (obj.total > 0) {
              var pct = Math.round(obj.completed / obj.total * 100);
              btn.textContent = pct + '%';
            } else if (obj.status) {
              // Show short status: 'pulling manifest' -> 'manifest', 'downloading ...' -> 'loading...'
              btn.textContent = obj.status.length > 12 ? obj.status.slice(0,12)+'...' : obj.status;
            }
          } catch(_) {}
        }
        return pump();
      });
    }
    return pump();
  })
  .catch(function(e) { btn.disabled = false; onFail(e.message); });
}
function _addDeleteBtn(btn,model,statusCell,actCell){
  btn.onclick=function(){
    if(!confirm('Remove '+model+' from Ollama? This cannot be undone.'))return;
    btn.textContent='Removing...'; btn.disabled=true;
    fetch('/api/ollama-delete',{method:'POST',headers:{'Content-Type':'application/json'},body:JSON.stringify({model:model})})
    .then(function(r){return r.json();})
    .then(function(res){
      if(res.ok){
        statusCell.textContent='✓ fits'; statusCell.style.color='#333';
        actCell.innerHTML='';
        var pb=document.createElement('button'); pb.className='btn'; pb.textContent='Pull';
        actCell.appendChild(pb);
        // Reload the section to restore full pull handler
        renderRight();
      } else {btn.textContent='Remove';btn.disabled=false;st('Remove failed: '+(res.error||'?'),'err');}
    }).catch(function(e){btn.textContent='Remove';btn.disabled=false;st('Error: '+e.message,'err');});
  };
}
function renderPrompts(el){
  var PROVIDERS=['default','claude','openai','gemini','local'];
  var PROV_LABELS={default:'Default (all providers)',claude:'Claude',openai:'OpenAI',gemini:'Gemini',local:'Local (Ollama)'};
  var SECTIONS=[
    {key:'system',   label:'Synthesis prompt',    hint:'Sent as system prompt with every query. Default used if provider-specific is empty.'},
    {key:'annotation',label:'Annotation prompt',  hint:'Instruction sent to LLM for each source annotation. Default used if provider-specific is empty.'}
  ];
  var dirty={};

  function makeSection(sec){
    var wrap=document.createElement('div');
    wrap.style.cssText='margin-bottom:24px;border-bottom:1px solid #ccc;padding-bottom:16px';
    var hdr=document.createElement('div');
    hdr.style.cssText='font-weight:bold;font-size:15px;margin-bottom:4px';
    hdr.textContent=sec.label;
    var hint=document.createElement('div');
    hint.className='hint'; hint.style.marginBottom='8px';
    hint.textContent=sec.hint;
    wrap.appendChild(hdr); wrap.appendChild(hint);

    // Provider tabs
    var tabs=document.createElement('div');
    tabs.style.cssText='display:flex;flex-wrap:wrap;gap:4px;margin-bottom:8px';
    var textarea=document.createElement('textarea');
    textarea.className='fi'; textarea.rows=8;
    textarea.style.cssText='width:100%;font-family:monospace;font-size:12px;resize:vertical';
    var status=document.createElement('div');
    status.className='hint'; status.style.marginTop='4px';

    var activeProvider='default';
    function selectProvider(p){
      activeProvider=p;
      tabs.querySelectorAll('button').forEach(function(b){ b.className='btn'+(b._p===p?' active':''); });
      var k=sec.key+'_'+p;
      textarea.value=dirty[k]!==undefined ? dirty[k] : (window._prompts[k]||'');
      if(p==='default') status.textContent='Used when no provider-specific prompt is set.';
      else status.textContent='Leave empty to use the default prompt.';
    }
    PROVIDERS.forEach(function(p){
      var btn=document.createElement('button'); btn.className='btn'; btn._p=p;
      btn.textContent=PROV_LABELS[p];
      btn.onclick=function(){ selectProvider(p); };
      tabs.appendChild(btn);
    });
    textarea.oninput=function(){
      var k=sec.key+'_'+activeProvider;
      dirty[k]=textarea.value;
    };
    wrap.appendChild(tabs); wrap.appendChild(textarea); wrap.appendChild(status);
    wrap.addEventListener('_refresh', function(){ selectProvider(activeProvider); });
    return {el:wrap, init:function(){ selectProvider('default'); }};
  }

  // Load prompts then render
  var loading=document.createElement('div'); loading.className='hint'; loading.textContent='Loading...';
  el.appendChild(loading);
  fetch('/api/prompts').then(function(r){return r.json();}).then(function(p){
    loading.remove();
    window._prompts=p;
    var secs=[];
    SECTIONS.forEach(function(sec){ var s=makeSection(sec); el.appendChild(s.el); s.init(); secs.push(s); });

    // Save button
    var btnrow=document.createElement('div'); btnrow.style.marginTop='12px';
    var saveBtn=document.createElement('button'); saveBtn.className='btnp'; saveBtn.textContent='Save Prompts';
    var saveStatus=document.createElement('span'); saveStatus.className='hint'; saveStatus.style.marginLeft='10px';
    saveBtn.onclick=function(){
      saveBtn.disabled=true; saveBtn.textContent='Saving...';
      fetch('/api/prompts',{method:'POST',headers:{'Content-Type':'application/json'},
        body:JSON.stringify(dirty)})
      .then(function(r){return r.json();})
      .then(function(d){
        saveBtn.disabled=false; saveBtn.textContent='Save Prompts';
        if(d.ok){
          Object.assign(window._prompts,dirty);
          for(var k in dirty) delete dirty[k];
          saveStatus.textContent='Saved.';
          saveStatus.style.color='#080';
          setTimeout(function(){saveStatus.textContent='';},2000);
        } else { saveStatus.textContent='Error: '+(d.error||'?'); saveStatus.style.color='#c00'; }
      }).catch(function(e){ saveBtn.disabled=false; saveBtn.textContent='Save Prompts'; saveStatus.textContent='Error: '+e.message; saveStatus.style.color='#c00'; });
    };
    btnrow.appendChild(saveBtn); btnrow.appendChild(saveStatus);
    el.appendChild(btnrow);

    // ── Test Prompts section ──────────────────────────────────────────────
    var testHdr=document.createElement('div');
    testHdr.style.cssText='font-weight:bold;font-size:15px;margin:24px 0 4px;border-top:1px solid #ccc;padding-top:16px';
    testHdr.textContent='Test Local Annotation Prompt';
    var testHint=document.createElement('div'); testHint.className='hint'; testHint.style.marginBottom='8px';
    testHint.textContent='Runs the current Local annotation prompt against a fixed test set and diagnoses failure modes. Uses ANNOTATION_MODEL from Config.';
    el.appendChild(testHdr); el.appendChild(testHint);

    var testBtnRow=document.createElement('div'); testBtnRow.style.cssText='display:flex;align-items:center;gap:10px;margin-bottom:8px';
    var testBtn=document.createElement('button'); testBtn.className='btnp'; testBtn.textContent='Run Tests';
    var testProgress=document.createElement('span'); testProgress.className='hint';
    testBtnRow.appendChild(testBtn); testBtnRow.appendChild(testProgress);
    el.appendChild(testBtnRow);

    var testResults=document.createElement('div');
    testResults.style.cssText='font-family:monospace;font-size:12px;background:#f8f8f8;border:1px solid #ccc;padding:8px;max-height:400px;overflow-y:auto;display:none';
    el.appendChild(testResults);

    testBtn.onclick=function(){
      testBtn.disabled=true; testBtn.textContent='Running...';
      testResults.style.display='block'; testResults.innerHTML='';
      testProgress.textContent='';

      function log(line, color){
        var d=document.createElement('div');
        if(color) d.style.color=color;
        d.textContent=line;
        testResults.appendChild(d);
        testResults.scrollTop=testResults.scrollHeight;
      }

      // Stream test results via /api/test-annotation
      var currentPrompt = dirty['annotation_local'] !== undefined ? dirty['annotation_local'] : (window._prompts && window._prompts['annotation_local'] || '');
      fetch('/api/test-annotation', {
        method:'POST',
        headers:{'Content-Type':'application/json'},
        body: JSON.stringify({ prompt: currentPrompt })
      }).then(function(r){
        var reader=r.body.getReader();
        var decoder=new TextDecoder();
        var buf='';
        function pump(){
          return reader.read().then(function(chunk){
            if(chunk.done){
              testBtn.disabled=false; testBtn.textContent='Run Tests';
              return;
            }
            buf+=decoder.decode(chunk.value,{stream:true});
            var lines=buf.split(String.fromCharCode(10)); buf=lines.pop();
            for(var i=0;i<lines.length;i++){
              if(!lines[i].trim()) continue;
              try {
                var obj=JSON.parse(lines[i]);
                if(obj.type==='result'){
                  var icon=obj.pass?'✓':'✗';
                  var color=obj.pass?'#060':obj.actual==='ERROR'?'#c00':'#c60';
                  log(icon+' '+obj.id.padEnd(12)+' '+obj.category.padEnd(22)+' '+obj.ms+'ms'+(obj.pass?'':' ['+obj.expected+'->'+obj.actual+']'), color);
                  if(!obj.pass) log('  '+obj.rawResponse.slice(0,100),'#666');
                  testProgress.textContent=obj.n+'/'+obj.total;
                } else if(obj.type==='summary'){
                  log('─'.repeat(60));
                  log(obj.passed+'/'+obj.total+' passed  TP='+obj.tp+' TN='+obj.tn+' FP='+obj.fp+' FN='+obj.fn, obj.passed===obj.total?'#060':'#c60');
                  log('Precision='+Math.round(obj.precision*100)+'%  Recall='+Math.round(obj.recall*100)+'%  F1='+Math.round(obj.f1*100)+'%');
                  if(obj.diagnosis&&obj.diagnosis.length){
                    log('Failure modes:','#c00');
                    obj.diagnosis.forEach(function(d){ log('  ['+d.mode+'] x'+d.count+': '+d.desc,'#c00'); });
                  } else { log('No failure modes detected.','#060'); }
                  testBtn.disabled=false; testBtn.textContent='Run Tests';
                } else if(obj.type==='error'){
                  log('ERROR: '+obj.message,'#c00');
                  testBtn.disabled=false; testBtn.textContent='Run Tests';
                }
              } catch(_){}
            }
            return pump();
          });
        }
        return pump();
      }).catch(function(e){
        log('Error: '+e.message,'#c00');
        testBtn.disabled=false; testBtn.textContent='Run Tests';
      });
    };
  }).catch(function(e){
    loading.textContent='Error loading prompts: '+e.message;
  });
}
function renderDebugOptimize(el, sec) {
  // Fields render via normal path (called before this in renderRight)
  // Just append the log viewer section
  var logHdr=document.createElement('div');
  logHdr.style.cssText='font-weight:bold;font-size:15px;margin:20px 0 6px;border-top:1px solid #ccc;padding-top:14px';
  logHdr.textContent='Log Files';
  el.appendChild(logHdr);

  var logList=document.createElement('div'); logList.style.marginBottom='8px';
  el.appendChild(logList);

  var loadLogs = function() {
    logList.innerHTML='<span class="hint">Loading...</span>';
    fetch('/api/logs').then(function(r){return r.json();}).then(function(d){
      logList.innerHTML='';
      if(!d.files||!d.files.length){ logList.innerHTML='<span class="hint">No log files found.</span>'; return; }
      d.files.forEach(function(f){
        var row=document.createElement('div'); row.style.cssText='display:flex;align-items:center;gap:8px;margin:2px 0';
        var btn=document.createElement('button'); btn.className='btn'; btn.textContent=f.name;
        btn.style.cssText='font-family:monospace;font-size:11px;text-align:left';
        var sz=document.createElement('span'); sz.className='hint';
        sz.textContent=(f.size>1024*1024?(f.size/1024/1024).toFixed(1)+'MB':f.size>1024?(f.size/1024).toFixed(0)+'KB':f.size+'B');
        btn.onclick=function(){
          window.open('/api/lograw?name='+encodeURIComponent(f.name),'_blank');
        };
        row.appendChild(btn); row.appendChild(sz); logList.appendChild(row);
      });
    }).catch(function(e){ logList.innerHTML='<span class="hint">Error: '+e.message+'</span>'; });
  }
  loadLogs();
}
function renderDlSearch(el){
  // ── Known models per provider (label = display, models = selectable list) ─
  var PROV_MODELS={
    claude: ['claude-opus-4-6','claude-sonnet-4-6','claude-haiku-4-5-20251001'],
    openai: ['gpt-4o-mini-search-preview','gpt-4o-search-preview'],
    gemini: ['gemini-2.5-flash','gemini-2.5-pro','gemini-2.0-flash','gemini-1.5-pro'],
  };
  var PROV_LABELS={auto:'Auto',claude:'Claude',openai:'OpenAI',gemini:'Gemini',all:'All providers'};
  var PROV_NOTES={
    claude:'All Claude models support web search.',
    openai:'Only search-preview models support web search. Standard gpt-4o/mini do not.',
    gemini:'All Gemini models support Google Search grounding.',
  };
  var PRESETS=[
    {label:'General',     q:'llama mistral qwen gemma general purpose'},
    {label:'Coding',      q:'coding code qwen2.5-coder deepseek-coder starcoder codellama'},
    {label:'Reasoning',   q:'reasoning thinking deepseek-r1 qwq phi4-reasoning openthinker'},
    {label:'Vision',      q:'vision multimodal llava llama3.2-vision qwen2.5vl minicpm'},
    {label:'Uncensored',  q:'uncensored dolphin wizard llama2-uncensored dolphin-mixtral'},
    {label:'Tiny (<4B)',  q:'tiny small smollm tinyllama phi gemma3 1b 2b 3b efficient'},
    {label:'Embedding',   q:'embedding nomic mxbai bge snowflake-arctic all-minilm'},
    {label:'Medical',     q:'medical clinical health biology biomedical'},
    {label:'Multilingual',q:'multilingual aya command-r multilingual cross-lingual'},
    {label:'Agents/Tools',q:'tools function calling agents hermes qwen3 mistral-small'},
  ];
  var activeChips={};  // label -> true
  // ── Intro ─────────────────────────────────────────────────────────────────
  var intro=document.createElement('div');
  intro.className='hint'; intro.style.marginBottom='10px';
  intro.textContent='Select one or more categories, choose a provider and model, then click Search. Results ranked by how many selected categories each model matches.';
  el.appendChild(intro);
  // ── Provider + model row ──────────────────────────────────────────────────
  var provrow=document.createElement('div');
  provrow.className='fr'; provrow.style.cssText='align-items:center;margin-bottom:8px;flex-wrap:wrap;gap:6px';
  var provlbl=document.createElement('span'); provlbl.className='hint';
  provlbl.style.whiteSpace='nowrap'; provlbl.textContent='Provider:';
  provrow.appendChild(provlbl);
  var provSel=document.createElement('select'); provSel.className='fi';
  provSel.style.cssText='width:auto;min-width:140px;flex:0 0 auto';
  ['auto','claude','openai','gemini','all'].forEach(function(v){
    var o=document.createElement('option'); o.value=v;
    o.textContent=PROV_LABELS[v]; provSel.appendChild(o);
  });
  provrow.appendChild(provSel);
  var modlbl=document.createElement('span'); modlbl.className='hint';
  modlbl.style.whiteSpace='nowrap'; modlbl.textContent='Model:';
  provrow.appendChild(modlbl);
  var modelSel=document.createElement('select'); modelSel.className='fi';
  modelSel.style.cssText='width:auto;min-width:200px;flex:0 0 auto';
  provrow.appendChild(modelSel);
  var modNote=document.createElement('span'); modNote.className='hint'; modNote.style.cssText='font-size:11px;flex-basis:100%;margin-top:2px';
  provrow.appendChild(modNote);
  el.appendChild(provrow);
  // Populate model dropdown based on selected provider
  function refreshModelSel(prov, configuredModel){
    modelSel.innerHTML=''; modNote.textContent='';
    var models=PROV_MODELS[prov]||[];
    if(!models.length||prov==='auto'||prov==='all'){
      var o=document.createElement('option'); o.value='';
      o.textContent=prov==='all'?'(each uses its configured model)':'(uses configured model)';
      modelSel.appendChild(o); modelSel.disabled=true; return;
    }
    modelSel.disabled=false;
    if(PROV_NOTES[prov]) modNote.textContent=PROV_NOTES[prov];
    list=models.slice();
    // Add configured model to list if it's not already there
    if(configuredModel && list.indexOf(configuredModel)<0) list.unshift(configuredModel);
    list.forEach(function(m){
      var o=document.createElement('option'); o.value=m; o.textContent=m;
      // Default: prefer configured model, else first in list
      if(m===(configuredModel||models[0])) o.selected=true;
      modelSel.appendChild(o);
    });
  }
  // Fetch current configured values then init
  fetch('/api/config').then(function(r){return r.json();}).then(function(cfg){
    var env={};
    (cfg.sections||[]).forEach(function(s){
      (s.fields||[]).forEach(function(f){ if(f.key&&f.val!==undefined) env[f.key]=f.val; });
    });
    // Pre-select the configured provider
    var confProv=env['LLM_PROVIDER']||'auto';
    provSel.value=confProv in PROV_LABELS ? confProv : 'auto';
    var confModel=env['ANTHROPIC_MODEL']||env['OPENAI_MODEL']||env['GEMINI_MODEL']||'';
    function getConfModel(p){
      if(p==='claude') return env['ANTHROPIC_MODEL']||'';
      if(p==='openai') return env['OPENAI_MODEL']||'';
      if(p==='gemini') return env['GEMINI_MODEL']||'';
      return '';
    }
    refreshModelSel(provSel.value, getConfModel(provSel.value));
    provSel.onchange=function(){ refreshModelSel(provSel.value, getConfModel(provSel.value)); };
  }).catch(function(){
    refreshModelSel('auto',''); provSel.onchange=function(){ refreshModelSel(provSel.value,''); };
  });
  // ── Preset chips (multi-select toggle) ───────────────────────────────────
  var chiprow=document.createElement('div');
  chiprow.style.cssText='display:flex;flex-wrap:wrap;gap:6px;margin-bottom:8px';
  PRESETS.forEach(function(preset){
    var chip=document.createElement('button'); chip.className='chip';
    chip.style.cssText='padding:3px 10px;border:1px solid;border-radius:12px;cursor:pointer;font-size:12px';
    chip.textContent=preset.label; chip._preset=preset;
    chip.onclick=function(){
      if(activeChips[preset.label]){
        delete activeChips[preset.label]; chip.classList.remove('active');
      } else {
        activeChips[preset.label]={preset:preset,chip:chip}; chip.classList.add('active');
      }
      syncInput();
    };
    chiprow.appendChild(chip);
  });
  el.appendChild(chiprow);
  // ── Search bar ────────────────────────────────────────────────────────────
  var srow=document.createElement('div');
  srow.className='fr'; srow.style.alignItems='center';
  var sinput=document.createElement('input'); sinput.className='fi';
  sinput.placeholder='or type any query  --  model name, use case, topic...';
  sinput.setAttribute('autocomplete','off');
  var sbtn=document.createElement('button'); sbtn.className='btnp';
  sbtn.textContent='Search';
  srow.appendChild(sinput); srow.appendChild(sbtn);
  el.appendChild(srow);
  // ── Results container ─────────────────────────────────────────────────────
  var results=document.createElement('div');
  el.appendChild(results);
  // ── Search handler ────────────────────────────────────────────────────────
  function syncInput(){ var labels=Object.keys(activeChips); sinput._fromChips=(labels.length>0); sinput.value=labels.join(', '); }
  function doSearch(){
    var manualQ=sinput.value.trim();
    var chipLabels=Object.keys(activeChips);
    if(!manualQ && !chipLabels.length) return;
    var prov=provSel.value;
    var model=(modelSel.disabled||!modelSel.value)?'':modelSel.value;
    // Build list of queries: one per active chip + manual if set
    var queries=chipLabels.map(function(lbl){ return {label:lbl, q:activeChips[lbl].preset?activeChips[lbl].preset.q:activeChips[lbl].q}; });
    if(manualQ) queries.push({label:manualQ, q:manualQ});
    results.innerHTML='';
    var msg=document.createElement('div'); msg.className='hint'; msg.style.marginTop='8px';
    msg.textContent='Searching '+queries.length+' quer'+(queries.length===1?'y':'ies')+'...';
    results.appendChild(msg);
    sbtn.disabled=true; sbtn.textContent='Searching...';
    function makeUrl(q, p){
      var u='/api/dlsearch?q='+encodeURIComponent(q);
      if(p && p!=='auto' && p!=='all') u+='&provider='+p;
      if(model) u+='&model='+encodeURIComponent(model);
      return u;
    }
    function fetchAll(providerList){
      // For each query × each provider: fire all, return flat array of {label, models, ramAvailMb}
      var reqs=[];
      queries.forEach(function(qobj){
        providerList.forEach(function(p){
          reqs.push(
            fetch(makeUrl(qobj.q, p))
            .then(function(r){return r.json();})
            .then(function(d){ return {label:qobj.label, models:d.models||[], ramAvailMb:d.ramAvailMb||4000, provider:d.provider||p}; })
            .catch(function(){ return {label:qobj.label, models:[], ramAvailMb:4000}; })
          );
        });
      });
      return Promise.all(reqs);
    }
    var providerList = prov==='all' ? ['claude','openai','gemini'] : [prov];
    fetchAll(providerList).then(function(buckets){
      sbtn.disabled=false; sbtn.textContent='Search';
      results.innerHTML='';
      // ── Merge: map base-model-name -> {model, matchedLabels set} ──────────
      var byBase={};
      buckets.forEach(function(bucket){
        bucket.models.forEach(function(m){
          var base=m.name.replace(/:.*$/,'');
          if(!byBase[base]){ byBase[base]={m:m, labels:[], seen:{}}; }
          if(!byBase[base].seen[bucket.label]){
            byBase[base].labels.push(bucket.label);
            byBase[base].seen[bucket.label]=true;
          }
          // Prefer richer data (sizeMb, ramMb, params)
          if(!byBase[base].m.sizeMb && m.sizeMb) byBase[base].m=Object.assign({},byBase[base].m,{sizeMb:m.sizeMb,ramMb:m.ramMb});
          if(!byBase[base].m.params && m.params) byBase[base].m.params=m.params;
        });
      });
      var merged=Object.values(byBase);
      // Sort: most matched labels first, then alpha
      merged.sort(function(a,b){
        if(b.labels.length!==a.labels.length) return b.labels.length-a.labels.length;
        return a.m.name.localeCompare(b.m.name);
      });
      if(!merged.length){
        var nm=document.createElement('div'); nm.className='hint'; nm.style.marginTop='8px';
        nm.textContent='No models found. Try different categories or a different provider.';
        results.appendChild(nm); return;
      }
      var usedProv=prov==='all'?'all providers':(PROV_LABELS[prov]||prov)+(model?' / '+model:'');
      var countDiv=document.createElement('div'); countDiv.className='hint'; countDiv.style.margin='8px 0';
      countDiv.textContent=merged.length+' result'+(merged.length!==1?'s':'')+' via '+usedProv;
      results.appendChild(countDiv);
      // Render with match badges injected into name cell
      _renderCatalogRowsTagged(results, merged, queries.length);
    });
  }
  sbtn.onclick=doSearch;
  sinput.onkeydown=function(e){ if(e.key==='Enter') doSearch(); };
  sinput.oninput=function(){ if(!sinput._fromChips){ Object.keys(activeChips).forEach(function(lbl){ activeChips[lbl].chip.classList.remove('active'); }); for(var k in activeChips) delete activeChips[k]; } sinput._fromChips=false; };
}
// Variant of _renderCatalogRows that shows match-label badges per model
function _renderCatalogRowsTagged(el, mergedItems, totalQueries){
  var cols=['Model','Params','Size','RAM req','Status','Action'];
  var tbl=document.createElement('table');
  tbl.style.cssText='width:100%;border-collapse:collapse;font-size:13px;margin-bottom:16px';
  var hr=tbl.createTHead().insertRow();
  cols.forEach(function(h){
    var th=document.createElement('th');
    th.style.cssText='text-align:left;padding:3px 8px;white-space:nowrap;font-size:13px';
    th.className='tbl-hdr'; th.textContent=h; hr.appendChild(th);
  });
  var tbody=tbl.createTBody();
  mergedItems.forEach(function(item){
    var m=item.m; var labels=item.labels;
    var row=tbody.insertRow();
    var fitClass=m.installed?'status-ok':m.fits?'':m.tight?'status-warn':'status-err';
    var td0=row.insertCell(); td0.style.cssText='padding:4px 8px'; td0.className='tbl-row';
    var nm=document.createElement('b'); nm.textContent=m.name; td0.appendChild(nm);
    // Match badges
    if(totalQueries>1){
      var brow=document.createElement('div'); brow.style.cssText='display:flex;flex-wrap:wrap;gap:3px;margin-top:2px';
      labels.forEach(function(lbl){
        var badge=document.createElement('span'); badge.className='chip active';
        badge.style.cssText='padding:1px 6px;border:1px solid;border-radius:8px;font-size:11px';
        badge.textContent=lbl; brow.appendChild(badge);
      });
      td0.appendChild(brow);
    }
    if(m.note){var nt=document.createElement('div');nt.className='sub';nt.textContent=m.note;td0.appendChild(nt);}
    var td1=row.insertCell(); td1.style.cssText='padding:4px 8px;white-space:nowrap'; td1.className='tbl-row';
    td1.textContent=m.params||'';
    var td2=row.insertCell(); td2.style.cssText='padding:4px 8px;white-space:nowrap'; td2.className='tbl-row';
    td2.textContent=m.sizeMb?(m.sizeMb>=1024?(m.sizeMb/1024).toFixed(1)+'GB':m.sizeMb+'MB'):' -- ';
    var td3=row.insertCell(); td3.style.cssText='padding:4px 8px;white-space:nowrap'; td3.className='tbl-row';
    td3.textContent=m.ramMb?(m.ramMb>=1024?(m.ramMb/1024).toFixed(1)+'GB':m.ramMb+'MB'):' -- ';
    var td4=row.insertCell(); td4.style.cssText='padding:4px 8px;white-space:nowrap'; td4.className='tbl-row '+fitClass;
    td4.textContent=m.installed?'✓ installed':(m.fits?'✓ fits':(m.tight?'~ tight':'✗ too large'));
    var td5=row.insertCell(); td5.style.cssText='padding:4px 8px;white-space:nowrap'; td5.className='tbl-row';
    if(!m.installed){
      var pb=document.createElement('button'); pb.className='btn'; pb.textContent='Pull';
      if(!m.fits&&!m.tight) pb.style.opacity='0.5';
      (function(model,btn,scell,acell){
        btn.onclick=function(){
          if(!m.fits&&!m.tight&&!confirm('Model may exceed available RAM. Pull anyway?'))return;
          btn.textContent='Pulling...'; btn.disabled=true;
          _doPull(model, btn,
            function(){ scell.textContent='✓ installed'; scell.className='tbl-row status-ok';
              acell.innerHTML='';
              var db=document.createElement('button');db.className='btn';db.textContent='Remove';
              _addDeleteBtn(db,model,scell,acell); acell.appendChild(db); },
            function(){ btn.textContent='Pull'; btn.disabled=false; }
          );
        };
      })(m.tag||m.name,pb,td4,td5);
      td5.appendChild(pb);
    } else {
      var db=document.createElement('button'); db.className='btn'; db.textContent='Remove';
      _addDeleteBtn(db,m.tag||m.name,td4,td5); td5.appendChild(db);
    }
  });
  el.appendChild(tbl);
}
function renderEmbedModels(el){
  var hdr=document.createElement('div');
  hdr.style.cssText='font-weight:bold;margin-bottom:10px;border-bottom:2px solid #111;padding-bottom:4px';
  hdr.textContent='Embed Models used by your collections';
  el.appendChild(hdr);
  var loading=document.createElement('div');
  loading.style.cssText='margin-top:8px';
  loading.textContent='Scanning collections...';
  el.appendChild(loading);
  fetch('/api/embedmodels').then(function(r){return r.json();})
  .then(function(d){
    loading.remove();
    if(!d.models||!d.models.length){
      var none=document.createElement('div');
      none.style.cssText='font-style:italic;margin-top:8px';
      none.textContent='No collections indexed yet.';
      el.appendChild(none); return;
    }
    d.models.forEach(function(m){
      var row=document.createElement('div');
      row.style.cssText='display:flex;align-items:baseline;gap:10px;padding:8px 0;border-bottom:1px solid #111';
      var status=document.createElement('span');
      status.style.cssText='flex:0 0 auto;font-weight:bold';
      status.textContent=m.available?'✓':'✗';
      status.style.color=m.available?'#080':'#c00';
      var info=document.createElement('div');
      info.style.cssText='flex:1';
      var mname=document.createElement('div');
      mname.style.cssText='font-weight:bold';
      mname.textContent=m.model+' (dim='+m.dim+')';
      var colls=document.createElement('div');
      colls.style.cssText='margin-top:2px';
      colls.textContent='Used by: '+m.collections.join(', ');
      info.appendChild(mname); info.appendChild(colls);
      row.appendChild(status); row.appendChild(info);
      if(!m.available){
        var pb=document.createElement('button');
        pb.className='btn'; pb.textContent='Pull now';
        pb.style.cssText='flex:0 0 auto';
        (function(model,btn,stEl){
          btn.onclick=function(){
            btn.textContent='Pulling...'; btn.disabled=true;
            _doPull(model, btn,
              function(){ btn.textContent='Done'; stEl.textContent='✓'; stEl.style.color='#080'; },
              function(e){ btn.textContent='Pull now'; btn.disabled=false; alert('Pull failed: '+e); }
            );
          };
        })(m.model,pb,status);
        row.appendChild(pb);
      }
      el.appendChild(row);
    });
    var foot=document.createElement('div');
    foot.style.cssText='margin-top:12px;font-style:italic';
    foot.textContent='All models used by your collections must remain available in Ollama. The installer pulls them automatically.';
    el.appendChild(foot);
  }).catch(function(e){loading.textContent='Error: '+e.message;});
}
function renderOllamaPicker(mid,f){
  var cur_val=dirty[f.key]!==undefined?dirty[f.key]:f.val;
  var wrap=document.createElement('div');
  var loading=document.createElement('div'); loading.textContent='Loading Ollama models...';
  wrap.appendChild(loading);
  mid.appendChild(wrap);
  fetch('/api/ollamamodels').then(function(r){return r.json();})
  .then(function(d){
    wrap.innerHTML='';
    if(!d.models||!d.models.length){
      var inp=document.createElement('input');
      inp.type='text'; inp.className='fi';
      inp.value=cur_val==='(none)'?'':cur_val;
      inp.placeholder='Ollama not reachable  --  type model name';
      inp.oninput=function(){mkd(f.key,inp.value||'(none)');};
      wrap.appendChild(inp);
      var note=document.createElement('div'); note.className='sub';
      note.textContent='Start Ollama and reload to pick from list';
      wrap.appendChild(note);
      return;
    }
    d.models.forEach(function(m){
      var it=document.createElement('span');
      it.className='ci'+(m===cur_val?' on':'');
      it.textContent=m;
      it.onclick=function(){
        var was=m===cur_val;
        cur_val=was?'(none)':m;
        mkd(f.key,cur_val);
        wrap.querySelectorAll('.ci').forEach(function(x){
          x.className='ci'+(x.textContent===cur_val?' on':'');
        });
      };
      wrap.appendChild(it);
    });
    if(cur_val&&cur_val!=='(none)'&&d.models.indexOf(cur_val)<0){
      var it2=document.createElement('span');
      it2.className='ci on';
      it2.textContent=cur_val+' (not found)';
      wrap.insertBefore(it2, wrap.firstChild);
    }
    var none2=document.createElement('button'); none2.className='btn'; none2.textContent='None';
    none2.style.marginTop='4px';
    none2.onclick=function(){
      cur_val='(none)'; mkd(f.key,'(none)');
      wrap.querySelectorAll('.ci').forEach(function(x){x.className='ci';});
    };
    var ca=document.createElement('div'); ca.className='ca';
    ca.appendChild(none2); wrap.appendChild(ca);
  }).catch(function(){
    wrap.innerHTML='';
    var inp=document.createElement('input');
    inp.type='text'; inp.className='fi';
    inp.value=cur_val==='(none)'?'':cur_val;
    inp.placeholder='Type model name';
    inp.oninput=function(){mkd(f.key,inp.value||'(none)');};
    wrap.appendChild(inp);
  });
}
function startEdit(k){editing=k;renderRight();}
function cancelEdit(){editing=null;renderRight();}
function commitEdit(f,val){
  if(f.secret&&!val){cancelEdit();return;}
  mkd(f.key,val);
  if(f.secret) f.val=msk(val); else f.val=val;
  editing=null; renderRight();
}
function msk(v){return v.length>8?v.slice(0,8)+'...':'***';}
function saveAll(){
  var spVal=dirty['system_prompt'];
  var apVal=dirty['annotation_prompt'];
  // Write ALL field values (not just dirty) so Config is always a complete snapshot.
  // Dirty values take priority over current field values.
  var envChanges={};
  cfg.forEach(function(s){
    (s.fields||[]).forEach(function(f){
      if(f.key&&f.key!=='system_prompt'&&f.key!=='annotation_prompt'&&!f.secret){
        var v=dirty[f.key]!==undefined?dirty[f.key]:f.val;
        if(v!==undefined&&v!==null) envChanges[f.key]=v;
      }
    });
  });
  // Include any dirty keys not in fields (e.g. secret keys that were changed)
  Object.keys(dirty).forEach(function(k){
    if(k!=='system_prompt'&&k!=='annotation_prompt') envChanges[k]=dirty[k];
  });
  var tasks=[];
  if(Object.keys(envChanges).length){
    tasks.push(
      fetch('/api/save',{method:'POST',headers:{'Content-Type':'application/json'},
        body:JSON.stringify({changes:envChanges})})
      .then(function(r){return r.json();})
    );
  }
  if(spVal!==undefined){
    tasks.push(
      fetch('/api/sysprompt',{method:'POST',headers:{'Content-Type':'application/json'},
        body:JSON.stringify({text:spVal})})
      .then(function(r){return r.json();})
    );
  }
  if(apVal!==undefined){
    tasks.push(
      fetch('/api/annotprompt',{method:'POST',headers:{'Content-Type':'application/json'},
        body:JSON.stringify({text:apVal})})
      .then(function(r){return r.json();})
    );
  }
  if(!tasks.length) return;
  Promise.all(tasks).then(function(results){
    var errs=results.filter(function(d){return !d.ok;});
    if(errs.length){st('Error: '+(errs[0].error||'?'),'err');return;}
    var _hadSecret=Object.keys(envChanges).some(function(k){
      var f=null;
      cfg.forEach(function(s){s.fields&&s.fields.forEach(function(ff){if(ff.key===k)f=ff;});});
      return f&&f.secret;
    });
    dirty={};
    document.getElementById('sbar').className='';
    st(_hadSecret?'Saved  --  restart the RAG service for API key changes to take effect':'Saved','ok');
    load();
  }).catch(function(e){st('Error: '+e.message,'err');});
}
document.getElementById('savebtn').onclick=saveAll;
document.getElementById('discbtn').onclick=function(){
  dirty={}; document.getElementById('sbar').className='';
  load();
};
load();
document.addEventListener('DOMContentLoaded',function(){var openLF=lfInit();document.getElementById('lf-btn').onclick=openLF;});
</script>
<div style="text-align:center;font-size:11px;padding:6px;border-top:1px solid currentColor;margin-top:8px">Copyright &copy; Fred Cohen, 2026 - ALL RIGHTS RESERVED - <a href="//all.net/Notices.html" target="_blank" style="color:inherit">Patents</a></div>
</body></html>`;
}

const server = http.createServer(async function(req, res) {
  try {
  const url = new URL(req.url, 'http://localhost');

  if (req.method === 'GET' && url.pathname === '/') {
    res.writeHead(200, {'Content-Type':'text/html; charset=utf-8'});
    res.end(buildHtml()); return;
  }
  if (req.method === 'GET' && url.pathname === '/lf.js') {
    res.writeHead(200, {'Content-Type':'application/javascript'});
    res.end(LF_JS); return;
  }
  if (req.method === 'GET' && url.pathname === '/favicon.ico') {
    res.writeHead(200, {'Content-Type':'image/svg+xml'});
    res.end(FAVICON_SVG); return;
  }
  if (req.method === 'GET' && url.pathname === '/api/config') {
    const sections = await getConfig();
    res.writeHead(200, {'Content-Type':'application/json'});
    res.end(JSON.stringify({ sections, envFile:ENV_FILE })); return;
  }
  if (req.method === 'GET' && url.pathname === '/api/ollamamodels') {
    const models = await getOllamaModels();
    res.writeHead(200, {'Content-Type':'application/json'});
    res.end(JSON.stringify({ models })); return;
  }
  if (req.method === 'GET' && url.pathname === '/api/collections') {
    const cols = await getCollections();
    res.writeHead(200, {'Content-Type':'application/json'});
    res.end(JSON.stringify({ collections:cols })); return;
  }
  if (req.method === 'GET' && url.pathname === '/api/profile') {
    // Returns hardware specs + per-profile recommendations + current Config values
    const { execSync } = await import('child_process');
    let ramKb = 4000000, swapKb = 0, cores = 2;
    try {
      const mem = fs.readFileSync('/proc/meminfo','utf8');
      const mt = mem.match(/MemTotal:\s+(\d+)/);  if (mt) ramKb  = parseInt(mt[1]);
      const sw = mem.match(/SwapTotal:\s+(\d+)/); if (sw) swapKb = parseInt(sw[1]);
    } catch(_) {}
    try { cores = parseInt(execSync('nproc 2>/dev/null || echo 2').toString().trim()); } catch(_) {}
    const ramGb   = ramKb / 1024 / 1024;
    const detectedProfile = ramGb < 4 ? 'low' : ramGb < 8 ? 'medium' : 'high';
    const PROFILES = {
      low:    { INGEST_PROFILE:'low',    CHUNK_SIZE:'512',  CHUNK_SIZE_PDF:'512',  CHUNK_SIZE_AV:'256', CHUNK_OVERLAP_PCT:'50',  EMBED_TIMEOUT_S:'300', EMBED_BACKOFF_S:'10', WHISPER_TIMEOUT_S:'600' },
      medium: { INGEST_PROFILE:'medium', CHUNK_SIZE:'1024', CHUNK_SIZE_PDF:'768',  CHUNK_SIZE_AV:'384', CHUNK_OVERLAP_PCT:'50', EMBED_TIMEOUT_S:'180', EMBED_BACKOFF_S:'5',  WHISPER_TIMEOUT_S:'600' },
      high:   { INGEST_PROFILE:'high',   CHUNK_SIZE:'2048', CHUNK_SIZE_PDF:'1024', CHUNK_SIZE_AV:'512', CHUNK_OVERLAP_PCT:'50', EMBED_TIMEOUT_S:'120', EMBED_BACKOFF_S:'2',  WHISPER_TIMEOUT_S:'600' },
    };
    const ingestKeys = Object.keys(PROFILES.low);
    const current = {};
    const installedProfile = envGet('INGEST_PROFILE', detectedProfile);
    for (const k of ingestKeys) current[k] = envGet(k, PROFILES[detectedProfile][k]);
    const embedModel = envGet('EMBED_MODEL', 'nomic-embed-text');
    res.writeHead(200, {'Content-Type':'application/json'});
    res.end(JSON.stringify({
      ramGb: ramGb.toFixed(1), swapMb: Math.round(swapKb/1024), cores,
      detectedProfile, installedProfile, profiles: PROFILES, current, ingestKeys, embedModel
    }));
    return;
  }
  if (req.method === 'GET' && url.pathname === '/api/colsources') {
    const colName = url.searchParams.get('col') || '';
    try {
      const chromaDir = path.resolve(PROJECT_DIR, envGet('CHROMA_PATH', './chromadb'));
      const { buildSegDirMap } = await import('./collections.js');
      const sdmap = buildSegDirMap(chromaDir);
      const entry = sdmap[colName];
      if (!entry) { res.writeHead(404); res.end(JSON.stringify({error:'not found'})); return; }
      const ragPath = path.join(entry.segDir, 'rag.sqlite3');
      const { createRequire } = await import('module');
      const require2 = createRequire(import.meta.url);
      const Database = require2('better-sqlite3');
      let sources = [];
      if (fs.existsSync(ragPath)) {
        try {
          const db = new Database(ragPath, { readonly: true, fileMustExist: true });
          const rows = db.prepare(
            "SELECT DISTINCT string_value as fn FROM embedding_metadata WHERE key='source_file_name' ORDER BY string_value"
          ).all();
          sources = rows.map(r => r.fn).filter(Boolean);
          db.close();
        } catch(e) { sources = []; }
      }
      res.writeHead(200, {'Content-Type':'application/json'});
      res.end(JSON.stringify({ col: colName, sources }));
    } catch(e) {
      res.writeHead(500); res.end(JSON.stringify({error:e.message}));
    }
    return;
  }
  if (req.method === 'POST' && url.pathname === '/api/rename-collection') {
    let body = ''; req.on('data', c => { body += c; });
    req.on('end', async function() {
      try {
        const { oldName, newName } = JSON.parse(body);
        if (!oldName || !newName) { res.writeHead(400); res.end(JSON.stringify({ok:false,error:'oldName and newName required'})); return; }
        if (!/^[a-zA-Z0-9_.-]+$/.test(newName)) { res.writeHead(400); res.end(JSON.stringify({ok:false,error:'Invalid name'})); return; }
        const chromaDir = path.resolve(PROJECT_DIR, envGet('CHROMA_PATH', './chromadb'));
        const { createRequire } = await import('module');
        const require2 = createRequire(import.meta.url);
        const Database = require2('better-sqlite3');
        // Find the rag.sqlite3 for this collection
        const { buildSegDirMap } = await import('./collections.js');
        const sdmap = buildSegDirMap(chromaDir);
        const entry = sdmap[oldName];
        if (!entry) { res.writeHead(404); res.end(JSON.stringify({ok:false,error:'Collection not found: '+oldName})); return; }
        if (sdmap[newName]) { res.writeHead(409); res.end(JSON.stringify({ok:false,error:'Collection already exists: '+newName})); return; }
        const ragPath = path.join(entry.segDir, 'rag.sqlite3');
        if (!fs.existsSync(ragPath)) { res.writeHead(404); res.end(JSON.stringify({ok:false,error:'rag.sqlite3 not found for: '+oldName})); return; }
        const db = new Database(ragPath);
        db.pragma('journal_mode = WAL');
        db.prepare('UPDATE collections SET name=? WHERE name=?').run(newName, oldName);
        db.prepare('UPDATE segments SET topic=? WHERE topic=?').run('persistent://'+newName, 'persistent://'+oldName);
        db.close();
        // Update index_meta.json name field so query.js picks up the new name on reload
        const metaPath = path.join(entry.segDir, 'index_meta.json');
        if (fs.existsSync(metaPath)) {
          try {
            const meta = JSON.parse(fs.readFileSync(metaPath, 'utf8'));
            meta.name = newName;
            fs.writeFileSync(metaPath, JSON.stringify(meta));
          } catch(_) {}
        }
        // Rename chromadb symlink or directory (only if it exists and matches old name)
        const oldLink = path.join(chromaDir, oldName);
        const newLink = path.join(chromaDir, newName);
        if (fs.existsSync(oldLink)) {
          if (fs.existsSync(newLink)) throw new Error('chromadb/' + newName + ' already exists');
          const lstat = fs.lstatSync(oldLink);
          if (lstat.isSymbolicLink()) {
            const target = fs.readlinkSync(oldLink);
            fs.symlinkSync(target, newLink);
            fs.unlinkSync(oldLink);
          } else {
            fs.renameSync(oldLink, newLink);
          }
        }
        // Rename source directory only if it exists
        const srcDir = path.resolve(PROJECT_DIR, envGet('SOURCE_DIR', './source'));
        const oldSrc = path.join(srcDir, oldName);
        const newSrc = path.join(srcDir, newName);
        if (fs.existsSync(oldSrc)) {
          if (fs.existsSync(newSrc)) throw new Error('source/' + newName + ' already exists');
          fs.renameSync(oldSrc, newSrc);
        }
        // Update dedup.json
        const dedupPath = path.join(PROJECT_DIR, 'dedup.json');
        if (fs.existsSync(dedupPath)) {
          const dedup = JSON.parse(fs.readFileSync(dedupPath, 'utf8'));
          let changed = 0;
          for (const entry of Object.values(dedup)) { if (entry.collection === oldName) { entry.collection = newName; changed++; } }
          if (changed) fs.writeFileSync(dedupPath, JSON.stringify(dedup));
        }
        // Update ACTIVE_COLLECTIONS in Config if old name present
        const envPath = path.join(PROJECT_DIR, 'Config');
        if (fs.existsSync(envPath)) {
          let envText = fs.readFileSync(envPath, 'utf8');
          const acMatch = envText.match(/^ACTIVE_COLLECTIONS=(.*)$/m);
          if (acMatch) {
            const cols = acMatch[1].split(',').map(s => s.trim());
            const idx = cols.indexOf(oldName);
            if (idx >= 0) {
              cols[idx] = newName;
              envText = envText.replace(/^ACTIVE_COLLECTIONS=.*$/m, 'ACTIVE_COLLECTIONS='+cols.join(','));
              fs.writeFileSync(envPath, envText);
            }
          }
        }
        res.writeHead(200, {'Content-Type':'application/json'});
        res.end(JSON.stringify({ok:true}));
        // Notify query server to reload collections (fire and forget)
        try {
          const _webPort = parseInt(envGet('WEB_PORT','3000'));
          const _http = await import('http');
          const _reloadReq = _http.default.request({hostname:'127.0.0.1',port:_webPort,path:'/api/reload',method:'POST'}, r => r.resume());
          _reloadReq.on('error', ()=>{});
          _reloadReq.end();
        } catch(_) {}
      } catch(e) { res.writeHead(500); res.end(JSON.stringify({ok:false,error:e.message})); }
    }); return;
  }
  if (req.method === 'POST' && url.pathname === '/api/savepathmap') {
    let body = '';
    req.on('data', c => { body += c; });
    req.on('end', function() {
      try {
        const { col, baseUrl, sources } = JSON.parse(body);
        const dataDir = path.join(PROJECT_DIR, 'data');
        if (!fs.existsSync(dataDir)) fs.mkdirSync(dataDir, { recursive: true });
        const pmFile = path.join(dataDir, 'path_map.json');
        let pm = {};
        try { if (fs.existsSync(pmFile)) pm = JSON.parse(fs.readFileSync(pmFile, 'utf8')); } catch(_) {}
        sources.forEach(function(fname) {
          pm[col + '/' + fname] = [fname];
          if (!pm[fname]) pm[fname] = [fname];
        });
        fs.writeFileSync(pmFile, JSON.stringify(pm, null, 2));
        const key = 'COLLECTION_URL_' + col.replace(/[^A-Za-z0-9]/g, '_').toUpperCase();
        if (baseUrl) envSet(key, baseUrl);
        res.writeHead(200, {'Content-Type':'application/json'});
        res.end(JSON.stringify({ ok: true, entries: sources.length }));
      } catch(e) {
        res.writeHead(500); res.end(JSON.stringify({ error: e.message }));
      }
    });
    return;
  }
  if (url.pathname === '/api/save') {
    let body = '';
    req.on('data', c => { body += c; });
    req.on('end', function() {
      try {
        const changes = JSON.parse(body).changes || {};
        for (const k of Object.keys(changes)) {
          if (k === 'system_prompt') continue;
          envSet(k, changes[k]);
        }
        validateActiveCollections();
        res.writeHead(200, {'Content-Type':'application/json'});
        res.end(JSON.stringify({ ok:true }));
      } catch(e) {
        res.writeHead(200, {'Content-Type':'application/json'});
        res.end(JSON.stringify({ ok:false, error:e.message }));
      }
    }); return;
  }
  if (req.method === 'POST' && url.pathname === '/api/test-annotation') {
    let body = '';
    req.on('data', c => { body += c; });
    req.on('end', async function() {
      res.writeHead(200, { 'Content-Type': 'application/x-ndjson', 'Transfer-Encoding': 'chunked', 'X-Accel-Buffering': 'no' });
      try {
        const { prompt: promptOverride } = JSON.parse(body || '{}');
        const runner = require(path.join(SCRIPTS_DIR, 'annotation_test_runner.cjs'));
        const p = readPrompts();
        const prompt = (promptOverride && promptOverride.trim()) ? promptOverride.trim() : (p.annotation_local || '');
        const model = envGet('ANNOTATION_MODEL','') || envGet('LOCAL_LLM_MODEL','');
        const ollamaHost = envGet('OLLAMA_HOST','http://localhost:11434');
        if (!model) { res.write(JSON.stringify({type:'error',message:'No model configured. Set ANNOTATION_MODEL in Config.'})+'\n'); res.end(); return; }
        if (!prompt) { res.write(JSON.stringify({type:'error',message:'No local annotation prompt set. Edit it in the Prompts tab.'})+'\n'); res.end(); return; }
        await runner.run({
          model, prompt, ollamaHost,
          onResult: (r, n, total) => {
            res.write(JSON.stringify({type:'result', ...r, n, total})+'\n');
          },
          onDone: (summary) => {
            res.write(JSON.stringify({type:'summary', ...summary})+'\n');
            res.end();
          },
          onError: (e, tc) => {
            res.write(JSON.stringify({type:'result', id:tc.id, category:tc.category, expected:tc.expected, actual:'ERROR', pass:false, rawResponse:e.message, ms:0})+'\n');
          }
        });
      } catch(e) {
        try { res.write(JSON.stringify({type:'error',message:e.message})+'\n'); res.end(); } catch(_) {}
      }
    }); return;
  }
  if (req.method === 'GET' && url.pathname === '/api/logs') {
    try {
      const logsDir = path.join(PROJECT_DIR, 'logs');
      const name = url.searchParams.get('name');
      if (name) {
        // Serve a specific log file -- sanitize name to prevent traversal
        const safe = path.basename(name);
        const fp = path.join(logsDir, safe);
        if (!fp.startsWith(logsDir) || !fs.existsSync(fp)) {
          res.writeHead(404); res.end('Not found'); return;
        }
        const content = fs.readFileSync(fp, 'utf8');
        res.writeHead(200, {'Content-Type':'application/json'});
        res.end(JSON.stringify({ name: safe, content }));
      } else {
        // List log files sorted newest first
        const files = fs.existsSync(logsDir)
          ? fs.readdirSync(logsDir).filter(f => f.endsWith('.log'))
              .map(f => { const st = fs.statSync(path.join(logsDir, f)); return { name: f, size: st.size, mtime: st.mtimeMs }; })
              .sort((a,b) => b.mtime - a.mtime)
          : [];
        res.writeHead(200, {'Content-Type':'application/json'});
        res.end(JSON.stringify({ files }));
      }
    } catch(e) {
      res.writeHead(500, {'Content-Type':'application/json'});
      res.end(JSON.stringify({ error: e.message }));
    }
    return;
  }
  if (req.method === 'GET' && url.pathname === '/api/lograw') {
    try {
      const logsDir = path.join(PROJECT_DIR, 'logs');
      const name = url.searchParams.get('name');
      if (!name) { res.writeHead(400); res.end('name required'); return; }
      const safe = path.basename(name);
      const fp = path.join(logsDir, safe);
      if (!fp.startsWith(logsDir + path.sep) && fp !== logsDir) { res.writeHead(400); res.end('bad name'); return; }
      if (!fs.existsSync(fp)) { res.writeHead(404); res.end('Not found'); return; }
      const content = fs.readFileSync(fp, 'utf8');
      res.writeHead(200, {'Content-Type':'text/plain; charset=utf-8'});
      res.end(content);
    } catch(e) {
      res.writeHead(500); res.end('Error: ' + e.message);
    }
    return;
  }
  if (req.method === 'GET' && url.pathname === '/api/prompts') {
    res.writeHead(200, {'Content-Type':'application/json'});
    res.end(JSON.stringify(readPrompts()));
    return;
  }
  if (req.method === 'POST' && url.pathname === '/api/prompts') {
    let body = '';
    req.on('data', c => { body += c; });
    req.on('end', function() {
      try {
        const obj = JSON.parse(body);
        writePrompts(obj);
        res.writeHead(200, {'Content-Type':'application/json'});
        res.end(JSON.stringify({ ok: true }));
      } catch(e) {
        res.writeHead(200, {'Content-Type':'application/json'});
        res.end(JSON.stringify({ ok: false, error: e.message }));
      }
    }); return;
  }
  if (req.method === 'POST' && url.pathname === '/api/sysprompt') {
    let body = '';
    req.on('data', c => { body += c; });
    req.on('end', function() {
      try {
        const { text } = JSON.parse(body);
        sysPromptWrite(text);
        res.writeHead(200, {'Content-Type':'application/json'});
        res.end(JSON.stringify({ ok:true }));
      } catch(e) {
        res.writeHead(200, {'Content-Type':'application/json'});
        res.end(JSON.stringify({ ok:false, error:e.message }));
      }
    }); return;
  }
  if (req.method === 'POST' && url.pathname === '/api/annotprompt') {
    let body = '';
    req.on('data', c => { body += c; });
    req.on('end', function() {
      try {
        const { text } = JSON.parse(body);
        annotPromptWrite(text);
        res.writeHead(200, {'Content-Type':'application/json'});
        res.end(JSON.stringify({ ok:true }));
      } catch(e) {
        res.writeHead(200, {'Content-Type':'application/json'});
        res.end(JSON.stringify({ ok:false, error:e.message }));
      }
    }); return;
  }
  if (req.method === 'GET' && url.pathname === '/api/modelcatalog') {
    // Fetch live model list from Ollama registry + cross-reference with installed models.
    // Falls back to curated static list if network is unavailable.
    try {
      let ramMb = 4000;
      try {
        const mem = fs.readFileSync('/proc/meminfo','utf8');
        const mt = mem.match(/MemAvailable:\s+(\d+)/);
        if (mt) ramMb = Math.floor(parseInt(mt[1]) / 1024);
      } catch(_) {}
      const installed = await getOllamaModelsFull();
      const installedMap = {};
      for (const m of installed) {
        installedMap[m.name] = m;                                  // exact: llama3.2:3b
        installedMap[m.name.replace(/:latest$/,'')] = m;           // strip :latest
        installedMap[m.name.replace(/:.*$/,'')] = m;               // base name: llama3.2
      }
      function _annotate(m) {
        const key   = (m.tag||m.name).replace(/:latest$/,'');      // catalog key
        const base  = (m.tag||m.name).replace(/:.*$/,'');          // base name
        const inst  = installedMap[key] || installedMap[base] || installedMap[m.name] || null;
        return { ...m, installed:!!inst, fits:m.ramMb?m.ramMb<=ramMb:true,
          tight:m.ramMb?(m.ramMb>ramMb&&m.ramMb<=Math.floor(ramMb*1.25)):false, ramAvailMb:ramMb };
      }
      // Try to fetch live list from Ollama search API (actual working endpoint)
      let llmLive = [], embedLive = [];
      try {
        const ctrl = new AbortController();
        const tid = setTimeout(() => ctrl.abort(), 15000);
        // ollama.com/search returns JSON with {models:[{name,description,pulls,tags:[{name,size}]}]}
        const r = await fetch('https://ollama.com/search?q=&sort=featured&limit=200',
          { signal: ctrl.signal, headers: { 'Accept': 'application/json' } });
        clearTimeout(tid);
        if (r.ok) {
          const ct = r.headers.get('content-type') || '';
          if (ct.includes('application/json')) {
            const data = await r.json();
            const models = Array.isArray(data) ? data : (data.models || data.results || []);
            const EMBED_NAMES = /embed|e5|bge|nomic|minilm|arctic-embed|snowflake|gte|mxbai/i;
            for (const m of models) {
              const name = (m.name || m.model || '').trim();
              if (!name) continue;
              const desc = (m.description || m.desc || '').slice(0, 120);
              let sizeMb = null, ramMb = null;
              const tags = m.tags || [];
              const lt = tags.find(t => t.name === 'latest') || tags[0];
              if (lt && lt.size) { sizeMb = Math.round(lt.size / 1024 / 1024); ramMb = Math.round(sizeMb * 1.6); }
              const entry = { name, tag: name + ':latest', sizeMb, ramMb, params: null, note: desc || undefined };
              if (EMBED_NAMES.test(name)) embedLive.push(entry);
              else llmLive.push(entry);
            }
          }
        }
      } catch(_) {} // network unavailable  --  fall through to static list
      // Static fallback catalog (used if live fetch fails or as supplement)
      const LLM_STATIC = [
        { name:'tinyllama',        tag:'tinyllama:latest',        sizeMb:638,   ramMb:1200,  params:'1.1B', note:'Extremely fast, minimal RAM.' },
        { name:'phi3',             tag:'phi3:latest',             sizeMb:2200,  ramMb:3500,  params:'3.8B', note:'Microsoft Phi-3. Strong for its size.' },
        { name:'phi3.5',           tag:'phi3.5:latest',           sizeMb:2200,  ramMb:3500,  params:'3.8B', note:'Phi-3.5  --  improved reasoning over Phi-3.' },
        { name:'llama3.2',         tag:'llama3.2:latest',         sizeMb:2000,  ramMb:3000,  params:'3B',   note:'Meta Llama 3.2 3B. Fast, capable.' },
        { name:'llama3.2:1b',      tag:'llama3.2:1b',             sizeMb:1300,  ramMb:2000,  params:'1B',   note:'Llama 3.2 1B. Minimal RAM, very fast.' },
        { name:'llama3.1',         tag:'llama3.1:latest',         sizeMb:4700,  ramMb:6000,  params:'8B',   note:'Meta Llama 3.1 8B. Solid general-purpose.' },
        { name:'mistral',          tag:'mistral:latest',          sizeMb:4100,  ramMb:5500,  params:'7B',   note:'Mistral 7B. Fast, good for RAG.' },
        { name:'mistral-nemo',     tag:'mistral-nemo:latest',     sizeMb:7100,  ramMb:10000, params:'12B',  note:'Mistral Nemo 12B. Strong instruction following.' },
        { name:'gemma2',           tag:'gemma2:latest',           sizeMb:5500,  ramMb:7000,  params:'9B',   note:'Google Gemma 2 9B. Excellent reasoning.' },
        { name:'gemma2:2b',        tag:'gemma2:2b',               sizeMb:1600,  ramMb:2500,  params:'2B',   note:'Gemma 2 2B. Very small, surprisingly capable.' },
        { name:'qwen2.5',          tag:'qwen2.5:latest',          sizeMb:4700,  ramMb:6000,  params:'7B',   note:'Alibaba Qwen 2.5 7B. Strong multilingual.' },
        { name:'qwen2.5:14b',      tag:'qwen2.5:14b',             sizeMb:9000,  ramMb:12000, params:'14B',  note:'Qwen 2.5 14B. Better reasoning, needs 12GB.' },
        { name:'deepseek-r1',      tag:'deepseek-r1:latest',      sizeMb:4700,  ramMb:6000,  params:'7B',   note:'DeepSeek R1 7B. Strong chain-of-thought reasoning.' },
        { name:'deepseek-r1:14b',  tag:'deepseek-r1:14b',         sizeMb:9000,  ramMb:12000, params:'14B',  note:'DeepSeek R1 14B. Excellent reasoning, needs 12GB.' },
      ];
      const EMBED_STATIC = [
        { name:'all-minilm',             tag:'all-minilm:latest',             sizeMb:46,   ramMb:200,  dim:'384',  note:'Fastest, lowest RAM. Good for low-spec machines.' },
        { name:'nomic-embed-text',       tag:'nomic-embed-text:latest',       sizeMb:274,  ramMb:500,  dim:'768',  note:'Best general-purpose embed model. Recommended default.' },
        { name:'nomic-embed-text-v1.5',  tag:'nomic-embed-text-v1.5:latest',  sizeMb:274,  ramMb:500,  dim:'768',  note:'Nomic v1.5 with Matryoshka support.' },
        { name:'mxbai-embed-large',      tag:'mxbai-embed-large:latest',      sizeMb:669,  ramMb:1200, dim:'1024', note:'Higher accuracy, larger index. Good on 8GB+ RAM.' },
        { name:'snowflake-arctic-embed', tag:'snowflake-arctic-embed:latest', sizeMb:335,  ramMb:700,  dim:'1024', note:'Strong retrieval accuracy, moderate size.' },
        { name:'bge-m3',                 tag:'bge-m3:latest',                 sizeMb:1200, ramMb:2000, dim:'1024', note:'Multilingual, high accuracy. Needs 4GB+ RAM.' },
        { name:'bge-large',              tag:'bge-large:latest',              sizeMb:670,  ramMb:1200, dim:'1024', note:'Strong English accuracy.' },
      ];
      // Build base lists: live if available, otherwise static
      const EMBED_NAMES2 = /embed|e5|bge|nomic|minilm|arctic-embed|snowflake|gte|mxbai/i;
      const baseLlm   = llmLive.length   ? llmLive   : LLM_STATIC;
      const baseEmbed = embedLive.length ? embedLive : EMBED_STATIC;
      // Add any installed models not already in the catalog
      const catalogKeys = new Set([...baseLlm, ...baseEmbed].map(m => m.name.replace(/:.*$/,'')));
      for (const inst of installed) {
        const base = inst.name.replace(/:.*$/,'');
        if (!catalogKeys.has(base) && !catalogKeys.has(inst.name)) {
          const entry = { name: inst.name, tag: inst.name, sizeMb: inst.sizeMb||null,
            ramMb: inst.sizeMb ? Math.round(inst.sizeMb*1.6) : null, params: null,
            note: 'Installed locally' };
          if (EMBED_NAMES2.test(inst.name)) baseEmbed.push(entry);
          else baseLlm.push(entry);
          catalogKeys.add(base);
        }
      }
      const llm   = baseLlm.map(_annotate);
      const embed = baseEmbed.map(_annotate);
      res.writeHead(200, {'Content-Type':'application/json'});
      res.end(JSON.stringify({ llm, embed, ramAvailMb: ramMb, live: llmLive.length > 0 }));
    } catch(e) {
      res.writeHead(200, {'Content-Type':'application/json'});
      res.end(JSON.stringify({ llm:[], embed:[], error:e.message }));
    }
    return;
  }
  if (req.method === 'GET' && url.pathname === '/api/dlsearch') {
    const q = (url.searchParams.get('q') || '').trim();
    if (!q) { res.writeHead(200,{'Content-Type':'application/json'}); res.end(JSON.stringify({models:[]})); return; }
    // ── Resolve provider: use configured default, skip local (no web search), fallback chain ──
    async function resolveSearchProvider() {
      const forceProv  = url.searchParams.get('provider') || '';
      const pref = forceProv && forceProv !== 'auto' ? forceProv : envGet('LLM_PROVIDER','claude');
      const chain = (pref === 'local' || pref === 'ollama')
        ? ['claude','openai','gemini']
        : [pref, ...['claude','openai','gemini'].filter(p=>p!==pref)];
      for (const p of chain) {
        if (p === 'claude' && envGet('ANTHROPIC_API_KEY',''))
          return { provider:'claude', key: envGet('ANTHROPIC_API_KEY','') };
        if (p === 'openai' && envGet('OPENAI_API_KEY',''))
          return { provider:'openai', key: envGet('OPENAI_API_KEY','') };
        if (p === 'gemini' && (envGet('GEMINI_API_KEY','') || envGet('GOOGLE_API_KEY','')))
          return { provider:'gemini', key: envGet('GEMINI_API_KEY','') || envGet('GOOGLE_API_KEY','') };
      }
      return null;
    }
    const forceModel = url.searchParams.get('model') || '';
    const provInfo = await resolveSearchProvider();
    if (!provInfo) {
      res.writeHead(200,{'Content-Type':'application/json'});
      res.end(JSON.stringify({error:'No API key found. Set ANTHROPIC_API_KEY, OPENAI_API_KEY, or GEMINI_API_KEY in AI Services.'}));
      return;
    }
    try {
      let ramMb = 4000;
      try {
        const mem = fs.readFileSync('/proc/meminfo','utf8');
        const mt = mem.match(/MemAvailable:\s+(\d+)/);
        if (mt) ramMb = Math.floor(parseInt(mt[1]) / 1024);
      } catch(_) {}
      const installed = await getOllamaModelsFull();
      const installedMap = {};
      for (const m of installed) {
        installedMap[m.name] = m;
        installedMap[m.name.replace(/:latest$/,'')] = m;
        installedMap[m.name.replace(/:.*$/,'')] = m;
      }
      const SEARCH_PROMPT =
        'Search ollama.com/library for models matching the query: ' + q +
        '. Return as many relevant results as you can find (aim for 10-20).' +
        ' Format each result as one line: NAME | PARAMS | SIZE_MB | DESCRIPTION' +
        ' where NAME is the exact ollama model name (e.g. llama3.2, qwen2.5-coder:7b),' +
        ' PARAMS is a single size like 7B or 8B (not a list), SIZE_MB is the download' +
        ' size in MB as a plain integer for the default/most popular tag (omit if unknown),' +
        ' and DESCRIPTION is a brief one-line description.' +
        ' Only include real Ollama model names pullable with "ollama pull".' +
        ' Do not include variant suffixes like /13B/30B in the PARAMS column.' +
        ' Return only the list lines, no preamble, headers, or markdown.';
      const ctrl = new AbortController();
      const tid = setTimeout(() => ctrl.abort(), 30000);
      let rawText = '';
      const providerUsed = provInfo.provider;
      if (provInfo.provider === 'claude') {
        const r = await fetch('https://api.anthropic.com/v1/messages', {
          method:'POST', signal:ctrl.signal,
          headers:{
            'Content-Type':'application/json',
            'x-api-key':provInfo.key,
            'anthropic-version':'2023-06-01',
            'anthropic-beta':'web-search-2025-03-05'
          },
          body: JSON.stringify({
            model: forceModel || envGet('ANTHROPIC_MODEL','claude-haiku-4-5-20251001'), max_tokens:2048,
            tools:[{ type:'web_search_20250305', name:'web_search', max_uses:5 }],
            messages:[{ role:'user', content:SEARCH_PROMPT }]
          })
        });
        if (!r.ok) { clearTimeout(tid); throw new Error('Anthropic '+r.status+': '+(await r.text()).slice(0,120)); }
        const d = await r.json();
        rawText = (d.content||[]).filter(b=>b.type==='text').map(b=>b.text||'').join('\n');
      } else if (provInfo.provider === 'openai') {
        const r = await fetch('https://api.openai.com/v1/responses', {
          method:'POST', signal:ctrl.signal,
          headers:{ 'Content-Type':'application/json', 'Authorization':'Bearer '+provInfo.key },
          body: JSON.stringify({
            model: forceModel || envGet('OPENAI_MODEL','gpt-4o-mini-search-preview'),
            tools:[{ type:'web_search_preview', search_context_size:'low' }],
            input: SEARCH_PROMPT
          })
        });
        if (!r.ok) { clearTimeout(tid); throw new Error('OpenAI '+r.status+': '+(await r.text()).slice(0,120)); }
        const d = await r.json();
        rawText = (d.output||[])
          .filter(o=>o.type==='message')
          .flatMap(o=>(o.content||[]).filter(c=>c.type==='output_text').map(c=>c.text||''))
          .join('\n');
      } else if (provInfo.provider === 'gemini') {
        const { GoogleGenAI } = await import('@google/genai');
        const genai = new GoogleGenAI({ apiKey:provInfo.key });
        const resp = await genai.models.generateContent({
          model: forceModel || envGet('GEMINI_MODEL','gemini-2.0-flash'),
          contents: SEARCH_PROMPT,
          config:{ tools:[{ googleSearch:{} }] }
        });
        rawText = resp.text || '';
      }
      clearTimeout(tid);
      if (!rawText.trim()) {
        res.writeHead(200,{'Content-Type':'application/json'});
        res.end(JSON.stringify({models:[], provider:providerUsed, error:'Provider returned empty response. Check API key and model.'}));
        return;
      }
      // Parse structured list: NAME | PARAMS | SIZE_MB | DESCRIPTION
      const seen = new Set();
      const models = [];
      for (const line of rawText.split('\n')) {
        const parts = line.split('|').map(s=>s.trim());
        if (parts.length < 2) continue;
        const name = parts[0].replace(/^\d+[\.\)]\s*/,'').replace(/[`*]/g,'').trim();
        if (!name || name.length < 2 || name.includes(' ')) continue;
        if (seen.has(name)) continue;
        seen.add(name);
        const rawP=parts[1]?parts[1].trim():''; const params=rawP&&rawP!=='N/A'&&!/[\/,]/.test(rawP)?rawP:null;
        const sizeMbR = parts[2] ? parseInt(parts[2]) : null;
        const sizeMb  = sizeMbR && sizeMbR > 0 ? sizeMbR : null;
        const ramMbM  = sizeMb ? Math.round(sizeMb * 1.6) : null;
        const note    = (parts[3]||'').replace(/[*`]/g,'').trim().slice(0,120) || undefined;
        const base    = name.replace(/:.*$/,'');
        const inst    = installedMap[name] || installedMap[base] || null;
        const fits    = ramMbM ? ramMbM <= ramMb : true;
        const tight   = ramMbM ? (ramMbM > ramMb && ramMbM <= Math.floor(ramMb*1.25)) : false;
        models.push({ name, tag:name+':latest', sizeMb, ramMb:ramMbM, params,
                      note, installed:!!inst, fits:!inst&&fits, tight:!inst&&tight });
      }
      res.writeHead(200,{'Content-Type':'application/json'});
      res.end(JSON.stringify({ models, ramAvailMb:ramMb, provider:providerUsed }));
    } catch(e) {
      res.writeHead(200,{'Content-Type':'application/json'});
      res.end(JSON.stringify({error: e.name==='AbortError'?'Search timed out (30s)':e.message}));
    }
    return;
  }
  if (req.method === 'GET' && url.pathname === '/api/embedmodels') {
    try {
      const chromaDir = path.resolve(PROJECT_DIR, envGet('CHROMA_PATH', './chromadb'));
      const currentModel = envGet('EMBED_MODEL', 'nomic-embed-text');
      const cols = getCollectionNames(chromaDir);
      // Read embed_model from each collection's index_meta.json (written by ingest)
      // Fall back to current EMBED_MODEL if not recorded
      const byModel = {}; // model -> [colName]
      for (const name of cols) {
        let model = currentModel;
        try {
          const { buildSegDirMap } = await import('./collections.js');
          const sdmap = buildSegDirMap(chromaDir);
          const seg = sdmap[name];
          if (seg && seg.meta && seg.meta.embed_model) model = seg.meta.embed_model;
        } catch(_) {}
        if (!byModel[model]) byModel[model] = [];
        byModel[model].push(name);
      }
      // Check Ollama availability
      const ollamaList = await getOllamaModels();
      const models = Object.entries(byModel).map(([model, collections]) => ({
        model,
        collections,
        available: ollamaList.some(m => m === model || m.replace(/:latest$/,'') === model),
      }));
      res.writeHead(200, { 'Content-Type': 'application/json' });
      res.end(JSON.stringify({ models }));
    } catch(e) {
      res.writeHead(200, { 'Content-Type': 'application/json' });
      res.end(JSON.stringify({ models: [], error: e.message }));
    }
    return;
  }
  if (req.method === 'POST' && url.pathname === '/api/ollama-pull') {
    try {
      const body = await new Promise((res, rej) => {
        let d = ''; req.on('data', c => d += c); req.on('end', () => res(d)); req.on('error', rej);
      });
      const { model } = JSON.parse(body);
      if (!model) { res.writeHead(400, {'Content-Type':'application/json'}); res.end(JSON.stringify({ok:false,error:'model required'})); return; }
      const host = envGet('OLLAMA_HOST', 'http://localhost:11434').replace(/\/$/, '');

      // Stream pull progress back to client as newline-delimited JSON
      res.writeHead(200, {
        'Content-Type': 'application/x-ndjson',
        'Transfer-Encoding': 'chunked',
        'X-Accel-Buffering': 'no',
      });

      const pr = await fetch(host + '/api/pull', {
        method: 'POST',
        headers: { 'Content-Type': 'application/json' },
        body: JSON.stringify({ name: model, stream: true }),
        signal: AbortSignal.timeout(600000),
      });

      if (!pr.ok) {
        res.end(JSON.stringify({ ok: false, error: 'Ollama returned ' + pr.status }) + '\n');
        return;
      }

      // Pipe Ollama stream to client
      const reader = pr.body.getReader();
      const decoder = new TextDecoder();
      let buf = '';
      let success = false;
      while (true) {
        const { done, value } = await reader.read();
        if (done) break;
        buf += decoder.decode(value, { stream: true });
        const lines = buf.split('\n');
        buf = lines.pop(); // keep incomplete last line
        for (const line of lines) {
          if (!line.trim()) continue;
          try {
            const obj = JSON.parse(line);
            if (obj.status === 'success') success = true;
            // Forward progress to client
            res.write(JSON.stringify({
              ok: !obj.error,
              status: obj.status || '',
              completed: obj.completed || 0,
              total: obj.total || 0,
              error: obj.error || null,
            }) + '\n');
          } catch(_) {}
        }
      }
      res.end(JSON.stringify({ ok: success, done: true }) + '\n');
    } catch(e) {
      try { res.write(JSON.stringify({ ok: false, error: e.message }) + '\n'); res.end(); } catch(_) {}
    }
    return;
  }
  if (req.method === 'POST' && url.pathname === '/api/ollama-delete') {
    try {
      const body = await new Promise((res, rej) => {
        let d = ''; req.on('data', c => d += c); req.on('end', () => res(d)); req.on('error', rej);
      });
      const { model } = JSON.parse(body);
      if (!model) { res.writeHead(400, {'Content-Type':'application/json'}); res.end(JSON.stringify({ok:false,error:'model required'})); return; }
      const host = envGet('OLLAMA_HOST', 'http://localhost:11434').replace(/\/$/, '');
      const pr = await fetch(host + '/api/delete', {
        method: 'DELETE',
        headers: { 'Content-Type': 'application/json' },
        body: JSON.stringify({ name: model }),
      });
      res.writeHead(200, {'Content-Type':'application/json'});
      res.end(JSON.stringify({ ok: pr.ok, error: pr.ok ? null : ('Ollama returned ' + pr.status) }));
    } catch(e) {
      res.writeHead(200, {'Content-Type':'application/json'});
      res.end(JSON.stringify({ ok: false, error: e.message }));
    }
    return;
  }
  res.writeHead(404); res.end('Not found');
  } catch(e) {
    try { res.writeHead(500); res.end('Server error: ' + e.message); } catch(_) {}
    console.error('Handler error:', e);
  }
});

// Validate ACTIVE_COLLECTIONS against SQLite -- remove stale names, update Config
// Called on startup and after every /api/save
async function validateActiveCollections() {
  try {
    const chromaDir = path.resolve(PROJECT_DIR, envGet('CHROMA_PATH', './chromadb'));
    const { getCollectionNames } = await import('./collections.js');
    const known = new Set(getCollectionNames(chromaDir));
    const ac = envGet('ACTIVE_COLLECTIONS', '');
    if (!ac.trim()) return;
    const cols = ac.split(',').map(s => s.trim()).filter(Boolean);
    const stale = cols.filter(c => !known.has(c));
    if (!stale.length) return;
    const valid = cols.filter(c => known.has(c));
    console.log('  [WARN] ACTIVE_COLLECTIONS: removing unknown: ' + stale.join(', '));
    envSet('ACTIVE_COLLECTIONS', valid.join(','));
    console.log('  [OK]   ACTIVE_COLLECTIONS updated: ' + valid.join(','));
  } catch(e) {
    console.log('  [WARN] Could not validate ACTIVE_COLLECTIONS: ' + e.message);
  }
}
validateActiveCollections();

server.listen(PORT, '0.0.0.0', function() {
  let _lip = null;
  try {
    for (const iface of Object.values(os.networkInterfaces()))
      for (const n of iface)
        if (n.family === 'IPv4' && !n.internal) { _lip = n.address; break; }
  } catch(_) {}
  console.log(`RAGWeed WebC v${VERSION} -- Config UI`);
  console.log(`  http://localhost:${PORT}`);
  if (_lip) console.log(`  http://${_lip}:${PORT}`);
  console.log(`  Config: ${ENV_FILE}`);
  console.log('  Ctrl-C to stop');
});

RAGWEED-WBC-v1.0.102-20260319-000014-473
chmod +x "$SCRIPTS_DIR/webc.js"
ok "scripts/webc.js written"


# STEP 11c: Write scripts/ingest.js (JS ingestion pipeline)
ph "STEP 11c: Writing scripts/ingest.js"
cat > "$SCRIPTS_DIR/ingest.js" << 'RAGWEED-ING-v1.0.102-20260319-000014-473'
#!/usr/bin/env node
// VERSION: 1.0.102
const VERSION = '1.0.102';
/**
 * ingest.js -- JS rewrite of ingest.py + preprocessor.py
 *
 * Produces identical ChromaDB datasets and dedup/path_map stores.
 * Full curses-equivalent TUI: two progress bars, log panel, p/r/s/Ctrl-C controls.
 *
 * Writes directly to per-collection rag.sqlite3 + HNSW binary files (pure JS, no Python)
 * to produce bitwise-identical SQLite + HNSW index format.
 *
 * Usage:
 *   node scripts/ingest.js                        # all collections
 *   node scripts/ingest.js -c docs                # one collection
 *   node scripts/ingest.js -c docs refs           # subset
 *   node scripts/ingest.js -c docs --reset        # clear and rebuild
 *   node scripts/ingest.js --list                 # show status
 *   node scripts/ingest.js --zip always           # ZIP policy
 */

import fs      from 'fs';
import path    from 'path';
import crypto  from 'crypto';
import { spawnSync, spawn } from 'child_process';
import { fileURLToPath }    from 'url';
import { createRequire }    from 'module';
import os      from 'os';

const __filename = fileURLToPath(import.meta.url);
const __dirname  = path.dirname(__filename);
const _require   = createRequire(import.meta.url);

// ── EAGER REQUIRES (CJS modules -- must be present after npm install) ──────────
// Verify all CJS deps are present at startup -- fail loudly if not
try { _require('pdf-parse'); } catch(e) { console.error('FATAL: pdf-parse not found -- run npm install'); process.exit(1); }
const mammoth = _require('mammoth');
const AdmZip  = _require('adm-zip');
// pdf-parse required lazily on first use (large module; test fixtures removed at install time)
let pdfParse = null;


// Notify web server that a collection is queryable  --  fires once per collection
// per ingest run, only if already in ACTIVE_COLLECTIONS (user must opt in via webc).
// Debounced: subsequent flushes for the same collection are no-ops.
const _notifiedCols = new Set();
function _notifyWebIfActive(name) {
  if (_notifiedCols.has(name)) return;
  _notifiedCols.add(name);
  // Only notify if this collection is already in ACTIVE_COLLECTIONS
  const active = cfg('ACTIVE_COLLECTIONS','').split(',').map(s=>s.trim()).filter(Boolean);
  if (active.length && !active.includes(name)) return;
  const port = parseInt(cfg('WEB_PORT','3000'));
  const ctrl = new AbortController();
  setTimeout(() => ctrl.abort(), 2000);
  fetch(`http://localhost:${port}/api/reload`, { method:'POST', signal:ctrl.signal })
    .catch(() => {});  // silent  --  web server may not be running
}

// ── PATHS ──────────────────────────────────────────────────────────────────────
const PROJECT_DIR  = path.resolve(__dirname, '..');
const ENV_FILE     = path.join(PROJECT_DIR, 'Config');
const CHROMA_PATH  = path.join(PROJECT_DIR, 'chromadb');
const DATA_DIR     = path.join(PROJECT_DIR, 'data');
const LOGS_DIR     = path.join(PROJECT_DIR, 'logs');
const STAGING_DIR  = path.join(PROJECT_DIR, 'zip_staging');
const DEDUP_FILE   = path.join(DATA_DIR, 'dedup.json');
const DEDUP_EXPORT  = path.join(DATA_DIR, 'dedup_export.json');
const INGEST_DB     = path.join(DATA_DIR, 'ingest_db.sqlite3');
const PREPROC_FILE = path.join(DATA_DIR, 'preprocess_cache.json');
const PATH_MAP_FILE= path.join(DATA_DIR, 'path_map.json');
const LOG_FILE     = path.join(LOGS_DIR, 'ingest.log');
const PDF_TXT_DIR  = path.join(DATA_DIR, 'pdf_txt');
const PDF_TXT_CACHE= path.join(DATA_DIR, 'pdf_txt_cache.json');
const CRASH_LOG    = path.join(LOGS_DIR, 'crash.log');

for (const d of [DATA_DIR, LOGS_DIR, STAGING_DIR, PDF_TXT_DIR])
  fs.mkdirSync(d, { recursive: true });

// ── ENV ────────────────────────────────────────────────────────────────────────
function loadEnv() {
  if (!fs.existsSync(ENV_FILE)) return;
  for (const line of fs.readFileSync(ENV_FILE, 'utf8').split('\n')) {
    const m = line.match(/^([A-Z_][A-Z0-9_]*)=(.*)$/);
    if (m && !process.env[m[1]]) process.env[m[1]] = m[2].replace(/^["']|["']$/g, '');
  }
}
loadEnv();

function cfg(k, d = '') { return process.env[k] || d; }

const SOURCE_DIR    = cfg('SOURCE_DIR', path.join(PROJECT_DIR, 'source'));
const EMBED_MODEL   = cfg('EMBED_MODEL', 'nomic-embed-text');
const OLLAMA_HOST   = cfg('EMBED_OLLAMA_HOST', cfg('OLLAMA_HOST', 'http://localhost:11434'));
const CHUNK_SIZE     = parseInt(cfg('CHUNK_SIZE',     '2048'));
const CHUNK_SIZE_PDF = parseInt(cfg('CHUNK_SIZE_PDF', String(Math.min(CHUNK_SIZE, 1024))));
const CHUNK_SIZE_AV  = parseInt(cfg('CHUNK_SIZE_AV',  String(Math.min(CHUNK_SIZE, 512))));
const EMBED_BACKOFF_MS = parseInt(cfg('EMBED_BACKOFF_S', '5')) * 1000;
const CHUNK_OVERLAP_PCT = parseInt(cfg('CHUNK_OVERLAP_PCT', '50')); // percent of chunk size
const OCR_ENABLED      = cfg('OCR_ENABLED', 'no').toLowerCase() === 'yes';
const WHISPER_TIMEOUT  = parseInt(cfg('WHISPER_TIMEOUT_S', '600'));
const WHISPER_MODEL    = cfg('WHISPER_MODEL', '');  // path to ggml model for whisper-cli; auto-detected if blank

// Detect which whisper binary is available: whisper-cli (C++) preferred, whisper (Python) fallback
function _detectWhisper() {
  const home = process.env.HOME || '';
  // Check whisper-cli (whisper.cpp)
  const r1 = spawnSync('whisper-cli', ['--help'], { stdio: 'pipe' });
  if (r1.error === undefined && (r1.status === 0 || r1.status === 1)) {
    const candidates = [
      WHISPER_MODEL,
      path.join(PROJECT_DIR, 'whisper-cpp', 'models', 'ggml-base.en.bin'),
      `${home}/.whisper-cpp/models/ggml-base.en.bin`,
      '/usr/share/whisper-cpp/models/ggml-base.en.bin',
    ];
    const modelPath = candidates.find(p => p && fs.existsSync(p)) || '';
    return { bin: 'whisper-cli', type: 'cpp', model: modelPath };
  }
  // Check Python whisper
  const r2 = spawnSync('whisper', ['--help'], { stdio: 'pipe' });
  if (r2.error === undefined && (r2.status === 0 || r2.status === 1)) {
    return { bin: 'whisper', type: 'python', model: '' };
  }
  return null;
}
const _whisperInfo = cfg('WHISPER_ENABLED', 'yes').toLowerCase() !== 'no' ? _detectWhisper() : null;
// Note: logFile not yet available here -- whisper status logged at ingest start instead
const EMBED_TIMEOUT_MS = parseInt(cfg('EMBED_TIMEOUT_S', '300')) * 1000; // 5min default; increase on slow hardware  // 10 min default; override with WHISPER_TIMEOUT_S= in Config
const LIBREOFFICE_TIMEOUT = parseInt(cfg('LIBREOFFICE_TIMEOUT_S', '60'));

// ── EXTENSION SETS ─────────────────────────────────────────────────────────────
const DOCUMENT_EXTS = new Set([
  '.pdf','.txt','.md','.docx','.doc','.html','.htm','.rtf','.odt','.epub',
  '.tex','.csv','.xml','.json','.yaml','.yml','.toml','.ini','.cfg','.conf',
  '.py','.c','.cpp','.h','.hpp','.java','.js','.ts','.go','.rs',
  '.sh','.bash','.rb','.pl','.lua','.asm','.s','.vhdl','.v','.sv',
  '.sql','.r','.m','.jl','.css','.svg',
  '.mp3','.mp4','.wav','.m4a','.ogg','.flac','.avi','.mov','.mkv','.webm',
]);
const SKIP_EXTS  = new Set(['.exe','.dll','.so','.bin','.o','.a','.pyc','.pyo','.class','.loaded_0','.loaded_1']);
const ZIP_EXTS   = new Set(['.zip','.tar','.gz','.bz2','.xz','.7z','.rar']);
const OLE_EXTS   = new Set(['.doc','.xls','.ppt','.msg','.pps','.pot']);
const AUDIO_EXTS = new Set(['.mp3','.mp4','.wav','.m4a','.ogg','.flac','.avi','.mov','.mkv']);
const IMAGE_EXTS = new Set(['.jpg','.jpeg','.png','.tiff','.tif','.bmp','.gif','.webp']);

// ── LOGGING ────────────────────────────────────────────────────────────────────
const LOG_STREAM = fs.createWriteStream(LOG_FILE, { flags: 'a' });
function logFile(msg) { LOG_STREAM.write(`${new Date().toISOString()} ${msg}\n`); }
const _DEBUG_LEVEL = parseInt(cfg('DEBUG_LEVEL', '0') || '0');
function dbgFile(msg)  { if (_DEBUG_LEVEL >= 2) logFile('[DBG] ' + msg); }
function lvl1File(msg) { if (_DEBUG_LEVEL >= 1) logFile('[L1] ' + msg); }
const _warnLog = [];
function warnFile(collection, msg) { logFile('WARN ' + msg); _warnLog.push({ collection, msg }); }
function warnSummary(tui, name) {
  const myWarns = _warnLog.filter(w => w.collection === name);
  if (!myWarns.length) return;
  tui.log('Warnings for ' + name + ' (' + myWarns.length + '):', 'warn');
  for (const w of myWarns) tui.log('  ' + w.msg.slice(0, 100), 'warn');
}
function crashWrite(msg) {
  try {
    const ts = new Date().toISOString();
    const lines = fs.existsSync(CRASH_LOG)
      ? fs.readFileSync(CRASH_LOG, 'utf8').split('\n') : [`=== Run ${ts} v${VERSION} ===`];
    lines.push(msg);
    fs.writeFileSync(CRASH_LOG, lines.slice(-500).join('\n') + '\n');
  } catch(_) {}
}

// ── DEDUP STORE ────────────────────────────────────────────────────────────────
function loadDedup() {
  try { if (fs.existsSync(DEDUP_FILE)) return JSON.parse(fs.readFileSync(DEDUP_FILE,'utf8')); }
  catch(_) { logFile('WARN dedup store corrupted -- starting fresh'); }
  return {};
}
function saveDedup(store) {
  fs.writeFileSync(DEDUP_FILE, JSON.stringify(store));
  fs.writeFileSync(DEDUP_EXPORT, JSON.stringify(store, null, 2));
}
function isIndexed(md5, store) { return md5 in store && (store[md5].chunks || 0) > 0; }
function partialChunks(md5, store) { return (store[md5] && store[md5].partial_chunks) || 0; }
function markPartial(md5, fpath, collection, n, store, srcDir) {
  const now = new Date().toISOString();
  if (md5 in store) store[md5].partial_chunks = n;
  else store[md5] = { first_seen_path: fpath, all_paths: [fpath], collection, partial_chunks: n,
                      indexed_at: now, chunks: 0, size_bytes: 0 };
  saveDedup(store);
  // Write content identity + location to SQLite if active
  if (_ingestDbActive()) {
    try {
      const db = _ingestDbOpen();
      let size = BigInt(0); try { size = BigInt(fs.statSync(fpath).size); } catch(_) {}
      const relPath = srcDir ? path.relative(srcDir, fpath) : path.basename(fpath);
      db.prepare(`INSERT INTO content (md5, size_bytes, first_seen) VALUES (?,?,?)
        ON CONFLICT(md5) DO NOTHING`).run(md5, size, now);
      db.prepare(`INSERT INTO content_locations (md5, collection, rel_path) VALUES (?,?,?)
        ON CONFLICT(collection, rel_path) DO UPDATE SET md5=excluded.md5`).run(md5, collection, relPath);
    } catch(_) {}
  }
}
function knownMd5ForPath(fpath, store) {
  let size; try { size = fs.statSync(fpath).size; } catch(_) { return null; }
  for (const [md5, entry] of Object.entries(store)) {
    const paths = entry.all_paths || [entry.first_seen_path || ''];
    if (paths.includes(fpath) && entry.size_bytes === size) return md5;
  }
  return null;
}
function recordPath(md5, fpath, store) {
  if (!(md5 in store)) return;
  const paths = store[md5].all_paths || [store[md5].first_seen_path || ''];
  if (!paths.includes(fpath)) { paths.push(fpath); store[md5].all_paths = paths; }
}
function registerIndexed(md5, fpath, collection, chunks, store, srcDir) {
  const now = new Date().toISOString();
  let size = 0; try { size = fs.statSync(fpath).size; } catch(_) {}
  if (md5 in store) Object.assign(store[md5], { chunks, indexed_at: now, collection, size_bytes: size });
  else store[md5] = { first_seen_path: fpath, all_paths: [fpath], indexed_at: now, collection, chunks, size_bytes: size };
  saveDedup(store);
  // Write to new SQLite tables if active
  if (_ingestDbActive()) {
    try {
      const db = _ingestDbOpen();
      const size_b = BigInt(size);
      const relPath = srcDir ? path.relative(srcDir, fpath) : path.basename(fpath);
      // content -- identity
      db.prepare(`INSERT INTO content (md5, size_bytes, first_seen) VALUES (?,?,?)
        ON CONFLICT(md5) DO NOTHING`).run(md5, size_b, now);
      // content_locations -- where this content lives
      db.prepare(`INSERT INTO content_locations (md5, collection, rel_path) VALUES (?,?,?)
        ON CONFLICT(collection, rel_path) DO UPDATE SET md5=excluded.md5`).run(md5, collection, relPath);
      // collection_chunks -- how many chunks in this collection
      db.prepare(`INSERT INTO collection_chunks (md5, collection, chunks, ingested_at) VALUES (?,?,?,?)
        ON CONFLICT(md5, collection) DO UPDATE SET chunks=excluded.chunks, ingested_at=excluded.ingested_at
      `).run(md5, collection, chunks, now);
      // ingest_files -- per-collection file tracking
      db.prepare(`INSERT INTO ingest_files (collection, source_file, md5, size_bytes, first_seen, last_seen, chunks, superseded)
        VALUES (?,?,?,?,?,?,?,0)
        ON CONFLICT(collection, source_file) DO UPDATE SET
          md5=excluded.md5, last_seen=excluded.last_seen, chunks=excluded.chunks
      `).run(collection, fpath, md5, size_b, now, now, chunks);
      const row = db.prepare('SELECT id FROM ingest_files WHERE collection=? AND source_file=?').get(collection, fpath);
      if (row) {
        db.prepare(`INSERT OR IGNORE INTO ingest_file_caps (file_id, md5, cap, status, chunks, ran_at)
          VALUES (?,?,'text','ok',?,?)`).run(row.id, md5, chunks, now);
      }
      // TODO: content_containers for extracted/embedded content -- deferred
    } catch(e) {
      logFile(`WARN ingest_db update failed for ${path.basename(fpath)}: ${e.message}`);
    }
  }
}

// ── DB-BASED DEDUP (replaces dedup.json lookups) ─────────────────────────────
// Scans all collection rag.sqlite3 files for a given source_md5.
// Returns { inTarget: bool, inOther: bool } where:
//   inTarget = md5 already embedded in the target collection (skip entirely)
//   inOther  = md5 found in another collection (copy vectors via crossIndexBatch)
// DB connections are cached for the duration of an ingest run -- one open per collection.
const _dbCheckCache = new Map(); // segUuid -> { db, stmt, binCount } | null
function _getDbCheckEntry(d) {
  if (_dbCheckCache.has(d)) return _dbCheckCache.get(d);
  const ragPath = path.join(CHROMA_PATH, d, 'rag.sqlite3');
  const binPath = path.join(CHROMA_PATH, d, 'data_level0.bin');
  if (!fs.existsSync(ragPath) || !fs.existsSync(binPath)) { _dbCheckCache.set(d, null); return null; }
  const binCount = Math.floor(fs.statSync(binPath).size / SPE);
  if (binCount === 0) { _dbCheckCache.set(d, null); return null; }
  let db;
  try { db = new (_require('better-sqlite3'))(ragPath, { readonly: true, fileMustExist: true }); }
  catch(_) { _dbCheckCache.set(d, null); return null; }
  try { db.pragma('wal_checkpoint(PASSIVE)'); } catch(_) {}
  const stmt = db.prepare(
    'SELECT e.id FROM embeddings e ' +
    'JOIN embedding_metadata em ON em.id=e.id ' +
    "WHERE em.key='source_md5' AND em.string_value=? AND e.id<=? LIMIT 1"
  );
  // Read collection name from index_meta.json for target identification
  let collName = '';
  try {
    const metaPath = path.join(CHROMA_PATH, d, 'index_meta.json');
    if (fs.existsSync(metaPath)) collName = JSON.parse(fs.readFileSync(metaPath,'utf8')).name || '';
  } catch(_) {}
  const entry = { db, stmt, binCount, collName, segDir: path.join(CHROMA_PATH, d) };
  _dbCheckCache.set(d, entry);
  return entry;
}
function closeDbCheckCache() {
  for (const entry of _dbCheckCache.values()) {
    if (entry) try { entry.db.close(); } catch(_) {}
  }
  _dbCheckCache.clear();
}
function dbCheckMd5(md5, targetSegDir) {
  const result = { inTarget: false, inOther: false };
  if (!md5) return result;
  let segDirs;
  try { segDirs = fs.readdirSync(CHROMA_PATH); } catch(_) { return result; }
  for (const d of segDirs) {
    const entry = _getDbCheckEntry(d);
    if (!entry) continue;
    try {
      const row = entry.stmt.get(md5, entry.binCount);
      if (row) {
        // Identify target collection by segDir path -- immune to renames
        const isTarget = targetSegDir && entry.segDir === targetSegDir;
        if (isTarget) result.inTarget = true;
        else result.inOther = true;
      }
    } catch(_) {}
    if (result.inTarget && result.inOther) break;
  }
  return result;
}

// SPE: bytes per element in data_level0.bin (dim=768)
const SPE = 3212;

// ── INGEST DB (SQLite -- optional upgrade from dedup.json) ───────────────────
let _idb = null;

function _ingestDbOpen() {
  if (_idb) return _idb;
  const Database = _require('better-sqlite3');
  _idb = new Database(INGEST_DB);
  _idb.pragma('journal_mode = WAL');
  _idb.pragma('synchronous = NORMAL');
  _idb.exec(`
    CREATE TABLE IF NOT EXISTS ingest_files (
      id          INTEGER PRIMARY KEY,
      collection  TEXT    NOT NULL,
      source_file TEXT    NOT NULL,
      md5         TEXT    NOT NULL,
      size_bytes  INTEGER NOT NULL DEFAULT 0,
      first_seen  TEXT    NOT NULL,
      last_seen   TEXT    NOT NULL,
      chunks      INTEGER NOT NULL DEFAULT 0,
      superseded  INTEGER NOT NULL DEFAULT 0,
      UNIQUE(collection, source_file)
    );
    CREATE TABLE IF NOT EXISTS ingest_file_caps (
      id      INTEGER PRIMARY KEY,
      file_id INTEGER NOT NULL REFERENCES ingest_files(id),
      md5     TEXT    NOT NULL,
      cap     TEXT    NOT NULL,
      status  TEXT    NOT NULL,
      detail  TEXT,
      chunks  INTEGER NOT NULL DEFAULT 0,
      ran_at  TEXT    NOT NULL,
      UNIQUE(file_id, md5, cap)
    );
    CREATE INDEX IF NOT EXISTS idx_ifiles_col  ON ingest_files(collection);
    CREATE INDEX IF NOT EXISTS idx_ifiles_md5  ON ingest_files(md5);
    CREATE INDEX IF NOT EXISTS idx_icaps_file  ON ingest_file_caps(file_id);
    CREATE INDEX IF NOT EXISTS idx_icaps_cap   ON ingest_file_caps(cap, status);
    CREATE TABLE IF NOT EXISTS content (
      md5          TEXT    PRIMARY KEY,
      size_bytes   INTEGER NOT NULL DEFAULT 0,
      first_seen   TEXT    NOT NULL
    );
    CREATE TABLE IF NOT EXISTS content_locations (
      id           INTEGER PRIMARY KEY,
      md5          TEXT    NOT NULL REFERENCES content(md5),
      collection   TEXT    NOT NULL,
      rel_path     TEXT    NOT NULL,
      UNIQUE(collection, rel_path)
    );
    CREATE INDEX IF NOT EXISTS idx_cloc_md5  ON content_locations(md5);
    CREATE INDEX IF NOT EXISTS idx_cloc_col  ON content_locations(collection);
    CREATE TABLE IF NOT EXISTS content_containers (
      id             INTEGER PRIMARY KEY,
      md5            TEXT NOT NULL REFERENCES content(md5),
      container_md5  TEXT NOT NULL REFERENCES content(md5),
      UNIQUE(md5, container_md5)
    );
    CREATE INDEX IF NOT EXISTS idx_ccon_md5  ON content_containers(md5);
    CREATE INDEX IF NOT EXISTS idx_ccon_cont ON content_containers(container_md5);
    CREATE TABLE IF NOT EXISTS collection_chunks (
      id           INTEGER PRIMARY KEY,
      md5          TEXT    NOT NULL,
      collection   TEXT    NOT NULL,
      chunks       INTEGER NOT NULL DEFAULT 0,
      ingested_at  TEXT    NOT NULL,
      UNIQUE(md5, collection)
    );
    CREATE INDEX IF NOT EXISTS idx_cchunks_md5 ON collection_chunks(md5);
    CREATE INDEX IF NOT EXISTS idx_cchunks_col ON collection_chunks(collection);
  `);
  return _idb;
}

function _ingestDbActive() {
  // SQLite DB is active if the file exists and has been migrated (has rows)
  if (!fs.existsSync(INGEST_DB)) return false;
  try {
    const db = _ingestDbOpen();
    return db.prepare('SELECT COUNT(*) as n FROM ingest_files').get().n > 0;
  } catch(_) { return false; }
}

function closeIngestDb() {
  try { if (_idb) { _idb.close(); _idb = null; } } catch(_) {}
}

// ── UPGRADE DB: one-time migration -- walk sources + read chroma.sqlite3 ────────
async function cmdUpgradeDb() {
  const chromaDb = path.join(CHROMA_PATH, 'chroma.sqlite3');
  if (!fs.existsSync(chromaDb)) {
    console.log('  INFO: chromadb/chroma.sqlite3 not found -- collections already migrated to rag.sqlite3, upgrade-db not needed.');
    return;
  }
  const Database = _require('better-sqlite3');
  const cdb = new Database(chromaDb, { readonly: true });
  const idb = _ingestDbOpen();
  const now = new Date().toISOString();

  // ── Get collection -> VECTOR segment UUID map from chroma.sqlite3 ───────────
  let colRows = [];
  try {
    colRows = cdb.prepare(`
      SELECT c.name, s.id as seg_id
      FROM collections c
      JOIN segments s ON s.collection = c.id
      WHERE s.scope = 'VECTOR'
    `).all();
  } catch(_) {
    try {
      colRows = cdb.prepare(`
        SELECT c.name, s.id as seg_id
        FROM collections c JOIN segments s ON s.collection = c.id
      `).all();
    } catch(e) {
      console.log('  ERROR reading chroma.sqlite3: ' + e.message);
      cdb.close(); closeIngestDb(); return;
    }
  }
  const segMap = {};
  for (const r of colRows) segMap[r.name] = r.seg_id;

  // ── Prepared statements ──────────────────────────────────────────────────────
  const upsertContent = idb.prepare(`
    INSERT INTO content (md5, size_bytes, first_seen)
    VALUES (?, ?, ?)
    ON CONFLICT(md5) DO NOTHING
  `);
  const upsertLocation = idb.prepare(`
    INSERT INTO content_locations (md5, collection, rel_path)
    VALUES (?, ?, ?)
    ON CONFLICT(collection, rel_path) DO UPDATE SET md5=excluded.md5
  `);
  const upsertChunks = idb.prepare(`
    INSERT INTO collection_chunks (md5, collection, chunks, ingested_at)
    VALUES (?, ?, ?, ?)
    ON CONFLICT(md5, collection) DO UPDATE SET
      chunks=excluded.chunks, ingested_at=excluded.ingested_at
  `);
  const upsertIngestFile = idb.prepare(`
    INSERT INTO ingest_files (collection, source_file, md5, size_bytes, first_seen, last_seen, chunks, superseded)
    VALUES (?, ?, ?, ?, ?, ?, ?, 0)
    ON CONFLICT(collection, source_file) DO UPDATE SET
      md5=excluded.md5, last_seen=excluded.last_seen,
      chunks=MAX(chunks, excluded.chunks)
  `);
  const getFileId = idb.prepare('SELECT id FROM ingest_files WHERE collection=? AND source_file=?');
  const upsertCap = idb.prepare(`
    INSERT OR IGNORE INTO ingest_file_caps (file_id, md5, cap, status, chunks, ran_at)
    VALUES (?, ?, 'text', 'ok', ?, ?)
  `);

  // ── Count chunks for a file in chroma.sqlite3 ────────────────────────────────
  // JS-ingested: embedding_id = '{md5}_{n}' -- count with LIKE prefix
  const stCountJs = cdb.prepare(`
    SELECT COUNT(*) as n FROM embeddings
    WHERE segment_id=? AND embedding_id LIKE ?
  `);
  // Python-era: embedding_id is a UUID -- count via metadata source key, cached per segment
  const _srcCountCache = {};
  function getSourceCountMap(segId) {
    if (_srcCountCache[segId]) return _srcCountCache[segId];
    const rows = cdb.prepare(`
      SELECT m.string_value as src, COUNT(DISTINCT e.id) as n
      FROM embeddings e
      JOIN embedding_metadata m ON m.id=e.id
      WHERE e.segment_id=?
        AND m.key IN ('source_rel_path')
      GROUP BY m.string_value
    `).all(segId);
    const map = {};
    for (const r of rows) if (r.src) map[r.src] = r.n;
    _srcCountCache[segId] = map;
    return map;
  }
  function countChunks(segId, md5, fullPath, relPath) {
    if (!segId) return 0;
    const js = stCountJs.get(segId, md5 + '_%');
    if (js && js.n > 0) return js.n;
    const m = getSourceCountMap(segId);
    return m[relPath] || m[path.basename(fullPath)] || 0;
  }

  // ── Discover source collections ──────────────────────────────────────────────
  const cols = discoverCollections();
  const allColNames = [...new Set([...Object.keys(cols), ...Object.keys(segMap)])].sort();

  console.log(`  Collections: ${allColNames.join(', ')}`);
  console.log(`  Walking source directories and cross-referencing chroma.sqlite3...`);
  console.log('-'.repeat(74));

  let totalFiles = 0, totalChunks = 0, totalUnindexed = 0;

  for (const colName of allColNames) {
    const col = cols[colName];
    if (!col) {
      console.log(`  ${colName.padEnd(22)} -- no source directory, skipping`);
      continue;
    }
    const srcDir = col.source_dir;
    const segId  = segMap[colName];

    // Walk source dir recursively
    const allFiles = [];
    (function walk(dir) {
      let entries; try { entries = fs.readdirSync(dir); } catch(_) { return; }
      for (const f of entries) {
        if (f.startsWith('.')) continue;
        const full = path.join(dir, f);
        try {
          const st = fs.statSync(full);
          if (st.isDirectory()) walk(full);
          else allFiles.push({ full, size: st.size });
        } catch(_) {}
      }
    })(srcDir);

    let colFiles = 0, colChunks = 0, colUnindexed = 0;

    const scanCol = idb.transaction(() => {
      for (const { full, size } of allFiles) {
        // rel_path: relative to collection source dir (not including collection name)
        const relPath = path.relative(srcDir, full);
        let md5 = '';
        try { md5 = md5File(full); } catch(_) { continue; }
        const size_b = BigInt(size);
        const chunks = countChunks(segId, md5, full, relPath);

        upsertContent.run(md5, size_b, now);
        upsertLocation.run(md5, colName, relPath);
        if (chunks > 0) upsertChunks.run(md5, colName, chunks, now);
        upsertIngestFile.run(colName, full, md5, size_b, now, now, chunks);
        const row = getFileId.get(colName, full);
        if (row && chunks > 0) upsertCap.run(row.id, md5, chunks, now);

        // TODO: populate content_containers for extracted/embedded content
        // (ZIP contents, graphics extracted from PDFs, OLE embedded objects etc.)
        // Requires ingest loop to track container relationships -- deferred.

        colFiles++;
        colChunks += chunks;
        if (chunks === 0) colUnindexed++;
      }
    });
    scanCol();

    totalFiles     += colFiles;
    totalChunks    += colChunks;
    totalUnindexed += colUnindexed;
    const note = colUnindexed ? `  (${colUnindexed} not yet indexed)` : '';
    console.log(`  ${colName.padEnd(22)} ${String(colFiles).padStart(5)} files  ${String(colChunks).padStart(7)} chunks${note}`);
  }

  cdb.close();
  closeIngestDb();

  console.log('-'.repeat(74));
  console.log(`  Total: ${totalFiles} files, ${totalChunks} chunks`);
  if (totalUnindexed) console.log(`  ${totalUnindexed} files not yet indexed -- run ingest to add them`);
  console.log(`  content, content_locations, collection_chunks tables populated.`);
  console.log(`  ingest_db.sqlite3 is now active -- dedup.json kept as backup.`);
}

// ── PREPROCESS CACHE ───────────────────────────────────────────────────────────
function loadPreprocCache() {
  try { if (fs.existsSync(PREPROC_FILE)) return JSON.parse(fs.readFileSync(PREPROC_FILE,'utf8')); }
  catch(_) {}
  return {};
}
function savePreprocCache(cache) { fs.writeFileSync(PREPROC_FILE, JSON.stringify(cache, null, 2)); }

// ── PDF TXT CACHE ─────────────────────────────────────────────────────────────
// Tracks which PDFs have had full-text extracted to data/pdf_txt/{md5}.txt
// Keyed by PDF md5. Independent of main dedup so chunk counts stay clean.
function loadPdfTxtCache() {
  try { if (fs.existsSync(PDF_TXT_CACHE)) return JSON.parse(fs.readFileSync(PDF_TXT_CACHE,'utf8')); }
  catch(_) {}
  return {};
}
function savePdfTxtCache(cache) { fs.writeFileSync(PDF_TXT_CACHE, JSON.stringify(cache, null, 2)); }
function isPdfTxtDone(md5, cache) { 
  if (!(md5 in cache)) return false;
  const p = cache[md5].txt_path;
  return p && fs.existsSync(p);
}
async function extractPdfFullText(fpath, tuiRef) {
  // ── Pass 1: pdftotext (poppler)  --  best for text-layer PDFs ──────────────────
  const r = await spawnAsync('pdftotext', ['-layout', fpath, '-'], { timeoutSecs: 120 });
  if (r.status === 0 && r.stdout.trim()) return r.stdout;

  // ── Pass 2: pdf-parse JS fallback ────────────────────────────────────────────
  if (pdfParse) {
    try {
      const data = await pdfParse(fs.readFileSync(fpath), { max: 0 });
      if (data.text.trim()) return data.text;
    } catch(_) {}
  }

  // ── Pass 3: OCR  --  for scanned/image-only PDFs ─────────────────────────────
  // Requires: poppler-utils (pdftoppm) + tesseract-ocr
  if (!OCR_ENABLED) return null;
  const pdftoppm = await spawnAsync('pdftoppm', ['-h'], { timeoutSecs: 5 });
  const tess     = await spawnAsync('tesseract', ['--version'], { timeoutSecs: 5 });
  if (pdftoppm.status !== 0 || tess.status !== 0) return null;

  const base   = path.basename(fpath, '.pdf');
  const tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), 'ragweed-ocr-'));
  try {
    if (tuiRef) tuiRef.update({ file_sub: `OCR rendering pages: ${base}` });
    const render = await spawnAsync('pdftoppm',
      ['-r', '150', '-png', fpath, path.join(tmpDir, 'page')],
      { timeoutSecs: 300 });
    if (render.status !== 0) return null;

    const pages = fs.readdirSync(tmpDir).filter(f => f.endsWith('.png')).sort();
    if (!pages.length) return null;

    const texts = [];
    for (let pi = 0; pi < pages.length; pi++) {
      if (tuiRef) tuiRef.update({ file_sub: `OCR page ${pi+1}/${pages.length}: ${base}` });
      const imgPath = path.join(tmpDir, pages[pi]);
      const outBase = path.join(tmpDir, `tess_${pi}`);
      const t = await spawnAsync('tesseract', [imgPath, outBase, '-l', 'eng'],
        { timeoutSecs: 120 });
      if (t.status === 0) {
        const txtFile = outBase + '.txt';
        if (fs.existsSync(txtFile)) {
          const pageText = fs.readFileSync(txtFile, 'utf8').trim();
          if (pageText) texts.push(pageText);
        }
      }
    }
    if (tuiRef) tuiRef.update({ file_sub: '' });
    return texts.length ? texts.join('\n\n') : null;
  } finally {
    try { fs.rmSync(tmpDir, { recursive: true, force: true }); } catch(_) {}
  }
}

// ── PATH MAP ───────────────────────────────────────────────────────────────────
function writePathMap(store) {
  const base = SOURCE_DIR.replace(/\/+$/, '');
  const map  = {};
  for (const entry of Object.values(store)) {
    const collection = entry.collection || '';
    const allPaths   = entry.all_paths || (entry.first_seen_path ? [entry.first_seen_path] : []);
    for (const ap of allPaths) {
      const fname = path.basename(ap);
      const key   = collection ? `${collection}/${fname}` : fname;
      let rel;
      if (ap.startsWith(base + '/')) rel = ap.slice(base.length + 1);
      else {
        const parts = ap.split('/');
        const ci    = parts.lastIndexOf(collection);
        rel = ci >= 0 ? parts.slice(ci + 1).join('/') : fname;
      }
      if (!map[key]) map[key] = [];
      if (!map[key].includes(rel)) map[key].push(rel);
    }
  }
  fs.writeFileSync(PATH_MAP_FILE, JSON.stringify(map, null, 2));
  logFile(`INFO path_map written: ${Object.keys(map).length} entries`);
}

// ── ASYNC SPAWN (non-blocking  --  keeps event loop alive for TUI) ──────────────────
function spawnAsync(cmd, args, opts = {}) {
  return new Promise((resolve) => {
    const timeoutMs = (opts.timeoutSecs || 300) * 1000;
    const captureOutput = opts.capture !== false;
    let stdout = '', stderr = '', timedOut = false, resolved = false;

    const stdioMode = captureOutput ? ['ignore','pipe','pipe'] : 'ignore';
    const child = spawn(cmd, args, { stdio: stdioMode, detached: true });
    const pid = child.pid;

    logFile(`SPAWN pid=${pid} cmd=${cmd} timeout=${timeoutMs/1000}s capture=${captureOutput}`);

    if (captureOutput) {
      child.stdout.on('data', d => { stdout += d; });
      child.stderr.on('data', d => { stderr += d; });
    }

    const _done = (code, why) => {
      if (resolved) return;
      resolved = true;
      logFile(`SPAWN done pid=${pid} code=${code} why=${why} timedOut=${timedOut}`);
      resolve({ status: code, stdout, stderr, timedOut });
    };

    const timer = setTimeout(() => {
      timedOut = true;
      logFile(`SPAWN timeout pid=${pid} cmd=${cmd}  --  sending SIGKILL to group`);
      try { process.kill(-pid, 'SIGKILL'); } catch(e1) {
        logFile(`SPAWN kill-group failed: ${e1.message}  --  trying direct kill`);
        try { child.kill('SIGKILL'); } catch(e2) {
          logFile(`SPAWN direct kill failed: ${e2.message}`);
        }
      }
      if (!captureOutput) _done(-1, 'timeout-noio');
    }, timeoutMs);

    child.on('close', code => { clearTimeout(timer); _done(timedOut ? -1 : code, 'close'); });
    child.on('error', err => {
      logFile(`SPAWN error pid=${pid}: ${err.message}`);
      clearTimeout(timer); _done(-1, 'error');
    });
  });
}

// ── MD5 ────────────────────────────────────────────────────────────────────────
function md5File(fpath) {
  const h = crypto.createHash('md5');
  const fd = fs.openSync(fpath, 'r');
  const buf = Buffer.allocUnsafe(65536);
  let n;
  while ((n = fs.readSync(fd, buf, 0, 65536)) > 0) h.update(buf.slice(0, n));
  fs.closeSync(fd);
  return h.digest('hex');
}

// ── MAGIC BYTE SNIFFING ────────────────────────────────────────────────────────
const MAGIC = [
  [Buffer.from('%PDF'),'.pdf'],[Buffer.from('PK\x03\x04'),'.zip'],
  [Buffer.from('\xd0\xcf\x11\xe0'),'.ole'],[Buffer.from('<!DOCTYPE html'),'.html'],
  [Buffer.from('<!doctype html'),'.html'],[Buffer.from('<html'),'.html'],
  [Buffer.from('<?xml'),'.xml'],[Buffer.from('{'),'.json'],[Buffer.from('['),'.json'],
  [Buffer.from('#!'),'.sh'],[Buffer.from('ID3'),'.mp3'],
  [Buffer.from('\x1aE\xdf\xa3'),'.mkv'],  // Matroska/WebM
  [Buffer.from('RIFF'),'.wav'],
  [Buffer.from('OggS'),'.ogg'],
  [Buffer.from('fLaC'),'.flac'],
];
// ODF and OOXML formats are ZIP containers internally -- never reclassify them as .zip
const ZIP_CONTAINER_EXTS = new Set(['.odt','.ods','.odp','.docx','.xlsx','.pptx','.odg','.odf']);
function sniffExtension(fpath) {
  // If the file already has an extension we handle natively, trust it
  const origExt = path.extname(fpath).toLowerCase();
  if (ZIP_CONTAINER_EXTS.has(origExt)) return origExt;
  try {
    const fd = fs.openSync(fpath, 'r');
    const buf = Buffer.allocUnsafe(64);
    const n = fs.readSync(fd, buf, 0, 64, 0);
    fs.closeSync(fd);
    const raw = buf.slice(0, n);
    for (const [sig, ext] of MAGIC) if (raw.slice(0, sig.length).equals(sig)) return ext;
    try { raw.toString('utf8'); return '.txt'; } catch(_) {}
  } catch(_) {}
  return '';
}

// ── TEXT EXTRACTION ────────────────────────────────────────────────────────────
// pdfParse, mammoth, AdmZip loaded eagerly at startup above

async function extractText(fpath, ext, meta) {
  try {
    if (ext === '.pdf') {
      if (pdfParse) {
        const data = await pdfParse(fs.readFileSync(fpath), { max: 0 });
        const pages = data.text.split(/\f/).filter(t => t.trim());
        if (pages.length > 1)
          return pages.map((text, i) => ({ text, metadata: { ...meta, page_label: String(i+1) } }));
        return [{ text: data.text, metadata: meta }];
      }
      const r = spawnSync('pdftotext', [fpath, '-'], { encoding: 'utf8', maxBuffer: 50e6 });
      if (r.status === 0 && r.stdout.trim()) return [{ text: r.stdout, metadata: meta }];
      return [];
    }
    if (ext === '.docx' && mammoth) {
      const result = await mammoth.extractRawText({ path: fpath });
      return [{ text: result.value, metadata: meta }];
    }
    // RTF  --  use unrtf (fast, no LibreOffice startup cost)
    if (ext === '.rtf') {
      const r = await spawnAsync('unrtf', ['--text', fpath], { timeoutSecs: 30 });
      if (r.timedOut) { logFile(`WARN unrtf timeout: ${path.basename(fpath)}`); return []; }
      if (r.status === 0 && r.stdout.trim()) return [{ text: r.stdout, metadata: meta }];
      logFile(`WARN unrtf failed (${r.status}) for ${path.basename(fpath)}  --  trying LibreOffice`);
      // fall through to LO block below
    }
    // OLE legacy formats + ODF formats (.odt/.ods/.odp) -- LibreOffice is authoritative for its own formats
    // OLE legacy formats (.doc/.xls/.ppt/.msg etc.)  --  LibreOffice only
    // Presentation formats (.odp/.ppt/.pptx) -- must use HTML conversion; txt export filter not available
    const LO_EXTS = new Set([...OLE_EXTS, '.odt','.ods','.odp','.xls','.ppt']);
    const LO_PRES_EXTS = new Set(['.odp','.ppt','.pptx']); // presentations: no txt export, use html
    if (LO_EXTS.has(ext) || LO_PRES_EXTS.has(ext)) {
      const loCheck = spawnSync('libreoffice', ['--version'], { stdio: 'pipe' });
      if (loCheck.error || loCheck.status !== 0) {
        logFile(`WARN libreoffice not found  --  skipping ${path.basename(fpath)}`);
        return [];
      }
      // Presentations use html conversion (no txt export filter available)
      const useHtml = LO_PRES_EXTS.has(ext);
      const convertTo = useHtml ? 'html' : 'txt';
      const outExt    = useHtml ? '.html' : '.txt';
      const r = await spawnAsync('libreoffice',
        ['--headless','--convert-to', convertTo,'--outdir', STAGING_DIR, fpath],
        { timeoutSecs: LIBREOFFICE_TIMEOUT });
      if (r.timedOut) {
        logFile(`WARN libreoffice timeout (${LIBREOFFICE_TIMEOUT}s): ${path.basename(fpath)}`);
        return [];
      }
      if (r.status === 0) {
        const out = path.join(STAGING_DIR, path.basename(fpath, ext) + outExt);
        if (fs.existsSync(out)) {
          let text = fs.readFileSync(out, 'utf8');
          fs.unlinkSync(out);
          // For HTML output: strip tags to get plain text
          if (useHtml) {
            text = text
              .replace(/<style[^>]*>[^]*?<\/style>/gi, ' ')
              .replace(/<script[^>]*>[^]*?<\/script>/gi, ' ')
              .replace(/<[^>]+>/g, ' ')
              .replace(/&nbsp;/g, ' ').replace(/&amp;/g, '&')
              .replace(/&lt;/g, '<').replace(/&gt;/g, '>')
              .replace(/&quot;/g, '"').replace(/&#39;/g, "'")
              .replace(/[ \t]{2,}/g, ' ')
              .replace(/\n{3,}/g, '\n\n')
              .trim();
          }
          if (!text.trim()) {
            logFile(`WARN libreoffice produced empty text for ${path.basename(fpath)}`);
            return [];
          }
          return [{ text, metadata: { ...meta, lo_source: path.basename(fpath) } }];
        }
        logFile(`WARN libreoffice ran but produced no output for ${path.basename(fpath)}`);
      } else {
        logFile(`WARN libreoffice exited ${r.status} for ${path.basename(fpath)}`);
      }
      return [];
    }
    if (IMAGE_EXTS.has(ext)) {
      if (OCR_ENABLED) {
        const r = await spawnAsync('tesseract', [fpath, 'stdout'], { timeoutSecs: 120 });
        if (r.status === 0 && r.stdout.trim())
          return [{ text: r.stdout, metadata: { ...meta, ocr_type: 'image' } }];
      }
      return [];
    }
    if (AUDIO_EXTS.has(ext)) {
      if (!_whisperInfo) {
        logFile(`INFO no whisper binary found  --  skipping ${path.basename(fpath)}`);
        return [];
      }
      let wavPath = fpath;
      let _tmpWav = null;
      // whisper-cli requires WAV; use ffmpeg to convert non-WAV formats
      if (_whisperInfo.type === 'cpp' && ext !== '.wav') {
        _tmpWav = path.join(STAGING_DIR, path.basename(fpath, ext) + '_conv.wav');
        const conv = await spawnAsync('ffmpeg', ['-y', '-i', fpath, '-ar', '16000', '-ac', '1', '-c:a', 'pcm_s16le', _tmpWav],
          { timeoutSecs: 300, capture: false });
        if (conv.status !== 0 || !fs.existsSync(_tmpWav)) {
          logFile(`WARN ffmpeg convert failed for ${path.basename(fpath)}`);
          return [];
        }
        wavPath = _tmpWav;
      }
      let r;
      if (_whisperInfo.type === 'cpp') {
        if (!_whisperInfo.model) {
          logFile(`WARN whisper-cli found but no model file  --  set WHISPER_MODEL= in Config`);
          if (_tmpWav) try { fs.unlinkSync(_tmpWav); } catch(_) {}
          return [];
        }
        r = await spawnAsync('whisper-cli',
          ['-m', _whisperInfo.model, '-f', wavPath, '-otxt', '-of',
           path.join(STAGING_DIR, path.basename(fpath, ext)), '--no-prints'],
          { timeoutSecs: WHISPER_TIMEOUT, capture: false });
      } else {
        r = await spawnAsync('whisper',
          [fpath, '--output_format', 'txt', '--output_dir', STAGING_DIR],
          { timeoutSecs: WHISPER_TIMEOUT, capture: false });
      }
      if (_tmpWav) try { fs.unlinkSync(_tmpWav); } catch(_) {}
      if (r.timedOut) { logFile(`WARN whisper timeout (${WHISPER_TIMEOUT}s): ${path.basename(fpath)}`); return []; }
      if (r.status === 0) {
        const out = path.join(STAGING_DIR, path.basename(fpath, ext) + '.txt');
        if (fs.existsSync(out)) { const text = fs.readFileSync(out,'utf8'); fs.unlinkSync(out); return [{ text, metadata: meta }]; }
      }
      logFile(`WARN whisper exited ${r.status} for ${path.basename(fpath)}`);
      return [];
    }
    // .tex  --  detex strips LaTeX commands; fall back to plain text read
    if (ext === '.tex') {
      const r = await spawnAsync('detex', [fpath], { timeoutSecs: 30 });
      if (!r.timedOut && r.status === 0 && r.stdout.trim())
        return [{ text: r.stdout, metadata: meta }];
      if (r.timedOut) logFile(`WARN detex timeout: ${path.basename(fpath)}`);
      else if (r.status !== 0) logFile(`WARN detex failed (${r.status}) for ${path.basename(fpath)}  --  falling back to plain text`);
      // fall through to plain text read below
    }

    // .svg  --  extract text nodes by stripping XML tags in JS (no external tool needed)
    if (ext === '.svg') {
      try {
        const raw = fs.readFileSync(fpath, 'utf8');
        // Strip XML/SVG tags, decode common entities, collapse whitespace
        const text = raw
          .replace(/<style[^>]*>[\s\S]*?<\/style>/gi, ' ')
          .replace(/<script[^>]*>[\s\S]*?<\/script>/gi, ' ')
          .replace(/<[^>]+>/g, ' ')
          .replace(/&amp;/g, '&').replace(/&lt;/g, '<').replace(/&gt;/g, '>').replace(/&quot;/g, '"').replace(/&#39;/g, "'")
          .replace(/\s+/g, ' ').trim();
        if (text) return [{ text, metadata: meta }];
      } catch(e) { logFile(`WARN svg parse failed for ${path.basename(fpath)}: ${e.message}`); }
      return [];
    }

    // .epub  --  it's a ZIP of HTML files; unzip and strip tags in JS
    if (ext === '.epub') {
      if (AdmZip) {
        try {
          const zip = new AdmZip(fpath);
          const parts = [];
          for (const entry of zip.getEntries()) {
            const n = entry.entryName.toLowerCase();
            if (!n.endsWith('.html') && !n.endsWith('.htm') && !n.endsWith('.xhtml')) continue;
            const html = entry.getData().toString('utf8');
            const text = html
              .replace(/<style[^>]*>[\s\S]*?<\/style>/gi, ' ')
              .replace(/<script[^>]*>[\s\S]*?<\/script>/gi, ' ')
              .replace(/<[^>]+>/g, ' ')
              .replace(/&amp;/g, '&').replace(/&lt;/g, '<').replace(/&gt;/g, '>').replace(/&nbsp;/g, ' ')
              .replace(/\s+/g, ' ').trim();
            if (text) parts.push(text);
          }
          if (parts.length) return [{ text: parts.join('\n\n'), metadata: meta }];
          logFile(`WARN epub: no HTML content found in ${path.basename(fpath)}  --  trying LibreOffice`);
        } catch(e) {
          logFile(`WARN epub unzip failed for ${path.basename(fpath)}: ${e.message}  --  trying LibreOffice`);
        }
        // fall through to LO block (already ran above, so we reach here only if we didn't return)
      }
      // If AdmZip unavailable or epub parsing failed, try LibreOffice
      const r = await spawnAsync('libreoffice',
        ['--headless','--convert-to','txt','--outdir', STAGING_DIR, fpath],
        { timeoutSecs: LIBREOFFICE_TIMEOUT });
      if (!r.timedOut && r.status === 0) {
        const out = path.join(STAGING_DIR, path.basename(fpath, ext) + '.txt');
        if (fs.existsSync(out)) { const text = fs.readFileSync(out,'utf8'); fs.unlinkSync(out); return [{ text, metadata: meta }]; }
      }
      if (r.timedOut) logFile(`WARN libreoffice timeout on epub fallback: ${path.basename(fpath)}`);
      return [];
    }

    // Plain text fallback (covers .txt .md .csv .json .yaml .xml .html .js .py .c etc.)
    try { const text = fs.readFileSync(fpath, 'utf8'); if (text.trim()) return [{ text, metadata: meta }]; }
    catch(_) {}

    // Last resort for unknown/binary files with no matched handler: run strings
    // to extract printable sequences rather than reading raw binary garbage
    {
      const r = await spawnAsync('strings', ['-n', '8', fpath], { timeoutSecs: 15 });
      if (!r.timedOut && r.status === 0 && r.stdout.trim()) {
        logFile(`INFO strings fallback used for ${path.basename(fpath)}`);
        return [{ text: r.stdout, metadata: { ...meta, strings_fallback: 'true' } }];
      }
    }
    return [];
  } catch(e) {
    logFile(`WARN extractText ${path.basename(fpath)}: ${e.message}`);
    return [];
  }
}

// ── CHUNKING ───────────────────────────────────────────────────────────────────
function chunkSizeForExt(ext) {
  const av  = new Set(['.mp4','.mp3','.wav','.m4a','.ogg','.flac','.avi','.mov','.mkv','.webm']);
  if (av.has(ext))       return CHUNK_SIZE_AV;
  if (ext === '.pdf')    return CHUNK_SIZE_PDF;
  return CHUNK_SIZE;
}
function chunkText(text, chunkSize = CHUNK_SIZE, overlapPct = CHUNK_OVERLAP_PCT) {
  const CHARS = chunkSize * 4;
  const OVL   = Math.floor(CHARS * overlapPct / 100);
  if (text.length <= CHARS) return [text.trim()].filter(Boolean);
  const chunks = [];
  let start = 0;
  while (start < text.length) {
    let end = start + CHARS;
    if (end >= text.length) { chunks.push(text.slice(start).trim()); break; }
    let boundary = end;
    for (let i = end; i > end - 200 && i > start; i--)
      if ('.!?\n'.includes(text[i])) { boundary = i + 1; break; }
    if (boundary === end)
      for (let i = end; i > end - 50 && i > start; i--)
        if (' \t'.includes(text[i])) { boundary = i; break; }
    const c = text.slice(start, boundary).trim();
    if (c) chunks.push(c);
    start = Math.max(start + 1, boundary - OVL);
  }
  return chunks.filter(Boolean);
}

// ── OLLAMA EMBEDDING ────────────────────────────────────────────────────────────
const MAX_EMBED_CHARS = 7500 * 4;
const EMBED_SPLIT_OVL = 200;
let _firstEmbed = true;  // first embed may need extra time for model load

// Embed a single part -- if context too long, halve and average (recursive)
async function _embedPart(part, depth = 0) {
  if (depth > 6) throw new Error('embed: text too long even after repeated halving');
  let lastErr;
  for (let attempt = 0; attempt < 3; attempt++) {
    try {
      const controller = new AbortController();
      const timeoutMs = _firstEmbed ? Math.max(EMBED_TIMEOUT_MS, 600000) : EMBED_TIMEOUT_MS;
      const fetchTimeout = setTimeout(() => controller.abort(), timeoutMs);
      let resp;
      try {
        resp = await fetch(`${OLLAMA_HOST}/api/embed`, {
          method: 'POST', headers: { 'Content-Type': 'application/json' },
          body: JSON.stringify({ model: EMBED_MODEL, input: part }),
          signal: controller.signal,
        });
      } finally { clearTimeout(fetchTimeout); }
      if (!resp.ok) {
        let body = ''; try { body = await resp.text(); } catch(_) {}
        const msg = `Ollama embed HTTP ${resp.status}: ${body.slice(0,200).trim()}`;
        // Context length exceeded -- halve and average rather than failing
        if (resp.status === 400 && body.includes('context length')) {
          const mid = Math.floor(part.length / 2);
          const [a, b] = [part.slice(0, mid).trim(), part.slice(mid).trim()];
          if (!a || !b) throw new Error(msg);  // can't halve further meaningfully
          logFile(`INFO embed auto-split depth=${depth} len=${part.length} -> ${a.length}+${b.length}`);
          const [va, vb] = await Promise.all([_embedPart(a, depth+1), _embedPart(b, depth+1)]);
          const avg = va.map((v, i) => (v + vb[i]) / 2);
          _firstEmbed = false;
          return avg;
        }
        throw new Error(msg);
      }
      const data = await resp.json();
      const vec  = data.embeddings?.[0] || data.embedding;
      if (!vec) throw new Error('No embedding returned');
      _firstEmbed = false;
      return vec;
    } catch(e) {
      // Don't retry context-length errors -- auto-split already handled above
      if (e.message.includes('context length')) throw e;
      lastErr = e;
      if (attempt < 2) await new Promise(r => setTimeout(r, EMBED_BACKOFF_MS * (attempt + 1)));
    }
  }
  throw lastErr;
}

async function embedText(text, _retries = 3) {
  if (!text || !text.trim()) throw new Error("embed called with empty text");
  // Strip null bytes and non-printable control chars (except tab/newline/cr)
  text = text.replace(/[\x00-\x08\x0b\x0c\x0e-\x1f\x7f]/g, ' ').trim();
  if (!text) throw new Error("embed called with text that is all control characters");
  const parts = [];
  if (text.length <= MAX_EMBED_CHARS) parts.push(text);
  else { let s = 0; while (s < text.length) { parts.push(text.slice(s, s + MAX_EMBED_CHARS)); s += MAX_EMBED_CHARS - EMBED_SPLIT_OVL; } }
  const vecs = [];
  for (const part of parts) {
    vecs.push(await _embedPart(part));
  }
  if (vecs.length === 1) return vecs[0];
  const dim = vecs[0].length;
  const avg = new Array(dim).fill(0);
  for (const v of vecs) for (let i = 0; i < dim; i++) avg[i] += v[i] / vecs.length;
  return avg;
}

// Warm up Ollama embed model  --  first call loads model into memory (can take 60s+)
async function checkOllama() {
  try {
    const controller = new AbortController();
    const t = setTimeout(() => controller.abort(), Math.max(EMBED_TIMEOUT_MS, 120000));
    try {
      const r = await fetch(`${OLLAMA_HOST}/api/embed`, {
        method: 'POST', headers: { 'Content-Type': 'application/json' },
        body: JSON.stringify({ model: EMBED_MODEL, input: 'warmup' }),
        signal: controller.signal,
      });
      if (!r.ok) return false;
      const d = await r.json();
      return !!(d.embeddings?.[0] || d.embedding);
    } finally { clearTimeout(t); }
  } catch(_) { return false; }
}

// ── PURE JS CHROMADB-COMPATIBLE WRITER ───────────────────────────────────────
// Writes directly to chroma.sqlite3 + HNSW binary files.
// Zero Python. Zero chromadb library. Same format query.js already reads.
//
// ChromaDB on-disk format:
//   chromadb/<uuid>/rag.sqlite3      -- per-collection metadata (collections, segments, embeddings)
//   chromadb/{segUuid}/data_level0.bin -- HNSW vectors: [M0 int32s][float32 vec][int64 label]
//   chromadb/{segUuid}/index_meta.json -- { dimensionality, total_elements, M, ef_construction }
//   chromadb/{segUuid}/header.bin    -- 4-byte magic + 4-byte version (ignored by our reader)
//   chromadb/{segUuid}/length.bin    -- 8-byte little-endian int64 element count

const Database = _require('better-sqlite3');

// UUIDs  --  stable per collection name so re-runs are idempotent
function deterministicUuid(seed) {
  // Simple deterministic uuid v4-shaped string from seed
  const h = Array.from(seed).reduce((a, c) => Math.imul(31, a) + c.charCodeAt(0) | 0, 0x811c9dc5);
  const hex = (Math.abs(h) * 0x100000001 + 0x123456789abcdef).toString(16).padStart(16, '0');
  const h2  = Array.from(seed + '2').reduce((a, c) => Math.imul(37, a) + c.charCodeAt(0) | 0, 0xdeadbeef);
  const hex2 = (Math.abs(h2) * 0x100000001 + 0xfedcba987654321).toString(16).padStart(16, '0');
  const full = (hex + hex2).slice(0, 32);
  return `${full.slice(0,8)}-${full.slice(8,12)}-4${full.slice(13,16)}-${full.slice(16,20)}-${full.slice(20,32)}`;
}

// ── SQLite schema (matches what query.js reads) ───────────────────────────────
const SCHEMA_SQL = `
  CREATE TABLE IF NOT EXISTS collections (
    id TEXT PRIMARY KEY,
    name TEXT NOT NULL,
    topic TEXT,
    metadata TEXT
  );
  CREATE UNIQUE INDEX IF NOT EXISTS collections_name ON collections(name);
  CREATE TABLE IF NOT EXISTS segments (
    id TEXT PRIMARY KEY,
    type TEXT,
    scope TEXT,
    topic TEXT,
    collection TEXT REFERENCES collections(id)
  );
  CREATE TABLE IF NOT EXISTS embeddings (
    id INTEGER PRIMARY KEY AUTOINCREMENT,
    segment_id TEXT NOT NULL,
    embedding_id TEXT NOT NULL,
    seq_id BLOB
  );
  CREATE UNIQUE INDEX IF NOT EXISTS embeddings_embedding_id ON embeddings(embedding_id);
  CREATE TABLE IF NOT EXISTS embedding_metadata (
    id INTEGER NOT NULL REFERENCES embeddings(id),
    key TEXT NOT NULL,
    string_value TEXT,
    int_value INTEGER,
    float_value REAL,
    bool_value INTEGER
  );
  CREATE INDEX IF NOT EXISTS embedding_metadata_id ON embedding_metadata(id);
  CREATE INDEX IF NOT EXISTS embedding_metadata_key_value ON embedding_metadata(key, string_value);
`;

// Per-collection in-memory state
const _colState = {};  // name -> { collUuid, segUuid, segDir, db, count, newVectors, newIds, hnswGraph, hnswVecs, hnswEntry, hnswMaxLvl, hnswCount }

function _getOrInitCol(name) {
  if (_colState[name]) return _colState[name];

  const collUuid = deterministicUuid(name);
  const segUuid  = deterministicUuid(name + ':vector');
  const segDir   = path.join(CHROMA_PATH, segUuid);
  fs.mkdirSync(segDir, { recursive: true });
  // Create a human-readable symlink chromadb/<name> -> <segUuid> for browsability.
  // Overwrites stale symlinks; skips if target already exists as a real dir.
  try {
    const lnk = path.join(CHROMA_PATH, name);
    try { fs.unlinkSync(lnk); } catch(_) {}
    fs.symlinkSync(segUuid, lnk);
  } catch(_) {}

  // Per-collection rag.sqlite3 -- lives alongside data_level0.bin in the segment dir
  const dbPath = path.join(segDir, 'rag.sqlite3');
  const db = new Database(dbPath);
  db.pragma('busy_timeout = 10000');
  db.pragma('foreign_keys = OFF');
  db.pragma('journal_mode = WAL');
  try { db.pragma('wal_checkpoint(PASSIVE)'); } catch(_) {}
  db.exec(SCHEMA_SQL);

  // Upsert collection + segment rows
  db.prepare(`INSERT OR IGNORE INTO collections(id,name,topic,metadata) VALUES(?,?,?,?)`)
    .run(collUuid, name, `persistent://${name}`, JSON.stringify({'hnsw:space':'cosine'}));
  db.prepare(`INSERT OR IGNORE INTO segments(id,type,scope,topic,collection) VALUES(?,?,?,?,?)`)
    .run(segUuid, 'urn:chroma:segment/vector/hnsw-local', 'VECTOR', `persistent://${name}`, collUuid);

  // Recovery: if a previous flush wrote data_level0.bin.tmp but died before renaming,
  // complete the rename now before counting. This brings binary and SQL back into sync.
  const tmpFile  = path.join(segDir, 'data_level0.bin.tmp');
  const dataFile = path.join(segDir, 'data_level0.bin');
  if (fs.existsSync(tmpFile)) {
    try {
      fs.renameSync(tmpFile, dataFile);
      logFile(`[RECOVERY] '${name}': renamed data_level0.bin.tmp -> data_level0.bin`);
    } catch(e) {
      logFile(`[RECOVERY] '${name}': failed to rename data_level0.bin.tmp: ${e.message}`);
    }
  }
  let existingCount = 0;
  if (fs.existsSync(dataFile)) {
    const metaFile = path.join(segDir, 'index_meta.json');
    if (fs.existsSync(metaFile)) {
      try {
        const meta = JSON.parse(fs.readFileSync(metaFile, 'utf8'));
        if (meta.total_elements > 0) existingCount = meta.total_elements;
      } catch(_) {}
    }
    if (existingCount === 0) {
      // Try SQLite embeddings table (JS-ingested collections)
      const sqlCount = db.prepare(`SELECT COUNT(*) as n FROM embeddings WHERE segment_id=?`).get(segUuid);
      existingCount = sqlCount ? sqlCount.n : 0;
    }
    if (existingCount === 0) {
      // Derive from binary file size using known ChromaDB element layout
      // spe = 128 + dim*4 + 12  (M0=32 neighbors * 4 bytes + 4 prefix + float32*dim + int64 label)
      // Try common dims: 768, 384, 1536, 1024, 512, 256, 128, 64
      const fsize = fs.statSync(dataFile).size;
      for (const dim of [768, 384, 1536, 1024, 512, 256, 128, 64]) {
        const spe = 128 + dim * 4 + 12;
        if (fsize > 0 && fsize % spe === 0) { existingCount = fsize / spe; break; }
      }
    }
  }

  const state = { collUuid, segUuid, segDir, db, count: existingCount, newVectors: [], newIds: [] };
  _colState[name] = state;
  return state;
}

// Append embeddings to in-memory buffer (flushed to disk in batch)
function _colAdd(name, ids, embeddings, documents, metadatas) {
  const st = _getOrInitCol(name);
  const insertEmb = st.db.prepare(
    `INSERT OR IGNORE INTO embeddings(segment_id, embedding_id, seq_id) VALUES(?,?,?)`
  );
  const insertMeta = st.db.prepare(
    `INSERT INTO embedding_metadata(id, key, string_value) VALUES(?,?,?)`
  );
  const addMany = st.db.transaction((ids, embeddings, documents, metadatas) => {
    for (let i = 0; i < ids.length; i++) {
      // seq_id: sequential integer stored as blob, matching Python ChromaDB convention
      const _seqId = Buffer.alloc(4); _seqId.writeUInt32BE(st.count + i + 1);
      const res = insertEmb.run(st.segUuid, ids[i], _seqId);
      if (res.changes === 0) continue; // duplicate  --  skip
      const rowid = res.lastInsertRowid;
      if (documents[i]) insertMeta.run(rowid, 'chroma:document', documents[i]);
      const m = metadatas[i] || {};
      for (const [k, v] of Object.entries(m)) {
        if (v !== null && v !== undefined) insertMeta.run(rowid, k, String(v));
      }
      st.newVectors.push(embeddings[i]);
      st.newIds.push(ids[i]);
      st.count++;
    }
  });
  addMany(ids, embeddings, documents, metadatas);
}

// Write HNSW binary files for a collection
// ChromaDB element layout in data_level0.bin:
// ── HNSW graph maintenance (incremental insert) ───────────────────────────────
// Parameters match offline builder and ChromaDB defaults
const HNSW_M             = 16;
const HNSW_M0            = 32;   // 2*M, layer-0 max neighbors
const HNSW_EF            = 200;  // ef_construction
const HNSW_ML            = 1.0 / Math.log(HNSW_M);

function hnswRandomLevel() {
  let l = 0;
  while (Math.random() < 0.5 && l < 16) l++;
  return l;
}

// Cosine similarity between two pre-normalized float arrays
function hnswSim(a, b) {
  let d = 0;
  for (let i = 0; i < a.length; i++) d += a[i] * b[i];
  return d;
}

// Normalize a float array in place
function hnswNorm(v) {
  let n = 0;
  for (let i = 0; i < v.length; i++) n += v[i] * v[i];
  n = Math.sqrt(n);
  if (n > 0) for (let i = 0; i < v.length; i++) v[i] /= n;
  return v;
}

// Greedy search in one layer -- returns ef nearest neighbors as [{id,sim}]
function hnswSearchLayer(st, query, eps, ef, layer) {
  const visited = new Set(eps);
  const cands   = eps.map(id => ({ id, sim: hnswSim(query, st.hnswVecs[id]) }));
  cands.sort((a, b) => b.sim - a.sim);
  const result  = cands.slice();

  let ci = 0;
  while (ci < cands.length) {
    const c = cands[ci++];
    if (result.length >= ef && c.sim < result[result.length - 1].sim) break;
    const nbrs = (st.hnswGraph[c.id] && st.hnswGraph[c.id][layer]) || [];
    for (const nb of nbrs) {
      if (visited.has(nb)) continue;
      visited.add(nb);
      const sim = hnswSim(query, st.hnswVecs[nb]);
      if (result.length < ef || sim > result[result.length - 1].sim) {
        cands.push({ id: nb, sim });
        cands.sort((a, b) => b.sim - a.sim);
        result.push({ id: nb, sim });
        result.sort((a, b) => b.sim - a.sim);
        if (result.length > ef) result.pop();
      }
    }
  }
  return result;
}

// Insert one vector into the in-memory HNSW graph
function hnswInsert(st, idx, vec) {
  if (!st.hnswGraph) st.hnswGraph = {};
  if (!st.hnswVecs)  st.hnswVecs  = {};

  st.hnswVecs[idx] = vec;
  st.hnswGraph[idx] = {};

  const level = hnswRandomLevel();
  for (let l = 0; l <= level; l++) st.hnswGraph[idx][l] = [];

  if (st.hnswCount === 0) {
    st.hnswEntry  = idx;
    st.hnswMaxLvl = level;
    st.hnswCount  = 1;
    return;
  }

  let ep = [st.hnswEntry];

  // Greedy descend from top to level+1
  for (let l = st.hnswMaxLvl; l > level; l--) {
    const found = hnswSearchLayer(st, vec, ep, 1, l);
    ep = [found[0].id];
  }

  // Insert at each layer
  for (let l = Math.min(level, st.hnswMaxLvl); l >= 0; l--) {
    const ef     = Math.max(HNSW_EF, HNSW_M0);
    const found  = hnswSearchLayer(st, vec, ep, ef, l);
    ep = found.map(f => f.id);

    const maxM   = l === 0 ? HNSW_M0 : HNSW_M;
    const nbrs   = found.slice(0, maxM).map(f => f.id);
    st.hnswGraph[idx][l] = nbrs;

    // Back-connections with pruning
    for (const nb of nbrs) {
      if (!st.hnswGraph[nb])    st.hnswGraph[nb] = {};
      if (!st.hnswGraph[nb][l]) st.hnswGraph[nb][l] = [];
      st.hnswGraph[nb][l].push(idx);
      if (st.hnswGraph[nb][l].length > maxM) {
        const pruned = st.hnswGraph[nb][l]
          .map(id => ({ id, sim: hnswSim(st.hnswVecs[nb], st.hnswVecs[id]) }))
          .sort((a, b) => b.sim - a.sim)
          .slice(0, maxM)
          .map(c => c.id);
        st.hnswGraph[nb][l] = pruned;
      }
    }
  }

  if (level > st.hnswMaxLvl) {
    st.hnswMaxLvl = level;
    st.hnswEntry  = idx;
  }
  st.hnswCount++;
}

// Load existing graph from data_level0.bin into st.hnswGraph / st.hnswVecs
function hnswLoadExisting(st, buf, count, dim, spe) {
  st.hnswGraph = {};
  st.hnswVecs  = {};
  const neighborBytes = HNSW_M0 * 4;
  const countPfx      = 4;
  for (let i = 0; i < count; i++) {
    const base  = i * spe;
    const nCount = Math.min(buf.readInt32LE(base + neighborBytes), HNSW_M0);
    const nbrs  = [];
    for (let j = 0; j < nCount; j++) {
      const nb = buf.readInt32LE(base + j * 4);
      if (nb !== 0 || j === 0) nbrs.push(nb);
    }
    st.hnswGraph[i] = { 0: nbrs.filter(n => n !== i) };
    // Load vector (pre-normalized)
    const v = new Float32Array(dim);
    const vecBase = base + neighborBytes + countPfx;
    let norm = 0;
    for (let d = 0; d < dim; d++) { v[d] = buf.readFloatLE(vecBase + d*4); norm += v[d]*v[d]; }
    norm = Math.sqrt(norm);
    if (norm > 0) for (let d = 0; d < dim; d++) v[d] /= norm;
    st.hnswVecs[i] = v;
  }
}

// Write layer-0 neighbor slots back into the binary buffer
function hnswWriteNeighbors(buf, st, startIdx, count, spe) {
  for (let i = startIdx; i < startIdx + count; i++) {
    const base = i * spe;
    const nbrs = (st.hnswGraph[i] && st.hnswGraph[i][0]) || [];
    const n    = Math.min(nbrs.length, HNSW_M0);
    for (let j = 0; j < HNSW_M0; j++) buf.writeInt32LE(j < n ? nbrs[j] : 0, base + j * 4);
    buf.writeInt32LE(n, base + HNSW_M0 * 4);
  }
}

//   [M0 * int32 neighbors (zeroed)] [int32 count_prefix=M0] [float32 * dim] [int64 label (1-indexed)]
// M=16, M0=32, neighborBytes=32*4+4=132, labelBytes=8
function _flushHnsw(name) {
  const st = _colState[name];
  if (!st || st.newVectors.length === 0) return;

  const dim    = st.newVectors[0].length;
  if (!Number.isFinite(dim) || dim <= 0) {
    logFile(`WARN _flushHnsw '${name}': invalid dim=${dim}, aborting flush`);
    st.newVectors = []; st.newIds = [];
    return;
  }
  const M0     = 32;
  const neighborBytes = M0 * 4 + 4;  // 32 int32 neighbors + int32 count_prefix
  const vectorBytes   = dim * 4;
  const labelBytes    = 8;
  const spe    = neighborBytes + vectorBytes + labelBytes;

  // Read existing data_level0.bin  --  ALWAYS derive count from file size, never from st.count
  const dataFile = path.join(st.segDir, 'data_level0.bin');
  let existingBuf = null;
  let existingCount = 0;
  if (fs.existsSync(dataFile)) {
    existingBuf = fs.readFileSync(dataFile);
    // Derive count from file size -- auto-detect spe to handle Python-format binaries
    const _dim = st.newVectors[0].length;
    // Try JS spe first, then Python spe
    const _spJS  = (32 * 4 + 4) + (_dim * 4) + 8;   // 132 + dim*4 + 8
    const _spPY  = 128 + (_dim * 4) + 12;             // 128 + dim*4 + 12
    if (existingBuf.length % _spJS === 0) {
      existingCount = existingBuf.length / _spJS;
    } else if (existingBuf.length % _spPY === 0) {
      existingCount = existingBuf.length / _spPY;
      // Python format -- we need to rewrite it in JS format when appending
      // For now, treat as correct count and let the copy handle it
    } else {
      // Fractional -- truncate to nearest whole element
      existingCount = Math.floor(existingBuf.length / _spJS);
      logFile(`[WARN] _flushHnsw '${name}': bin size ${existingBuf.length} not divisible by spe ${_spJS}, truncating to ${existingCount} elements`);
      existingBuf = existingBuf.slice(0, existingCount * _spJS);
    }
  }

  if (!Number.isFinite(existingCount) || existingCount < 0) {
    logFile(`WARN _flushHnsw '${name}': invalid existingCount=${existingCount}, treating as 0`);
    existingCount = 0;
    existingBuf = null;
  }
  const totalCount = existingCount + st.newVectors.length;
  const newBuf = Buffer.allocUnsafe(totalCount * spe);

  // Copy existing elements
  if (existingBuf) existingBuf.copy(newBuf, 0, 0, existingCount * spe);

  // Load existing graph into memory if not already loaded
  if (!st.hnswCount && existingCount > 0 && existingBuf) {
    hnswLoadExisting(st, newBuf, existingCount, dim, spe);
    st.hnswCount = existingCount;
    // Restore entry point from index_meta.json if available
    const _metaPath = path.join(st.segDir, 'index_meta.json');
    if (fs.existsSync(_metaPath)) {
      try {
        const _m = JSON.parse(fs.readFileSync(_metaPath, 'utf8'));
        if (_m.hnsw_entry_point !== undefined) st.hnswEntry  = _m.hnsw_entry_point;
        if (_m.hnsw_max_layer   !== undefined) st.hnswMaxLvl = _m.hnsw_max_layer;
      } catch(_) {}
    }
    if (st.hnswEntry === undefined) st.hnswEntry = 0;
    if (st.hnswMaxLvl === undefined) st.hnswMaxLvl = 0;
  }

  // Write new elements and insert into HNSW graph
  for (let i = 0; i < st.newVectors.length; i++) {
    const globalIdx = existingCount + i;
    const base = globalIdx * spe;
    // Write vector and label first (neighbor slots written after graph insert)
    newBuf.fill(0, base, base + neighborBytes);
    newBuf.writeInt32LE(M0, base + M0 * 4);
    const vec = st.newVectors[i];
    for (let d = 0; d < dim; d++) {
      newBuf.writeFloatLE(vec[d], base + neighborBytes + d * 4);
    }
    const label = BigInt(globalIdx + 1);
    newBuf.writeBigInt64LE(label, base + neighborBytes + vectorBytes);

    // Insert into HNSW graph (pre-normalize vector first)
    const normVec = new Float32Array(vec);
    hnswNorm(normVec);
    hnswInsert(st, globalIdx, normVec);
  }

  // Write updated neighbor slots for all new vectors
  hnswWriteNeighbors(newBuf, st, existingCount, st.newVectors.length, spe);

  // Write to data_level0.bin.tmp first, then rename atomically.
  // This ensures binary and SQL stay in sync even if the process dies mid-flush:
  // on next startup _getOrInitCol will detect data_level0.bin.tmp and complete the rename.
  const tmpFile = path.join(st.segDir, 'data_level0.bin.tmp');
  fs.writeFileSync(tmpFile, newBuf);
  fs.renameSync(tmpFile, dataFile);

  // index_meta.json
  const metaPath = path.join(st.segDir, 'index_meta.json');
  const meta = fs.existsSync(metaPath) ? JSON.parse(fs.readFileSync(metaPath, 'utf8')) : {};
  meta.dimensionality      = dim;
  meta.total_elements      = totalCount;
  meta.M                   = 16;
  meta.ef_construction     = HNSW_EF;
  meta.name                = name;
  meta.embed_model         = EMBED_MODEL;
  if (st.hnswEntry  !== undefined) meta.hnsw_entry_point = st.hnswEntry;
  if (st.hnswMaxLvl !== undefined) meta.hnsw_max_layer   = st.hnswMaxLvl;
  fs.writeFileSync(metaPath, JSON.stringify(meta));

  // header.bin  --  8 bytes: magic 0x1234 + version 1 (our reader ignores this)
  const headerPath = path.join(st.segDir, 'header.bin');
  if (!fs.existsSync(headerPath)) {
    const hbuf = Buffer.allocUnsafe(8);
    hbuf.writeInt32LE(0x1234, 0);
    hbuf.writeInt32LE(1, 4);
    fs.writeFileSync(headerPath, hbuf);
  }

  // length.bin  --  8-byte little-endian int64 element count
  const lbuf = Buffer.allocUnsafe(8);
  lbuf.writeBigInt64LE(BigInt(totalCount), 0);
  fs.writeFileSync(path.join(st.segDir, 'length.bin'), lbuf);

  // Clear new-vector buffer
  st.newVectors = [];
  st.newIds = [];
}

// ── Public bridge API (same as before) ───────────────────────────────────────
async function bridgeCall(obj) {
  try {
    const { action, name } = obj;
    if (action === 'get_or_create') {
      _getOrInitCol(name);
      return { ok: true };
    }
    if (action === 'count') {
      const st = _getOrInitCol(name);
      return { ok: true, count: st.count };
    }
    if (action === 'add') {
      _colAdd(name, obj.ids, obj.embeddings, obj.documents, obj.metadatas);
      _flushHnsw(name);
      // Notify web once per collection, first time it becomes queryable
      _notifyWebIfActive(name);
      return { ok: true, added: obj.ids.length };
    }
    // 'delete' action intentionally removed  --  collections are permanent
    if (action === 'quit') {
      // Flush all pending state
      for (const n of Object.keys(_colState)) _flushHnsw(n);
      for (const st of Object.values(_colState)) { try { st.db.close(); } catch(_) {} }
      return { ok: true };
    }
    return { ok: false, error: `unknown action: ${action}` };
  } catch(e) {
    crashWrite(`BRIDGE ${obj.action}: ${e.message}\n${e.stack}`);
    return { ok: false, error: e.message };
  }
}

// Cross-collection indexing (batch): each source rag.sqlite3 and data_level0.bin
// loaded ONCE for all dupe files. Fully independent of source collection name.
async function crossIndexBatch(toCrossIndex, targetName, srcDir, tui) {
  if (!toCrossIndex.length) return 0;
  const Database = _require('better-sqlite3');
  const targetSegUuid = deterministicUuid(targetName + ':vector');
  const md5Set = new Set(toCrossIndex.map(e => e.md5));
  const md5ToFpath = new Map(toCrossIndex.map(e => [e.md5, e.fpath]));
  let segDirs;
  try { segDirs = fs.readdirSync(CHROMA_PATH).filter(d => d !== targetSegUuid); }
  catch(_) { return 0; }
  let totalAdded = 0;
  const DIM = 768;
  const seenEmbIds = new Set();
  const total = segDirs.length;
  for (let si = 0; si < total; si++) {
    if (tui && tui.isStopped()) break;
    if (!md5Set.size) break;
    const d = segDirs[si];
    const ragPath = path.join(CHROMA_PATH, d, 'rag.sqlite3');
    const binPath = path.join(CHROMA_PATH, d, 'data_level0.bin');
    if (!fs.existsSync(ragPath) || !fs.existsSync(binPath)) continue;
    if (tui) tui.update({ stage: 'Cross-indexing ' + (si+1) + '/' + total + '...', file_sub: d.slice(0,8) });
    let srcDb;
    try { srcDb = new Database(ragPath, { readonly: true, fileMustExist: true }); } catch(_) { continue; }
    try {
      const ph = Array.from(md5Set).map(() => '?').join(',');
      const rows = srcDb.prepare(
        'SELECT e.embedding_id, e.id as eid, m.string_value as src_md5 FROM embeddings e ' +
        'JOIN embedding_metadata m ON m.id=e.id AND m.key=\'source_md5\'' +
        ' WHERE m.string_value IN (' + ph + ') ORDER BY e.id'
      ).all(...md5Set);
      if (!rows.length) { srcDb.close(); continue; }
      const allEmbs = srcDb.prepare('SELECT embedding_id FROM embeddings ORDER BY id').all();
      const posMap = new Map();
      // posMap already built above with binElemCount limit
      const _binSize = fs.statSync(binPath).size;
      // Derive spe from file size using known dim -- do NOT use allEmbs.length
      // (SQLite may have more rows than binary elements if backup was incomplete)
      let spe = 0;
      for (const nb of [132, 128]) {
        const _spe = nb + DIM * 4 + 8;
        if (_binSize > 0 && _binSize % _spe === 0) { spe = _spe; break; }
      }
      if (!spe) {
        logFile(`WARN crossIndex ${d.slice(0,8)}: binSize=${_binSize} not divisible by known spe for dim=${DIM}, skipping`);
        srcDb.close(); continue;
      }
      const binElemCount = _binSize / spe;
      const NEIGHBOR_BYTES = spe - DIM * 4 - 8;

      // Only map embeddings that have a corresponding vector in the binary
      allEmbs.slice(0, binElemCount).forEach((e, i) => posMap.set(e.embedding_id, i));
      if (tui) tui.update({ file_sub: 'loading ' + d.slice(0,8) });
      const srcBin = fs.readFileSync(binPath);
      if (tui) tui.update({ file_sub: '' });
      const byMd5 = new Map();
      for (const row of rows) {
        if (!byMd5.has(row.src_md5)) byMd5.set(row.src_md5, []);
        byMd5.get(row.src_md5).push(row);
      }
      for (const [md5, chunkRows] of byMd5) {
        if (tui && tui.isStopped()) break;
        const fpath = md5ToFpath.get(md5);
        if (!fpath) continue;
        const ids = [], embeddings = [], documents = [], metadatas = [];
        for (const row of chunkRows) {
          if (seenEmbIds.has(row.embedding_id)) continue;
          const pos = posMap.get(row.embedding_id);
          if (pos === undefined || (pos + 1) * spe > srcBin.length) continue;
          const vecStart = pos * spe + NEIGHBOR_BYTES;
          const vec = new Array(DIM);
          for (let di = 0; di < DIM; di++) vec[di] = srcBin.readFloatLE(vecStart + di * 4);
          const mrows = srcDb.prepare(
            'SELECT key, string_value FROM embedding_metadata WHERE id=? ' +
            'AND key IN (\'chroma:document\',\'source_file_name\',\'page_label\',\'ocr_type\',\'ole_parent_name\')'
          ).all(row.eid);
          const m = {};
          for (const mr of mrows) m[mr.key] = mr.string_value;
          seenEmbIds.add(row.embedding_id);
          ids.push(row.embedding_id);
          embeddings.push(vec);
          documents.push(m['chroma:document'] || '');
          metadatas.push({
            source_file_name: m['source_file_name'] || path.basename(fpath),
            source_rel_path: path.relative(srcDir, fpath),
            source_md5: md5, collection: targetName, _collection: targetName,
            preprocessed_at: new Date().toISOString(),
            ...(m['page_label']      ? { page_label:      m['page_label'] }      : {}),
            ...(m['ocr_type']        ? { ocr_type:        m['ocr_type'] }        : {}),
            ...(m['ole_parent_name'] ? { ole_parent_name: m['ole_parent_name'] } : {}),
          });
        }
        if (!ids.length) continue;
        try {
          const r = await bridgeCall({ action: 'add', name: targetName, ids, embeddings, documents, metadatas });
          if (r.ok) {
            totalAdded += ids.length;
            md5Set.delete(md5);
            logFile('INFO cross-index ' + targetName + ': +' + ids.length + ' chunks for ' + path.basename(fpath));
            if (tui) tui.update({ chunks: (tui.state.chunks||0) + ids.length });
          }
        } catch(e) { logFile('WARN cross-index add ' + path.basename(fpath) + ': ' + e.message); }
      }
    } finally { try { srcDb.close(); } catch(_) {} }
  }
  for (const md5 of md5Set) warnFile(targetName, 'cross-index: no source found for md5=' + md5.slice(0,8));
  return totalAdded;
}
function closeBridge() {
  for (const name of Object.keys(_colState)) {
    try { _flushHnsw(name); } catch(_) {}
    try { _colState[name].db.close(); } catch(_) {}
    delete _colState[name];
  }
  closeDbCheckCache();
}

// ── ZIP EXTRACTION ─────────────────────────────────────────────────────────────
async function extractZip(fpath, policy, tui) {
  if (policy === 'never') return [];
  if (policy === 'ask') {
    const answer = await tui.prompt(`Extract ${path.basename(fpath)}? (y/N/s): `);
    if (!answer) return [];
  }
  const destName = path.basename(fpath, path.extname(fpath)) + '_' + md5File(fpath).slice(0,8);
  const dest     = path.join(STAGING_DIR, destName);
  fs.mkdirSync(dest, { recursive: true });
  const ext = path.extname(fpath).toLowerCase();
  if (AdmZip && ext === '.zip') {
    try { new AdmZip(fpath).extractAllTo(dest, true); }
    catch(e) { logFile(`WARN ZIP adm-zip failed ${path.basename(fpath)}: ${e.message}`); return []; }
  } else {
    const args = ext === '.zip' ? ['unzip','-q','-o',fpath,'-d',dest]
                                : ['tar','-xf',fpath,'-C',dest];
    const r = spawnSync(args[0], args.slice(1), { timeout: 120000 });
    if (r.status !== 0) { logFile(`WARN ZIP extract failed: ${path.basename(fpath)}`); return []; }
  }
  const out = [];
  (function walk(dir) {
    for (const f of fs.readdirSync(dir)) {
      if (f.startsWith('.')) continue;
      const full = path.join(dir, f);
      if (fs.statSync(full).isDirectory()) walk(full); else out.push(full);
    }
  })(dest);
  return out;
}

// ── TUI ────────────────────────────────────────────────────────────────────────
// Mirrors IngestTUI from ingest.py exactly: same state fields, same bar formula,
// same ETA logic, same key bindings (p/r/s/Ctrl-C), same log panel (8 lines).
// Uses raw terminal mode + ANSI escape sequences instead of curses.

function makeTUI() {
  const state = {
    collection: '', stage: '', current_file: '', current_file_path: '',
    file_sub: '', file_n: 0, file_total: 0,
    bytes_done: 0, bytes_total: 0, bytes_skipped: 0,
    batch_n: 0, batch_total: 0,
    chunks: 0, chunks_embedded: 0, chunks_in_batch: 0, chunks_total: 0,
    resumed: 0, dupes: 0, skipped: 0, warnings: 0,
    start: Date.now(), batch_start: Date.now(), embed_start: null,
    embed_bytes_done: 0, embed_chunks_done: 0, embed_bytes_total: 0,
    embed_chunks_current: 0,  // chunks done in current file (not yet added to ratio)
    current_file_chunks: 0,   // total chunks in file being processed right now
    current_file_bytes: 0,    // bytes of file being processed right now
    embed_active_start: 0,    // timestamp of first chunk actually embedded this session
    embed_files_done: 0,      // files fully embedded this session (for ratio stability)
    chunk_size: 2048, chunk_hints: {}, paused: false, prompt_line: '',
    paused_ms: 0,         // total milliseconds spent paused or in prompts this session
  };
  const logLines  = [];
  const MAX_LOG   = 8;
  let   stopped   = false;
  let   paused    = false;
  let   stopReason= '';
  let   completed = false;
  let   promptMode = false;    // suspend TUI for ZIP ask
  let   promptResolve = null;  // resolve fn when prompting

  // ANSI helpers
  const ESC = '\x1b[';
  const out = s => process.stdout.write(s);
  const clear = () => out('\x1b[2J\x1b[H');
  const hideCursor = () => out('\x1b[?25l');
  const showCursor = () => out('\x1b[?25h');

  function cols() { return process.stdout.columns || 80; }
  function rows() { return process.stdout.rows || 24; }

  function fit(t, w) { w = w || cols() - 2; return t.length > w ? t.slice(0, w) : t; }
  function pad(t, w) { w = w || cols() - 2; return t.padEnd(w).slice(0, w); }

  function bar(pct, label, suffix, w) {
    w = w || cols() - 2;
    const maxSuffix = w - label.length - 11;
    if (suffix.length > maxSuffix) suffix = suffix.slice(0, maxSuffix - 3) + '...';
    const barW  = Math.max(w - label.length - 7 - suffix.length, 4);
    const filled= Math.floor(barW * Math.min(pct, 1.0));
    const b     = '#'.repeat(filled) + '-'.repeat(barW - filled);
    return fit(`${label}[${b}]${(pct*100).toFixed(0).padStart(3)}%${suffix}`, w);
  }

  function fmtEta(secs) {
    if (secs <= 0) return '';
    if (secs >= 3600) {
      const h = Math.floor(secs/3600), m = Math.floor((secs%3600)/60), s = Math.floor(secs%60);
      return ` eta ${h}:${String(m).padStart(2,'0')}:${String(s).padStart(2,'0')}`;
    }
    if (secs >= 60) {
      const m = Math.floor(secs/60), s = Math.floor(secs%60);
      return ` eta ${m}:${String(s).padStart(2,'0')}`;
    }
    return secs > 5 ? ' eta <1m' : '';
  }

  let _lastNonTTYLine = '';
  // Frozen snapshot of embedding progress -- only updated when a chunk completes
  let embedSnap = { eCdone: 0, eBdone: 0, bTotal: 0, filesD: 0,
                    curChunks: 0, curBytes: 0, activeStart: 0,
                    chunks: 0, bN: 0, bTot: 1, bElStr: '0s', pausedMs: 0 };
  function draw() {
    if (!process.stdout.isTTY) {
      // Non-TTY fallback: print a status line every ~5s so user sees progress
      const s = state;
      const line = `[INGEST] ${s.stage || 'working'} file=${s.file_idx||0}/${s.file_total||'?'} chunk=${s.chunk_done||0} ${s.cur_file ? s.cur_file.slice(-40) : ''}`;
      if (line !== _lastNonTTYLine) { console.log(line); _lastNonTTYLine = line; }
      return;
    }
    const s   = state;
    const now = Date.now();
    const w   = cols() - 2;
    const sep = '-'.repeat(w);

    const elSec = (now - s.start) / 1000;
    const elStr = elSec < 60
      ? `${Math.floor(elSec)}s`
      : `${Math.floor(elSec/60)}m${Math.floor(elSec%60)}s`;
    const batchEl = ((now - s.batch_start) / 1000);
    const bElStr  = batchEl < 60
      ? `${Math.floor(batchEl)}s`
      : `${Math.floor(batchEl/60)}m${Math.floor(batchEl%60)}s`;

    const status = s.paused ? '[PAUSED]' : s.stage;
    const hdr    = fit(` v${VERSION}  ${s.collection}  ${status}  elapsed ${elStr}`, w);

    // File line with size + estimated chunks
    let fsz = 0;
    try { if (s.current_file_path) fsz = fs.statSync(s.current_file_path).size; } catch(_) {}
    const fext = s.current_file_path
      ? path.extname(s.current_file_path).toLowerCase()
      : path.extname(s.current_file).toLowerCase();
    const hints  = s.chunk_hints;
    const csTok  = s.chunk_size;
    const eBdone = s.embed_bytes_done;
    const eCdone = s.embed_chunks_done + (s.embed_chunks_current || 0); // include current file for display
    let estChunks = 0;
    if (eBdone > 0 && eCdone > 0 && fsz)
      estChunks = Math.max(1, Math.floor(fsz * eCdone / eBdone));
    else if (fsz && hints[fext])
      estChunks = Math.max(1, Math.floor(fsz / (hints[fext] * 4)));
    else if (fsz)
      estChunks = Math.max(1, Math.floor(fsz / (csTok * 4)));

    const fszStr = fsz >= 1048576 ? `${(fsz/1048576).toFixed(1)}MB` : fsz > 0 ? `${Math.floor(fsz/1024)}KB` : '';
    const metaStr = fszStr + (estChunks ? `  ~${estChunks.toLocaleString()}ch` : '');
    const meta    = metaStr ? `  [${metaStr}]` : '';
    const dispN   = Math.min(s.file_n, s.file_total);
    const pfx     = ` File ${dispN}/${s.file_total}${meta}  `;
    let   fname   = s.current_file;
    const space   = w - pfx.length;
    if (fname.length > space) fname = '...' + fname.slice(-(space - 3));
    const fileLn  = fit(pfx + fname, w);
    const subLn   = s.file_sub ? fit(`   ${s.file_sub}`, w) : null;

    // Preprocess bar
    const pctF = s.file_n / Math.max(s.file_total, 1);
    const bdone = s.bytes_done, btotal = s.bytes_total, bskip = s.bytes_skipped;
    const bWorkDone = bdone, bWorkRem = Math.max(0, btotal - bskip - bdone);
    let etaSecs = 0;
    if (bWorkDone > 0 && bWorkRem > 0) etaSecs = (elSec / bWorkDone) * bWorkRem;
    else if (bWorkDone === 0 && bWorkRem > 0) {
      const doneF = Math.max(s.file_n - s.resumed - s.dupes, 1);
      const remF  = Math.max(s.file_total - s.file_n, 0);
      etaSecs = doneF > 0 ? (elSec / doneF) * remF : 0;
    }
    const preSfx = ` r=${s.resumed} d=${s.dupes} sk=${s.skipped} w=${s.warnings}` +
      (pctF >= 1 ? ' done' : fmtEta(etaSecs));
    const preLn  = bar(pctF, ' Preprocess ', preSfx, w);

    // Embedding bar -- uses frozen embedSnap, only updated on chunk completion
    const es = embedSnap;
    let pctC = es.bN / es.bTot, cTotal = 0;
    if (es.filesD >= 3 && es.eBdone > 0 && es.bTotal > 0) {
      // 3+ files done: use completed-file ratio projected across all remaining bytes
      cTotal = Math.round((es.eCdone / es.eBdone) * es.bTotal);
    } else if (es.curChunks > 0) {
      // Use known chunk count of current file as floor -- not a projection, actual computed value
      // This gives a valid cTotal for ETA even before any file completes
      cTotal = es.curChunks;
    }
    if (cTotal > 0) pctC = es.eCdone / cTotal;
    let embEta = '';
    if (es.activeStart && es.eCdone > 0 && cTotal > 0) {
      let pausedSoFar = es.pausedMs;
      if (s._pause_start) pausedSoFar += now - s._pause_start;
      const eSecs   = Math.max((now - es.activeStart) / 1000 - pausedSoFar / 1000, 0.1);
      const secPerC = eSecs / es.eCdone;
      const cRem    = Math.max(0, cTotal - es.eCdone);
      if (cRem > 0) embEta = fmtEta(cRem * secPerC);
    }
    const cDisp  = cTotal > 0 ? `~${cTotal.toLocaleString()}` : '?';
    const embSfx = ` ${es.bN}/${es.bTot} files ${es.eCdone.toLocaleString()}/${cDisp} chunks ${es.bElStr} stored=${es.chunks.toLocaleString()}${embEta}`;
    const embLn  = bar(pctC, ' Embedding  ', embSfx, w);

    const ctrl = fit(' p=pause  r=resume  s=stop  Ctrl-C=abort', w);
    const logs = logLines.slice(-MAX_LOG);
    while (logs.length < MAX_LOG) logs.push('');

    const promptLn = s.prompt_line ? fit(` ${s.prompt_line}`, w) : null;
    const screenLines = [
      sep, hdr, sep,
      fileLn,
      ...(subLn ? [subLn] : []),
      preLn, embLn, sep,
      ...logs.map(l => fit(l, w)),
      sep,
      ...(promptLn ? [promptLn, sep] : []),
      ctrl,
    ];

    out('\x1b[H'); // move to top-left without clearing (reduces flicker)
    const maxRows = rows() - 1;
    screenLines.slice(0, maxRows).forEach(line => out(pad(line, w) + '\n'));
    out('\x1b[J'); // clear from cursor to end of screen (erases stale lines from previous draw)
  }

  function addLog(msg, level) {
    const prefix = { ok:'[OK]  ', warn:'[WARN]', skip:'[SKIP]', err:'[ERR] ' }[level] || '      ';
    logLines.push(`${prefix} ${msg.trim()}`);
    if (logLines.length > MAX_LOG * 3) logLines.splice(0, logLines.length - MAX_LOG);
    // Mirror to file log with matching level prefix
    const filePrefix = { ok:'INFO', warn:'WARN', skip:'SKIP', err:'ERR' }[level] || 'INFO';
    logFile(`${filePrefix} ${msg.trim()}`);
  }

  // Raw mode input
  let rawModeOn = false;
  function startRawMode() {
    if (!process.stdin.isTTY) return;
    process.stdin.setRawMode(true);
    process.stdin.resume();
    process.stdin.setEncoding('utf8');
    rawModeOn = true;
    hideCursor();
    clear();
    process.stdin.on('data', key => {
      if (promptMode && promptResolve) {
        // In prompt mode pass keys to readline-like handler
        promptResolve(key);
        return;
      }
      if (key === 'p') { paused = true;  state.paused = true;  state._pause_start = Date.now(); addLog('Paused -- press r to resume','warn'); }
      if (key === 'r') { paused = false; state.paused = false; if (state._pause_start) { state.paused_ms += Date.now() - state._pause_start; state._pause_start = 0; } addLog('Resumed','ok'); }
      if (key === 's') { stopped = true; stopReason = 'user pressed s'; addLog('Stop requested...','warn'); }
      if (key === 'q') { addLog('Press s to stop, Ctrl-C to abort','warn'); }
      if (key === '\x03') { stopped = true; stopReason = 'Ctrl-C'; process.kill(process.pid, 'SIGINT'); }
    });
  }
  function stopRawMode() {
    if (rawModeOn) {
      try { process.stdin.setRawMode(false); } catch(_) {}
      process.stdin.pause();
      showCursor();
      rawModeOn = false;
    }
  }

  // Draw timer
  let drawTimer = null;
  function startDraw() { drawTimer = setInterval(draw, process.stdout.isTTY ? 250 : 5000); }
  function stopDraw()  { if (drawTimer) { clearInterval(drawTimer); drawTimer = null; } }

  // tty_prompt: show prompt in TUI prompt_line area, read y/n via raw stdin
  function tuiPrompt(msg) {
    return new Promise(resolve => {
      // Show the prompt inside the TUI prompt_line slot and redraw immediately
      state.prompt_line = msg;
      draw();
      // Use raw stdin -- already in raw mode during TUI, handles Ctrl-C correctly
      promptMode = true;
      let lineBuf = '';
      const _promptStart = Date.now();
      const done = (answer) => {
        promptResolve = null; promptMode = false;
        state.prompt_line = '';
        state.paused_ms += Date.now() - _promptStart; // prompt time is dead time
        draw();
        resolve(answer);
      };
      promptResolve = key => {
        if (key === '\x03') { // Ctrl-C -- stop ingest and exit cleanly
          stopped = true; stopReason = 'Ctrl-C';
          done(false);
          process.kill(process.pid, 'SIGINT');
          return;
        }
        if (key === 's' || key === 'S') { // s -- stop ingest cleanly
          stopped = true; stopReason = 'user pressed s';
          done(false);
          return;
        }
        if (key === '\r' || key === '\n') {
          const ans = lineBuf.trim().toLowerCase();
          if (ans === 'y') { done(true); return; }
          if (ans === 'n' || ans === '') { done(false); return; }
          // invalid -- re-prompt
          lineBuf = '';
          state.prompt_line = msg + '  [y/n/s only]';
          draw();
          return;
        }
        if (key === '\x7f' || key === '\b') {
          lineBuf = lineBuf.slice(0, -1);
          state.prompt_line = msg + lineBuf;
          draw();
        } else if (key === 'y' || key === 'Y') {
          done(true); // accept y immediately without needing Enter
        } else if (key === 'n' || key === 'N') {
          done(false); // accept n immediately without needing Enter
        } else {
          // invalid single keypress -- flash hint
          state.prompt_line = msg + '  [y/n/s only]';
          draw();
        }
      };
    });
  }

  return {
    state,
    update: fields => Object.assign(state, fields),
    log: (msg, level) => addLog(msg, level),
    isStopped: () => stopped,
    isPaused: () => paused,
    isCompleted: () => completed,
    setCompleted: () => { completed = true; },
    stopReason: () => stopReason,
    prompt: msg => tuiPrompt(msg),
    snapEmbed: fields => { embedSnap = Object.assign({}, embedSnap, fields); },
    start() { startRawMode(); startDraw(); },
    finish(msg) {
      stopDraw();
      draw(); // final draw
      stopRawMode();
      if (msg) { process.stdout.write('\n'); console.log(msg); }
    },
    // Pause gate: await this in work loop
    async waitIfPaused() {
      while (paused && !stopped) await new Promise(r => setTimeout(r, 300));
      if (stopped) throw new StopRequested();
    },
  };
}

class StopRequested extends Error {}

// ── SUMMARY BOX ────────────────────────────────────────────────────────────────
function summary(...lines) {
  const all = [new Date().toLocaleString(), ...lines];
  const w   = Math.max(...all.map(l => l.length)) + 4;
  const sep = '-'.repeat(w);
  console.log(sep);
  all.forEach(l => console.log(`  ${l}`));
  console.log(sep);
}

// ── COLLECTION DISCOVERY ────────────────────────────────────────────────────────
function discoverCollections() {
  fs.mkdirSync(SOURCE_DIR, { recursive: true });
  const cols = {};
  for (const name of fs.readdirSync(SOURCE_DIR).sort()) {
    const full = path.join(SOURCE_DIR, name);
    if (name.startsWith('.') || !fs.statSync(full).isDirectory()) continue;
    cols[name] = { source_dir: full };
  }
  return cols;
}

// ── FAILED/UNINDEXED REPORT ──────────────────────────────────────────────────
async function cmdFailed() {
  if (_ingestDbActive()) {
    // SQLite path -- authoritative after --upgrade-db
    const db = _ingestDbOpen();
    const rows = db.prepare(`
      SELECT f.collection, f.source_file, c.detail
      FROM ingest_files f
      LEFT JOIN ingest_file_caps c ON c.file_id=f.id AND c.cap='text'
      WHERE f.chunks=0 AND f.superseded=0
      ORDER BY f.collection, f.source_file
    `).all();
    closeIngestDb();
    if (!rows.length) {
      console.log('  No unindexed files in ingest_db.sqlite3 -- all files have chunks > 0');
      return;
    }
    console.log(`  Unindexed files (chunks=0): ${rows.length}`);
    console.log('-'.repeat(74));
    console.log('  Collection        File');
    console.log('-'.repeat(74));
    for (const r of rows) {
      const col     = (r.collection || '?').padEnd(16);
      const name    = (r.source_file || '?').split('/').pop();
      const partial = r.detail?.startsWith('partial:') ? ` [partial: ${r.detail.slice(8)} chunks]` : '';
      console.log(`  ${col}  ${name}${partial}`);
    }
    console.log('-'.repeat(74));
    console.log('  Re-run ingest to retry these files.');
  } else {
    // dedup.json fallback
    const dedup = loadDedup();
    const failed = Object.entries(dedup)
      .filter(([, v]) => !v.chunks || v.chunks === 0)
      .sort((a, b) => (a[1].collection||'').localeCompare(b[1].collection||''));
    if (!failed.length) {
      console.log('  No unindexed files in dedup.json -- all files have chunks > 0');
      return;
    }
    console.log(`  Unindexed files (chunks=0): ${failed.length}`);
    console.log('-'.repeat(74));
    console.log('  Collection        File');
    console.log('-'.repeat(74));
    for (const [md5, v] of failed) {
      const col     = (v.collection || '?').padEnd(16);
      const fp      = (v.first_seen_path || v.all_paths?.[0] || md5);
      const name    = fp.split('/').pop();
      const partial = v.partial_chunks ? ` [partial: ${v.partial_chunks} chunks]` : '';
      console.log(`  ${col}  ${name}${partial}`);
    }
    console.log('-'.repeat(74));
    console.log('  Re-run ingest to retry these files.');
    console.log('  Tip: run ./run.sh ingest --upgrade-db to migrate to SQLite tracking.');
  }
}

// ── LIST COMMAND ───────────────────────────────────────────────────────────────
async function cmdList() {
  const cols  = discoverCollections();
  const dedup = loadDedup();
  console.log(`  Collections in ${SOURCE_DIR}`);
  console.log('-'.repeat(74));
  console.log('  Name              Chunks    Files   Dupes  Source');
  console.log('-'.repeat(74));
  for (const [name, col] of Object.entries(cols)) {
    const cr = await bridgeCall({ action: 'count', name });
    const count = cr.count || 0;
    let fileCount = 0;
    (function walk(dir) {
      for (const f of fs.readdirSync(dir)) {
        if (f.startsWith('.')) continue;
        const full = path.join(dir, f);
        if (fs.statSync(full).isDirectory()) walk(full); else fileCount++;
      }
    })(col.source_dir);
    const dupes = Object.values(dedup).filter(v => v.collection === name && (v.all_paths||[]).length > 1).length;
    const src   = col.source_dir.replace(os.homedir(), '~');
    console.log(`  ${name.padEnd(16)} ${String(count).padStart(6)}  ${String(fileCount).padStart(6)}  ${String(dupes).padStart(6)}  ${src}`);
  }
  console.log('-'.repeat(74));
}

// ── INGEST COLLECTION ──────────────────────────────────────────────────────────
async function ingestCollection(name, srcDir, opts) {
  const { reset = false, zipPolicy = 'never' } = opts;
  const tui = makeTUI();

  // Start TUI first -- before any disk I/O so screen appears immediately
  tui.update({ collection: name, stage: 'Starting...' });
  tui.start();
  await new Promise(r => setImmediate(r)); // yield so first frame renders

  // Load caches after TUI is visible -- yield between each so frames render
  tui.update({ stage: 'Loading preprocess cache...' });
  await new Promise(r => setImmediate(r));
  const pcache = loadPreprocCache();
  tui.update({ stage: 'Loading pdf cache...' });
  await new Promise(r => setImmediate(r));
  const pdfTxtCache = loadPdfTxtCache();
  tui.update({ stage: 'Scanning files...' });
  await new Promise(r => setImmediate(r)); // yield so TUI draws before disk scan

  // Show last crash from THIS run (crash.log is rotated on start by run.sh)
  if (fs.existsSync(CRASH_LOG)) {
    try {
      const lines = fs.readFileSync(CRASH_LOG,'utf8').split('\n').filter(l=>l.trim());
      if (lines.length) tui.log(`crash.log: ${lines[lines.length-1].slice(0,70)}`, 'warn');
    } catch(_) {}
  }

  tui.log(`Source:  ${srcDir}`, 'info');
  const _prof = cfg('INGEST_PROFILE','?');
  tui.log(`Model:   ${EMBED_MODEL}  profile=${_prof}  timeout=${EMBED_TIMEOUT_MS/1000}s  backoff=${EMBED_BACKOFF_MS/1000}s`, 'info');
  tui.log(`Chunks:  text=${CHUNK_SIZE}  pdf=${CHUNK_SIZE_PDF}  av=${CHUNK_SIZE_AV}  overlap=${CHUNK_OVERLAP_PCT}%`, 'info');
  tui.log(`ZIP: ${zipPolicy}  OCR: ${OCR_ENABLED?'on':'off'}  Whisper: ${_whisperInfo?_whisperInfo.type:'off'}  v${VERSION}`, 'info');

  // Scan files (show progress in TUI so blank screen is eliminated)
  tui.update({ stage: 'Scanning files...' });
  const allFiles = [];
  (function walk(dir) {
    let entries; try { entries = fs.readdirSync(dir); } catch(_) { return; }
    for (const f of entries) {
      if (f.startsWith('.')) continue;
      const full = path.join(dir, f);
      try {
        if (fs.statSync(full).isDirectory()) walk(full);
        else { allFiles.push(full); tui.update({ current_file: full.replace(srcDir + '/', '') }); }
      } catch(_) {}
    }
  })(srcDir);

  // Total bytes for ETA
  tui.update({ stage: 'Calculating...', current_file: '' });
  await new Promise(r => setImmediate(r)); // yield so scan result draws before size calc
  let bytesTotal = 0;
  for (const f of allFiles) try { bytesTotal += fs.statSync(f).size; } catch(_) {}

  tui.update({
    stage: 'Starting',
    file_total: allFiles.length,
    bytes_total: bytesTotal,
    current_file: '',
  });
  tui.log(`Files:   ${allFiles.length} found`, 'info');

  try {
    // Pre-flight: verify index is writable; auto-recover from corrupt database
    tui.update({ stage: 'Opening index database...' });
    await new Promise(r => setImmediate(r)); // yield so TUI draws before sync DB open
    {
      let _pf = await bridgeCall({ action: 'get_or_create', name });
      if (!_pf.ok && (_pf.error||'').toLowerCase().includes('malform')) {
        const msg = `Index database error: ${_pf.error}\n  Database may be corrupt  --  back up chromadb/ then run with --reset if needed`;
        tui.log(msg, 'warn');
        // NEVER auto-delete chromadb/  --  user must explicitly use --reset
      }
      if (!_pf.ok) {
        const isLocked = (_pf.error||'').toLowerCase().includes('database is locked');
        const noCol    = (_pf.error||'').toLowerCase().includes('has no column');
        const msg = isLocked
          ? `Index write failed: database is locked.\n  The web server may have the database open. Stop it with Ctrl-C, then run ./run.sh ingest again.`
          : noCol
          ? `Index write failed: ${_pf.error}\n  Schema migration incomplete  --  re-run: ./run.sh ingest`
          : `Index write failed: ${_pf.error}\n  If the database is corrupt, back up chromadb/ then run: ./run.sh ingest --reset`;
        tui.log(msg, 'warn');
        crashWrite('FATAL: ' + msg);
        tui.setCompleted();
        tui.finish();
        summary('Collection: ' + name, msg);
        return;
      }
    }
    const _cntR = await bridgeCall({ action: 'count', name });
    if (!_cntR.ok) {
      const msg = `Index count failed: ${_cntR.error}`;
      tui.log(msg, 'warn'); crashWrite('FATAL: ' + msg);
      tui.setCompleted(); tui.finish();
      summary('Collection: ' + name, msg); return;
    }
    const existingCount = _cntR.count || 0;
    tui.log(`Index ready: ${existingCount} existing chunks`, 'ok');

    // ── STAGE 1: PREPROCESSING ────────────────────────────────────────────────
    tui.update({ stage: 'Stage 1/2: Preprocessing' });
    await new Promise(r => setImmediate(r)); // yield so TUI draws new stage before sync work
    const toEmbed = [];
    const toCrossIndex = []; // files already indexed elsewhere -- copy vectors into this collection
    const userSkip = new Set((cfg('USER_SKIP_EXTS','')).split(',')
      .map(e => e.trim().toLowerCase()).filter(Boolean)
      .map(e => e.startsWith('.') ? e : '.' + e));

    let _lastYield = Date.now();
    for (const fpath of allFiles) {
      await tui.waitIfPaused();

      let ext = path.extname(fpath).toLowerCase();
      let fsize = 0;
      try { fsize = fs.statSync(fpath).size; } catch(_) {}
      tui.update({
        current_file: path.basename(fpath),
        current_file_path: fpath,
        bytes_done: (tui.state.bytes_done || 0) + fsize,
      });
      // Yield to event loop every 100ms so TUI redraws during fast dupe runs
      if (Date.now() - _lastYield >= 100) { await new Promise(r => setImmediate(r)); _lastYield = Date.now(); }

      if (SKIP_EXTS.has(ext) || userSkip.has(ext)) {
        logFile(`INFO skip (blacklisted ext ${ext}): ${path.basename(fpath)}`);
        tui.update({ skipped: tui.state.skipped + 1, file_n: tui.state.file_n + 1 });
        continue;
      }
      if (!ext || !DOCUMENT_EXTS.has(ext)) {
        ext = sniffExtension(fpath);
        if (!ext) {
          logFile(`INFO skip (unrecognised type): ${path.basename(fpath)}`);
          tui.update({ skipped: tui.state.skipped + 1, file_n: tui.state.file_n + 1 });
          continue;
        }
      }

      // Compute file MD5
      let md5;
      try { md5 = md5File(fpath); }
      catch(e) { logFile(`WARN hash: ${fpath}: ${e.message}`); tui.update({ skipped: tui.state.skipped+1, file_n: tui.state.file_n+1 }); continue; }

      // DB dedup: check all collection rag.sqlite3 files
      {
        const dbCheck = dbCheckMd5(md5, _colState[name] && _colState[name].segDir);
        if (dbCheck.inTarget) {
          logFile(`INFO dupe (db-current): ${path.basename(fpath)} [${md5.slice(0,8)}]`);
          tui.update({ dupes: tui.state.dupes+1, file_n: tui.state.file_n+1, bytes_skipped: (tui.state.bytes_skipped||0)+fsize });
          continue; // already in this collection -- skip entirely
        }
        if (dbCheck.inOther) {
          logFile(`INFO dupe (db-other): ${path.basename(fpath)} [${md5.slice(0,8)}]`);
          tui.update({ dupes: tui.state.dupes+1, file_n: tui.state.file_n+1, bytes_skipped: (tui.state.bytes_skipped||0)+fsize });
          toCrossIndex.push({ md5, fpath });
          continue; // in another collection -- copy vectors
        }
      }

      // ZIP -- exclude ODF/OOXML containers (they are ZIP internally but handled by LibreOffice)
      if (ZIP_EXTS.has(ext) && !ZIP_CONTAINER_EXTS.has(path.extname(fpath).toLowerCase())) {
        tui.update({ file_sub: `Extracting ${path.basename(fpath)}...` });
        const extracted = await extractZip(fpath, zipPolicy, tui);
        tui.update({ file_sub: '' });
        for (const ep of extracted) {
          const eext = path.extname(ep).toLowerCase();
          let emd5; try { emd5 = md5File(ep); } catch(_) { continue; }
          {
            const dbCheck = dbCheckMd5(emd5, _colState[name] && _colState[name].segDir);
            if (dbCheck.inTarget) { continue; }
            if (dbCheck.inOther)  { toCrossIndex.push({ md5: emd5, fpath: ep }); continue; }
          }
          toEmbed.push([ep, eext, {
            source_file_name: path.basename(ep),
            source_rel_path: path.relative(srcDir, ep),
            source_md5: emd5, collection: name,
            preprocessed_at: new Date().toISOString(),
            zip_source: fpath, zip_md5: md5,
          }]);
        }
        tui.update({ file_n: tui.state.file_n + 1 });
        continue;
      }

      toEmbed.push([fpath, ext, {
        source_file_name: path.basename(fpath),
        source_rel_path: path.relative(srcDir, fpath),
        source_md5: md5, collection: name,
        preprocessed_at: new Date().toISOString(),
      }]);

      // PDF full-text: queue TXT version if not yet extracted
      // Runs even if the PDF itself was already indexed (catches pre-existing PDFs)
      if (ext === '.pdf' && !isPdfTxtDone(md5, pdfTxtCache)) {
        tui.update({ file_sub: `pdf2txt: ${path.basename(fpath)}` });
        const fullText = await extractPdfFullText(fpath, tui);
        tui.update({ file_sub: '' });
        if (fullText) {
          const txtPath = path.join(PDF_TXT_DIR, `${md5}.txt`);
          fs.writeFileSync(txtPath, fullText);
          pdfTxtCache[md5] = { txt_path: txtPath, source: fpath, extracted_at: new Date().toISOString() };
          savePdfTxtCache(pdfTxtCache);
          // Queue the TXT for embedding under a synthetic md5 (pdf_md5 + '_txt')
          const txtMd5 = md5 + '_txt';
          if (true) {
            toEmbed.push([txtPath, '.txt', {
              source_file_name: path.basename(fpath),
              source_rel_path: path.relative(srcDir, fpath),
              source_md5: md5, txt_md5: txtMd5,
              collection: name, pdf2txt: 'true',
              preprocessed_at: new Date().toISOString(),
            }]);
            tui.log(`pdf2txt: ${path.basename(fpath)}  (${(fullText.length/1024).toFixed(0)}KB)`, 'ok');
          }
        } else {
          tui.log(`pdf2txt failed: ${path.basename(fpath)}`, 'warn');
          tui.update({ warnings: tui.state.warnings + 1 });
        }
      }

      tui.update({ file_n: tui.state.file_n + 1 });
    }

    // Also catch already-indexed PDFs that predate the pdf2txt feature
    _lastYield = Date.now();
    for (const fpath of allFiles) {
      const ext = path.extname(fpath).toLowerCase();
      if (ext !== '.pdf') continue;
      const knownMd5 = md5File(fpath);
      if (!knownMd5) continue;
      if (isPdfTxtDone(knownMd5, pdfTxtCache)) continue;  // already done
      const txtMd5 = knownMd5 + '_txt';
      // Check DB whether txt chunks already exist for this pdf
      if (dbCheckMd5(txtMd5, _colState[name] && _colState[name].segDir).inTarget) continue;
      // Extract TXT for this previously-indexed PDF
      tui.update({ file_sub: `pdf2txt (backfill): ${path.basename(fpath)}` });
      const fullText = await extractPdfFullText(fpath, tui);
      tui.update({ file_sub: '' });
      if (fullText) {
        const txtPath = path.join(PDF_TXT_DIR, `${knownMd5}.txt`);
        fs.writeFileSync(txtPath, fullText);
        pdfTxtCache[knownMd5] = { txt_path: txtPath, source: fpath, extracted_at: new Date().toISOString() };
        savePdfTxtCache(pdfTxtCache);
        toEmbed.push([txtPath, '.txt', {
          source_file_name: path.basename(fpath),
          source_rel_path: path.relative(srcDir, fpath),
          source_md5: knownMd5, txt_md5: txtMd5,
          collection: name, pdf2txt: 'true',
          preprocessed_at: new Date().toISOString(),
        }]);
        tui.log(`pdf2txt backfill: ${path.basename(fpath)}`, 'ok');
      }
    }

    tui.update({ stage: 'Stage 1/2: Preprocessing', file_n: allFiles.length });
    tui.log(`Preprocessing done: ${toEmbed.length} to embed`, 'ok');
    savePreprocCache(pcache);

    if (!toEmbed.length) {
      if (toCrossIndex.length) {
        const crossTotal = await crossIndexBatch(toCrossIndex, name, srcDir, tui);
        if (crossTotal > 0) {
          _flushHnsw(name);
          logFile(`INFO cross-indexed ${crossTotal} chunks for ${toCrossIndex.length} dupe files in ${name}`);
          tui.log(`Cross-indexed ${crossTotal} chunks from other collections`, 'ok');
          warnSummary(tui, name);
          await new Promise(r => setTimeout(r, 1500));
          tui.finish();
          summary(`Collection: ${name}`, `Cross-indexed ${crossTotal} chunks from ${toCrossIndex.length} files.`);
          return;
        }
      }
      tui.setCompleted();
      tui.finish();
      summary(`Collection: ${name}`, `Already up to date -- ${existingCount} chunks.`);
      return;
    }

    // ── STAGE 2: EMBEDDING ────────────────────────────────────────────────────
    tui.update({
      stage: 'Stage 2/2: Embedding',
      batch_total: toEmbed.length, batch_n: 0,
      embed_start: Date.now(), embed_bytes_total: 0,
      embed_bytes_done: 0, embed_chunks_done: 0, embed_chunks_current: 0,
      current_file_chunks: 0, current_file_bytes: 0, embed_active_start: 0, embed_files_done: 0,
    });

    // Compute embed bytes total
    let eBytesTotal = 0;
    for (const [fp] of toEmbed) try { eBytesTotal += fs.statSync(fp).size; } catch(_) {}
    tui.update({ embed_bytes_total: eBytesTotal });

    // Pre-flight: verify embed model is available and matches this collection's dim.
    // If collection already has vectors we must use the same model that built them.
    // If the required model is missing from Ollama, attempt to pull it automatically.
    {
      const _DIM_TO_MODEL_LOCAL = { 384:'all-minilm', 768:'nomic-embed-text', 1024:'mxbai-embed-large' };
      const segUuidPf = deterministicUuid(name + ':vector');
      const segDirPf  = path.join(CHROMA_PATH, segUuidPf);
      const metaPath  = path.join(segDirPf, 'index_meta.json');
      let requiredModel = EMBED_MODEL;

      // If collection already has an index, enforce its dim's model
      if (fs.existsSync(metaPath)) {
        try {
          const existMeta = JSON.parse(fs.readFileSync(metaPath, 'utf8'));
          const existDim  = existMeta.dimensionality;
          const existModel = existDim ? _DIM_TO_MODEL_LOCAL[existDim] : null;
          if (existModel && existModel !== EMBED_MODEL) {
            tui.log(`Collection '${name}' has existing vectors at dim=${existDim}  --  switching to model '${existModel}' (was '${EMBED_MODEL}')`, 'warn');
            requiredModel = existModel;
            process.env.EMBED_MODEL = requiredModel;
          }
        } catch(_) {}
      }

      // Check model is in Ollama; auto-pull if not
      const ollamaBase = (cfg('EMBED_OLLAMA_HOST','') || OLLAMA_HOST).replace(/\/+$/, '');
      let modelOk = false;
      const _checkStart = Date.now();
      const _checkTicker = setInterval(() => {
        const _cs = Math.round((Date.now() - _checkStart) / 1000);
        tui.update({ file_sub: `checking Ollama for model '${requiredModel}'... ${_cs}s` });
      }, 2000);
      tui.update({ file_sub: `checking Ollama for model '${requiredModel}'...` });
      try {
        const r = await fetch(`${ollamaBase}/api/tags`, { signal: AbortSignal.timeout(30000) });
        if (r.ok) {
          const d = await r.json();
          modelOk = (d.models||[]).some(m => (m.name||'').startsWith(requiredModel));
        }
      } catch(e) {
        tui.log(`Ollama not reachable at ${ollamaBase}: ${e.message}`, 'warn');
      } finally {
        clearInterval(_checkTicker);
        tui.update({ file_sub: '' });
      }

      if (!modelOk) {
        tui.update({ file_sub: `model '${requiredModel}' not found  --  pulling from Ollama...` });
        tui.log(`Model '${requiredModel}' not in Ollama  --  attempting auto-pull...`, 'warn');
        const pullResult = await spawnAsync('ollama', ['pull', requiredModel], { timeoutSecs: 600, capture: false });
        if (pullResult.status === 0) {
          tui.log(`Model '${requiredModel}' pulled successfully`, 'ok');
          modelOk = true;
        } else {
          const msg = `Required embed model '${requiredModel}' is not available and could not be pulled.`
            + ` Ensure Ollama is running and has internet access, then run the installer to resolve this permanently.`;
          tui.log(msg, 'err');
          crashWrite('FATAL: ' + msg);
          tui.setCompleted(); tui.finish();
          summary('Collection: ' + name, msg);
          return;
        }
      }
    }

    // Warmup embed model (confirms model is loaded and responsive)
    tui.update({ file_sub: `warming up ${EMBED_MODEL} (may take 60s on cold start)...` });
    const _warmupStart = Date.now();
    const _warmupTicker = setInterval(() => {
      const _ws = Math.round((Date.now() - _warmupStart) / 1000);
      tui.update({ file_sub: `warming up ${EMBED_MODEL}... ${_ws}s` });
    }, 2000);
    const _warmupOk = await checkOllama();
    clearInterval(_warmupTicker);
    tui.update({ file_sub: '' });
    if (!_warmupOk) {
      const msg = `Ollama embed model '${EMBED_MODEL}' not responding at ${OLLAMA_HOST}  --  check Ollama is running`;
      tui.log(msg, 'warn'); crashWrite('FATAL: ' + msg);
      tui.setCompleted(); tui.finish(); summary('Collection: ' + name, msg); return;
    }
    tui.update({ file_sub: '' });
    let chunksAdded = 0;
    const embedStart = Date.now();

    for (let fIdx = 0; fIdx < toEmbed.length; fIdx++) {
      await tui.waitIfPaused();

      const [fpath, ext, meta] = toEmbed[fIdx];
      const md5 = meta.source_md5;
      let fsize = 0; try { fsize = fs.statSync(fpath).size; } catch(_) {}

      tui.update({
        batch_n: fIdx + 1,
        current_file: `[${fIdx+1}/${toEmbed.length}] ${path.basename(fpath)}`,
        current_file_path: fpath,
        batch_start: Date.now(),
        embed_chunks_current: 0,
      });

      // Guard: another iteration may have already indexed this (DB check)
      {
        if (dbCheckMd5(md5, _colState[name] && _colState[name].segDir).inTarget) {
          logFile(`INFO dupe (db-guard): ${path.basename(fpath)} [${md5.slice(0,8)}]`);
          continue;
        }
      }

      // Show what we're doing inside slow files (whisper, large PDFs)
      const _fname = path.basename(fpath);
      const _isAV  = ['.mp4','.mp3','.wav','.m4a','.webm','.mkv','.avi','.ogg','.flac','.mov'].includes(ext);
      const _extHint = _isAV ? 'transcribing' : 'extracting';
      const _extractStart = Date.now();
      // For AV files: tick a live elapsed counter so the user knows whisper is running
      let _tickInterval = null;
      if (_isAV) {
        tui.update({ file_sub: `transcribing: ${_fname}  0s` });
        _tickInterval = setInterval(() => {
          const elapsed = Math.round((Date.now() - _extractStart) / 1000);
          const timeout = WHISPER_TIMEOUT;
          tui.update({ file_sub: `transcribing: ${_fname}  ${elapsed}s / ${timeout}s max` });
        }, 1000);
      } else {
        tui.update({ file_sub: `${_extHint}: ${_fname}` });
      }
      let pages;
      try { pages = await extractText(fpath, ext, meta); }
      catch(e) { logFile(`WARN extract ${_fname}: ${e.message}`); tui.update({ warnings: tui.state.warnings+1 }); }
      finally { if (_tickInterval) clearInterval(_tickInterval); tui.update({ file_sub: '' }); }
      if (!pages || !pages.length) { tui.update({ skipped: tui.state.skipped+1 }); continue; }
      if (!pages || !pages.length) { tui.update({ skipped: tui.state.skipped+1 }); continue; }

      // Resume from partial if this file was interrupted mid-way.
      // Use _colState[name].segDir (authoritative -- immune to renames) to find existing chunks.
      let resumeFrom = 0;
      if (_colState[name]) {
        const _segDir = _colState[name].segDir;
        const _ragPath = path.join(_segDir, 'rag.sqlite3');
        const _binPath = path.join(_segDir, 'data_level0.bin');
        if (fs.existsSync(_ragPath) && fs.existsSync(_binPath)) {
          const _freshBinCount = Math.floor(fs.statSync(_binPath).size / SPE);
          try {
            const _rdb = new (_require('better-sqlite3'))(_ragPath, { readonly: true, fileMustExist: true });
            try {
              const _row = _rdb.prepare(
                'SELECT COUNT(*) as n FROM embeddings e ' +
                'JOIN embedding_metadata em ON em.id=e.id ' +
                "WHERE em.key='source_md5' AND em.string_value=? AND e.id<=?"
              ).get(md5, _freshBinCount);
              if (_row && _row.n > 0) {
                resumeFrom = _row.n;
                logFile(`INFO resume: ${path.basename(fpath)} from chunk ${resumeFrom} [${md5.slice(0,8)}]`);
              }
            } finally { try { _rdb.close(); } catch(_) {} }
          } catch(_) {}
        }
      }
      let consecutiveEmbedFails = 0;

      let chunkIdx = 0, fileChunksAdded = 0;
      // Compute total chunks once before the loop (not O(pages^2) per page)
      const _cszPre = chunkSizeForExt(ext);
      const totalChunks = pages.reduce((a,p) => a + chunkText(p.text, _cszPre).length, 0);
      // Expose current file's size+chunks so draw() can seed ratio before first file completes
      tui.update({ current_file_chunks: totalChunks, current_file_bytes: fsize });

      if (resumeFrom > 0) {
        tui.update({ file_sub: `resuming from chunk ${resumeFrom}` });
        // Exclude already-done portion from bytes_total so ratio stays session-only
        const doneFrac = totalChunks > 0 ? Math.min(resumeFrom / totalChunks, 1) : 0;
        const alreadyBytes = Math.round(fsize * doneFrac);
        tui.update({ embed_bytes_total: Math.max(0, tui.state.embed_bytes_total - alreadyBytes) });
      }
      let pageN = 0;
      for (const { text, metadata } of pages) {
        pageN++;
        const _csz = chunkSizeForExt(ext);
        const chunks = chunkText(text, _csz);
        for (const chunk of chunks) {
          // Skip already-written chunks (partial resume)
          if (chunkIdx < resumeFrom) { chunkIdx++; continue; }

          const _chunkStart = Date.now();
          const _chunkN = chunkIdx + 1;
          const _chunkOf = totalChunks > 0 ? ` of ~${totalChunks}` : '';
          tui.update({ file_sub: `embedding chunk ${_chunkN}${_chunkOf}...` });
          if (!chunk || !chunk.trim()) { chunkIdx++; continue; } // skip empty chunks
          let vec;
          try { vec = await embedText(chunk); consecutiveEmbedFails = 0; }
          catch(e) {
            consecutiveEmbedFails++;
            logFile(`WARN embed ${path.basename(fpath)} chunk ${chunkIdx}: ${e.message} (fail ${consecutiveEmbedFails})`);
            tui.update({ warnings: tui.state.warnings+1,
              file_sub: `embed failed ${consecutiveEmbedFails}x: ${e.message.slice(0,40)}` });
            if (consecutiveEmbedFails >= 3) {
              logFile(`WARN skipping ${path.basename(fpath)}: 3 consecutive embed failures`);
              tui.log(`Skipping ${path.basename(fpath)}: embed failed ${consecutiveEmbedFails}x (${e.message.slice(0,40)})`, 'warn');
          logFile(`WARN skip embed ${path.basename(fpath)}: ${consecutiveEmbedFails}x failures: ${e.message}`);
              break;
            }
            chunkIdx++; continue;
          }

          // Write immediately  --  don't accumulate
          const cleanMeta = {};
          for (const [k,v] of Object.entries(metadata)) cleanMeta[k] = String(v);
          // Log source_rel_path for first chunk of each file to verify correct storage
          if (chunkIdx === 0 && cleanMeta.source_rel_path) {
            logFile(`INFO source_rel_path: ${cleanMeta.collection}/${cleanMeta.source_rel_path}`);
          }
          try {
            const r = await bridgeCall({ action: 'add', name,
              ids: [`${md5}_${chunkIdx}`],
              embeddings: [vec],
              documents: [chunk],
              metadatas: [cleanMeta],
            });
            if (!r.ok) throw new Error(r.error || 'bridge error');
          } catch(e) {
            logFile(`WARN index add chunk ${chunkIdx} ${path.basename(fpath)}: ${e.message}`);
            tui.update({ warnings: tui.state.warnings+1 });
            chunkIdx++; continue;
          }

          chunkIdx++; fileChunksAdded++;

          const _chunkMs = Date.now() - _chunkStart;
          const _chunkSec = (_chunkMs/1000).toFixed(1);
          const _newActiveStart = tui.state.embed_active_start || Date.now();
          const _newCurrent = tui.state.embed_chunks_current + 1;
          tui.update({
            chunks_embedded: tui.state.chunks_embedded + 1,
            embed_chunks_current: _newCurrent,
            embed_active_start: _newActiveStart,
            file_sub: `chunk ${chunkIdx}${totalChunks>0?' /~'+totalChunks:''} done (${_chunkSec}s)`,
          });
          // Freeze embedding snap on chunk completion -- draw() reads this
          const _ts = tui.state;
          const _bElSec = (_ts.batch_start ? (Date.now() - _ts.batch_start) / 1000 : 0);
          const _bElStr = _bElSec < 60 ? `${Math.floor(_bElSec)}s`
            : `${Math.floor(_bElSec/60)}m${Math.floor(_bElSec%60)}s`;
          tui.snapEmbed({
            eCdone: _ts.embed_chunks_done + _newCurrent,
            eBdone: _ts.embed_bytes_done,
            bTotal: _ts.embed_bytes_total,
            filesD: _ts.embed_files_done || 0,
            curChunks: totalChunks,
            curBytes: fsize,
            activeStart: _newActiveStart,
            chunks: _ts.chunks,
            bN: _ts.batch_n,
            bTot: Math.max(_ts.batch_total, 1),
            bElStr: _bElStr,
            pausedMs: _ts.paused_ms || 0,
          });
          chunksAdded++;
          tui.update({ chunks: existingCount + chunksAdded });
        }
      }

      if (fileChunksAdded === 0 && resumeFrom === 0) continue;

      // For pdf2txt entries, register under the txt_md5 key
      const regMd5 = meta.txt_md5 || md5;
      const regPath = fpath;
      registerIndexed(regMd5, regPath, name, chunkIdx, {}, srcDir);

      // Commit completed file bytes+chunks to ratio fields (completed files only)
      tui.update({
        embed_bytes_done: tui.state.embed_bytes_done + fsize,
        embed_chunks_done: tui.state.embed_chunks_done + tui.state.embed_chunks_current,
        embed_chunks_current: 0,
        current_file_chunks: 0,
        current_file_bytes: 0,
        embed_files_done: tui.state.embed_files_done + 1,
        file_sub: '',
      });
    }

    // Flush newly embedded vectors before cross-indexing to keep them separate
    _flushHnsw(name);

    // Process cross-index entries (files that were dupes but from another collection)
    if (toCrossIndex.length) {
      const crossTotal = await crossIndexBatch(toCrossIndex, name, srcDir, tui);
      if (crossTotal > 0) {
        _flushHnsw(name);
        chunksAdded += crossTotal;
        logFile(`INFO cross-indexed ${crossTotal} chunks for ${toCrossIndex.length} dupe files in ${name}`);
        tui.log(`Cross-indexed ${crossTotal} chunks from other collections`, 'ok');
      }
    }

    tui.setCompleted();
    tui.update({ stage: 'Complete', chunks: existingCount + chunksAdded });
    tui.log(`Done: +${chunksAdded.toLocaleString()} chunks  total ${(existingCount+chunksAdded).toLocaleString()}  time ${((Date.now()-embedStart)/1000).toFixed(0)}s`, 'ok');
    logFile(`INFO done: +${chunksAdded} chunks total ${existingCount+chunksAdded} time ${((Date.now()-embedStart)/1000).toFixed(0)}s`);
    warnSummary(tui, name);

    // Hold TUI for a moment so user can read final state
    await new Promise(r => setTimeout(r, 1500));
    tui.finish();

    summary(
      `Collection: ${name}`,
      `Chunks added:  ${chunksAdded.toLocaleString()}`,
      `Total chunks:  ${(existingCount + chunksAdded).toLocaleString()}`,
    );

  } catch(e) {
    if (e instanceof StopRequested || tui.isStopped()) {
      tui.finish();
      summary(`Stopped: ${name}`, `Chunks so far: ${tui.state.chunks}`, `Run ingest again to resume.`);
    } else {
      crashWrite(`EXCEPTION ${name}: ${e.message}\n${e.stack}`);
      tui.log(`[FATAL] ${e.message}`, 'err');
      await new Promise(r => setTimeout(r, 1500));
      tui.finish();
      throw e;
    }
  }
}

// ── ENTRY POINT ────────────────────────────────────────────────────────────────
async function main() {
  const args = process.argv.slice(2);
  const popFlag = f => { const i = args.indexOf(f); if (i >= 0) { args.splice(i,1); return true; } return false; };
  const popOpt  = f => { const i = args.indexOf(f); if (i >= 0 && args[i+1]) { const v = args[i+1]; args.splice(i,2); return v; } return null; };
  const popMulti= f => { const i = args.indexOf(f); if (i < 0) return null; args.splice(i,1); const v=[]; while(args[i]&&!args[i].startsWith('-')) v.push(args.splice(i,1)[0]); return v.length?v:null; };

  const doList  = popFlag('--list') || popFlag('-l');
  // --reset intentionally removed: no mechanism to delete collections exists
  const names   = popMulti('-c') || popMulti('--collection');
  let   zipPol  = popOpt('--zip');
  const ZIP_ALIAS = { y:'always', n:'never', a:'ask', yes:'always', no:'never' };
  if (zipPol) zipPol = ZIP_ALIAS[zipPol] || zipPol;
  if (!zipPol) zipPol = ZIP_ALIAS[cfg('ZIP_POLICY','never')] || cfg('ZIP_POLICY','never') || 'never';

  const doFailed    = popFlag('--failed') || popFlag('--unindexed');
  const doUpgradeDb = popFlag('--upgrade-db');
  if (doList)      { await cmdList();      closeBridge(); return; }
  if (doFailed)    { await cmdFailed();    closeBridge(); return; }
  if (doUpgradeDb) { await cmdUpgradeDb(); closeBridge(); return; }

  const cols = discoverCollections();
  if (!Object.keys(cols).length) {
    console.error(`  [ERROR] No collections found in ${SOURCE_DIR}`);
    closeBridge(); process.exit(1);
  }

  const targets = names || Object.keys(cols);
  if (names) logFile(`Targeting collection(s): ${targets.join(', ')}`);
  else        logFile(`No -c flag: ingesting all ${targets.length} collection(s): ${targets.join(', ')}`);
  for (const name of targets) {
    if (!(name in cols)) {
      console.error(`  [ERROR] No directory for collection: ${name}`);
      console.error(`  Available: ${Object.keys(cols).join(', ')}`);
      continue;
    }
    try {
      await ingestCollection(name, cols[name].source_dir, { reset: false, zipPolicy: zipPol });
    } catch(e) {
      crashWrite(`FATAL ${name}: ${e.message}\n${e.stack}`);
      console.error(`\n  [FATAL] ${name}: ${e.message}`);
    }
  }

  writePathMap(loadDedup());
  console.log('  path_map.json updated.');
  closeBridge();
}

// ── TERMINAL CLEANUP -- always restore cursor and raw mode on any exit ────────
function terminalCleanup() {
  try { process.stdout.write('\x1b[?25h'); } catch(_) {} // show cursor
  try { if (process.stdin.isTTY) process.stdin.setRawMode(false); } catch(_) {}
  try { process.stdout.write('\x1b[?1049l'); } catch(_) {} // restore alt screen if used
}
process.on('exit',    ()  => terminalCleanup());
process.on('SIGINT',  ()  => { terminalCleanup(); process.exit(130); });
process.on('SIGTERM', ()  => { terminalCleanup(); process.exit(143); });

main().catch(e => {
  crashWrite(`FATAL: ${e.message}\n${e.stack}`);
  console.error(`[FATAL] ${e.message}`);
  closeBridge();
  process.exit(1);
});
RAGWEED-ING-v1.0.102-20260319-000014-473
chmod +x "$SCRIPTS_DIR/ingest.js"
ok "ingest.js written"

# STEP 13: system_prompt.txt (never overwrite if exists)
# =============================================================================
ph "STEP 13: System prompt"

if [ ! -f "$PROJECT_DIR/system_prompt.txt" ]; then
    cat > "$PROJECT_DIR/system_prompt.txt" << 'RAGWEED-SYS-v1.0.102-20260319-000014-473'
You are a research assistant with access to a curated knowledge base on cybersecurity,
topical information assembled by the supplier of this RAGWeed content.
Answer questions based strictly on the retrieved sources provided to you.
Use plain prose. Cite sources inline as [N]. Do not use bullet points or headers.
Be precise and scholarly. If sources are insufficient, say so explicitly.
RAGWEED-SYS-v1.0.102-20260319-000014-473
    ok "system_prompt.txt created"
else
    ok "system_prompt.txt exists  --  preserved"
fi

# STEP 13b: annotation_prompt.txt (never overwrite if exists)
# =============================================================================
ph "STEP 13b: Annotation prompt"

if [ ! -f "$SCRIPTS_DIR/annotation_prompt.txt" ]; then
    cat > "$SCRIPTS_DIR/annotation_prompt.txt" << 'RAGWEED-ANP-v1.0.102-20260319-000014-473'
Judge relevance by conceptual and semantic content, not literal word matches -- ignore spelling variations, capitalisation differences, and phrasing differences. Write one concise sentence (3-5 lines maximum) explaining how this excerpt relates to the query. Only respond IRRELEVANT (one word, nothing more) if the excerpt has no meaningful connection to the subject matter of the query. If the excerpt contains one or more relevant quotes, include them verbatim in the sentence. Output only the sentence or the word IRRELEVANT, no preamble, no extra commentary.
RAGWEED-ANP-v1.0.102-20260319-000014-473
    ok "scripts/annotation_prompt.txt created"
else
    ok "scripts/annotation_prompt.txt exists  --  preserved"
fi

ph "STEP 13b2: help.txt"

cat > "$SCRIPTS_DIR/help.txt" << 'RAGWEED-HLP-v1.0.102-20260319-000014-473'
RAGWeed -- Usage: ./run.sh [command]

QUERY
  ./run.sh                     TUI query interface (interactive)
  ./run.sh query "text"        One-shot: run query, print answer, exit
  ./run.sh query -c <col> "text"           One collection
  ./run.sh query -c <c1> -c <c2> "text"   Multiple collections
  ./run.sh web                 Web query interface (port WEB_PORT, default 3000)

CONFIGURATION
  ./run.sh webc                Web config UI (port WEBC_PORT, default 3001)
  ./run.sh config              Interactive TUI configuration menu

INGEST
  ./run.sh ingest              Index documents from SOURCE_DIR
    -c <n>                  Collection name (default: basename of SOURCE_DIR)
    --zip always|never|ask     ZIP file handling policy
    --list                     List collections and exit
    --failed                   List unindexed (zero-chunk) files and exit
    --upgrade-db               Migrate dedup.json to SQLite (one-time)

ANNOTATION TESTING
  ./run.sh annotate            Auto-test local models using Config settings
    --auto                     Test RAM-fitting models, stop at threshold
    --all                      Test all models regardless of RAM or threshold
    --model <n>                Test a specific model only
    --threshold <n>            F1 threshold % override
    --verbose                  Show full model responses

UTILITIES
  ./run.sh logs                Show recent query logs
  ./run.sh diagnose            Check Node.js, Ollama, collections, Config
  ./run.sh man                 Display the manual page
  ./run.sh install             Re-run installer (preserves Config and data)
  ./run.sh help                Show this help

INGEST CONFIG KEYS  (tune via webc Ingest tab or set in Config)
  INGEST_PROFILE       low|medium|high  (RAM-based preset)
                       low=<4GB RAM, medium=4-8GB, high=>8GB
  CHUNK_SIZE           Tokens per chunk for text/code files
  CHUNK_SIZE_PDF       Tokens per chunk for PDF files
  CHUNK_SIZE_AV        Tokens per chunk for audio/video transcripts
  CHUNK_OVERLAP_PCT    Overlap between chunks
  EMBED_TIMEOUT_S      Seconds per Ollama embed call
  EMBED_BACKOFF_S      Seconds between embed retries
  WHISPER_TIMEOUT_S    Seconds allowed per audio/video transcription

ANNOTATION CONFIG KEYS  (set via webc Debug/Optimize tab)
  ANNOTATION_MODEL                   Local model for annotation
  ANNOTATION_TEST_THRESHOLD          F1% threshold for auto mode (default: 95)
  ANNOTATION_TEST_STOP_AT_THRESHOLD  Stop at first passing model (yes/no)
  ANNOTATION_TEST_FIT_ONLY           Only test RAM-fitting models (yes/no)

The Advise button in webc (Ingest section) shows hardware-tuned
recommendations. Profile presets apply all related keys at once.
Changing profile never deletes index data.
RAGWEED-HLP-v1.0.102-20260319-000014-473
ok "scripts/help.txt written"

ph "STEP 13c: prompts.json (per-provider prompts)"

if [ ! -f "$SCRIPTS_DIR/prompts.json" ]; then
    # Migrate existing .txt files into JSON if present
    node << 'RAGWEED-PROMPTS-INIT'
const fs = require('fs');
const path = require('path');
const sd = process.env.SCRIPTS_DIR || path.join(process.env.PROJECT_DIR || process.cwd(), 'scripts');
const pd = process.env.PROJECT_DIR || process.cwd();
const out = path.join(sd, 'prompts.json');
let sysDefault = '';
let annotDefault = '';
try { sysDefault = fs.readFileSync(path.join(pd, 'system_prompt.txt'), 'utf8').trim(); } catch(_) {}
try { annotDefault = fs.readFileSync(path.join(sd, 'annotation_prompt.txt'), 'utf8').trim(); } catch(_) {}
const annotLocal = 'Does this excerpt relate to the query?\nIf yes: write one sentence explaining how.\nIf no: reply with only the word IRRELEVANT.\nNo other text. No explanation. No preamble.';
const prompts = {
  system_default: sysDefault,
  system_claude: '', system_openai: '', system_gemini: '', system_local: '',
  annotation_default: annotDefault,
  annotation_claude: '', annotation_openai: '', annotation_gemini: '',
  annotation_local: annotLocal
};
fs.writeFileSync(out, JSON.stringify(prompts, null, 2));
console.log('  ok  prompts.json created');
RAGWEED-PROMPTS-INIT
else
    ok "scripts/prompts.json exists  --  preserved"
fi

ph "STEP 13d: annotation_test_runner.cjs"

# Always write (managed file -- overwrite on upgrade)
cat > "$SCRIPTS_DIR/annotation_test_runner.cjs" << 'RAGWEED-ATR-v1.0.102-20260319-000014-473'
// RAGWEED_VERSION=1.0.102
// annotation_test_runner.cjs -- RAGWeed annotation prompt test runner
// Copyright (c) Fred Cohen, 2026 - ALL RIGHTS RESERVED - Patents: https://all.net/Notices.html
// Standalone: node scripts/annotation_test_runner.cjs [--model <n>] [--verbose] [--auto]
// Module:     const runner = require('./annotation_test_runner.cjs');
//             runner.run({ model, prompt, ollamaHost, onResult, onDone })
'use strict';
const fs   = require('fs');
const path = require('path');

const TEST_CASES = [
  // ── Clear IRRELEVANT: completely unrelated domains ──────────────────────
  { id:'irr-001', category:'clear-irrelevant', expected:'IRRELEVANT',
    query: 'What are the best practices for securing a corporate network?',
    excerpt: `The cultivation of sourdough bread relies on a symbiotic culture of wild yeast and lactic acid bacteria. The starter must be maintained through regular feedings of flour and water, typically at a ratio of one part starter to one part water to one part flour by weight. Fermentation temperature dramatically affects flavor development: cooler temperatures slow activity and encourage more acidic notes from lactobacilli, while warmer temperatures favor yeast activity and a milder tang. Bulk fermentation typically takes between four and twelve hours depending on ambient temperature and starter strength. Shaping requires developing surface tension without degassing the dough excessively. The final proof can occur at room temperature for two to four hours or retarded overnight in the refrigerator, which further develops flavor complexity. Scoring the loaf before baking allows controlled expansion and prevents blowouts. Baking in a covered Dutch oven for the first twenty minutes traps steam and produces a thin, blistered crust with good ear development.`,
    notes: 'Bread baking vs network security -- no connection' },

  { id:'irr-002', category:'clear-irrelevant', expected:'IRRELEVANT',
    query: 'How does malware evade antivirus detection?',
    excerpt: `The migration of monarch butterflies represents one of the most remarkable navigational feats in the animal kingdom. Each autumn, eastern North American monarchs travel up to four thousand kilometers from their summer breeding grounds to overwintering sites in the oyamel fir forests of central Mexico. Individual butterflies undertake this journey without prior experience of the route, relying on a time-compensated sun compass that integrates circadian clock information with solar position. On overcast days, monarchs can navigate using polarized light patterns detected through specialized photoreceptors. The butterflies also appear to use the Earth's magnetic field as a backup orientation system. Spring remigration northward occurs in multiple generations, with successive cohorts leapfrogging northward as milkweed becomes available. The entire round trip spans three to four generations, yet the population reliably returns to the same overwintering groves each year. Conservation threats include habitat loss along migratory corridors, milkweed decline due to herbicide use, and deforestation of overwintering forests.`,
    notes: 'Butterfly migration vs malware -- no connection' },

  { id:'irr-003', category:'clear-irrelevant', expected:'IRRELEVANT',
    query: 'What risk management frameworks apply to financial institutions?',
    excerpt: `Tidal locking occurs when the gravitational gradient across an orbiting body causes it to rotate at the same rate as its orbital period, keeping one face permanently toward its primary. The Moon is the most familiar example: its synchronous rotation means the same hemisphere always faces Earth. This state is reached over geological timescales through tidal dissipation, which converts rotational kinetic energy into heat within the tidally stressed body. The timescale for tidal locking scales inversely with the mass of the primary and the cube of the orbital distance, meaning close-in bodies around massive primaries lock quickly. Many exoplanets in the habitable zones of red dwarf stars are expected to be tidally locked, raising questions about atmospheric circulation and climate habitability. The permanent dayside of a locked planet would experience intense irradiation while the nightside would be perpetually dark, potentially causing atmospheric collapse through condensation of gases on the cold nightside.`,
    notes: 'Planetary science vs financial risk -- no connection' },

  { id:'irr-004', category:'clear-irrelevant', expected:'IRRELEVANT',
    query: 'How do you implement zero trust architecture?',
    excerpt: `The Maillard reaction is a non-enzymatic browning process that occurs when amino acids and reducing sugars are heated together, producing hundreds of distinct flavor and aroma compounds. Named after French chemist Louis-Camille Maillard who first described it in 1912, the reaction proceeds optimally above 140 degrees Celsius and is inhibited by water, which is why moisture must be driven from a food surface before browning can occur. The reaction is responsible for the characteristic flavors of roasted coffee, seared meat, toasted bread, and grilled vegetables. Unlike caramelization, which involves only sugars, the Maillard reaction requires a nitrogen source in the form of amino acids. The specific flavor compounds produced depend on which amino acids and sugars are present, the temperature, the pH, and the water activity of the food. Controlling these variables allows cooks to achieve targeted flavor profiles in complex dishes.`,
    notes: 'Food chemistry vs zero trust -- no connection' },

  // ── Clear RELEVANT: direct specific match ──────────────────────────────
  { id:'rel-001', category:'clear-relevant', expected:'RELEVANT',
    query: 'What are the best practices for securing a corporate network?',
    excerpt: `Network segmentation divides a corporate infrastructure into isolated zones so that a compromise in one area cannot spread freely to others. The most common approach uses VLANs at the switching layer combined with firewall policies that enforce explicit permit rules between segments. The principle of least privilege should guide all inter-segment rules: only the specific ports and protocols required for legitimate business functions should be allowed. A demilitarized zone should separate internet-facing services from the internal network, with no direct routing between the DMZ and sensitive internal systems. Privileged management networks should be isolated from general user traffic, and out-of-band management channels should be used for network device administration. Microsegmentation using software-defined networking extends these principles to east-west traffic within data centers. Regular penetration tests and red team exercises validate that segmentation controls cannot be bypassed through misconfigured rules, dual-homed hosts, or spanning tree anomalies. Logging and alerting on inter-segment traffic violations provides early warning of lateral movement attempts.`,
    notes: 'Direct match -- network security best practices with specifics' },

  { id:'rel-002', category:'clear-relevant', expected:'RELEVANT',
    query: 'How does malware evade antivirus detection?',
    excerpt: `Antivirus evasion has evolved from simple signature modification to sophisticated multi-layered techniques. Polymorphic malware uses an encryption engine that re-encrypts the payload with a different key on each infection, changing the binary signature while preserving functionality. Metamorphic engines go further by rewriting the entire code without encryption, substituting equivalent instruction sequences, inserting junk instructions, and transposing independent code blocks. Packers and crypters wrap the payload in a protective layer that decompresses or decrypts only at runtime, causing static analysis to see only the packer stub rather than the malicious payload. Process hollowing injects malicious code into the memory of a legitimately-spawned process after emptying its original code, allowing the malware to execute under a trusted process identity. Living-off-the-land techniques abuse built-in system tools such as PowerShell, WMI, and certutil to perform malicious actions without dropping any binary to disk, bypassing file-based detection entirely. Fileless malware stores its payload in the registry or in memory only, leaving minimal forensic artifacts.`,
    notes: 'Direct match -- malware evasion techniques with specifics' },

  { id:'rel-003', category:'clear-relevant', expected:'RELEVANT',
    query: 'What risk management frameworks apply to financial institutions?',
    excerpt: `Financial institutions operate under multiple overlapping risk management frameworks that address different dimensions of institutional risk. Basel III, developed by the Basel Committee on Banking Supervision, establishes minimum capital adequacy ratios, liquidity coverage requirements, and leverage limits for internationally active banks. The net stable funding ratio introduced under Basel III requires banks to maintain sufficient stable funding relative to their asset profile over a one-year horizon. COSO ERM provides a broader enterprise risk management framework applicable across industries but widely adopted in financial services for its structured approach to identifying, assessing, and responding to risks across strategic, operational, reporting, and compliance categories. The NIST Cybersecurity Framework has been adopted by many financial institutions to manage technology risk alongside traditional financial risk frameworks. Stress testing regimes mandated by regulators such as the Federal Reserve require institutions to demonstrate capital adequacy under severely adverse macroeconomic scenarios. Internal capital adequacy assessment processes require boards to make forward-looking judgments about capital needs beyond minimum regulatory requirements.`,
    notes: 'Direct match -- financial risk frameworks with specifics' },

  { id:'rel-004', category:'clear-relevant', expected:'RELEVANT',
    query: 'How do you implement zero trust architecture?',
    excerpt: `Zero trust architecture implementation begins with a thorough discovery phase to enumerate all users, devices, applications, and data flows across the organization. Traditional perimeter-based security assumed that anything inside the network boundary was trustworthy; zero trust discards this assumption entirely and requires every access request to be authenticated, authorized, and continuously validated regardless of network location. Identity becomes the new perimeter: every user and device must authenticate with strong multi-factor authentication before accessing any resource. Device health attestation verifies that endpoints meet security posture requirements such as current patch levels and enabled endpoint protection before granting access. Microsegmentation replaces flat network architectures with fine-grained access controls that limit which workloads can communicate with which. Just-in-time and just-enough-access provisioning ensures that elevated privileges are granted only when needed and automatically revoked. Comprehensive logging of all access decisions feeds a security information and event management system for anomaly detection and incident response.`,
    notes: 'Direct match -- zero trust implementation steps' },

  // ── Hard cases: tangential or partially relevant ───────────────────────
  { id:'hard-001', category:'hard-tangential', expected:'RELEVANT',
    query: 'What are the best practices for securing a corporate network?',
    excerpt: `Physical security controls form the foundation of any comprehensive security program and are frequently underestimated relative to technical controls. Tailgating attacks, where an unauthorized individual follows an authorized employee through a controlled door, bypass electronic access controls entirely and require procedural countermeasures such as security culture training and mantrap vestibules in high-security areas. Server rooms and data centers should be protected by multi-factor physical access controls, with all entries and exits logged and reviewed regularly. Unattended workstations in common areas represent easy targets for malicious USB device insertion or cold boot attacks against memory-resident credentials. Cable security and clean desk policies reduce the risk of sensitive information exposure. Surveillance cameras with sufficient retention periods support forensic investigations after incidents. Physical access logs should be correlated with logical access logs to detect anomalies such as a user's badge accessing a facility while their account is simultaneously being used from a remote location.`,
    notes: 'Physical security is legitimately part of corporate security posture' },

  { id:'hard-002', category:'hard-tangential', expected:'RELEVANT',
    query: 'How does malware evade antivirus detection?',
    excerpt: `Software protection and obfuscation tools are widely used by commercial software vendors to defend their intellectual property against reverse engineering. Code obfuscators rename variables and functions to meaningless identifiers, insert semantically equivalent but syntactically different instruction sequences, flatten control flow graphs to eliminate recognizable loop and branch structures, and encrypt string constants that would otherwise reveal program logic. These transformations make static analysis difficult and time-consuming without changing the program's external behavior. Virtual machine protectors go further by translating native code into a proprietary bytecode executed by a custom interpreter, forcing analysts to reverse engineer the interpreter before they can understand the protected code. While developed for legitimate software protection, these same techniques are extensively applied by malware authors to hinder analysis and signature creation by antivirus vendors.`,
    notes: 'Obfuscation techniques directly overlap with malware evasion methods' },

  { id:'hard-003', category:'hard-tangential', expected:'IRRELEVANT',
    query: 'What risk management frameworks apply to financial institutions?',
    excerpt: `Healthcare organizations have increasingly adopted formal enterprise risk management programs modeled on practices from financial services and influenced by regulatory expectations from accreditation bodies. Clinical risk encompasses patient safety events, adverse outcomes, and malpractice exposure. Operational risk covers supply chain disruption, workforce shortages, and facility failures. Compliance risk addresses the complex web of federal and state regulations governing patient privacy, billing practices, and clinical quality reporting. Many healthcare systems have implemented three lines of defense models with clinical departments as the first line, risk management and compliance functions as the second, and internal audit as the third. The Joint Commission and other accreditation bodies evaluate risk management maturity as part of their survey processes. Despite structural similarities with financial risk frameworks, healthcare risk management operates under distinct regulatory regimes and liability environments that differ substantially from banking and insurance.`,
    notes: 'Healthcare risk mgmt -- explicitly different from financial institutions per last sentence' },

  { id:'hard-004', category:'hard-tangential', expected:'RELEVANT',
    query: 'How do you implement zero trust architecture?',
    excerpt: `The widespread shift to remote and hybrid work arrangements fundamentally challenged assumptions embedded in traditional corporate security architectures. VPN-centric models that backhauled all remote traffic through a central gateway created performance bottlenecks as the proportion of remote workers increased from a small minority to a majority of the workforce virtually overnight. Security teams discovered that perimeter defenses designed to protect a defined network boundary became ineffective when that boundary dissolved and users accessed resources from home networks, coffee shops, and hotels over connections of unknown security status. The inability to validate device health and enforce consistent security policies across heterogeneous personal and corporate devices exposed significant gaps in visibility and control. These operational pressures accelerated adoption of cloud-delivered security services, identity-centric access controls, and endpoint detection and response capabilities that do not depend on network location -- the core architectural principles of zero trust.`,
    notes: 'Remote work challenges directly motivated and describe zero trust adoption' },

  // ── Format compliance tests ────────────────────────────────────────────
  { id:'fmt-001', category:'format-irrelevant', expected:'IRRELEVANT',
    query: 'How do you implement zero trust architecture?',
    excerpt: `The Amazon basin contains the largest tropical rainforest on Earth, covering approximately 5.5 million square kilometers across nine countries. The forest plays a critical role in the global carbon cycle, storing an estimated 150 to 200 billion tons of carbon in its biomass and soils. Annual precipitation varies from 1500 to over 3000 millimeters depending on location, with most rainfall driven by moisture recycling as water evaporates from forest canopy and re-condenses inland. The river system drains roughly 40 percent of South America and discharges approximately 20 percent of all freshwater entering the world's oceans. Biodiversity within the basin is extraordinary: the region contains roughly 10 percent of all species on Earth, including over 40,000 plant species, 1,300 bird species, and 3,000 freshwater fish species. Deforestation driven by cattle ranching, soy cultivation, and illegal logging has removed approximately 17 percent of the original forest cover, with scientists warning that a tipping point of 20 to 25 percent loss could trigger a transition to savannah across large portions of the eastern basin.`,
    notes: 'Tests clean [IRRELEVANT!!!] output without prefix or explanation' },

  { id:'fmt-002', category:'format-relevant', expected:'RELEVANT',
    query: 'What are the best practices for securing a corporate network?',
    excerpt: `Vulnerability management programs must balance the impracticality of patching everything immediately against the risk of leaving known vulnerabilities exposed. The first step is maintaining a complete and current asset inventory, since vulnerabilities cannot be managed in assets that are unknown. Scanning frequency should be calibrated to asset criticality and exposure: internet-facing systems warrant daily scanning while internal workstations may be scanned weekly. Vulnerability findings should be triaged using a combination of CVSS base scores, exploitability intelligence from sources such as CISA's known exploited vulnerabilities catalog, and asset criticality derived from the organization's business impact analysis. Critical vulnerabilities with known active exploitation should be remediated within 24 to 72 hours on internet-facing systems. Compensating controls such as network segmentation or web application firewalls may be acceptable for vulnerabilities that cannot be immediately patched. Metrics including mean time to remediate and patch coverage percentage should be reported to leadership regularly. Exception management processes formalize risk acceptance for vulnerabilities that cannot be remediated within policy timelines.`,
    notes: 'Tests that response includes direct quotes from the excerpt' },

  // ── Edge cases ─────────────────────────────────────────────────────────
  { id:'edge-001', category:'edge-case', expected:'IRRELEVANT',
    query: 'What are the best practices for securing a corporate network?',
    excerpt: `Quarterly review meeting agenda: 1. Review of Q3 sales figures by region. 2. Update on new product launch timeline. 3. HR policy changes effective January. 4. Facilities update -- parking lot resurfacing scheduled for November. 5. Any other business. Please come prepared with your department reports. Refreshments will be provided. Meeting duration estimated at ninety minutes. Please confirm attendance with the executive assistant by end of week. Dial-in information for remote participants will be distributed separately.`,
    notes: 'Short content-free administrative text' },

  { id:'edge-002', category:'edge-case', expected:'IRRELEVANT',
    query: 'How does malware evade antivirus detection?',
    excerpt: `Security awareness is increasingly recognized as a critical component of any organization's defense posture. Employees at all levels represent both the greatest vulnerability and potentially the strongest line of defense against social engineering attacks. Training programs should cover phishing recognition, safe browsing habits, password hygiene, physical security awareness, and incident reporting procedures. Regular simulated phishing exercises provide measurable data on employee susceptibility and the effectiveness of training interventions. Executive buy-in is essential for security culture programs to succeed, as visible leadership engagement signals that security is a genuine organizational priority rather than a compliance checkbox. Security awareness programs should be tailored to different roles and risk profiles rather than delivering identical content to all employees. Metrics such as phishing simulation click rates, training completion rates, and reported incident rates help demonstrate program effectiveness and identify areas requiring additional attention.`,
    notes: 'Generic security awareness content -- no specific malware evasion techniques' }
];

const FAILURE_MODES = {
  'fp-verbose': { test:(e,a)=>e==='IRRELEVANT'&&a!=='IRRELEVANT'&&/does not (directly |)relate|not relevant|unrelated|no (direct |)connection/i.test(a), desc:'Explains irrelevance instead of [IRRELEVANT!!!]' },
  'fp-echo':    { test:(e,a)=>e==='IRRELEVANT'&&/if the excerpt|reply with|option [12]/i.test(a), desc:'Echoes prompt instructions in response' },
  'fp-prefix':  { test:(e,a)=>e==='IRRELEVANT'&&/^[12]\.|^option [12]/i.test(a.trim()), desc:'Adds numbered prefix to [IRRELEVANT!!!]' },
  'fn-stretch': { test:(e,a)=>e==='IRRELEVANT'&&a==='RELEVANT'&&/could be|might|implies|suggests|potentially/i.test(a), desc:'Calls irrelevant content relevant via tenuous connection' },
  'fn-miss':    { test:(e,a)=>e==='RELEVANT'&&a==='IRRELEVANT'&&a.length>2, desc:'Marks clearly relevant content as irrelevant' },
  'fn-empty':   { test:(e,a,raw)=>e==='RELEVANT'&&(!raw||raw.trim().length<5), desc:'Returns empty response for relevant content' },
  'fn-noquote': { test:(e,a)=>e==='RELEVANT'&&a==='RELEVANT'&&!/"/.test(a), desc:'Relevant annotation missing direct quotes' },
};

function normalize(text) {
  if (!text) return 'IRRELEVANT';
  const t = text.trim();
  if (t.length < 5) return 'IRRELEVANT';
  if (t.replace(/[^a-zA-Z]/g,'').toUpperCase() === 'IRRELEVANT') return 'IRRELEVANT';
  return 'RELEVANT';
}

function diagnose(results) {
  const counts = {};
  for (const r of results) {
    if (r.pass) continue;
    for (const [mode,def] of Object.entries(FAILURE_MODES)) {
      if (def.test(r.expected, r.rawResponse||'', r.rawResponse||'')) counts[mode]=(counts[mode]||0)+1;
    }
  }
  return Object.entries(counts).sort((a,b)=>b[1]-a[1]).map(([mode,count])=>({mode,count,desc:FAILURE_MODES[mode].desc}));
}

function suggestFix(diagnosis) {
  if (!diagnosis.length) return null;
  const top = diagnosis[0].mode;
  const fixes = {
    'fp-echo':    'Model echoes instructions -- simplify prompt, remove numbered options, use imperative form only.',
    'fp-prefix':  'Model adds "1." or "Option 1:" prefix -- remove numbered list format from prompt.',
    'fp-verbose': 'Model explains irrelevance instead of [IRRELEVANT!!!] -- add explicit prohibition: "Do not explain. Do not say does not relate."',
    'fn-stretch': 'Model too lenient -- add "direct and specific" qualifier, add examples of what counts as irrelevant.',
    'fn-miss':    'Model too strict -- lower threshold, add examples of what counts as relevant.',
    'fn-empty':   'Model returns empty for relevant content -- model may be too small or confused by prompt format. Try simpler prompt or larger model.',
    'fn-noquote': 'Model omits quotes -- strengthen quote instruction: "You MUST include a verbatim quote."',
  };
  return fixes[top] || 'Review failure details above.';
}

async function getOllamaModels(ollamaHost, fitOnly) {
  // Read available RAM from /proc/meminfo
  let ramAvailMb = null;
  try {
    const mem = require('fs').readFileSync('/proc/meminfo', 'utf8');
    const mt = mem.match(/MemAvailable:\s+(\d+)/);
    if (mt) ramAvailMb = Math.floor(parseInt(mt[1]) / 1024);
  } catch(_) {}

  try {
    const r = await fetch(`${ollamaHost}/api/tags`, { signal: AbortSignal.timeout(5000) });
    if (!r.ok) return [];
    const d = await r.json();
    return (d.models || []).map(m => ({
      name: m.name,
      sizeMb: m.size ? Math.round(m.size / 1024 / 1024) : null
    })).filter(m => {
      // Skip embedding models
      if (/embed|minilm|arctic-embed|nomic-embed|bge|snowflake|mxbai/i.test(m.name)) return false;
      // Skip models that won't fit in available RAM (need ~1.6x model size) -- only if fitOnly
      if (fitOnly !== false && ramAvailMb && m.sizeMb && Math.round(m.sizeMb * 1.6) > ramAvailMb) return false;
      return true;
    }).sort((a,b) => (a.sizeMb||99999) - (b.sizeMb||99999));
  } catch(_) { return []; }
}

async function runOne(tc, prompt, model, ollamaHost) {
  const fullPrompt = `Query: ${tc.query}\n\nExcerpt:\n${tc.excerpt}\n\n${prompt}`;
  const t0 = Date.now();
  try {
    const r = await fetch(`${ollamaHost}/api/chat`, {
      method:'POST', headers:{'Content-Type':'application/json'},
      signal: AbortSignal.timeout(120000),
      body: JSON.stringify({ model, stream:false,
        options:{ temperature:0, num_ctx:3072, num_predict:300 },
        messages:[{ role:'user', content:fullPrompt }] })
    });
    if (!r.ok) throw new Error('Ollama HTTP '+r.status);
    const d = await r.json();
    let raw = (d.message?.content||'').trim().replace(/<think>[\s\S]*?<\/think>/gi,'').trim();
    const actual = normalize(raw);
    const pass = actual === tc.expected;
    return { id:tc.id, category:tc.category, expected:tc.expected, actual, pass, rawResponse:raw, ms:Date.now()-t0 };
  } catch(e) {
    return { id:tc.id, category:tc.category, expected:tc.expected, actual:'ERROR', pass:false, rawResponse:e.message, ms:Date.now()-t0 };
  }
}

async function run({ model, prompt, ollamaHost, onResult, onDone, onError, cases }) {
  const testCases = cases || TEST_CASES;
  const results = [];
  for (const tc of testCases) {
    try {
      const r = await runOne(tc, prompt, model, ollamaHost);
      results.push(r);
      if (onResult) onResult(r, results.length, testCases.length);
    } catch(e) { if (onError) onError(e, tc); }
  }
  const passed=results.filter(r=>r.pass).length;
  const tp=results.filter(r=>r.pass&&r.expected==='RELEVANT').length;
  const tn=results.filter(r=>r.pass&&r.expected==='IRRELEVANT').length;
  const fp=results.filter(r=>!r.pass&&r.expected==='IRRELEVANT').length;
  const fn=results.filter(r=>!r.pass&&r.expected==='RELEVANT').length;
  const precision=tp+fp>0?tp/(tp+fp):1;
  const recall=tp+fn>0?tp/(tp+fn):1;
  const f1=precision+recall>0?2*precision*recall/(precision+recall):0;
  const diag=diagnose(results);
  const summary={ passed, failed:results.length-passed, tp, tn, fp, fn, precision, recall, f1, total:testCases.length, diagnosis:diag, suggestion:suggestFix(diag) };
  if (onDone) onDone(summary, results);
  return { summary, results };
}

if (require.main === module) {
  const args = process.argv.slice(2);
  let model=null, promptOverride=null, verbose=false, autoMode=false, allMode=false, f1Threshold=null, stopAtThreshold=null, fitOnly=null;
  for (let i=0;i<args.length;i++) {
    if (args[i]==='--model') model=args[++i];
    else if (args[i]==='--prompt') promptOverride=args[++i];
    else if (args[i]==='--verbose') verbose=true;
    else if (args[i]==='--auto') autoMode=true;
    else if (args[i]==='--all') { autoMode=true; allMode=true; stopAtThreshold=false; fitOnly=false; }
    else if (args[i]==='--threshold') f1Threshold=parseFloat(args[++i]);
    else if (args[i]==='--no-stop') stopAtThreshold=false;
    else if (args[i]==='--no-fit-only') fitOnly=false;
  }
  const projDir = process.env.PROJECT_DIR || path.resolve(__dirname, '..');
  let ollamaHost='http://localhost:11434', defaultModel='', defaultPrompt='Reply with EXACTLY ONE of these two options:\n1. If the excerpt is NOT relevant to the query, reply with only the string: [IRRELEVANT!!!]\n2. If the excerpt IS relevant, write one or two sentences explaining how, including direct quotes.\n\nBe strict: tangential connections and vague implications should be [IRRELEVANT!!!].';
  try {
    const c=fs.readFileSync(path.join(projDir,'Config'),'utf8');
    const oh=c.match(/^OLLAMA_HOST=(.+)$/m); if(oh)ollamaHost=oh[1].trim();
    const lm=c.match(/^ANNOTATION_MODEL=(.+)$/m)||c.match(/^LOCAL_LLM_MODEL=(.+)$/m);
    if(lm&&lm[1].trim())defaultModel=lm[1].trim();
    if(f1Threshold===null){const th=c.match(/^ANNOTATION_TEST_THRESHOLD=(.+)$/m);if(th&&th[1].trim())f1Threshold=parseFloat(th[1].trim())/100;}
    if(stopAtThreshold===null){const st=c.match(/^ANNOTATION_TEST_STOP_AT_THRESHOLD=(.+)$/m);if(st)stopAtThreshold=st[1].trim().toLowerCase()!=='no';}
    if(fitOnly===null){const fo=c.match(/^ANNOTATION_TEST_FIT_ONLY=(.+)$/m);if(fo)fitOnly=fo[1].trim().toLowerCase()!=='no';}
  } catch(_){}
  if(f1Threshold===null) f1Threshold=0.95;
  if(stopAtThreshold===null) stopAtThreshold=true;
  if(fitOnly===null) fitOnly=true;
  // Default to auto mode when no explicit mode or model given -- Config is the authority
  if(!autoMode && !model) autoMode=true;
  try {
    const p=JSON.parse(fs.readFileSync(path.join(projDir,'scripts','prompts.json'),'utf8'));
    if(p.annotation_local)defaultPrompt=p.annotation_local;
  } catch(_){}
  const prompt=promptOverride||defaultPrompt;

  console.log(`\nRAGWeed Annotation Prompt Test Runner`);
  console.log(`Copyright (c) Fred Cohen, 2026 - ALL RIGHTS RESERVED`);
  console.log(`Host: ${ollamaHost}  Cases: ${TEST_CASES.length}`);

  (async()=>{
    if (autoMode) {
      console.log('\nAuto mode: querying available models...');
      const models = await getOllamaModels(ollamaHost, fitOnly);
      if (!models.length) { console.error('No models found or Ollama unavailable.'); process.exit(1); }
      try { const mem=require('fs').readFileSync('/proc/meminfo','utf8'); const mt=mem.match(/MemAvailable:\s+(\d+)/); if(mt) console.log(`Available RAM: ${Math.floor(parseInt(mt[1])/1024)}MB -- models exceeding this are excluded`); } catch(_){}
      console.log(`Found ${models.length} fitting LLM model(s): ${models.map(m=>m.name+(m.sizeMb?` (${m.sizeMb}MB)`:``)).join(', ')}`);
      console.log(`F1 threshold: ${(f1Threshold*100).toFixed(0)}%${allMode?' (--all: testing all models)':''}`);
      let bestModel=null, bestF1=0;
      const rankings=[];
      for (const m of models) {
        console.log(`\n${'─'.repeat(72)}\nTesting: ${m.name}${m.sizeMb?` (${m.sizeMb}MB)`:''}`);
        const { summary } = await run({ model:m.name, prompt, ollamaHost,
          onResult:(r)=>{ const icon=r.pass?'✓':'✗'; process.stdout.write(`${icon} ${r.id.padEnd(10)} ${r.ms}ms\n`); if(verbose||!r.pass) process.stdout.write(`  ${r.rawResponse.slice(0,100)}\n`); }
        });
        console.log(`${summary.passed}/${summary.total} passed  F1=${(summary.f1*100).toFixed(1)}%  TP=${summary.tp} TN=${summary.tn} FP=${summary.fp} FN=${summary.fn}`);
        if (summary.f1 > bestF1) { bestF1=summary.f1; bestModel=m.name; }
        rankings.push({name:m.name,sizeMb:m.sizeMb,f1:summary.f1,passed:summary.passed});
        if (stopAtThreshold && summary.f1 >= f1Threshold) {
          console.log(`\n✓ RECOMMENDED: ${m.name} meets F1 threshold of ${(f1Threshold*100).toFixed(0)}%`);
          if (summary.suggestion) console.log(`  Prompt note: ${summary.suggestion}`);
          process.exit(0);
        }
      }
      console.log(`\n${'═'.repeat(72)}`);
      console.log(`Final ranking (${rankings.length} models):`);
      rankings.sort((a,b)=>b.f1-a.f1).forEach(r=>console.log(`  ${r.f1>=f1Threshold?'✓':'✗'} ${r.name.padEnd(30)} F1=${(r.f1*100).toFixed(1)}%  ${r.passed}/${TEST_CASES.length} passed`));
      if(bestModel&&bestF1>=f1Threshold)console.log(`\n✓ RECOMMENDED: ${bestModel} (F1=${(bestF1*100).toFixed(1)}%)`);else console.log(`\nNo model met threshold ${(f1Threshold*100).toFixed(0)}%. Best: ${bestModel} F1=${(bestF1*100).toFixed(1)}%`);
    } else {
      model=model||defaultModel;
      if (!model){console.error('No model. Set ANNOTATION_MODEL in Config or use --model or --auto');process.exit(1);}
      console.log(`Model: ${model}\n${'─'.repeat(72)}`);
      await run({ model, prompt, ollamaHost,
        onResult:(r)=>{ const icon=r.pass?'✓':'✗'; console.log(`${icon} ${r.id.padEnd(10)} ${r.category.padEnd(22)} ${r.ms}ms${r.pass?'':' ['+r.expected+'->'+r.actual+']'}`); if(verbose||!r.pass)console.log(`  ${r.rawResponse.slice(0,120)}`); },
        onDone:(s)=>{ console.log('─'.repeat(72)); console.log(`\n${s.passed}/${s.total} passed  TP=${s.tp} TN=${s.tn} FP=${s.fp} FN=${s.fn}`); console.log(`Precision=${(s.precision*100).toFixed(1)}%  Recall=${(s.recall*100).toFixed(1)}%  F1=${(s.f1*100).toFixed(1)}%`); if(s.diagnosis.length){console.log('\nFailure modes:');for(const d of s.diagnosis)console.log(`  [${d.mode}] x${d.count}: ${d.desc}`);}else console.log('\nNo failure modes detected.'); if(s.suggestion)console.log(`\nSuggestion: ${s.suggestion}`); }
      });
    }
  })();
}

module.exports = { run, TEST_CASES, normalize, diagnose, suggestFix, getOllamaModels };
RAGWEED-ATR-v1.0.102-20260319-000014-473
ok "scripts/annotation_test_runner.cjs written"


# =============================================================================
# STEP 11f: Write scripts/config.sh (interactive configuration TUI)
# =============================================================================
ph "STEP 11f: Writing scripts/config.sh"

cat > "$SCRIPTS_DIR/config.sh" << 'RAGWEED-CFG-v1.0.102-20260319-000014-473'
#!/usr/bin/env bash
# RAGWEED_VERSION=1.0.102
# RAGWeed v10 -- two-pane config TUI (ASCII borders, full navigation)
ENV_FILE="${PROJECT_DIR}/Config"
[ -f "$ENV_FILE" ] || touch "$ENV_FILE"
set -a; source "$ENV_FILE" 2>/dev/null; set +a

env_get() { grep -E "^${1}=" "$ENV_FILE" 2>/dev/null | tail -1 | cut -d= -f2- | tr -d '"' || echo "${2:-}"; }
env_set() {
    local k="$1" v="$2"
    if grep -qE "^${k}=" "$ENV_FILE" 2>/dev/null; then sed -i "s|^${k}=.*|${k}=${v}|" "$ENV_FILE"
    else echo "${k}=${v}" >> "$ENV_FILE"; fi
}
masked() { local v="$1"; [ ${#v} -gt 8 ] && echo "${v:0:8}..." || { [ -n "$v" ] && echo "***" || echo "(not set)"; }; }
pad() { local s="$1" n="$2"; [ ${#s} -gt $n ] && s="${s:0:$((n-1))}~"; printf "%-*s" "$n" "$s"; }
at()  { tput cup "$1" "$2" 2>/dev/null; }
clr() { tput el  2>/dev/null; }
bold(){ tput bold 2>/dev/null; }
sgr0(){ tput sgr0 2>/dev/null; }
rev() { tput smso 2>/dev/null; }
unrev(){ tput rmso 2>/dev/null; }
drain(){ while IFS= read -rsn1 -t 0.02 _j 2>/dev/null; do :; done; }

ollama_models() {
    curl -sf http://localhost:11434/api/tags 2>/dev/null \
        | node --input-type=module -e "
import { createRequire } from 'module';
let d='';
process.stdin.on('data',c=>d+=c);
process.stdin.on('end',()=>{
  try{ const j=JSON.parse(d); (j.models||[]).forEach(m=>process.stdout.write(m.name+'\n')); }catch(_){}
});
" 2>/dev/null
}
chroma_col_list() {
    node --input-type=module -e "
import { getCollectionNames } from './scripts/collections.js';
const chromaDir = (process.env.CHROMA_PATH||'./chromadb').replace(/^([^/])/,process.env.PROJECT_DIR+'/'+'\$1');
try {
    const names = getCollectionNames(chromaDir);
    names.forEach(n => process.stdout.write(n+'\n'));
    if (!names.length) process.stdout.write('(none)\n');
} catch(e){ process.stdout.write('(unavailable)\n'); }
" 2>/dev/null
}

# -- Sections ------------------------------------------------------------------
# "Title|KEY:Label:default|..."   default="*"=masked  "*list"=multi-select
SECTIONS=(
    "LLM Provider|LLM_PROVIDER:Active provider:claude"
    "Claude|ANTHROPIC_API_KEY:API key:*|ANTHROPIC_MODEL:Model:claude-sonnet-4-6"
    "OpenAI|OPENAI_API_KEY:API key:*|OPENAI_MODEL:Model:gpt-4o"
    "Gemini|GEMINI_API_KEY:API key:*|GEMINI_MODEL:Model:gemini-2.5-pro"
    "Local / Ollama|LOCAL_LLM_MODEL:Model name:(none)|OLLAMA_HOST:Ollama host:http://localhost:11434"
    "Embeddings|EMBED_MODEL:Embed model:nomic-embed-text|EMBED_OLLAMA_HOST:Embed host:http://localhost:11434"
    "Retrieval|TOP_K:Top-K results:64|MIN_SCORE:Min score (0=all):0|MAX_TOKENS:Max tokens:4096|CONTEXT_CHUNKS:Context chunks:64|ACTIVE_COLLECTIONS:Active collections:*list"
    "Paths|CHROMA_PATH:ChromaDB path:./chromadb|SOURCE_DIR:Source dir:./source"
    "Web|WEB_PORT:Web port:3000|WEB_PASSWORD:Password:*"
    "Debug|DEBUG_LEVEL:Debug level:0|system_prompt:System prompt:edit"
)
NSEC=${#SECTIONS[@]}

# -- Layout --------------------------------------------------------------------
get_dims() {
    COLS=$(tput cols  2>/dev/null || echo 80)
    ROWS=$(tput lines 2>/dev/null || echo 24)
    L_W=18
    L_COL=1
    DIV_COL=$((L_W+3))
    R_COL=$((DIV_COL+2))
    R_W=$((COLS - R_COL - 2))
    [ $R_W -lt 24 ] && R_W=24
    BODY_TOP=3
    BODY_BOT=$((ROWS-3))
    BODY_H=$((BODY_BOT - BODY_TOP + 1))
}

hline() {
    # ASCII horizontal line across full width
    local c="$1" w
    w=$(tput cols 2>/dev/null || echo 80)
    printf "%${w}s" | tr ' ' "$c"
}

draw_frame() {
    get_dims
    tput clear 2>/dev/null

    # Header line 0: title left, env file right
    at 0 0
    bold; printf " RAGWeed Configuration"; sgr0
    local ef=" ${ENV_FILE}"
    at 0 $((COLS - ${#ef} - 1)); printf "%s" "$ef"

    # Line 1: horizontal rule (ASCII dashes)
    at 1 0; hline "-"

    # Column header line 2
    at 2 $L_COL;   rev; pad "Section" $L_W; unrev
    at 2 $R_COL;   rev; pad "Settings" $R_W; unrev

    # Vertical divider
    local r
    for (( r=BODY_TOP; r<=BODY_BOT; r++ )); do
        at $r $DIV_COL; printf "|"
    done

    # Footer rule + status
    at $((ROWS-2)) 0; hline "-"
    at $((ROWS-1)) 0
    printf " arrows=navigate  Space/Enter=edit  s=save  q=quit  r=redraw"
}

draw_left() {
    local sel="$1" focus="$2"   # focus: "left" or "right"
    get_dims
    local i title
    for (( i=0; i<NSEC && i<BODY_H; i++ )); do
        title="${SECTIONS[$i]%%|*}"
        at $((BODY_TOP+i)) $L_COL
        if [ $i -eq $sel ] && [ "$focus" = "left" ]; then
            rev; printf " %-*s" $((L_W-1)) "$(pad "$title" $((L_W-2)))"; unrev
        elif [ $i -eq $sel ]; then
            bold; printf ">%-*s" $((L_W-1)) "$(pad "$title" $((L_W-2)))"; sgr0
        else
            printf " %-*s" $((L_W-1)) "$(pad "$title" $((L_W-2)))"
        fi
    done
    for (( i=NSEC; i<BODY_H; i++ )); do
        at $((BODY_TOP+i)) $L_COL; printf "%-*s" $L_W " "
    done
}

# Build field value list for current section
get_fields() {
    local sel="$1"
    local section="${SECTIONS[$sel]}"
    fields_str="${section#*|}"
    IFS='|' read -ra FIELDS <<< "$fields_str"
    NFIELDS=${#FIELDS[@]}
}

get_field_val() {
    local field="$1"
    local key="${field%%:*}"; local rest="${field#*:}"; local default="${rest#*:}"
    if   [ "$key" = "system_prompt" ]; then
        echo "$(wc -l < "${PROJECT_DIR}/system_prompt.txt" 2>/dev/null||echo 0) lines"
    elif [ "$default" = "*" ]; then
        masked "$(env_get "$key")"
    elif [ "$default" = "*list" ]; then
        env_get "$key" "(all)"
    else
        env_get "$key" "$default"
    fi
}

draw_right() {
    local sel="$1" fsel="$2" focus="$3"
    get_dims
    get_fields "$sel"
    local section="${SECTIONS[$sel]}"
    local title="${section%%|*}"

    # Clear right pane
    local r
    for (( r=BODY_TOP; r<=BODY_BOT; r++ )); do
        at $r $R_COL; printf "%-*s" $((COLS - R_COL - 1)) " "
    done

    local row=$BODY_TOP
    at $row $R_COL; bold; printf " %s" "$title"; sgr0
    row=$((row+2))

    local i
    for (( i=0; i<NFIELDS && row<=BODY_BOT; i++ )); do
        local field="${FIELDS[$i]}"
        local key="${field%%:*}"; local rest="${field#*:}"; local label="${rest%%:*}"
        local default="${rest#*:}"
        local val; val=$(get_field_val "$field")
        local line_pre; line_pre=$(printf "  [%d] %-22s : " $((i+1)) "$label")
        local pre_len=${#line_pre}
        local val_w=$(( R_W - pre_len - 1 ))
        [ $val_w -lt 8 ] && val_w=8

        at $row $R_COL
        if [ $i -eq $fsel ] && [ "$focus" = "right" ]; then
            rev
            printf "%s%-*s" "$line_pre" $val_w "$(pad "$val" $val_w)"
            unrev
        else
            printf "%s" "$line_pre"
            # Long values wrap downward
            local remaining="$val"
            printf "%-*s" $val_w "${remaining:0:$val_w}"
            remaining="${remaining:$val_w}"
            while [ -n "$remaining" ] && [ $((row+1)) -le $BODY_BOT ]; do
                row=$((row+1))
                at $row $R_COL
                local indent; indent=$(printf "%*s" $pre_len " ")
                printf "%s%-*s" "$indent" $val_w "${remaining:0:$val_w}"
                remaining="${remaining:$val_w}"
            done
        fi
        row=$((row+1))
    done

    # Hint for list sections
    if [ $row -le $BODY_BOT ]; then
        case $sel in
            4) at $((row+1)) $R_COL; printf "  [l] List installed Ollama models" ;;
        esac
    fi
}

status() {
    get_dims
    at $((ROWS-1)) 0
    printf " %-*s" $((COLS-2)) "$*"
}

# -- Collection multi-select picker --------------------------------------------
pick_collections() {
    get_dims
    # Get all available collections
    local all_cols
    mapfile -t all_cols < <(chroma_col_list 2>/dev/null)
    if [ ${#all_cols[@]} -eq 0 ]; then
        tput rmcup 2>/dev/null; tput cnorm 2>/dev/null
        echo "No collections found in ChromaDB."; read -rp "Enter to continue..." _
        tput smcup 2>/dev/null; tput civis 2>/dev/null
        return
    fi

    # Current selection
    local cur_val; cur_val=$(env_get ACTIVE_COLLECTIONS "")
    local -a selected=()
    local c
    for c in "${all_cols[@]}"; do
        if [ -z "$cur_val" ] || echo "$cur_val" | grep -qE "(^|,)${c}(,|$)"; then
            selected+=("1")
        else
            selected+=("0")
        fi
    done

    local pick=0
    local ncols=${#all_cols[@]}

    tput clear 2>/dev/null
    while true; do
        at 0 0; bold; printf " Select Active Collections"; sgr0
        at 1 0; hline "-"
        at 2 0; printf " Space=toggle  a=all  n=none  Enter=confirm  q=cancel"
        at 3 0; hline "-"

        local i
        for (( i=0; i<ncols; i++ )); do
            at $((4+i)) 2
            local check="[ ]"; [ "${selected[$i]}" = "1" ] && check="[*]"
            if [ $i -eq $pick ]; then
                rev; printf "%s %-30s" "$check" "${all_cols[$i]}"; unrev
            else
                printf "%s %-30s" "$check" "${all_cols[$i]}"
            fi
        done

        at $((4+ncols+1)) 0; hline "-"
        at $((4+ncols+2)) 0
        local sel_names=""
        for (( i=0; i<ncols; i++ )); do
            [ "${selected[$i]}" = "1" ] && sel_names="${sel_names:+$sel_names,}${all_cols[$i]}"
        done
        printf " Active: %s" "${sel_names:-(all)}"

        IFS= read -rsn1 key2 2>/dev/null
        case "$key2" in
            $'\x1b')
                IFS= read -rsn1 -t 0.05 a; IFS= read -rsn1 -t 0.05 b; drain
                case "${a}${b}" in
                    '[A') [ $pick -gt 0 ] && pick=$((pick-1)) ;;
                    '[B') [ $pick -lt $((ncols-1)) ] && pick=$((pick+1)) ;;
                esac ;;
            ' ') # toggle
                [ "${selected[$pick]}" = "1" ] && selected[$pick]="0" || selected[$pick]="1" ;;
            a|A) for (( i=0; i<ncols; i++ )); do selected[$i]="1"; done ;;
            n|N) for (( i=0; i<ncols; i++ )); do selected[$i]="0"; done ;;
            ''|$'\r')
                env_set ACTIVE_COLLECTIONS "$sel_names"
                set -a; source "$ENV_FILE" 2>/dev/null; set +a
                break ;;
            q|Q) break ;;
        esac
    done
}

# -- Edit a single field -------------------------------------------------------
edit_field() {
    local sel="$1" fidx="$2"
    get_fields "$sel"
    [ $fidx -lt 0 ] || [ $fidx -ge $NFIELDS ] && return
    local field="${FIELDS[$fidx]}"
    local key="${field%%:*}"; local rest="${field#*:}"; local label="${rest%%:*}"; local default="${rest#*:}"

    # System prompt: full-screen editor
    if [ "$key" = "system_prompt" ]; then
        tput rmcup 2>/dev/null; tput cnorm 2>/dev/null
        ${EDITOR:-nano} "${PROJECT_DIR}/system_prompt.txt"
        tput smcup 2>/dev/null; tput civis 2>/dev/null
        return
    fi

    # Active collections: picker
    if [ "$default" = "*list" ]; then
        pick_collections
        tput clear 2>/dev/null
        return
    fi

    # Hints
    get_dims
    local hint=""
    case "$key" in
        LLM_PROVIDER)    hint="Options: claude / openai / gemini / local" ;;
        ANTHROPIC_MODEL) hint="Options: claude-opus-4-6 / claude-sonnet-4-6 / claude-haiku-4-5-20251001" ;;
        OPENAI_MODEL)    hint="Options: gpt-4o / gpt-4o-mini / gpt-4-turbo / o1-mini / o3-mini" ;;
        GEMINI_MODEL)    hint="Options: gemini-2.5-pro / gemini-2.5-flash / gemini-1.5-pro" ;;
        EMBED_MODEL)     hint="Options: nomic-embed-text / mxbai-embed-large / all-minilm" ;;
        DEBUG_LEVEL)     hint="Options: 0=off  1=errors  2=verbose" ;;
        WEB_PASSWORD)    hint="Leave blank to disable password" ;;
    esac
    if [ -n "$hint" ]; then
        at $((ROWS-2)) 0; printf " %-*s" $((COLS-2)) "$hint"
    fi

    local cur_val
    if [ "$default" = "*" ]; then cur_val=""; else cur_val=$(env_get "$key" "$default"); fi

    at $((ROWS-1)) 0; printf "%-*s" $((COLS-1)) " "
    at $((ROWS-1)) 0; tput cnorm 2>/dev/null
    local v
    IFS= read -rp " $label [$cur_val]: " v
    tput civis 2>/dev/null

    if [ -n "$v" ] || [ "$key" = "WEB_PASSWORD" ]; then
        env_set "$key" "$v"
        set -a; source "$ENV_FILE" 2>/dev/null; set +a
    fi

    # Clear hint line
    [ -n "$hint" ] && { at $((ROWS-2)) 0; printf "%-*s" $((COLS-1)) " "; }
}

# -- Main loop -----------------------------------------------------------------
tput smcup 2>/dev/null
tput civis 2>/dev/null
trap 'tput rmcup 2>/dev/null; tput cnorm 2>/dev/null' EXIT INT TERM

cur=0        # current section (left pane)
fsel=0       # current field (right pane)
focus="left" # "left" or "right"

draw_frame
draw_left  $cur "$focus"
draw_right $cur $fsel "$focus"
status " arrows=navigate  Space/Enter=edit  s=save  q=quit  r=redraw"

while true; do
    IFS= read -rsn1 key 2>/dev/null
    case "$key" in
        $'\x1b')
            IFS= read -rsn1 -t 0.05 a 2>/dev/null
            IFS= read -rsn1 -t 0.05 b 2>/dev/null
            drain
            seq="${a}${b}"
            case "$seq" in
                '[A')  # up
                    if [ "$focus" = "left" ]; then
                        [ $cur -gt 0 ] && cur=$((cur-1)) && fsel=0
                    else
                        [ $fsel -gt 0 ] && fsel=$((fsel-1))
                    fi
                    draw_left $cur "$focus"; draw_right $cur $fsel "$focus" ;;
                '[B')  # down
                    if [ "$focus" = "left" ]; then
                        [ $cur -lt $((NSEC-1)) ] && cur=$((cur+1)) && fsel=0
                    else
                        get_fields "$cur"
                        [ $fsel -lt $((NFIELDS-1)) ] && fsel=$((fsel+1))
                    fi
                    draw_left $cur "$focus"; draw_right $cur $fsel "$focus" ;;
                '[C')  # right arrow: enter right pane
                    focus="right"; fsel=0
                    draw_left $cur "$focus"; draw_right $cur $fsel "$focus"
                    status " up/dn=field  Space/Enter=edit  Left=back to sections" ;;
                '[D')  # left arrow: back to left pane
                    focus="left"
                    draw_left $cur "$focus"; draw_right $cur $fsel "$focus"
                    status " arrows=navigate  Space/Enter=edit  s=save  q=quit  r=redraw" ;;
            esac ;;
        ' '|'')  # space or enter
            if [ "$focus" = "left" ]; then
                # Enter right pane
                focus="right"; fsel=0
                draw_left $cur "$focus"; draw_right $cur $fsel "$focus"
                status " up/dn=field  Space/Enter=edit  Left=back to sections"
            else
                # Edit highlighted field
                edit_field $cur $fsel
                draw_frame
                draw_left  $cur "$focus"
                draw_right $cur $fsel "$focus"
                status " up/dn=field  Space/Enter=edit  Left=back to sections"
            fi ;;
        [1-9])
            # Shortcut: jump to field N and edit
            local_n=$((key - 1))
            get_fields "$cur"
            if [ $local_n -lt $NFIELDS ]; then
                focus="right"; fsel=$local_n
                draw_left $cur "$focus"; draw_right $cur $fsel "$focus"
                edit_field $cur $fsel
                draw_frame; draw_left $cur "$focus"; draw_right $cur $fsel "$focus"
                status " up/dn=field  Space/Enter=edit  Left=back to sections"
            fi ;;
        l|L)
            case $cur in
                4)  # Ollama: show list on normal screen
                    tput rmcup 2>/dev/null; tput cnorm 2>/dev/null; tput clear 2>/dev/null
                    echo " Installed Ollama models:"; echo ""
                    ollama_models | sed 's/^/  /'
                    echo ""; read -rp " Enter to continue..." _
                    tput smcup 2>/dev/null; tput civis 2>/dev/null
                    draw_frame; draw_left $cur "$focus"; draw_right $cur $fsel "$focus"
                    status " up/dn=field  Space/Enter=edit  Left=back to sections" ;;
            esac ;;
        r|R)
            draw_frame; draw_left $cur "$focus"; draw_right $cur $fsel "$focus"
            status " arrows=navigate  Space/Enter=edit  s=save  q=quit  r=redraw" ;;
        s|S)
            get_dims; at $((ROWS-1)) 0
            printf " Saved.%-*s" $((COLS-9)) " "
            sleep 0.6; break ;;
        q|Q|$'\x03'|$'\x04')
            break ;;
    esac
done

RAGWEED-CFG-v1.0.102-20260319-000014-473
chmod +x "$SCRIPTS_DIR/config.sh"
ok "config.sh written"

# =============================================================================
# STEP 13c: Create visible symlink for nvm in project dir
# nvm lives in ~/.nvm (system-level Node version manager) -- symlink for browsability.
ph "STEP 13c: Visible symlink for nvm"
if [ -d "$HOME/.nvm" ] && [ ! -e "$PROJECT_DIR/nvm" ]; then
    ln -s "$HOME/.nvm" "$PROJECT_DIR/nvm" 2>/dev/null && ok "symlink: nvm -> ~/.nvm" || true
fi

# Take AFTER snapshot -- everything now installed
set +e
# Find previous version's AFTER dir (for verification section diff)
_prev_after_ver=""
for _vd in $(ls -d "$_OLD_DIR"/*/AFTER 2>/dev/null | sort -V); do
    _vn=$(basename "$(dirname "$_vd")")
    [ "$_vn" != "${RAGWEED_INSTALL_VERSION}" ] && _prev_after_ver="$_vn"
done
_after_dir="$_OLD_DIR/${RAGWEED_INSTALL_VERSION}/AFTER"
mkdir -p "$_after_dir"
_after_count=0
for _rel in "${_MANAGED_FILES[@]}"; do
    _fpath="$PROJECT_DIR/$_rel"
    [ -f "$_fpath" ] || continue
    _dest_subdir="$_after_dir/$(dirname "$_rel")"
    mkdir -p "$_dest_subdir"
    cp -a "$_fpath" "$_dest_subdir/"
    _after_count=$((_after_count + 1))
done
for _subdir in scripts docs local; do
    [ -d "$PROJECT_DIR/$_subdir" ] && cp -a "$PROJECT_DIR/$_subdir/." "$_after_dir/$_subdir/" 2>/dev/null || true
done
ok "$_after_count files in AFTER snapshot: OLD/${RAGWEED_INSTALL_VERSION}/AFTER/"
set -e

# ============ START VERIFICATION SECTION ============
# STEP 14: Verification
# =============================================================================
ph "STEP 14: Verification"

set +e
export _RW_INSTALL_VER="$RAGWEED_INSTALL_VERSION"
node --input-type=module << 'VERIFYEOF'
import fs     from 'fs';
import path   from 'path';
import crypto from 'crypto';

const proj   = process.env.PROJECT_DIR || '.';
const oldDir = path.join(proj, 'OLD');

// ── File presence check ───────────────────────────────────────────────────────
const checks = [
    ['scripts/query.js',  'query engine'],
    ['scripts/web.js',    'web server'],
    ['scripts/config.sh', 'config menu'],
    ['scripts/webc.js',   'web config'],
    ['run.sh',            'launcher'],
    ['Config',              'configuration'],
];
let ok = true;
for (const [f, label] of checks) {
    const fp = path.join(proj, f);
    if (fs.existsSync(fp)) {
        const n = fs.readFileSync(fp,'utf8').split('\n').length;
        process.stdout.write(`  ✓  ${label} (${n} lines)\n`);
    } else {
        process.stdout.write(`  ✗  ${label} MISSING: ${f}\n`);
        ok = false;
    }
}

// ── Change report ─────────────────────────────────────────────────────────────
const managed = [
    'run.sh', 'Config', 'package.json', 'scripts/pickle.js', 'scripts/collections.js',
    'scripts/query.js', 'scripts/web.js', 'scripts/index.html', 'scripts/app.js',
    'scripts/lf.js', 'scripts/webc.js', 'scripts/ingest.js', 'scripts/config.sh',
    'system_prompt.txt', 'scripts/annotation_prompt.txt', 'scripts/help.txt', 'scripts/prompts.json',
    'scripts/annotation_test_runner.cjs', 'bump.js', 'inject_preflight.js', 'docs/ragweed.1', 'patch.js',
];
const currentVer = process.env._RW_INSTALL_VER || '';

const VERSION_RE = [
    /\/\/ VERSION: \d+\.\d+\.\d+/g,
    /# VERSION: \d+\.\d+\.\d+/g,
    /RAGWEED_VERSION=\d+\.\d+\.\d+/g,
    /const VERSION = '\d+\.\d+\.\d+'/g,
    /RAGWEED_VERSION="\d+\.\d+\.\d+"/g,
    /RAGWEED_INSTALL_VERSION="\d+\.\d+\.\d+"/g,
    /"version": "\d+\.\d+\.\d+"/g,
    /RAGWEED-[A-Z0-9]+-v[\d.]+-\d{8}-\d{6}-\d{3}/g,
];
function stripVersions(t) {
    for (const re of VERSION_RE) t = t.replace(re, '__VER__');
    return t;
}
function extractVer(fpath) {
    try {
        const t = fs.readFileSync(fpath, 'utf8');
        for (const re of VERSION_RE) {
            const m = t.match(re);
            if (m) {
                const v = m[0].match(/\d+\.\d+\.\d+/);
                if (v) return v[0];
            }
        }
    } catch(_) {}
    return null;
}
function md5raw(fpath) {
    try { return crypto.createHash('md5').update(fs.readFileSync(fpath)).digest('hex'); }
    catch(_) { return null; }
}
function md5stripped(fpath) {
    try { return crypto.createHash('md5').update(stripVersions(fs.readFileSync(fpath,'utf8'))).digest('hex'); }
    catch(_) { return null; }
}
function semverKey(v) {
    if (v === 'unknown') return [0,0,0,0];
    // Handle suffixes like 10.8.181-2 -- sort after base version
    const [base, suffix] = v.split('-');
    const p = base.split('.').map(Number);
    return [p[0]||0, p[1]||0, p[2]||0, suffix ? parseInt(suffix) : 0];
}

// ── Helper: compare two file paths, report in standard format ─────────────────
function compareFiles(rel, aPath, bPath) {
    const hasA = aPath && fs.existsSync(aPath);
    const hasB = bPath && fs.existsSync(bPath);
    if (!hasA && !hasB) return { sym: '!', line: `  !  ${rel}  (missing from both snapshots)\n`, added: false, removed: false, changed: false, bumped: false };
    if (!hasA) return { sym: '+', line: `  +  ${rel}  (NEW)\n`, added: true, removed: false, changed: false, bumped: false };
    if (!hasB) return { sym: '-', line: `  -  ${rel}  (GONE)\n`, added: false, removed: true, changed: false, bumped: false };
    if (md5raw(aPath) === md5raw(bPath)) {
        const ver = extractVer(bPath) || '?';
        return { sym: '\u2713', line: `  \u2713  ${rel}  (v${ver} unchanged)\n`, added: false, removed: false, changed: false, bumped: false };
    }
    if (md5stripped(aPath) === md5stripped(bPath)) {
        const oldVer = extractVer(aPath) || '?';
        const newVer = extractVer(bPath) || '?';
        return { sym: '=', line: `  =  ${rel}  (v${oldVer} -> v${newVer})\n`, added: false, removed: false, changed: false, bumped: true };
    }
    // Real content change
    const oldTxt   = fs.readFileSync(aPath, 'utf8');
    const newTxt   = fs.readFileSync(bPath, 'utf8');
    const oldLines = oldTxt.split('\n');
    const newLines = newTxt.split('\n');
    const dChars   = newTxt.length - oldTxt.length;
    const strip    = s => s.replace(/\d+\.\d+\.\d+/g, '__V__').replace(/\d{8}-\d{6}-\d{3}/g, '__T__');
    let diffCount  = 0;
    const maxLen   = Math.max(oldLines.length, newLines.length);
    for (let i = 0; i < maxLen; i++) {
        if (strip(oldLines[i]||'') !== strip(newLines[i]||'')) diffCount++;
    }
    const netLines = newLines.length - oldLines.length;
    const diffStr  = diffCount === 1 ? '1 line changed' : `${diffCount} lines changed`;
    const netStr   = netLines !== 0 ? `, ${netLines >= 0 ? '+' : ''}${netLines} net` : '';
    const charStr  = dChars   !== 0 ? `, ${dChars >= 0 ? '+' : ''}${dChars} chars` : '';
    return { sym: '*', line: `\x1b[1m  *  ${rel}  (${diffStr}${netStr}${charStr})\x1b[0m\n`, added: false, removed: false, changed: true, bumped: false };
}

function printSection(label, pairs) {
    process.stdout.write(`\n  i  ${label}:\n`);
    let n = 0, r = 0, c = 0, b = 0;
    for (const [aPath, bPath, rel] of pairs) {
        const res = compareFiles(rel, aPath, bPath);
        process.stdout.write(res.line);
        if (res.added)   n++;
        if (res.removed) r++;
        if (res.changed) c++;
        if (res.bumped)  b++;
    }
    const parts = [];
    if (n) parts.push(`${n} new`);
    if (r) parts.push(`${r} removed`);
    if (c) parts.push(`${c} changed`);
    if (b) parts.push(`${b} version-bumped only`);
    process.stdout.write(`\n  ${parts.join(', ') || 'no changes'}\n`);
}

// ── Locate BEFORE and AFTER snapshots ─────────────────────────────────────────
const verDirs = fs.existsSync(oldDir)
    ? fs.readdirSync(oldDir, { withFileTypes: true })
        .filter(e => e.isDirectory()).map(e => e.name)
        .sort((a,b) => { const ka=semverKey(a),kb=semverKey(b); for(let i=0;i<4;i++) if(ka[i]!==kb[i]) return ka[i]-kb[i]; return 0; })
    : [];

const curVerDir = path.join(oldDir, currentVer);
const curBefore = path.join(curVerDir, 'BEFORE');
const curAfter  = path.join(curVerDir, 'AFTER');
const prevVer   = verDirs.filter(v => v !== currentVer).pop() || null;
const prevAfter = prevVer ? path.join(oldDir, prevVer, 'AFTER') : null;

const hasBefore    = fs.existsSync(curBefore);
const hasAfter     = fs.existsSync(curAfter);
const hasPrevAfter = prevAfter && fs.existsSync(prevAfter);

process.stdout.write('\n');

// ── GONE: in previous AFTER but not in current BEFORE ─────────────────────────
if (hasPrevAfter && hasBefore) {
    // Check managed list AND scan actual directory contents
    const allPrevAfter = new Set();
    const allCurBefore = new Set();
    function scanDir(dir, base, out) {
        if (!fs.existsSync(dir)) return;
        for (const e of fs.readdirSync(dir, { withFileTypes: true })) {
            const rel = base ? base + '/' + e.name : e.name;
            if (e.isDirectory()) scanDir(path.join(dir, e.name), rel, out);
            else out.add(rel);
        }
    }
    scanDir(prevAfter, '', allPrevAfter);
    scanDir(curBefore, '', allCurBefore);
    // Remove verification artifacts from scan
    const _skip = new Set(['verification_section.sh']);
    const gone = [...allPrevAfter].filter(r => !allCurBefore.has(r) && !_skip.has(r));
    const newFiles = [...allCurBefore].filter(r => !allPrevAfter.has(r) && !_skip.has(r));
    if (gone.length) {
        process.stdout.write(`  i  GONE before install (in v${prevVer} AFTER, missing from BEFORE):\n`);
        for (const rel of gone) process.stdout.write(`  -  ${rel}\n`);
        process.stdout.write('\n');
    }
    if (newFiles.length) {
        process.stdout.write(`  i  NEW since v${prevVer} (not in prev AFTER, present in BEFORE):\n`);
        for (const rel of newFiles) process.stdout.write(`  +  ${rel}\n`);
        process.stdout.write('\n');
    }
}

// ── Changes from previous: prev AFTER -> current BEFORE (first) ───────────────
if (hasPrevAfter && hasBefore) {
    const prevBeforePairs = managed.map(rel => [
        path.join(prevAfter,  rel),
        path.join(curBefore, rel),
        rel
    ]);
    const anyChange = prevBeforePairs.some(([a, b]) => {
        if (!a || !b) return true;
        const hasA = fs.existsSync(a), hasB = fs.existsSync(b);
        if (hasA !== hasB) return true;
        if (!hasA && !hasB) return false;
        return md5raw(a) !== md5raw(b);
    });
    if (!anyChange) {
        process.stdout.write(`\n  i  v${prevVer} AFTER -> v${currentVer} BEFORE: identical -- no changes between installs\n`);
    } else {
        printSection(`Changes from v${prevVer}  (prev AFTER -> current BEFORE)`, prevBeforePairs);
    }
} else {
    process.stdout.write(`\n  i  No previous AFTER snapshot -- first comparable install\n`);
}

// ── This install: BEFORE -> AFTER ─────────────────────────────────────────────
if (hasBefore && hasAfter) {
    // Scan actual directories for new/gone files not in managed list
    const allBefore = new Set(); const allAfter = new Set();
    function scanDir2(dir, base, out) {
        if (!fs.existsSync(dir)) return;
        for (const e of fs.readdirSync(dir, { withFileTypes: true })) {
            const rel = base ? base + '/' + e.name : e.name;
            if (e.isDirectory()) scanDir2(path.join(dir, e.name), rel, out);
            else out.add(rel);
        }
    }
    const _skip2 = new Set(['verification_section.sh']);
    scanDir2(curBefore, '', allBefore); scanDir2(curAfter, '', allAfter);
    const newInstalled = [...allAfter].filter(r => !allBefore.has(r) && !_skip2.has(r) && !managed.includes(r));
    const goneInstall  = [...allBefore].filter(r => !allAfter.has(r)  && !_skip2.has(r) && !managed.includes(r));
    if (newInstalled.length) {
        process.stdout.write(`  i  NEW files installed (not in managed list):\n`);
        for (const rel of newInstalled) process.stdout.write(`  +  ${rel}\n`);
        process.stdout.write('\n');
    }
    if (goneInstall.length) {
        process.stdout.write(`  i  FILES REMOVED by install (not in managed list):\n`);
        for (const rel of goneInstall) process.stdout.write(`  -  ${rel}\n`);
        process.stdout.write('\n');
    }
}
const beforeAfterPairs = managed.map(rel => [
    hasBefore ? path.join(curBefore, rel) : null,
    hasAfter  ? path.join(curAfter,  rel) : null,
    rel
]);
printSection(`This install v${currentVer}  BEFORE -> AFTER`, beforeAfterPairs);

// ── Version consistency check ──────────────────────────────────────────────────
const installVer = (() => {
    try { return JSON.parse(fs.readFileSync(path.join(proj, 'package.json'), 'utf8')).version || null; }
    catch(_) { return null; }
})();
if (installVer) {
    const mismatched = managed
        .map(rel => ({ rel, fp: path.join(proj, rel) }))
        .filter(({ fp }) => fs.existsSync(fp))
        .map(({ rel, fp }) => ({ rel, ver: extractVer(fp) }))
        .filter(({ ver }) => ver && ver !== '?' && ver !== installVer);
    if (mismatched.length) {
        process.stdout.write(`\n  \x1b[1m!! VERSION MISMATCH -- these files are not at v${installVer}:\x1b[0m\n`);
        for (const { rel, ver } of mismatched) process.stdout.write(`     ${rel} (v${ver})\n`);
        ok = false;
    } else {
        process.stdout.write(`\n  \u2713  all files at v${installVer}\n`);
    }
}

process.exit(ok ? 0 : 1);

VERIFYEOF
set -e

# Check collection status
echo ""
info "Collection status:"
set +e
node --input-type=module << 'COLLCHECK'
import fs   from 'fs';
import path from 'path';
import { createRequire } from 'module';
const require = createRequire(import.meta.url);

const proj      = process.env.PROJECT_DIR || '.';
const chromaDir = path.join(proj, 'chromadb');

if (!fs.existsSync(chromaDir)) {
    process.stdout.write('  i  No chromadb directory yet\n');
    process.exit(0);
}

// ── Read dim + count directly from binary index files ────────────────────────
// ChromaDB always uses M=16 -> M0=32, label=8 bytes
// spe = M0*4 + dim*4 + 8 = 128 + dim*4 + 8
// count = filesize / spe
// For nomic-embed-text dim=768: spe = 128 + 3072 + 8 = 3208
// We try common dims; if none fit exactly, report the file size
function readBinaryInfo(segDir) {
    const binPath = path.join(segDir, 'data_level0.bin');
    let size = 0;
    try { size = fs.statSync(binPath).size; } catch(_) { return null; }
    if (size === 0) return null;
    // Try common embedding dims with ChromaDB label size (12 bytes)
    // ChromaDB element layout: [M0*4 neighbor bytes][dim*4 vector bytes][12 label bytes]
    // M0=32 (default M=16), so neighborBytes=128
    for (const dim of [768, 384, 1536, 1024, 512, 256, 64, 128]) {
        const spe = 128 + dim * 4 + 12;
        if (size % spe === 0) return { dim, count: size / spe };
    }
    // Unknown dim -- report size only
    return { dim: null, count: null, size };
}

// ── Collect names from SQLite (best-effort, no checkpoint) ───────────────────
const nameMap = {};
const namesFile = path.join(chromaDir, 'collection_names.json');
if (fs.existsSync(namesFile)) {
    try { Object.assign(nameMap, JSON.parse(fs.readFileSync(namesFile,'utf8'))); } catch(_) {}
}

const dbPath = path.join(chromaDir, 'chroma.sqlite3');
if (fs.existsSync(dbPath)) {
    let db = null;
    try {
        const Database = require('better-sqlite3');
        try { db = new Database(dbPath, { readonly: true, fileMustExist: true }); }
        catch(_) {}
        if (db) {
            for (const sql of [
                "SELECT c.name, s.id FROM collections c JOIN segments s ON s.collection=c.id WHERE s.scope='VECTOR'",
                "SELECT c.name, s.id FROM collections c JOIN segments s ON s.collection=c.id",
                "SELECT name, id FROM collections",
            ]) {
                try {
                    const rows = db.prepare(sql).all();
                    for (const r of rows) if (r.id && r.name) nameMap[r.id] = r.name;
                    if (rows.length > 0) break;
                } catch(_) {}
            }
            try { db.close(); } catch(_) {}
        }
    } catch(e) {
        process.stdout.write('  i  SQLite: ' + e.message + '\n');
        try { db && db.close(); } catch(_) {}
    }
}

// ── Report each collection directory ─────────────────────────────────────────
let ready = 0, problems = 0;
try {
    for (const entry of fs.readdirSync(chromaDir, { withFileTypes: true })) {
        if (!entry.isDirectory()) continue;
        const uuid = entry.name;
        const dir  = path.join(chromaDir, uuid);
        const bin  = readBinaryInfo(dir);
        if (!bin) continue;  // not a vector segment dir or empty

        let name = nameMap[uuid] || uuid.slice(0,8);

        // index_meta.json overrides if present
        const metaPath = path.join(dir, 'index_meta.json');
        if (fs.existsSync(metaPath)) {
            try {
                const meta = JSON.parse(fs.readFileSync(metaPath, 'utf8'));
                if (meta.name)           name     = meta.name;
                if (meta.label_to_id)    bin.count = Object.keys(meta.label_to_id).length;
                if (meta.dimensionality) bin.dim   = meta.dimensionality;
            } catch(_) {}
        }

        // Refresh human-readable symlink
        if (name !== uuid) try {
            const lnk = path.join(chromaDir, name);
            try { fs.unlinkSync(lnk); } catch(_) {}
            if (!fs.existsSync(lnk)) fs.symlinkSync(uuid, lnk);
        } catch(_) {}

        const dimStr   = bin.dim   !== null ? bin.dim   : '?';
        const countStr = bin.count !== null ? bin.count : `~${bin.size}b`;
        process.stdout.write(`  ✓  ${name}  dim=${dimStr}  chunks=${countStr}\n`);
        ready++;
    }
} catch(e) {
    process.stdout.write('  ⚠  Error reading chromadb dir: ' + e.message + '\n');
    problems++;
}
process.stdout.write('\n  ' + ready + ' ready, ' + problems + ' with issues\n');
COLLCHECK
set -e
# ============ END VERIFICATION SECTION ============

# Extract verification section from installer and diff against last AFTER
set +e
_ver_sec_file="verification_section.sh"
_cur_ver_sec="$_OLD_DIR/${RAGWEED_INSTALL_VERSION}/AFTER/$_ver_sec_file"
_prev_ver_sec=""
if [ -n "$_prev_after_ver" ]; then
    _prev_ver_sec="$_OLD_DIR/${_prev_after_ver}/AFTER/$_ver_sec_file"
fi

# Extract verification section from this installer into AFTER
sed -n '/^# ============ START VERIFICATION SECTION ============/,/^# ============ END VERIFICATION SECTION ============/p' \
    "$_INSTALLER_PATH" > "$_cur_ver_sec" 2>/dev/null

if [ -f "$_cur_ver_sec" ] && [ -f "$_prev_ver_sec" ]; then
    _vdiff=$(diff "$_prev_ver_sec" "$_cur_ver_sec" 2>/dev/null)
    if [ -z "$_vdiff" ]; then
        ok "Verification section unchanged from v${_prev_after_ver}"
    else
        warn "Verification section CHANGED from v${_prev_after_ver}:"
        echo "$_vdiff" | head -40
    fi
elif [ -f "$_cur_ver_sec" ] && [ -z "$_prev_ver_sec" ]; then
    ok "Verification section extracted (no previous version to compare)"
fi
set -e

echo ""
echo "================================================================"
echo "  RAGWeed v${RAGWEED_INSTALL_VERSION} installation complete"
echo "================================================================"
echo ""
echo "  ./run.sh help for help"
echo ""
