#!/usr/bin/env bash
set -euo pipefail

## -----------------------------------------------------------------------------
## Cricket ESP32-P4 Build Helper (fail-fast, proves model selection)
## Usage: build_cricket.sh
## -----------------------------------------------------------------------------

die() { echo "ERROR: $*" >&2; exit 1; }
info() { echo "==> $*"; }

resolve_compatible_model_name() {
  local dir="$1"
  local requested="$2"
  local requested_stem requested_base cand cand_name cand_stem cand_base matches count
  [[ -f "$dir/$requested" ]] && { printf "%s" "$requested"; return 0; }
  requested_stem="${requested%.*}"
  requested_base="$(printf "%s" "$requested_stem" | sed -E 's/_pthresh([0-9][0-9][0-9]?|NN)//g')"
  matches=()
  while IFS= read -r -d '' cand; do
    cand_name="$(basename "$cand")"
    cand_stem="${cand_name%.*}"
    cand_base="$(printf "%s" "$cand_stem" | sed -E 's/_pthresh([0-9][0-9][0-9]?|NN)//g')"
    if [[ "$cand_base" == "$requested_base" ]]; then
      matches+=("$cand_name")
    fi
  done < <(find "$dir" -maxdepth 1 -type f -name '*.tflite' -print0)
  count="${#matches[@]}"
  if [[ "$count" -eq 1 ]]; then
    printf "%s" "${matches[0]}"
    return 0
  fi
  if [[ "$count" -gt 1 ]]; then
    printf "AMBIGUOUS:%s" "${matches[*]}"
    return 0
  fi
  return 1
}

info "Cricket ESP32-P4 Build Helper"

## Auto-detect Cricket directory based on user
if [[ "${USER:-}" == "uhuman" ]]; then
  CRICKET_DIR="/Users/uhuman/PycharmProjects/PlatformIO/Projects/M5Tab5-HalUD/platforms/Cricket_ESP32_P4"
  PROJECT_WATER_MOML_DEFAULT="/Users/uhuman/PycharmProjects/projectWater_vs/cricket_AudioData_and_Models/projectWater/config/project_config.moml"
elif [[ "${USER:-}" == "ming" ]]; then
  CRICKET_DIR="/Users/ming/Development/esp32/cricket"
  PROJECT_WATER_MOML_DEFAULT=""
else
  die "Unknown user '${USER:-<unset>}' - add your path to this script"
fi

[[ -d "$CRICKET_DIR" ]] || die "Cricket directory not found: $CRICKET_DIR"
info "Directory: $CRICKET_DIR"

PROJECT_WATER_MOML="${PROJECT_WATER_MOML:-$PROJECT_WATER_MOML_DEFAULT}"
[[ -n "$PROJECT_WATER_MOML" ]] || die "PROJECT_WATER_MOML is required (export PROJECT_WATER_MOML=/absolute/path/to/project_config.moml)"
[[ -f "$PROJECT_WATER_MOML" ]] || die "PROJECT_WATER_MOML not found: $PROJECT_WATER_MOML"
export PROJECT_WATER_MOML
info "MOML source: $PROJECT_WATER_MOML"

moml_scalar() {
  local key="$1"
  local line value
  line="$(grep -E "^[[:space:]]*${key}[[:space:]]*:" "$PROJECT_WATER_MOML" | tail -n 1 || true)"
  [[ -n "$line" ]] || die "Missing MOML key: $key"
  value="${line#*:}"
  value="$(printf "%s" "$value" | sed -E 's/[[:space:]]*#.*$//' | xargs)"
  value="${value%\'}"
  value="${value#\'}"
  value="${value%\"}"
  value="${value#\"}"
  [[ -n "$value" ]] || die "Empty MOML key: $key"
  printf "%s" "$value"
}

MODEL_TYPE_RAW="$(moml_scalar "audioMatrix_model_type")"
MODEL_TYPE_LC="$(printf "%s" "$MODEL_TYPE_RAW" | tr '[:upper:]' '[:lower:]')"
PROJECT_WATER_PROJECT_DIR="$(cd "$(dirname "$PROJECT_WATER_MOML")/.." && pwd)"
PROJECT_WATER_MODELS_DIR="$PROJECT_WATER_PROJECT_DIR/models/audioMatrix_models"
CRICKET_TFLITE_DIR="$CRICKET_DIR/main/tflite_models"

[[ -d "$PROJECT_WATER_MODELS_DIR" ]] || die "Project model directory not found: $PROJECT_WATER_MODELS_DIR"
[[ -d "$CRICKET_TFLITE_DIR" ]] || die "Cricket tflite_models directory not found: $CRICKET_TFLITE_DIR"

if [[ "$MODEL_TYPE_LC" =~ ^temporal.+s$ ]]; then
  SPLIT_ENCODER_MODEL="$(moml_scalar "INFERENCE_AUDIOMATRIX_ENCODER_MODEL")"
  SPLIT_HEAD_MODEL="$(moml_scalar "INFERENCE_AUDIOMATRIX_HEAD_MODEL")"
  [[ -f "$PROJECT_WATER_MODELS_DIR/$SPLIT_ENCODER_MODEL" ]] || die "Split encoder model missing: $PROJECT_WATER_MODELS_DIR/$SPLIT_ENCODER_MODEL"
  cp -f "$PROJECT_WATER_MODELS_DIR/$SPLIT_ENCODER_MODEL" "$CRICKET_TFLITE_DIR/"
  info "Synced split encoder: $SPLIT_ENCODER_MODEL"

  LOCAL_HEAD_RESOLVED="$(resolve_compatible_model_name "$CRICKET_TFLITE_DIR" "$SPLIT_HEAD_MODEL" || true)"
  if [[ -n "${LOCAL_HEAD_RESOLVED:-}" && "$LOCAL_HEAD_RESOLVED" == AMBIGUOUS:* ]]; then
    die "Ambiguous compatible split head models already in Cricket dir for $SPLIT_HEAD_MODEL: ${LOCAL_HEAD_RESOLVED#AMBIGUOUS:}"
  fi
  if [[ -n "${LOCAL_HEAD_RESOLVED:-}" && "$LOCAL_HEAD_RESOLVED" != "$SPLIT_HEAD_MODEL" ]]; then
    info "Preserving local compatible split head override in Cricket dir: requested=$SPLIT_HEAD_MODEL resolved=$LOCAL_HEAD_RESOLVED"
  else
    SOURCE_HEAD_RESOLVED="$(resolve_compatible_model_name "$PROJECT_WATER_MODELS_DIR" "$SPLIT_HEAD_MODEL" || true)"
    [[ -n "${SOURCE_HEAD_RESOLVED:-}" ]] || die "Split head model missing: $PROJECT_WATER_MODELS_DIR/$SPLIT_HEAD_MODEL"
    [[ "$SOURCE_HEAD_RESOLVED" != AMBIGUOUS:* ]] || die "Ambiguous compatible split head models in projectWater models dir for $SPLIT_HEAD_MODEL: ${SOURCE_HEAD_RESOLVED#AMBIGUOUS:}"
    cp -f "$PROJECT_WATER_MODELS_DIR/$SOURCE_HEAD_RESOLVED" "$CRICKET_TFLITE_DIR/"
    if [[ "$SOURCE_HEAD_RESOLVED" == "$SPLIT_HEAD_MODEL" ]]; then
      info "Synced split head: $SPLIT_HEAD_MODEL"
    else
      info "Synced compatible split head variant: requested=$SPLIT_HEAD_MODEL resolved=$SOURCE_HEAD_RESOLVED"
    fi
  fi
else
  MONO_MODEL="$(moml_scalar "INFERENCE_AUDIOMATRIX_MODEL")"
  LOCAL_MONO_RESOLVED="$(resolve_compatible_model_name "$CRICKET_TFLITE_DIR" "$MONO_MODEL" || true)"
  if [[ -n "${LOCAL_MONO_RESOLVED:-}" && "$LOCAL_MONO_RESOLVED" == AMBIGUOUS:* ]]; then
    die "Ambiguous compatible inference models already in Cricket dir for $MONO_MODEL: ${LOCAL_MONO_RESOLVED#AMBIGUOUS:}"
  fi
  if [[ -n "${LOCAL_MONO_RESOLVED:-}" && "$LOCAL_MONO_RESOLVED" != "$MONO_MODEL" ]]; then
    info "Preserving local compatible inference override in Cricket dir: requested=$MONO_MODEL resolved=$LOCAL_MONO_RESOLVED"
  else
    SOURCE_MONO_RESOLVED="$(resolve_compatible_model_name "$PROJECT_WATER_MODELS_DIR" "$MONO_MODEL" || true)"
    [[ -n "${SOURCE_MONO_RESOLVED:-}" ]] || die "Inference model missing: $PROJECT_WATER_MODELS_DIR/$MONO_MODEL"
    [[ "$SOURCE_MONO_RESOLVED" != AMBIGUOUS:* ]] || die "Ambiguous compatible inference models in projectWater models dir for $MONO_MODEL: ${SOURCE_MONO_RESOLVED#AMBIGUOUS:}"
    cp -f "$PROJECT_WATER_MODELS_DIR/$SOURCE_MONO_RESOLVED" "$CRICKET_TFLITE_DIR/"
    if [[ "$SOURCE_MONO_RESOLVED" == "$MONO_MODEL" ]]; then
      info "Synced inference model: $MONO_MODEL"
    else
      info "Synced compatible inference variant: requested=$MONO_MODEL resolved=$SOURCE_MONO_RESOLVED"
    fi
  fi
fi

## Source ESP-IDF (suppress verbose output)
if [[ -f "$HOME/export-esp.sh" ]]; then
  source "$HOME/export-esp.sh" > /dev/null 2>&1
elif [[ -f "$HOME/esp/esp-idf/export.sh" ]]; then
  source "$HOME/esp/esp-idf/export.sh" > /dev/null 2>&1
else
  die "ESP-IDF not found. Expected: ~/export-esp.sh or ~/esp/esp-idf/export.sh"
fi

command -v idf.py >/dev/null 2>&1 || die "idf.py not on PATH after sourcing ESP-IDF"

cd "$CRICKET_DIR" || die "Cannot cd to: $CRICKET_DIR"
info "Working directory: $(pwd)"

## Log file for deterministic parsing
LOG="/tmp/cricket_build_$(date +%Y%m%d_%H%M%S).log"
info "Build log: $LOG"

## Force CMake configure to run so model selection prints every time
info "Reconfigure (forces CMake selection output)..."
idf.py reconfigure -DPROJECT_WATER_MOML="$PROJECT_WATER_MOML" 2>&1 | tee "$LOG"

info "Build..."
idf.py build -DPROJECT_WATER_MOML="$PROJECT_WATER_MOML" 2>&1 | tee -a "$LOG"

echo ""

## -----------------------------------------------------------------------------
## Model provenance: must come from CMake output (NOT “newest file” heuristics)
## -----------------------------------------------------------------------------

MODEL_NAME_LINES="$(grep -E -- "-- Using TFLite model( \\([^)]+\\))?:" "$LOG" || true)"
SPLIT_ENCODER_LINES="$(grep -E -- "-- Using split temporal TFLite encoder:" "$LOG" || true)"
SPLIT_HEAD_LINES="$(grep -E -- "-- Using split temporal TFLite head[[:space:]]*:" "$LOG" || true)"

if [[ -n "$MODEL_NAME_LINES" ]]; then
  ## Allow duplicates if identical; hard-fail only if conflicting.
  MODEL_NAME_UNIQ="$(printf "%s\n" "$MODEL_NAME_LINES" | sed '/^$/d' | sort -u)"
  MODEL_NAME_UNIQ_COUNT="$(printf "%s\n" "$MODEL_NAME_UNIQ" | wc -l | tr -d ' ')"
  [[ "$MODEL_NAME_UNIQ_COUNT" == "1" ]] || die "Conflicting '-- Using TFLite model' lines found:\n$MODEL_NAME_UNIQ"

  MODEL_NAME_LINE="$MODEL_NAME_UNIQ"
  MODEL_SYM_LINE="$(grep -E -- "-- TFLite binary symbol:" "$LOG" | tail -n 1 || true)"
  [[ -n "$MODEL_SYM_LINE" ]] || die "Missing CMake line: '-- TFLite binary symbol:' (expected when embedding model)."

  MODEL_NAME="$(echo "$MODEL_NAME_LINE" | sed -E 's/^-- Using TFLite model( \([^)]+\))?:[[:space:]]+//')"
  [[ -n "$MODEL_NAME" ]] || die "Parsed empty model name from: $MODEL_NAME_LINE"
  info "Model selected by CMake: $MODEL_NAME"

  MODEL_SYM="$(echo "$MODEL_SYM_LINE" | sed -E 's/^-- TFLite binary symbol:[[:space:]]+//')"
  [[ -n "$MODEL_SYM" ]] || die "Parsed empty symbol from: $MODEL_SYM_LINE"
  info "Embedded symbol: $MODEL_SYM"
elif [[ -n "$SPLIT_ENCODER_LINES" && -n "$SPLIT_HEAD_LINES" ]]; then
  SPLIT_ENCODER_UNIQ="$(printf "%s\n" "$SPLIT_ENCODER_LINES" | sed '/^$/d' | sort -u)"
  SPLIT_HEAD_UNIQ="$(printf "%s\n" "$SPLIT_HEAD_LINES" | sed '/^$/d' | sort -u)"
  [[ "$(printf "%s\n" "$SPLIT_ENCODER_UNIQ" | wc -l | tr -d ' ')" == "1" ]] || die "Conflicting split encoder lines found:\n$SPLIT_ENCODER_UNIQ"
  [[ "$(printf "%s\n" "$SPLIT_HEAD_UNIQ" | wc -l | tr -d ' ')" == "1" ]] || die "Conflicting split head lines found:\n$SPLIT_HEAD_UNIQ"

  SPLIT_ENCODER_NAME="$(echo "$SPLIT_ENCODER_UNIQ" | sed -E 's/^-- Using split temporal TFLite encoder:[[:space:]]+//')"
  SPLIT_HEAD_NAME="$(echo "$SPLIT_HEAD_UNIQ" | sed -E 's/^-- Using split temporal TFLite head[[:space:]]*:[[:space:]]+//')"
  [[ -n "$SPLIT_ENCODER_NAME" ]] || die "Parsed empty split encoder name from: $SPLIT_ENCODER_UNIQ"
  [[ -n "$SPLIT_HEAD_NAME" ]] || die "Parsed empty split head name from: $SPLIT_HEAD_UNIQ"
  MODEL_NAME="$SPLIT_HEAD_NAME"
  info "Split encoder selected by CMake: $SPLIT_ENCODER_NAME"
  info "Split head selected by CMake: $SPLIT_HEAD_NAME"
else
  die "No model-selection lines found in CMake output."
fi

## -----------------------------------------------------------------------------
## Enforce output mode token in selected filename
## -----------------------------------------------------------------------------
HAS_SOFTMAX=0
HAS_LOGITS=0
[[ "$MODEL_NAME" == *_softmax_* ]] && HAS_SOFTMAX=1
[[ "$MODEL_NAME" == *_logits_* ]] && HAS_LOGITS=1

if [[ "$HAS_SOFTMAX" -eq 1 && "$HAS_LOGITS" -eq 1 ]]; then
  echo "Selected model filename: $MODEL_NAME"
  echo "Mode parse reason: ambiguous (contains both _softmax_ and _logits_)"
  exit 1
fi
if [[ "$HAS_SOFTMAX" -eq 0 && "$HAS_LOGITS" -eq 0 ]]; then
  echo "Selected model filename: $MODEL_NAME"
  echo "Mode parse reason: missing required token (_softmax_ or _logits_)"
  exit 1
fi

MODEL_OUTPUT_MODE="softmax"
if [[ "$HAS_LOGITS" -eq 1 ]]; then
  MODEL_OUTPUT_MODE="logits"
fi
echo "MODEL_OUTPUT_MODE: $MODEL_OUTPUT_MODE"

echo ""

## -----------------------------------------------------------------------------
## Disk check: CMake-selected model must exist
## -----------------------------------------------------------------------------
TFL_DIR="$CRICKET_DIR/main/tflite_models"
[[ -d "$TFL_DIR" ]] || die "tflite_models dir missing: $TFL_DIR"
[[ -f "$TFL_DIR/$MODEL_NAME" ]] || die "CMake-selected model missing on disk: $TFL_DIR/$MODEL_NAME"
info "Selected model exists on disk: $MODEL_NAME"

echo ""

## -----------------------------------------------------------------------------
## Artifact must exist
## -----------------------------------------------------------------------------
BIN="$CRICKET_DIR/build/cricket.bin"
[[ -f "$BIN" ]] || die "Build artifact missing: $BIN"
info "Built: $BIN"
info "Size: $(stat -f '%z bytes' "$BIN")"

echo "✅ Build complete and successful!"
echo "To flash: ./flash_cricket.sh"
