#!/bin/bash
# Telemetry health check with fail-closed behavior
# Returns 0 on success, 1 on failure with alert

TELEMETRY_FILE="/home/ccuser/shared/kpi/marketing-telemetry.json"
LOG_FILE="/home/ccuser/shared/kpi/telemetry-health.log"
MAX_FAILURES=3

# Check if file exists
if [ ! -f "$TELEMETRY_FILE" ]; then
    echo "$(date): ERROR: Telemetry file not found" >> "$LOG_FILE"
    exit 1
fi

# Check last update time (basic staleness check)
LAST_UPDATED=$(jq -r '.last_updated' "$TELEMETRY_FILE" 2>/dev/null)
if [ "$LAST_UPDATED" = "null" ] || [ -z "$LAST_UPDATED" ]; then
    echo "$(date): WARNING: Telemetry never updated" >> "$LOG_FILE"
    # Don't fail on first check, just warn
fi

# Check consecutive failures
FAILURES=$(jq -r '.telemetry.daily_job.consecutive_failures' "$TELEMETRY_FILE" 2>/dev/null)
if [ "$FAILURES" -ge "$MAX_FAILURES" ]; then
    echo "$(date): CRITICAL: $FAILURES consecutive failures exceeds threshold $MAX_FAILURES" >> "$LOG_FILE"
    exit 1
fi

echo "$(date): OK: Telemetry health check passed" >> "$LOG_FILE"
exit 0

# If check failed, trigger alert
if [ $? -ne 0 ]; then
    /home/ccuser/shared/scripts/telemetry-alert.sh
fi
