From 886d8c923d0cee4ee939e8316429e76b9a24ed99 Mon Sep 17 00:00:00 2001
From: "Ehsan.Asadi" <ehsan.asadi@zoodfood.com>
Date: Tue, 30 Dec 2025 19:47:18 +0330
Subject: [PATCH] fixe ci

---
 helm/peikarband/values-production.yaml |  28 +++++-
 scripts/diagnose-502.sh                | 126 +++++++++++++++++++++++++
 2 files changed, 150 insertions(+), 4 deletions(-)
 create mode 100755 scripts/diagnose-502.sh

diff --git a/helm/peikarband/values-production.yaml b/helm/peikarband/values-production.yaml
index 8ec6e5a..86cbe7d 100644
--- a/helm/peikarband/values-production.yaml
+++ b/helm/peikarband/values-production.yaml
@@ -36,11 +36,11 @@ podAnnotations:
 
 resources:
   limits:
-    cpu: 200m
-    memory: 256Mi
+    cpu: 500m
+    memory: 512Mi
   requests:
-    cpu: 50m
-    memory: 128Mi
+    cpu: 100m
+    memory: 256Mi
 
 autoscaling:
   enabled: false
@@ -95,6 +95,26 @@ redis:
       name: "peikarband-prod-secrets"
       key: "redis-password"
 
+# Override readiness probe for production (Reflex needs more time to start)
+readinessProbe:
+  httpGet:
+    path: /ping
+    port: 8000
+  initialDelaySeconds: 60  # Increased from 10 to allow Reflex to fully start
+  periodSeconds: 10
+  timeoutSeconds: 5
+  failureThreshold: 3
+
+# Override liveness probe
+livenessProbe:
+  httpGet:
+    path: /ping
+    port: 8000
+  initialDelaySeconds: 90  # Increased from 30
+  periodSeconds: 15
+  timeoutSeconds: 5
+  failureThreshold: 3
+
 configMap:
   data:
     APP_NAME: "peikarband"
diff --git a/scripts/diagnose-502.sh b/scripts/diagnose-502.sh
new file mode 100755
index 0000000..c410424
--- /dev/null
+++ b/scripts/diagnose-502.sh
@@ -0,0 +1,126 @@
+#!/bin/bash
+# Diagnostic script for 502 Bad Gateway error
+
+set -e
+
+NAMESPACE=${1:-production}
+APP_NAME="peikarband"
+
+echo "🔍 Diagnosing 502 Bad Gateway for $APP_NAME in namespace $NAMESPACE"
+echo "=========================================="
+echo ""
+
+# Colors
+RED='\033[0;31m'
+GREEN='\033[0;32m'
+YELLOW='\033[1;33m'
+NC='\033[0m'
+
+# 1. Check Pods
+echo "1️⃣ Checking Pods..."
+echo "-------------------"
+PODS=$(kubectl get pods -n $NAMESPACE -l app.kubernetes.io/name=$APP_NAME --no-headers 2>/dev/null || echo "")
+if [ -z "$PODS" ]; then
+    echo -e "${RED}❌ No pods found!${NC}"
+    exit 1
+fi
+
+kubectl get pods -n $NAMESPACE -l app.kubernetes.io/name=$APP_NAME
+echo ""
+
+# Check pod status
+POD_STATUS=$(kubectl get pods -n $NAMESPACE -l app.kubernetes.io/name=$APP_NAME -o jsonpath='{.items[0].status.phase}' 2>/dev/null || echo "Unknown")
+POD_NAME=$(kubectl get pods -n $NAMESPACE -l app.kubernetes.io/name=$APP_NAME -o jsonpath='{.items[0].metadata.name}' 2>/dev/null || echo "")
+
+if [ "$POD_STATUS" != "Running" ]; then
+    echo -e "${RED}❌ Pod is not Running! Status: $POD_STATUS${NC}"
+    echo ""
+    echo "Pod events:"
+    kubectl describe pod -n $NAMESPACE $POD_NAME | tail -20
+    echo ""
+fi
+
+# 2. Check Service
+echo "2️⃣ Checking Service..."
+echo "----------------------"
+kubectl get svc -n $NAMESPACE -l app.kubernetes.io/name=$APP_NAME
+echo ""
+
+# 3. Check Ingress
+echo "3️⃣ Checking Ingress..."
+echo "-----------------------"
+kubectl get ingress -n $NAMESPACE -l app.kubernetes.io/name=$APP_NAME
+echo ""
+
+# 4. Check Pod Logs
+if [ -n "$POD_NAME" ]; then
+    echo "4️⃣ Recent Pod Logs (last 30 lines)..."
+    echo "--------------------------------------"
+    kubectl logs -n $NAMESPACE $POD_NAME --tail=30 || echo "Could not fetch logs"
+    echo ""
+fi
+
+# 5. Check Readiness/Liveness
+if [ -n "$POD_NAME" ]; then
+    echo "5️⃣ Checking Probe Status..."
+    echo "----------------------------"
+    READY=$(kubectl get pod -n $NAMESPACE $POD_NAME -o jsonpath='{.status.conditions[?(@.type=="Ready")].status}' 2>/dev/null || echo "Unknown")
+    echo "Ready: $READY"
+    
+    # Check container status
+    CONTAINER_STATUS=$(kubectl get pod -n $NAMESPACE $POD_NAME -o jsonpath='{.status.containerStatuses[0].ready}' 2>/dev/null || echo "Unknown")
+    echo "Container Ready: $CONTAINER_STATUS"
+    echo ""
+fi
+
+# 6. Test from inside pod
+if [ -n "$POD_NAME" ] && [ "$POD_STATUS" == "Running" ]; then
+    echo "6️⃣ Testing /ping endpoint from inside pod..."
+    echo "---------------------------------------------"
+    kubectl exec -n $NAMESPACE $POD_NAME -- curl -s http://localhost:8000/ping || echo -e "${RED}❌ /ping failed!${NC}"
+    echo ""
+fi
+
+# 7. Check Service Endpoints
+echo "7️⃣ Checking Service Endpoints..."
+echo "---------------------------------"
+SVC_NAME=$(kubectl get svc -n $NAMESPACE -l app.kubernetes.io/name=$APP_NAME -o jsonpath='{.items[0].metadata.name}' 2>/dev/null || echo "")
+if [ -n "$SVC_NAME" ]; then
+    kubectl get endpoints -n $NAMESPACE $SVC_NAME
+    ENDPOINTS=$(kubectl get endpoints -n $NAMESPACE $SVC_NAME -o jsonpath='{.subsets[0].addresses[*].ip}' 2>/dev/null || echo "")
+    if [ -z "$ENDPOINTS" ]; then
+        echo -e "${RED}❌ No endpoints! Service cannot route traffic.${NC}"
+        echo "This is likely because readiness probe is failing."
+    fi
+    echo ""
+fi
+
+# 8. Check Resources
+if [ -n "$POD_NAME" ]; then
+    echo "8️⃣ Checking Resource Usage..."
+    echo "------------------------------"
+    kubectl top pod -n $NAMESPACE $POD_NAME 2>/dev/null || echo "Metrics not available"
+    echo ""
+fi
+
+# 9. Check Events
+echo "9️⃣ Recent Events..."
+echo "-------------------"
+kubectl get events -n $NAMESPACE --sort-by='.lastTimestamp' | grep $APP_NAME | tail -10 || echo "No recent events"
+echo ""
+
+# 10. NetworkPolicy check
+echo "🔟 Checking NetworkPolicy..."
+echo "---------------------------"
+kubectl get networkpolicy -n $NAMESPACE -l app.kubernetes.io/name=$APP_NAME || echo "No NetworkPolicy found"
+echo ""
+
+echo "=========================================="
+echo "✅ Diagnosis complete!"
+echo ""
+echo "Common fixes:"
+echo "1. If pod is CrashLoopBackOff: Check logs and resource limits"
+echo "2. If no endpoints: Readiness probe is failing - check /ping endpoint"
+echo "3. If NetworkPolicy exists: Check if it allows ingress traffic"
+echo "4. If resources exhausted: Increase limits in values-production.yaml"
+