From 886d8c923d0cee4ee939e8316429e76b9a24ed99 Mon Sep 17 00:00:00 2001 From: "Ehsan.Asadi" Date: Tue, 30 Dec 2025 19:47:18 +0330 Subject: [PATCH] fixe ci --- helm/peikarband/values-production.yaml | 28 +++++- scripts/diagnose-502.sh | 126 +++++++++++++++++++++++++ 2 files changed, 150 insertions(+), 4 deletions(-) create mode 100755 scripts/diagnose-502.sh diff --git a/helm/peikarband/values-production.yaml b/helm/peikarband/values-production.yaml index 8ec6e5a..86cbe7d 100644 --- a/helm/peikarband/values-production.yaml +++ b/helm/peikarband/values-production.yaml @@ -36,11 +36,11 @@ podAnnotations: resources: limits: - cpu: 200m - memory: 256Mi + cpu: 500m + memory: 512Mi requests: - cpu: 50m - memory: 128Mi + cpu: 100m + memory: 256Mi autoscaling: enabled: false @@ -95,6 +95,26 @@ redis: name: "peikarband-prod-secrets" key: "redis-password" +# Override readiness probe for production (Reflex needs more time to start) +readinessProbe: + httpGet: + path: /ping + port: 8000 + initialDelaySeconds: 60 # Increased from 10 to allow Reflex to fully start + periodSeconds: 10 + timeoutSeconds: 5 + failureThreshold: 3 + +# Override liveness probe +livenessProbe: + httpGet: + path: /ping + port: 8000 + initialDelaySeconds: 90 # Increased from 30 + periodSeconds: 15 + timeoutSeconds: 5 + failureThreshold: 3 + configMap: data: APP_NAME: "peikarband" diff --git a/scripts/diagnose-502.sh b/scripts/diagnose-502.sh new file mode 100755 index 0000000..c410424 --- /dev/null +++ b/scripts/diagnose-502.sh @@ -0,0 +1,126 @@ +#!/bin/bash +# Diagnostic script for 502 Bad Gateway error + +set -e + +NAMESPACE=${1:-production} +APP_NAME="peikarband" + +echo "🔍 Diagnosing 502 Bad Gateway for $APP_NAME in namespace $NAMESPACE" +echo "==========================================" +echo "" + +# Colors +RED='\033[0;31m' +GREEN='\033[0;32m' +YELLOW='\033[1;33m' +NC='\033[0m' + +# 1. Check Pods +echo "1️⃣ Checking Pods..." +echo "-------------------" +PODS=$(kubectl get pods -n $NAMESPACE -l app.kubernetes.io/name=$APP_NAME --no-headers 2>/dev/null || echo "") +if [ -z "$PODS" ]; then + echo -e "${RED}❌ No pods found!${NC}" + exit 1 +fi + +kubectl get pods -n $NAMESPACE -l app.kubernetes.io/name=$APP_NAME +echo "" + +# Check pod status +POD_STATUS=$(kubectl get pods -n $NAMESPACE -l app.kubernetes.io/name=$APP_NAME -o jsonpath='{.items[0].status.phase}' 2>/dev/null || echo "Unknown") +POD_NAME=$(kubectl get pods -n $NAMESPACE -l app.kubernetes.io/name=$APP_NAME -o jsonpath='{.items[0].metadata.name}' 2>/dev/null || echo "") + +if [ "$POD_STATUS" != "Running" ]; then + echo -e "${RED}❌ Pod is not Running! Status: $POD_STATUS${NC}" + echo "" + echo "Pod events:" + kubectl describe pod -n $NAMESPACE $POD_NAME | tail -20 + echo "" +fi + +# 2. Check Service +echo "2️⃣ Checking Service..." +echo "----------------------" +kubectl get svc -n $NAMESPACE -l app.kubernetes.io/name=$APP_NAME +echo "" + +# 3. Check Ingress +echo "3️⃣ Checking Ingress..." +echo "-----------------------" +kubectl get ingress -n $NAMESPACE -l app.kubernetes.io/name=$APP_NAME +echo "" + +# 4. Check Pod Logs +if [ -n "$POD_NAME" ]; then + echo "4️⃣ Recent Pod Logs (last 30 lines)..." + echo "--------------------------------------" + kubectl logs -n $NAMESPACE $POD_NAME --tail=30 || echo "Could not fetch logs" + echo "" +fi + +# 5. Check Readiness/Liveness +if [ -n "$POD_NAME" ]; then + echo "5️⃣ Checking Probe Status..." + echo "----------------------------" + READY=$(kubectl get pod -n $NAMESPACE $POD_NAME -o jsonpath='{.status.conditions[?(@.type=="Ready")].status}' 2>/dev/null || echo "Unknown") + echo "Ready: $READY" + + # Check container status + CONTAINER_STATUS=$(kubectl get pod -n $NAMESPACE $POD_NAME -o jsonpath='{.status.containerStatuses[0].ready}' 2>/dev/null || echo "Unknown") + echo "Container Ready: $CONTAINER_STATUS" + echo "" +fi + +# 6. Test from inside pod +if [ -n "$POD_NAME" ] && [ "$POD_STATUS" == "Running" ]; then + echo "6️⃣ Testing /ping endpoint from inside pod..." + echo "---------------------------------------------" + kubectl exec -n $NAMESPACE $POD_NAME -- curl -s http://localhost:8000/ping || echo -e "${RED}❌ /ping failed!${NC}" + echo "" +fi + +# 7. Check Service Endpoints +echo "7️⃣ Checking Service Endpoints..." +echo "---------------------------------" +SVC_NAME=$(kubectl get svc -n $NAMESPACE -l app.kubernetes.io/name=$APP_NAME -o jsonpath='{.items[0].metadata.name}' 2>/dev/null || echo "") +if [ -n "$SVC_NAME" ]; then + kubectl get endpoints -n $NAMESPACE $SVC_NAME + ENDPOINTS=$(kubectl get endpoints -n $NAMESPACE $SVC_NAME -o jsonpath='{.subsets[0].addresses[*].ip}' 2>/dev/null || echo "") + if [ -z "$ENDPOINTS" ]; then + echo -e "${RED}❌ No endpoints! Service cannot route traffic.${NC}" + echo "This is likely because readiness probe is failing." + fi + echo "" +fi + +# 8. Check Resources +if [ -n "$POD_NAME" ]; then + echo "8️⃣ Checking Resource Usage..." + echo "------------------------------" + kubectl top pod -n $NAMESPACE $POD_NAME 2>/dev/null || echo "Metrics not available" + echo "" +fi + +# 9. Check Events +echo "9️⃣ Recent Events..." +echo "-------------------" +kubectl get events -n $NAMESPACE --sort-by='.lastTimestamp' | grep $APP_NAME | tail -10 || echo "No recent events" +echo "" + +# 10. NetworkPolicy check +echo "🔟 Checking NetworkPolicy..." +echo "---------------------------" +kubectl get networkpolicy -n $NAMESPACE -l app.kubernetes.io/name=$APP_NAME || echo "No NetworkPolicy found" +echo "" + +echo "==========================================" +echo "✅ Diagnosis complete!" +echo "" +echo "Common fixes:" +echo "1. If pod is CrashLoopBackOff: Check logs and resource limits" +echo "2. If no endpoints: Readiness probe is failing - check /ping endpoint" +echo "3. If NetworkPolicy exists: Check if it allows ingress traffic" +echo "4. If resources exhausted: Increase limits in values-production.yaml" +