infrastructure/pool-scaler.yaml

92 lines
3.3 KiB
YAML
Raw Permalink Normal View History

# Pool scaler - watches for pending Argo workflow pods and scales the
# high-compute pool via the Civo API. Handles scale-up from zero since
# the cluster autoscaler can't do this without a node template.
# The cluster autoscaler still handles scale-down.
apiVersion: kubernetes.crossplane.io/v1alpha2
kind: Object
metadata:
name: pool-scaler-deployment
namespace: crossplane-system
spec:
providerConfigRef:
name: kubernetes-provider
forProvider:
manifest:
apiVersion: apps/v1
kind: Deployment
metadata:
name: pool-scaler
namespace: kube-system
spec:
replicas: 1
selector:
matchLabels:
app: pool-scaler
template:
metadata:
labels:
app: pool-scaler
spec:
serviceAccountName: node-labeler
containers:
- name: scaler
image: bitnami/kubectl:latest
command:
- /bin/bash
- -c
- |
echo "Pool scaler started. Watching for pending workflow pods..."
while true; do
# Count pending pods with high-compute nodeSelector
PENDING=$(kubectl get pods -n argo --field-selector=status.phase=Pending \
-o jsonpath='{.items[*].spec.nodeSelector}' 2>/dev/null \
| tr '}' '\n' | grep -c 'high-compute' || true)
if [ "$PENDING" -gt 0 ]; then
# Check if any high-compute nodes already exist
HC_NODES=$(kubectl get nodes -l kubernetes.civo.com/node-pool=high-compute --no-headers 2>/dev/null | wc -l)
if [ "$HC_NODES" -eq 0 ]; then
echo "$(date): $PENDING pending workflow pods, no high-compute nodes. Scaling pool to 1..."
curl -s -X PUT \
"${CIVO_API_URL}/v2/kubernetes/clusters/${CIVO_CLUSTER_ID}/pools/high-compute" \
-H "Authorization: bearer ${CIVO_API_KEY}" \
-H "Content-Type: application/json" \
-d "{\"count\": 1, \"region\": \"${CIVO_REGION}\"}"
echo ""
# Wait for node to provision before checking again
sleep 180
fi
fi
sleep 15
done
env:
- name: CIVO_API_URL
valueFrom:
secretKeyRef:
key: api-url
name: civo-api-access
- name: CIVO_API_KEY
valueFrom:
secretKeyRef:
key: api-key
name: civo-api-access
- name: CIVO_CLUSTER_ID
valueFrom:
secretKeyRef:
key: cluster-id
name: civo-api-access
- name: CIVO_REGION
valueFrom:
secretKeyRef:
key: region
name: civo-api-access
resources:
requests:
cpu: 10m
memory: 32Mi
limits:
cpu: 50m
memory: 64Mi