blob: ff5f52ae87c5a4a5e419e8c36e4e74be647322eb [file] [log] [blame]
---
# deployment for production at http://lab.llvm.org:8011/
# The deployment for staging is in a separate file!
apiVersion: apps/v1
kind: Deployment
metadata:
name: mlir-nvidia-production
spec:
# number of instances we want to run
replicas: 1
selector:
matchLabels:
app: mlir-nvidia-production
# define strategy for updating the images
strategy:
rollingUpdate:
# do not deploy more replicas, as the buildbot server
# can't handle multiple workers with the same credentials
maxSurge: 0
# Allow to have 0 replicas during updates.
maxUnavailable: 1
type: RollingUpdate
template:
metadata:
labels:
app: mlir-nvidia-production
spec:
containers:
# the image and version we want to run
- image: gcr.io/sanitizer-bots/buildbot-mlir-nvidia:14
name: mlir-nvidia-production
env:
# connect to production environment, running at port 9990
- name: BUILDBOT_PORT
value: "9990"
# reserve "<number of cores>-1" for this image, kubernetes also
# needs <1 core for management tools
resources:
limits:
cpu: "15"
memory: 10Gi
# also request to use the GPU
# Note: this does not work in terraform at the moment
nvidia.com/gpu: "1"
requests:
cpu: "15"
memory: 10Gi
nvidia.com/gpu: "1"
volumeMounts:
# mount the secrets into a folder
- mountPath: /secrets
mountPropagation: None
name: buildbot-token
# specify the nood pool on which to deploy
nodeSelector:
pool: nvidia-16core-pool
restartPolicy: Always
volumes:
# declare the secret as a volume so we can mount it
- name: buildbot-token
secret:
optional: false
secretName: password-mlir-nvidia