1# Default values for ollama-helm.
2# This is a YAML-formatted file.
3# Declare variables to be passed into your templates.
9 # -- Enable Knative integration
11 # -- Knative service container concurrency
12 containerConcurrency: 0
13 # -- Knative service timeout seconds
15 # -- Knative service response start timeout seconds
16 responseStartTimeoutSeconds: 300
17 # -- Knative service idle timeout seconds
18 idleTimeoutSeconds: 300
19 # -- Knative service annotations
22 # -- Time to keep completed Knative model bootstrap Jobs before cleanup. Set to null to disable TTL-based cleanup.
23 ttlSecondsAfterFinished: 300
26 # -- Docker image registry
27 repository: cgr.dev/chainguard-private/ollama
28 # -- Docker pull policy
29 pullPolicy: IfNotPresent
30 # -- Docker image tag, overrides the image tag whose default is the chart appVersion.
31 tag: latest@sha256:18e9b8ee15ab2db7e073fec281b30b44168895b8887b638b80f5ebba925ba72b
32# -- Docker registry secret names as an array
34# -- String to partially override template (will maintain the release name)
36# -- String to fully override template
38# -- String to fully override namespace
42 # Port Ollama is listening on
45 # -- Enable GPU integration
47 # -- Enable DRA GPU integration
48 # If enabled, it will use DRA instead of Device Driver Plugin and create a ResourceClaim and GpuClaimParameters
50 # -- DRA GPU DriverClass
51 draDriverClass: "gpu.nvidia.com"
52 # -- Existing DRA GPU ResourceClaim Template
53 draExistingClaimTemplate: ""
54 # -- GPU type: 'nvidia' or 'amd'
55 # If 'ollama.gpu.enabled', default value is nvidia
56 # If set to 'amd', this will add 'rocm' suffix to image tag if 'image.tag' is not override
57 # This is due cause AMD and CPU/CUDA are different images
59 # -- Specify the number of GPU
60 # If you use MIG section below then this parameter is ignored
62 # -- only for nvidia cards; change to (example) 'nvidia.com/mig-1g.10gb' to use MIG slice
63 nvidiaResource: "nvidia.com/gpu"
64 # nvidiaResource: "nvidia.com/mig-1g.10gb" # example
65 # If you want to use more than one NVIDIA MIG you can use the following syntax (then nvidiaResource is ignored and only the configuration in the following MIG section is used)
68 # -- Enable multiple mig devices
69 # If enabled you will have to specify the mig devices
70 # If enabled is set to false this section is ignored
72 # -- Specify the mig devices and the corresponding number
77 # -- List of models to pull at container startup
78 # The more you add, the longer the container will take to start if models are not present
83 # -- List of models to load in memory at container startup
88 # -- List of models to create at container startup, there are two options
89 # 1. Create a raw model
90 # 2. Load a model from configMaps, configMaps must be created before and are loaded as volume in "/models" directory.
92 # - name: llama3.1-ctx32768
93 # configMapRef: my-configmap
94 # configMapKeyRef: configmap-key
95 # - name: llama3.1-ctx32768
98 # PARAMETER num_ctx 32768
100 # -- Automatically remove models present on the disk but not specified in the values file
102 # -- Add insecure flag for pulling at container startup
104 # -- Override ollama-data volume mount path, default: "/root/.ollama"
107# ref: https://kubernetes.io/docs/tasks/configure-pod-container/configure-service-account/
109 # -- Specifies whether a service account should be created
111 # -- Automatically mount a ServiceAccount's API credentials?
113 # -- Annotations to add to the service account
115 # -- The name of the service account to use.
116 # If not set and create is true, a name is generated using the fullname template
118# -- Map of annotations to add to the pods
120# -- Map of labels to add to the pods
122# -- Pod Security Context
123podSecurityContext: {}
126# -- Priority Class Name
128# -- Container Security Context
133# readOnlyRootFilesystem: true
137# -- Specify runtime class
145 # -- Service node port when service type is 'NodePort'
147 # -- Load Balancer IP address
149 # -- Annotations to add to the service
151 # -- Labels to add to the service
153 # -- IP Families for the service
158 # -- IP Family Policy for the service
163# Configure Deployment
165 # -- Labels to add to the deployment
167# Configure the ingress resource that allows you to access the
169 # -- Enable ingress controller resource
171 # -- IngressClass that will be used to implement the Ingress (Kubernetes 1.18+)
173 # -- Additional annotations for the Ingress resource.
175 # kubernetes.io/ingress.class: traefik
176 # kubernetes.io/ingress.class: nginx
177 # kubernetes.io/tls-acme: "true"
179 # The list of hostnames to be covered with this ingress record.
185 # -- The tls configuration for hostnames to be covered with this ingress record.
187 # - secretName: chart-example-tls
189 # - chart-example.local
190# Configure resource requests and limits
191# ref: http://kubernetes.io/docs/user-guide/compute-resources/
208# Configure extra options for liveness probe
209# ref: https://kubernetes.io/docs/tasks/configure-pod-container/configure-liveness-readiness-probes/#configure-probes
211 # -- Enable livenessProbe
213 # -- Request path for livenessProbe
215 # -- Initial delay seconds for livenessProbe
216 initialDelaySeconds: 60
217 # -- Period seconds for livenessProbe
219 # -- Timeout seconds for livenessProbe
221 # -- Failure threshold for livenessProbe
223 # -- Success threshold for livenessProbe
225# Configure extra options for readiness probe
226# ref: https://kubernetes.io/docs/tasks/configure-pod-container/configure-liveness-readiness-probes/#configure-probes
228 # -- Enable readinessProbe
230 # -- Request path for readinessProbe
232 # -- Initial delay seconds for readinessProbe
233 initialDelaySeconds: 30
234 # -- Period seconds for readinessProbe
236 # -- Timeout seconds for readinessProbe
238 # -- Failure threshold for readinessProbe
240 # -- Success threshold for readinessProbe
242# Configure autoscaling
244 # -- Enable autoscaling
246 # -- Number of minimum replicas
248 # -- Number of maximum replicas
250 # -- CPU usage to target replica
251 targetCPUUtilizationPercentage: 80
252 # -- targetMemoryUtilizationPercentage: 80
253# -- Additional volumes on the output Deployment definition.
257# secretName: mysecret
260# -- Additional volumeMounts on the output Deployment definition.
263# mountPath: "/etc/foo"
266# -- Additional arguments on the output Deployment definition.
268# -- Additional environments variables on the output Deployment definition.
269# For extra OLLAMA env, please refer to https://github.com/ollama/ollama/blob/main/envconfig/config.go
271# - name: OLLAMA_DEBUG
274# -- Additionl environment variables from external sources (like ConfigMap)
277# name: my-env-configmap
279# Enable persistence using Persistent Volume Claims
280# ref: https://kubernetes.io/docs/concepts/storage/persistent-volumes/
282 # -- Enable persistence using PVC
284 # -- Ollama server data Persistent Volume access modes
285 # Must match those of existing PV or dynamic provisioner
286 # Ref: http://kubernetes.io/docs/user-guide/persistent-volumes/
289 # -- Ollama server data Persistent Volume annotations
291 # -- If you'd like to bring your own PVC for persisting Ollama state, pass the name of the
292 # created + ready PVC here. If set, this Chart will not create the default PVC.
293 # Requires server.persistentVolume.enabled: true
295 # -- Ollama server data Persistent Volume size
297 # -- Ollama server data Persistent Volume Storage Class
298 # If defined, storageClassName: <storageClass>
299 # If set to "-", storageClassName: "", which disables dynamic provisioning
300 # If undefined (the default) or set to null, no storageClassName spec is
301 # set, choosing the default provisioner. (gp2 on AWS, standard on
302 # GKE, AWS & OpenStack)
304 # -- Ollama server data Persistent Volume Binding Mode
305 # If defined, volumeMode: <volumeMode>
306 # If empty (the default) or set to null, no volumeBindingMode spec is
307 # set, choosing the default mode.
309 # -- Subdirectory of Ollama server data Persistent Volume to mount
310 # Useful if the volume's root directory is not empty
312 # -- Pre-existing PV to attach this claim to
313 # Useful if a CSI auto-provisions a PV for you and you want to always
314 # reference the PV moving forward
316# -- Node labels for pod assignment.
318# -- Tolerations for pod assignment
320# -- Affinity for pod assignment
322# -- Lifecycle for pod assignment (override ollama.models startup pull/run)
324# How to replace existing pods
326 # -- Deployment strategy can be "Recreate" or "RollingUpdate". Default is Recreate
328# -- Topology Spread Constraints for pod assignment
329topologySpreadConstraints: {}
330# -- Wait for a grace period
331terminationGracePeriodSeconds: 120
332# -- Init containers to add to the pod
334# - name: startup-tool
340# -- Use the host’s ipc namespace.
342# -- Use the host’s pid namespace
344# -- Use the host's network namespace.
346# -- Extra K8s manifests to deploy
349# kind: PersistentVolume
354# - apiVersion: scheduling.k8s.io/v1
359# globalDefault: false
360# description: "This priority class should be used for XYZ service pods only."
362# Test connection pods
365 # -- Labels to add to the tests
367 # -- Annotations to add to the tests