ollama

Helm chart

Last changed

Request a free trial

Contact our team to test out this Helm chart and related images for free. Please also indicate any other images you would like to evaluate.

Tag:

# Default values for ollama-helm.

# This is a YAML-formatted file.

# Declare variables to be passed into your templates.

# -- Number of replicas

replicaCount: 1

# Knative configuration

knative:

# -- Enable Knative integration

enabled: false

# -- Knative service container concurrency

containerConcurrency: 0

# -- Knative service timeout seconds

timeoutSeconds: 300

# -- Knative service response start timeout seconds

responseStartTimeoutSeconds: 300

# -- Knative service idle timeout seconds

idleTimeoutSeconds: 300

# -- Knative service annotations

annotations: {}

modelBootstrap:

# -- Time to keep completed Knative model bootstrap Jobs before cleanup. Set to null to disable TTL-based cleanup.

ttlSecondsAfterFinished: 300

# Docker image

image:

# -- Docker image registry

repository: cgr.dev/chainguard-private/ollama

# -- Docker pull policy

pullPolicy: IfNotPresent

# -- Docker image tag, overrides the image tag whose default is the chart appVersion.

tag: latest@sha256:18e9b8ee15ab2db7e073fec281b30b44168895b8887b638b80f5ebba925ba72b

# -- Docker registry secret names as an array

imagePullSecrets: []

# -- String to partially override template (will maintain the release name)

nameOverride: ""

# -- String to fully override template

fullnameOverride: ""

# -- String to fully override namespace

namespaceOverride: ""

# Ollama parameters

ollama:

# Port Ollama is listening on

port: 11434

gpu:

# -- Enable GPU integration

enabled: false

# -- Enable DRA GPU integration

# If enabled, it will use DRA instead of Device Driver Plugin and create a ResourceClaim and GpuClaimParameters

draEnabled: false

# -- DRA GPU DriverClass

draDriverClass: "gpu.nvidia.com"

# -- Existing DRA GPU ResourceClaim Template

draExistingClaimTemplate: ""

# -- GPU type: 'nvidia' or 'amd'

# If 'ollama.gpu.enabled', default value is nvidia

# If set to 'amd', this will add 'rocm' suffix to image tag if 'image.tag' is not override

# This is due cause AMD and CPU/CUDA are different images

type: 'nvidia'

# -- Specify the number of GPU

# If you use MIG section below then this parameter is ignored

number: 1

# -- only for nvidia cards; change to (example) 'nvidia.com/mig-1g.10gb' to use MIG slice

nvidiaResource: "nvidia.com/gpu"

# nvidiaResource: "nvidia.com/mig-1g.10gb" # example

# If you want to use more than one NVIDIA MIG you can use the following syntax (then nvidiaResource is ignored and only the configuration in the following MIG section is used)

mig:

# -- Enable multiple mig devices

# If enabled you will have to specify the mig devices

# If enabled is set to false this section is ignored

enabled: false

# -- Specify the mig devices and the corresponding number

devices: {}

# 1g.10gb: 1

# 3g.40gb: 1

models:

# -- List of models to pull at container startup

# The more you add, the longer the container will take to start if models are not present

# pull:

# - llama2

# - mistral

pull: []

# -- List of models to load in memory at container startup

# run:

# - llama2

# - mistral

run: []

# -- List of models to create at container startup, there are two options

# 1. Create a raw model

# 2. Load a model from configMaps, configMaps must be created before and are loaded as volume in "/models" directory.

# create:

# - name: llama3.1-ctx32768

# configMapRef: my-configmap

# configMapKeyRef: configmap-key

# - name: llama3.1-ctx32768

# template: |

# FROM llama3.1

# PARAMETER num_ctx 32768

create: []

100

# -- Automatically remove models present on the disk but not specified in the values file

101

clean: false

102

# -- Add insecure flag for pulling at container startup

103

insecure: false

104

# -- Override ollama-data volume mount path, default: "/root/.ollama"

105

mountPath: ""

106

# Service account

107

# ref: https://kubernetes.io/docs/tasks/configure-pod-container/configure-service-account/

108

serviceAccount:

109

# -- Specifies whether a service account should be created

110

create: true

111

# -- Automatically mount a ServiceAccount's API credentials?

112

automount: true

113

# -- Annotations to add to the service account

114

annotations: {}

115

# -- The name of the service account to use.

116

# If not set and create is true, a name is generated using the fullname template

117

name: ""

118

# -- Map of annotations to add to the pods

119

podAnnotations: {}

120

# -- Map of labels to add to the pods

121

podLabels: {}

122

# -- Pod Security Context

123

podSecurityContext: {}

124

# fsGroup: 2000

125

126

# -- Priority Class Name

127

priorityClassName: ""

128

# -- Container Security Context

129

securityContext: {}

130

# capabilities:

131

# drop:

132

# - ALL

133

# readOnlyRootFilesystem: true

134

# runAsNonRoot: true

135

# runAsUser: 1000

136

137

# -- Specify runtime class

138

runtimeClassName: ""

139

# Configure Service

140

service:

141

# -- Service type

142

type: ClusterIP

143

# -- Service port

144

port: 11434

145

# -- Service node port when service type is 'NodePort'

146

nodePort: 31434

147

# -- Load Balancer IP address

148

loadBalancerIP:

149

# -- Annotations to add to the service

150

annotations: {}

151

# -- Labels to add to the service

152

labels: {}

153

# -- IP Families for the service

154

ipFamilies: []

155

# - IPv4

156

# - IPv6

157

158

# -- IP Family Policy for the service

159

ipFamilyPolicy: ""

160

# SingleStack

161

# PreferDualStack

162

# RequireDualStack

163

# Configure Deployment

164

deployment:

165

# -- Labels to add to the deployment

166

labels: {}

167

# Configure the ingress resource that allows you to access the

168

ingress:

169

# -- Enable ingress controller resource

170

enabled: false

171

# -- IngressClass that will be used to implement the Ingress (Kubernetes 1.18+)

172

className: ""

173

# -- Additional annotations for the Ingress resource.

174

annotations: {}

175

# kubernetes.io/ingress.class: traefik

176

# kubernetes.io/ingress.class: nginx

177

# kubernetes.io/tls-acme: "true"

178

179

# The list of hostnames to be covered with this ingress record.

180

hosts:

181

- host: ollama.local

182

paths:

183

- path: /

184

pathType: Prefix

185

# -- The tls configuration for hostnames to be covered with this ingress record.

186

tls: []

187

# - secretName: chart-example-tls

188

# hosts:

189

# - chart-example.local

190

# Configure resource requests and limits

191

# ref: http://kubernetes.io/docs/user-guide/compute-resources/

192

resources:

193

# -- Pod requests

194

requests: {}

195

# Memory request

196

# memory: 4096Mi

197

198

# CPU request

199

# cpu: 2000m

200

201

# -- Pod limit

202

limits: {}

203

# Memory limit

204

# memory: 8192Mi

205

# CPU limit

206

# cpu: 4000m

207

208

# Configure extra options for liveness probe

209

# ref: https://kubernetes.io/docs/tasks/configure-pod-container/configure-liveness-readiness-probes/#configure-probes

210

livenessProbe:

211

# -- Enable livenessProbe

212

enabled: true

213

# -- Request path for livenessProbe

214

path: /

215

# -- Initial delay seconds for livenessProbe

216

initialDelaySeconds: 60

217

# -- Period seconds for livenessProbe

218

periodSeconds: 10

219

# -- Timeout seconds for livenessProbe

220

timeoutSeconds: 5

221

# -- Failure threshold for livenessProbe

222

failureThreshold: 6

223

# -- Success threshold for livenessProbe

224

successThreshold: 1

225

# Configure extra options for readiness probe

226

# ref: https://kubernetes.io/docs/tasks/configure-pod-container/configure-liveness-readiness-probes/#configure-probes

227

readinessProbe:

228

# -- Enable readinessProbe

229

enabled: true

230

# -- Request path for readinessProbe

231

path: /

232

# -- Initial delay seconds for readinessProbe

233

initialDelaySeconds: 30

234

# -- Period seconds for readinessProbe

235

periodSeconds: 5

236

# -- Timeout seconds for readinessProbe

237

timeoutSeconds: 3

238

# -- Failure threshold for readinessProbe

239

failureThreshold: 6

240

# -- Success threshold for readinessProbe

241

successThreshold: 1

242

# Configure autoscaling

243

autoscaling:

244

# -- Enable autoscaling

245

enabled: false

246

# -- Number of minimum replicas

247

minReplicas: 1

248

# -- Number of maximum replicas

249

maxReplicas: 100

250

# -- CPU usage to target replica

251

targetCPUUtilizationPercentage: 80

252

# -- targetMemoryUtilizationPercentage: 80

253

# -- Additional volumes on the output Deployment definition.

254

volumes: []

255

# -- - name: foo

256

# secret:

257

# secretName: mysecret

258

# optional: false

259

260

# -- Additional volumeMounts on the output Deployment definition.

261

volumeMounts: []

262

# -- - name: foo

263

# mountPath: "/etc/foo"

264

# readOnly: true

265

266

# -- Additional arguments on the output Deployment definition.

267

extraArgs: []

268

# -- Additional environments variables on the output Deployment definition.

269

# For extra OLLAMA env, please refer to https://github.com/ollama/ollama/blob/main/envconfig/config.go

270

extraEnv: []

271

# - name: OLLAMA_DEBUG

272

# value: "1"

273

274

# -- Additionl environment variables from external sources (like ConfigMap)

275

extraEnvFrom: []

276

# - configMapRef:

277

# name: my-env-configmap

278

279

# Enable persistence using Persistent Volume Claims

280

# ref: https://kubernetes.io/docs/concepts/storage/persistent-volumes/

281

persistentVolume:

282

# -- Enable persistence using PVC

283

enabled: false

284

# -- Ollama server data Persistent Volume access modes

285

# Must match those of existing PV or dynamic provisioner

286

# Ref: http://kubernetes.io/docs/user-guide/persistent-volumes/

287

accessModes:

288

- ReadWriteOnce

289

# -- Ollama server data Persistent Volume annotations

290

annotations: {}

291

# -- If you'd like to bring your own PVC for persisting Ollama state, pass the name of the

292

# created + ready PVC here. If set, this Chart will not create the default PVC.

293

# Requires server.persistentVolume.enabled: true

294

existingClaim: ""

295

# -- Ollama server data Persistent Volume size

296

size: 30Gi

297

# -- Ollama server data Persistent Volume Storage Class

298

# If defined, storageClassName: <storageClass>

299

# If set to "-", storageClassName: "", which disables dynamic provisioning

300

# If undefined (the default) or set to null, no storageClassName spec is

301

# set, choosing the default provisioner. (gp2 on AWS, standard on

302

# GKE, AWS & OpenStack)

303

storageClass: ""

304

# -- Ollama server data Persistent Volume Binding Mode

305

# If defined, volumeMode: <volumeMode>

306

# If empty (the default) or set to null, no volumeBindingMode spec is

307

# set, choosing the default mode.

308

volumeMode: ""

309

# -- Subdirectory of Ollama server data Persistent Volume to mount

310

# Useful if the volume's root directory is not empty

311

subPath: ""

312

# -- Pre-existing PV to attach this claim to

313

# Useful if a CSI auto-provisions a PV for you and you want to always

314

# reference the PV moving forward

315

volumeName: ""

316

# -- Node labels for pod assignment.

317

nodeSelector: {}

318

# -- Tolerations for pod assignment

319

tolerations: []

320

# -- Affinity for pod assignment

321

affinity: {}

322

# -- Lifecycle for pod assignment (override ollama.models startup pull/run)

323

lifecycle: {}

324

# How to replace existing pods

325

updateStrategy:

326

# -- Deployment strategy can be "Recreate" or "RollingUpdate". Default is Recreate

327

type: "Recreate"

328

# -- Topology Spread Constraints for pod assignment

329

topologySpreadConstraints: {}

330

# -- Wait for a grace period

331

terminationGracePeriodSeconds: 120

332

# -- Init containers to add to the pod

333

initContainers: []

334

# - name: startup-tool

335

# image: alpine:3

336

# command: [sh, -c]

337

# args:

338

# - echo init

339

340

# -- Use the host’s ipc namespace.

341

hostIPC: false

342

# -- Use the host’s pid namespace

343

hostPID: false

344

# -- Use the host's network namespace.

345

hostNetwork: false

346

# -- Extra K8s manifests to deploy

347

extraObjects: []

348

# - apiVersion: v1

349

# kind: PersistentVolume

350

# metadata:

351

# name: aws-efs

352

# data:

353

# key: "value"

354

# - apiVersion: scheduling.k8s.io/v1

355

# kind: PriorityClass

356

# metadata:

357

# name: high-priority

358

# value: 1000000

359

# globalDefault: false

360

# description: "This priority class should be used for XYZ service pods only."

361

362

# Test connection pods

363

tests:

364

enabled: true

365

# -- Labels to add to the tests

366

labels: {}

367

# -- Annotations to add to the tests

368

annotations: {}

369

podSchedulerName: ""

370

The trusted source for open source

Talk to an expert

Privacy

Terms

© 2026 Chainguard, Inc. All Rights Reserved.
Chainguard® and the Chainguard logo are registered trademarks of Chainguard, Inc. in the United States and/or other countries.
The other respective trademarks mentioned on this page are owned by the respective companies and use of them does not imply any affiliation or endorsement.

ollama

The trusted source for open source

Product

Solutions

Customers

Resources

Company