spark-operator

Helm chart

Last changed

Request a free trial

Contact our team to test out this Helm chart and related images for free. Please also indicate any other images you would like to evaluate.

Tag:

# Licensed under the Apache License, Version 2.0 (the "License");

# you may not use this file except in compliance with the License.

# You may obtain a copy of the License at

# https://www.apache.org/licenses/LICENSE-2.0

# Unless required by applicable law or agreed to in writing, software

# distributed under the License is distributed on an "AS IS" BASIS,

# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.

# See the License for the specific language governing permissions and

# limitations under the License.

# Default values for spark-operator.

# This is a YAML-formatted file.

# Declare variables to be passed into your templates.

# -- String to partially override release name.

nameOverride: ""

# -- String to fully override release name.

fullnameOverride: ""

# -- Common labels to add to the resources.

commonLabels: {}

# Image used by the Spark operator.

image:

# -- Image registry.

registry: cgr.dev

# -- Image repository.

repository: chainguard-private/spark-operator

# -- Image tag.

# @default -- If not set, the chart appVersion will be used.

tag: latest@sha256:68988784d6770df9e8222c6e700fb94ba25519757a2bc75084272e7cd8fee3a6

# -- Image pull policy.

pullPolicy: IfNotPresent

# -- Image pull secrets for private image registry.

pullSecrets: []

# - name: <secret-name>

# Helm hook configuration.

hook:

# -- Whether to create a Helm pre-install/pre-upgrade hook Job to update CRDs.

upgradeCrd: false

# Image used by the Helm hook Job.

image:

# -- Image registry.

registry: cgr.dev

# -- Image repository.

repository: chainguard-private/kubectl

# -- Image tag.

# @default -- If not set, the chart appVersion will be used.

tag: latest@sha256:2ad180bbbcc8d809f3a9ab75202adeddec89ee5554a46aff8ed5d0429f18a151

# -- Node selector for the Helm hook Job.

nodeSelector: {}

# -- Affinity for the Helm hook Job.

affinity: {}

# -- List of node taints to tolerate for the Helm hook Job.

tolerations: []

controller:

# -- Number of replicas of controller.

replicas: 1

# -- Feature gates to enable or disable specific features.

featureGates:

- name: PartialRestart

enabled: false

- name: LoadSparkDefaults

enabled: false

# -- The number of old history to retain to allow rollback.

revisionHistoryLimit: 10

leaderElection:

# -- Specifies whether to enable leader election for controller.

enable: true

# -- Leader election lease duration.

leaseDuration: 15s

# -- Leader election renew deadline.

renewDeadline: 10s

# -- Leader election retry period.

retryPeriod: 2s

# -- Reconcile concurrency, higher values might increase memory usage.

workers: 10

# -- Configure the verbosity of logging, can be one of `debug`, `info`, `error`.

logLevel: info

# -- Configure the encoder of logging, can be one of `console` or `json`.

logEncoder: console

# -- Grace period after a successful spark-submit when driver pod not found errors will be retried. Useful if the driver pod can take some time to be created.

driverPodCreationGracePeriod: 10s

# -- Specifies the maximum number of Executor pods that can be tracked by the controller per SparkApplication.

maxTrackedExecutorPerApp: 1000

# -- Timestamp precision for ScheduledSparkApplication run names.

# Valid values: nanos (default), micros, millis, seconds, minutes.

# Shorter precisions produce shorter names which helps with Kubernetes name length limits.

# NOTE: Using lower precisions such as "seconds" or "minutes" increases the risk of name

# collisions if multiple runs are created within the same time unit (for example during

# reconciliation loops or manual re-triggers). A collision will cause run creation to fail.

# Choose a precision compatible with your scheduling frequency: "minutes" is only suitable

# for jobs scheduled at most once per minute, "seconds" for jobs scheduled at most once per second.

scheduledSparkApplicationTimestampPrecision: nanos

uiService:

100

# -- Specifies whether to create service for Spark web UI.

101

enable: true

102

uiIngress:

103

# -- Specifies whether to create ingress for Spark web UI.

104

# `controller.uiService.enable` must be `true` to enable ingress.

105

enable: false

106

# -- Ingress URL format.

107

# Required if `controller.uiIngress.enable` is true.

108

urlFormat: ""

109

# -- Optionally set the ingressClassName.

110

ingressClassName: ""

111

# -- Optionally set default TLS configuration for the Spark UI's ingress. `ingressTLS` in the SparkApplication spec overrides this.

112

tls: []

113

# - hosts:

114

# - "*.example.com"

115

# secretName: "example-secret"

116

# -- Optionally set default ingress annotations for the Spark UI's ingress. `ingressAnnotations` in the SparkApplication spec overrides this.

117

annotations: {}

118

# key1: value1

119

# key2: value2

120

batchScheduler:

121

# -- Specifies whether to enable batch scheduler for spark jobs scheduling.

122

# If enabled, users can specify batch scheduler name in spark application.

123

enable: false

124

# -- Specifies a list of kube-scheduler names for scheduling Spark pods.

125

kubeSchedulerNames: []

126

# - default-scheduler

127

# -- Default batch scheduler to be used if not specified by the user.

128

# If specified, this value must be either "volcano" or "yunikorn". Specifying any other

129

# value will cause the controller to error on startup.

130

default: ""

131

serviceAccount:

132

# -- Specifies whether to create a service account for the controller.

133

create: true

134

# -- Optional name for the controller service account.

135

136

# -- Extra annotations for the controller service account.

137

annotations: {}

138

# -- Auto-mount service account token to the controller pods.

139

automountServiceAccountToken: true

140

rbac:

141

# -- Specifies whether to create RBAC resources for the controller.

142

create: true

143

# -- Extra annotations for the controller RBAC resources.

144

annotations: {}

145

# -- Extra labels for controller pods.

146

labels: {}

147

# key1: value1

148

# key2: value2

149

150

# -- Extra annotations for controller pods.

151

annotations: {}

152

# key1: value1

153

# key2: value2

154

155

# -- Volumes for controller pods.

156

volumes:

157

# Create a tmp directory to write Spark artifacts to for deployed Spark apps.

158

- name: tmp

159

emptyDir:

160

sizeLimit: 1Gi

161

# -- Node selector for controller pods.

162

nodeSelector: {}

163

# -- Affinity for controller pods.

164

affinity: {}

165

# -- List of node taints to tolerate for controller pods.

166

tolerations: []

167

# -- Priority class for controller pods.

168

priorityClassName: ""

169

# -- Security context for controller pods.

170

podSecurityContext:

171

fsGroup: 185

172

# -- Topology spread constraints rely on node labels to identify the topology domain(s) that each Node is in.

173

# Ref: [Pod Topology Spread Constraints](https://kubernetes.io/docs/concepts/workloads/pods/pod-topology-spread-constraints/).

174

# The labelSelector field in topology spread constraint will be set to the selector labels for controller pods if not specified.

175

topologySpreadConstraints: []

176

# - maxSkew: 1

177

# topologyKey: topology.kubernetes.io/zone

178

# whenUnsatisfiable: ScheduleAnyway

179

# - maxSkew: 1

180

# topologyKey: kubernetes.io/hostname

181

# whenUnsatisfiable: DoNotSchedule

182

183

# -- Whether to use user namespace or not

184

# Kubernetes version 1.30 for feature beta (1.33 for GA) or higher is required with support from OS and OCI runtime

185

# ref: https://kubernetes.io/docs/concepts/workloads/pods/user-namespaces/

186

hostUsers: null

187

# -- Environment variables for controller containers.

188

env: []

189

# -- Environment variable sources for controller containers.

190

envFrom: []

191

# -- Volume mounts for controller containers.

192

volumeMounts:

193

# Mount a tmp directory to write Spark artifacts to for deployed Spark apps.

194

- name: tmp

195

mountPath: "/tmp"

196

readOnly: false

197

# -- Pod resource requests and limits for controller containers.

198

# Note, that each job submission will spawn a JVM within the controller pods using "/usr/local/openjdk-11/bin/java -Xmx128m".

199

# Kubernetes may kill these Java processes at will to enforce resource limits. When that happens, you will see the following error:

200

# 'failed to run spark-submit for SparkApplication [...]: signal: killed' - when this happens, you may want to increase memory limits.

201

resources: {}

202

# limits:

203

# cpu: 100m

204

# memory: 300Mi

205

# requests:

206

# cpu: 100m

207

# memory: 300Mi

208

209

# -- Security context for controller containers.

210

securityContext:

211

readOnlyRootFilesystem: true

212

privileged: false

213

allowPrivilegeEscalation: false

214

runAsNonRoot: true

215

capabilities:

216

drop:

217

- ALL

218

seccompProfile:

219

type: RuntimeDefault

220

# -- Sidecar containers for controller pods.

221

sidecars: []

222

# Pod disruption budget for controller to avoid service degradation.

223

podDisruptionBudget:

224

# -- Specifies whether to create pod disruption budget for controller.

225

# Ref: [Specifying a Disruption Budget for your Application](https://kubernetes.io/docs/tasks/run-application/configure-pdb/)

226

enable: false

227

# -- The number of pods that must be available.

228

# Require `controller.replicas` to be greater than 1

229

minAvailable: 1

230

pprof:

231

# -- Specifies whether to enable pprof.

232

enable: false

233

# -- Specifies pprof port.

234

port: 6060

235

# -- Specifies pprof service port name.

236

portName: pprof

237

# Workqueue rate limiter configuration forwarded to the controller-runtime Reconciler.

238

workqueueRateLimiter:

239

# -- Specifies the average rate of items process by the workqueue rate limiter.

240

bucketQPS: 50

241

# -- Specifies the maximum number of items that can be in the workqueue at any given time.

242

bucketSize: 500

243

maxDelay:

244

# -- Specifies whether to enable max delay for the workqueue rate limiter.

245

# This is useful to avoid losing events when the workqueue is full.

246

enable: true

247

# -- Specifies the maximum delay duration for the workqueue rate limiter.

248

duration: 6h

249

webhook:

250

# -- Specifies whether to enable webhook.

251

enable: true

252

# -- Number of replicas of webhook server.

253

replicas: 1

254

# -- The number of old history to retain to allow rollback.

255

revisionHistoryLimit: 10

256

leaderElection:

257

# -- Specifies whether to enable leader election for webhook.

258

enable: true

259

# -- Configure the verbosity of logging, can be one of `debug`, `info`, `error`.

260

logLevel: info

261

# -- Configure the encoder of logging, can be one of `console` or `json`.

262

logEncoder: console

263

# -- Specifies webhook port.

264

port: 9443

265

# -- Specifies webhook service port name.

266

portName: webhook

267

# -- Specifies how unrecognized errors are handled.

268

# Available options are `Ignore` or `Fail`.

269

failurePolicy: Fail

270

# -- Specifies the timeout seconds of the webhook, the value must be between 1 and 30.

271

timeoutSeconds: 10

272

resourceQuotaEnforcement:

273

# -- Specifies whether to enable the ResourceQuota enforcement for SparkApplication resources.

274

enable: false

275

serviceAccount:

276

# -- Specifies whether to create a service account for the webhook.

277

create: true

278

# -- Optional name for the webhook service account.

279

280

# -- Extra annotations for the webhook service account.

281

annotations: {}

282

# -- Auto-mount service account token to the webhook pods.

283

automountServiceAccountToken: true

284

rbac:

285

# -- Specifies whether to create RBAC resources for the webhook.

286

create: true

287

# -- Extra annotations for the webhook RBAC resources.

288

annotations: {}

289

# -- Extra labels for webhook pods.

290

labels: {}

291

# key1: value1

292

# key2: value2

293

294

# -- Extra annotations for webhook pods.

295

annotations: {}

296

# key1: value1

297

# key2: value2

298

299

# -- Sidecar containers for webhook pods.

300

sidecars: []

301

# -- Volumes for webhook pods.

302

volumes:

303

# Create a dir for the webhook to generate its certificates in.

304

- name: serving-certs

305

emptyDir:

306

sizeLimit: 500Mi

307

# -- Node selector for webhook pods.

308

nodeSelector: {}

309

# -- Affinity for webhook pods.

310

affinity: {}

311

# -- List of node taints to tolerate for webhook pods.

312

tolerations: []

313

# -- Priority class for webhook pods.

314

priorityClassName: ""

315

# -- Security context for webhook pods.

316

podSecurityContext:

317

fsGroup: 185

318

# -- Topology spread constraints rely on node labels to identify the topology domain(s) that each Node is in.

319

# Ref: [Pod Topology Spread Constraints](https://kubernetes.io/docs/concepts/workloads/pods/pod-topology-spread-constraints/).

320

# The labelSelector field in topology spread constraint will be set to the selector labels for webhook pods if not specified.

321

topologySpreadConstraints: []

322

# - maxSkew: 1

323

# topologyKey: topology.kubernetes.io/zone

324

# whenUnsatisfiable: ScheduleAnyway

325

# - maxSkew: 1

326

# topologyKey: kubernetes.io/hostname

327

# whenUnsatisfiable: DoNotSchedule

328

329

# -- Whether to use user namespace or not

330

# Kubernetes version 1.30 for feature beta (1.33 for GA) or higher is required with support from OS and OCI runtime

331

# ref: https://kubernetes.io/docs/concepts/workloads/pods/user-namespaces/

332

hostUsers: null

333

# -- Environment variables for webhook containers.

334

env: []

335

# -- Environment variable sources for webhook containers.

336

envFrom: []

337

# -- Volume mounts for webhook containers.

338

volumeMounts:

339

# Mount a dir for the webhook to generate its certificates in.

340

- name: serving-certs

341

mountPath: /etc/k8s-webhook-server/serving-certs

342

subPath: serving-certs

343

readOnly: false

344

# -- Pod resource requests and limits for webhook pods.

345

resources: {}

346

# limits:

347

# cpu: 100m

348

# memory: 300Mi

349

# requests:

350

# cpu: 100m

351

# memory: 300Mi

352

353

# -- Security context for webhook containers.

354

securityContext:

355

readOnlyRootFilesystem: true

356

privileged: false

357

allowPrivilegeEscalation: false

358

runAsNonRoot: true

359

capabilities:

360

drop:

361

- ALL

362

seccompProfile:

363

type: RuntimeDefault

364

# Pod disruption budget for webhook to avoid service degradation.

365

podDisruptionBudget:

366

# -- Specifies whether to create pod disruption budget for webhook.

367

# Ref: [Specifying a Disruption Budget for your Application](https://kubernetes.io/docs/tasks/run-application/configure-pdb/)

368

enable: false

369

# -- The number of pods that must be available.

370

# Require `webhook.replicas` to be greater than 1

371

minAvailable: 1

372

spark:

373

# -- List of namespaces where to run spark jobs.

374

# If empty string is included, all namespaces will be allowed.

375

# Namespaces specified here will be watched in addition to those matching jobNamespaceSelector.

376

# Make sure the namespaces have already existed.

377

jobNamespaces:

378

- default

379

# -- Label selector to filter namespaces to watch.

380

# Supports standard Kubernetes label selector syntax (e.g., 'spark-operator=enabled,env in (prod,staging)').

381

# Namespaces matching this selector will be watched in addition to those in jobNamespaces.

382

# When specified, requires ClusterRole permission to list and watch namespaces.

383

# Leave empty to disable namespace selector functionality.

384

jobNamespaceSelector: ""

385

serviceAccount:

386

# -- Specifies whether to create a service account for spark applications.

387

create: true

388

# -- Optional name for the spark service account.

389

390

# -- Optional annotations for the spark service account.

391

annotations: {}

392

# -- Auto-mount service account token to the spark applications pods.

393

automountServiceAccountToken: true

394

rbac:

395

# -- Specifies whether to create RBAC resources for spark applications.

396

create: true

397

# -- Optional annotations for the spark application RBAC resources.

398

annotations: {}

399

prometheus:

400

metrics:

401

# -- Specifies whether to enable prometheus metrics scraping.

402

enable: true

403

# -- Metrics port.

404

port: 8080

405

# -- Metrics port name.

406

portName: metrics

407

# -- Metrics serving endpoint.

408

endpoint: /metrics

409

# -- Metrics prefix, will be added to all exported metrics.

410

prefix: ""

411

# -- Job Start Latency histogram buckets. Specified in seconds.

412

jobStartLatencyBuckets: "30,60,90,120,150,180,210,240,270,300"

413

# -- Labels to be added to the Spark Operator standard metrics, e.g., "label1Key,label2Key".

414

# Defaults to 'app_type' if not set.

415

labels: ""

416

# Prometheus pod monitor for controller pods

417

podMonitor:

418

# -- Specifies whether to create pod monitor.

419

# Note that prometheus metrics should be enabled as well.

420

create: false

421

# -- Pod monitor labels

422

labels: {}

423

# -- The label to use to retrieve the job name from

424

jobLabel: spark-operator-podmonitor

425

# -- Prometheus metrics endpoint properties. `metrics.portName` will be used as a port

426

podMetricsEndpoint:

427

scheme: http

428

interval: 5s

429

certManager:

430

# -- Specifies whether to use [cert-manager](https://cert-manager.io) to generate certificate for webhook.

431

# `webhook.enable` must be set to `true` to enable cert-manager.

432

enable: false

433

# -- The reference to the issuer.

434

# @default -- A self-signed issuer will be created and used if not specified.

435

issuerRef: {}

436

# group: cert-manager.io

437

# kind: ClusterIssuer

438

# name: selfsigned

439

# -- The duration of the certificate validity (e.g. `2160h`).

440

# See [cert-manager.io/v1.Certificate](https://cert-manager.io/docs/reference/api-docs/#cert-manager.io/v1.Certificate).

441

# @default -- `2160h` (90 days) will be used if not specified.

442

duration: ""

443

# -- The duration before the certificate expiration to renew the certificate (e.g. `720h`).

444

# See [cert-manager.io/v1.Certificate](https://cert-manager.io/docs/reference/api-docs/#cert-manager.io/v1.Certificate).

445

# @default -- 1/3 of issued certificate’s lifetime.

446

renewBefore: ""

447

The trusted source for open source

Talk to an expert

Privacy

Terms

© 2026 Chainguard, Inc. All Rights Reserved.
Chainguard® and the Chainguard logo are registered trademarks of Chainguard, Inc. in the United States and/or other countries.
The other respective trademarks mentioned on this page are owned by the respective companies and use of them does not imply any affiliation or endorsement.

spark-operator

The trusted source for open source

Product

Solutions

Customers

Resources

Company