Skip to content

Commit 38db747

Browse files
committed
Attempt nvidia-device-plugin app fixes
1 parent 07ea2d0 commit 38db747

3 files changed

Lines changed: 20 additions & 4 deletions

File tree

helm/nvidia-device-plugin/Chart.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,4 +3,4 @@ name: nvidia-device-plugin
33
description: NVIDIA Device Plugin for Kubernetes
44
type: application
55
version: 0.1.0
6-
appVersion: "v0.17.1"
6+
appVersion: "v0.18.0"

helm/nvidia-device-plugin/templates/daemonset.yaml

Lines changed: 13 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ spec:
2525
- image: "{{ .Values.image.repository }}:{{ .Values.image.tag }}"
2626
imagePullPolicy: {{ .Values.image.pullPolicy }}
2727
name: nvidia-device-plugin-ctr
28-
args: ["--fail-on-init-error=false"]
28+
command: ["nvidia-device-plugin"]
2929
securityContext:
3030
{{- toYaml .Values.securityContext | nindent 10 }}
3131
resources:
@@ -35,8 +35,19 @@ spec:
3535
env:
3636
- name: PASS_DEVICE_SPECS
3737
value: "true"
38+
{{- if typeIs "bool" .Values.failOnInitError }}
39+
- name: FAIL_ON_INIT_ERROR
40+
value: {{ .Values.failOnInitError | quote }}
41+
{{- else }}
3842
- name: FAIL_ON_INIT_ERROR
3943
value: "false"
44+
{{- end }}
45+
{{- if typeIs "string" .Values.deviceDiscoveryStrategy }}
46+
{{- if ne .Values.deviceDiscoveryStrategy "" }}
47+
- name: DEVICE_DISCOVERY_STRATEGY
48+
value: {{ .Values.deviceDiscoveryStrategy }}
49+
{{- end }}
50+
{{- end }}
4051
{{- if .Values.env }}
4152
{{- toYaml .Values.env | nindent 8 }}
4253
{{- else }}
@@ -50,5 +61,4 @@ spec:
5061
value: "compute,utility"
5162
{{- end }}
5263
volumes:
53-
{{- toYaml .Values.volumes | nindent 8 }}
54-
hostNetwork: true
64+
{{- toYaml .Values.volumes | nindent 8 }}

helm/nvidia-device-plugin/values.yaml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,12 @@ image:
33
tag: v0.18.0
44
pullPolicy: IfNotPresent
55

6+
failOnInitError: false
7+
8+
# Device discovery strategy: "auto", "cdi", or "legacy"
9+
# Leave empty/null to use default "auto"
10+
deviceDiscoveryStrategy: ""
11+
612
nodeSelector:
713
kubernetes.io/arch: amd64
814
gpu: "true"

0 commit comments

Comments
 (0)