Skip to content

Commit 7c0f320

Browse files
authored
Merge pull request #4 from fourhu/master
Fixed the issue where the DCU device plugin failed to run on K100
2 parents af86d04 + f7408f3 commit 7c0f320

File tree

3 files changed

+53
-43
lines changed

3 files changed

+53
-43
lines changed

Dockerfile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ RUN cd /device-plugin && go build -o ./k8s-device-plugin cmd/k8s-device-plugin/m
2020
FROM ubuntu:20.04
2121
ENV TZ=Asia/Dubai
2222
RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone
23-
RUN apt-get update && apt-get -y install libhwloc-dev libdrm-dev pciutils
23+
RUN apt-get update && apt-get -y install libhwloc-dev libdrm-dev pciutils libelf-dev kmod
2424
ENV LD_LIBRARY_PATH=/opt/hygondriver/hip/lib:/opt/hygondriver/llvm/lib:/opt/hygondriver/lib:/opt/hygondriver/lib64:/opt/hyhal/lib:/opt/hyhal/lib64:/opt/hygondriver/.hyhal/lib:/opt/hygondriver/.hyhal/lib64:
2525
ENV PATH=/opt/hygondriver/bin:/opt/hygondriver/llvm/bin:/opt/hygondriver/hip/bin:/opt/hygondriver/hip/bin/hipify:/opt/hyhal/bin:/opt/hygondriver/.hyhal/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin
2626
ENV C_INCLUDE_PATH=/opt/hygondriver/include:/opt/hyhal/include:/opt/hygondriver/llvm/include:/opt/hygondriver/.hyhal/include:

internal/pkg/dcu/server.go

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -95,7 +95,7 @@ func (p *Plugin) Start() error {
9595
}
9696
p.count = 0
9797

98-
cmd := exec.Command("hy-smi", "--showmeminfo", "vram")
98+
cmd := exec.Command("/opt/hyhal/bin/hy-smi", "--showmeminfo", "vram")
9999
out, err := cmd.CombinedOutput()
100100
if err != nil {
101101
log.Fatalf("cmd.Run() failed with %s\n", err)
@@ -124,7 +124,7 @@ func (p *Plugin) Start() error {
124124
p.count++
125125
}
126126

127-
cmd = exec.Command("hy-smi", "--showproductname")
127+
cmd = exec.Command("/opt/hyhal/bin/hy-smi", "--showproductname")
128128
out, err = cmd.CombinedOutput()
129129
if err != nil {
130130
log.Fatalf("cmd.Run() failed with %s\n", err)
@@ -148,7 +148,7 @@ func (p *Plugin) Start() error {
148148
index++
149149
}
150150

151-
cmd = exec.Command("hy-smi", "--showbus")
151+
cmd = exec.Command("/opt/hyhal/bin/hy-smi", "--showbus")
152152
out, err = cmd.CombinedOutput()
153153
if err != nil {
154154
log.Fatalf("cmd.Run() failed with %s\n", err)
@@ -167,7 +167,7 @@ func (p *Plugin) Start() error {
167167
}
168168
fmt.Println("collecting pcibus=", p.pcibusid)
169169

170-
cmd = exec.Command("hy-virtual", "--show-device-info")
170+
cmd = exec.Command("/opt/hyhal/bin/hy-virtual", "--show-device-info")
171171
out, err = cmd.CombinedOutput()
172172
if err != nil {
173173
log.Fatalf("cmd.Run() failed with %s\n", err)

k8s-dcu-plugin.yaml

Lines changed: 48 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -21,55 +21,65 @@ spec:
2121
- key: CriticalAddonsOnly
2222
operator: Exists
2323
containers:
24-
- image: projecthami/dcu-vgpu-device-plugin:v1.0.1
24+
- image: projecthami/dcu-vgpu-device-plugin:master
2525
#command: ["/bin/bash","-c","source /opt/hygondriver/env.sh && sleep infinity"]
26-
command: ["/root/k8s-device-plugin"]
26+
command: [ "/root/k8s-device-plugin" ]
2727
name: dcu-dp-cntr
2828
env:
29-
- name: NODE_NAME
30-
valueFrom:
31-
fieldRef:
32-
fieldPath: spec.nodeName
33-
- name: HYGONPATH
34-
value: /opt/dtk
35-
- name: BASH_ENV
36-
value: ~/.bashrc
29+
- name: NODE_NAME
30+
valueFrom:
31+
fieldRef:
32+
fieldPath: spec.nodeName
33+
- name: HYGONPATH
34+
value: /opt/dtk
35+
- name: BASH_ENV
36+
value: ~/.bashrc
3737
securityContext:
3838
privileged: true
3939
allowPrivilegeEscalation: true
4040
capabilities:
41-
drop: ["ALL"]
42-
add: ["SYS_ADMIN"]
41+
drop: [ "ALL" ]
42+
add: [ "SYS_ADMIN" ]
4343
volumeMounts:
44-
- name: dp
45-
mountPath: /var/lib/kubelet/device-plugins
46-
- name: sys
47-
mountPath: /sys
48-
- name: hwpath
49-
mountPath: /usr/share/hwdata
50-
- name: hygonloc
51-
mountPath: /opt/hygondriver/
52-
- name: lib
53-
mountPath: /usr/local/vgpu
54-
- name: hyhal
55-
mountPath: /opt/hyhal
56-
volumes:
5744
- name: dp
58-
hostPath:
59-
path: /var/lib/kubelet/device-plugins
45+
mountPath: /var/lib/kubelet/device-plugins
6046
- name: sys
61-
hostPath:
62-
path: /sys
47+
mountPath: /sys
48+
- name: dev
49+
mountPath: /dev
50+
- name: vdev
51+
mountPath: /etc/vdev
6352
- name: hwpath
64-
hostPath:
65-
path: /usr/share/hwdata
53+
mountPath: /usr/share/hwdata
6654
- name: hygonloc
67-
hostPath:
68-
path: /opt/dtk
55+
mountPath: /opt/hygondriver/
6956
- name: lib
70-
hostPath:
71-
path: /usr/local/vgpu
57+
mountPath: /usr/local/vgpu
7258
- name: hyhal
73-
hostPath:
74-
path: /opt/hyhal
75-
59+
mountPath: /opt/hyhal
60+
volumes:
61+
- name: dp
62+
hostPath:
63+
path: /var/lib/kubelet/device-plugins
64+
- name: sys
65+
hostPath:
66+
path: /sys
67+
- name: dev
68+
hostPath:
69+
path: /dev
70+
- name: vdev
71+
hostPath:
72+
path: /etc/vdev
73+
type: Directory
74+
- name: hwpath
75+
hostPath:
76+
path: /usr/share/hwdata
77+
- name: hygonloc
78+
hostPath:
79+
path: /opt/dtk
80+
- name: lib
81+
hostPath:
82+
path: /usr/local/vgpu
83+
- name: hyhal
84+
hostPath:
85+
path: /opt/hyhal

0 commit comments

Comments
 (0)