From 37bb3e17208f588f87fab85da7729c2301b81065 Mon Sep 17 00:00:00 2001 From: fourhu Date: Tue, 17 Dec 2024 17:30:42 +0800 Subject: [PATCH 1/2] fix device plugin get showproductname failed in k100 Signed-off-by: fourhu --- Dockerfile | 2 +- internal/pkg/dcu/server.go | 8 ++-- k8s-dcu-plugin.yaml | 86 +++++++++++++++++++++----------------- 3 files changed, 53 insertions(+), 43 deletions(-) diff --git a/Dockerfile b/Dockerfile index fb822c8..9379785 100644 --- a/Dockerfile +++ b/Dockerfile @@ -20,7 +20,7 @@ RUN cd /device-plugin && go build -o ./k8s-device-plugin cmd/k8s-device-plugin/m FROM ubuntu:20.04 ENV TZ=Asia/Dubai RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone -RUN apt-get update && apt-get -y install libhwloc-dev libdrm-dev pciutils +RUN apt-get update && apt-get -y install libhwloc-dev libdrm-dev pciutils libelf-dev kmod ENV LD_LIBRARY_PATH=/opt/hygondriver/hip/lib:/opt/hygondriver/llvm/lib:/opt/hygondriver/lib:/opt/hygondriver/lib64:/opt/hyhal/lib:/opt/hyhal/lib64:/opt/hygondriver/.hyhal/lib:/opt/hygondriver/.hyhal/lib64: ENV PATH=/opt/hygondriver/bin:/opt/hygondriver/llvm/bin:/opt/hygondriver/hip/bin:/opt/hygondriver/hip/bin/hipify:/opt/hyhal/bin:/opt/hygondriver/.hyhal/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin ENV C_INCLUDE_PATH=/opt/hygondriver/include:/opt/hyhal/include:/opt/hygondriver/llvm/include:/opt/hygondriver/.hyhal/include: diff --git a/internal/pkg/dcu/server.go b/internal/pkg/dcu/server.go index 59e99bc..1497c61 100644 --- a/internal/pkg/dcu/server.go +++ b/internal/pkg/dcu/server.go @@ -95,7 +95,7 @@ func (p *Plugin) Start() error { } p.count = 0 - cmd := exec.Command("hy-smi", "--showmeminfo", "vram") + cmd := exec.Command("/opt/hyhal/bin/hy-smi", "--showmeminfo", "vram") out, err := cmd.CombinedOutput() if err != nil { log.Fatalf("cmd.Run() failed with %s\n", err) @@ -124,7 +124,7 @@ func (p *Plugin) Start() error { p.count++ } - cmd = exec.Command("hy-smi", "--showproductname") + cmd = exec.Command("/opt/hyhal/bin/hy-smi", "--showproductname") out, err = cmd.CombinedOutput() if err != nil { log.Fatalf("cmd.Run() failed with %s\n", err) @@ -148,7 +148,7 @@ func (p *Plugin) Start() error { index++ } - cmd = exec.Command("hy-smi", "--showbus") + cmd = exec.Command("/opt/hyhal/bin/hy-smi", "--showbus") out, err = cmd.CombinedOutput() if err != nil { log.Fatalf("cmd.Run() failed with %s\n", err) @@ -167,7 +167,7 @@ func (p *Plugin) Start() error { } fmt.Println("collecting pcibus=", p.pcibusid) - cmd = exec.Command("hy-virtual", "--show-device-info") + cmd = exec.Command("/opt/hyhal/bin/hy-virtual", "--show-device-info") out, err = cmd.CombinedOutput() if err != nil { log.Fatalf("cmd.Run() failed with %s\n", err) diff --git a/k8s-dcu-plugin.yaml b/k8s-dcu-plugin.yaml index 545c152..495e5d7 100644 --- a/k8s-dcu-plugin.yaml +++ b/k8s-dcu-plugin.yaml @@ -21,55 +21,65 @@ spec: - key: CriticalAddonsOnly operator: Exists containers: - - image: projecthami/dcu-vgpu-device-plugin:v1.0.1 + - image: swr.cn-central-221.ovaijisuan.com/hami/dcu-vgpu-device-plugin:master #command: ["/bin/bash","-c","source /opt/hygondriver/env.sh && sleep infinity"] - command: ["/root/k8s-device-plugin"] + command: [ "/root/k8s-device-plugin" ] name: dcu-dp-cntr env: - - name: NODE_NAME - valueFrom: - fieldRef: - fieldPath: spec.nodeName - - name: HYGONPATH - value: /opt/dtk - - name: BASH_ENV - value: ~/.bashrc + - name: NODE_NAME + valueFrom: + fieldRef: + fieldPath: spec.nodeName + - name: HYGONPATH + value: /opt/dtk + - name: BASH_ENV + value: ~/.bashrc securityContext: privileged: true allowPrivilegeEscalation: true capabilities: - drop: ["ALL"] - add: ["SYS_ADMIN"] + drop: [ "ALL" ] + add: [ "SYS_ADMIN" ] volumeMounts: - - name: dp - mountPath: /var/lib/kubelet/device-plugins - - name: sys - mountPath: /sys - - name: hwpath - mountPath: /usr/share/hwdata - - name: hygonloc - mountPath: /opt/hygondriver/ - - name: lib - mountPath: /usr/local/vgpu - - name: hyhal - mountPath: /opt/hyhal - volumes: - name: dp - hostPath: - path: /var/lib/kubelet/device-plugins + mountPath: /var/lib/kubelet/device-plugins - name: sys - hostPath: - path: /sys + mountPath: /sys + - name: dev + mountPath: /dev + - name: vdev + mountPath: /etc/vdev - name: hwpath - hostPath: - path: /usr/share/hwdata + mountPath: /usr/share/hwdata - name: hygonloc - hostPath: - path: /opt/dtk + mountPath: /opt/hygondriver/ - name: lib - hostPath: - path: /usr/local/vgpu + mountPath: /usr/local/vgpu - name: hyhal - hostPath: - path: /opt/hyhal - + mountPath: /opt/hyhal + volumes: + - name: dp + hostPath: + path: /var/lib/kubelet/device-plugins + - name: sys + hostPath: + path: /sys + - name: dev + hostPath: + path: /dev + - name: vdev + hostPath: + path: /etc/vdev + type: Directory + - name: hwpath + hostPath: + path: /usr/share/hwdata + - name: hygonloc + hostPath: + path: /opt/dtk + - name: lib + hostPath: + path: /usr/local/vgpu + - name: hyhal + hostPath: + path: /opt/hyhal From f7408f310a199da1ccde0b05f8edc7e9e1acdaa6 Mon Sep 17 00:00:00 2001 From: fourhu Date: Tue, 17 Dec 2024 17:49:06 +0800 Subject: [PATCH 2/2] rename image Signed-off-by: fourhu --- k8s-dcu-plugin.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/k8s-dcu-plugin.yaml b/k8s-dcu-plugin.yaml index 495e5d7..5e96901 100644 --- a/k8s-dcu-plugin.yaml +++ b/k8s-dcu-plugin.yaml @@ -21,7 +21,7 @@ spec: - key: CriticalAddonsOnly operator: Exists containers: - - image: swr.cn-central-221.ovaijisuan.com/hami/dcu-vgpu-device-plugin:master + - image: projecthami/dcu-vgpu-device-plugin:master #command: ["/bin/bash","-c","source /opt/hygondriver/env.sh && sleep infinity"] command: [ "/root/k8s-device-plugin" ] name: dcu-dp-cntr