From b276a85b5b87a18741715b62ed26180ac531b6a5 Mon Sep 17 00:00:00 2001 From: shenlian Date: Thu, 10 Jul 2025 10:24:39 +0800 Subject: [PATCH 1/2] =?UTF-8?q?=E5=88=A0=E9=99=A4k8s=20tool?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- ascend_deployer/downloader/config.ini | 2 +- .../kubernetes/driver_installer/README.rst | 59 ------------------- .../driver_installer/image/dockerfile | 14 ----- .../install_scripts/driver-install.sh | 30 ---------- .../driver_installer/install_scripts/exec.sh | 45 -------------- .../yaml/310-arm-installer.yaml | 31 ---------- .../yaml/310-x86-installer.yaml | 31 ---------- .../yaml/310P-arm-installer.yaml | 31 ---------- .../yaml/310P-x86-installer.yaml | 31 ---------- .../yaml/910-arm-installer.yaml | 31 ---------- .../yaml/910-x86-installer.yaml | 31 ---------- 11 files changed, 1 insertion(+), 335 deletions(-) delete mode 100644 ascend_deployer/tools/kubernetes/driver_installer/README.rst delete mode 100644 ascend_deployer/tools/kubernetes/driver_installer/image/dockerfile delete mode 100644 ascend_deployer/tools/kubernetes/driver_installer/install_scripts/driver-install.sh delete mode 100644 ascend_deployer/tools/kubernetes/driver_installer/install_scripts/exec.sh delete mode 100644 ascend_deployer/tools/kubernetes/driver_installer/yaml/310-arm-installer.yaml delete mode 100644 ascend_deployer/tools/kubernetes/driver_installer/yaml/310-x86-installer.yaml delete mode 100644 ascend_deployer/tools/kubernetes/driver_installer/yaml/310P-arm-installer.yaml delete mode 100644 ascend_deployer/tools/kubernetes/driver_installer/yaml/310P-x86-installer.yaml delete mode 100644 ascend_deployer/tools/kubernetes/driver_installer/yaml/910-arm-installer.yaml delete mode 100644 ascend_deployer/tools/kubernetes/driver_installer/yaml/910-x86-installer.yaml diff --git a/ascend_deployer/downloader/config.ini b/ascend_deployer/downloader/config.ini index 03f66623..c2d84c94 100644 --- a/ascend_deployer/downloader/config.ini +++ b/ascend_deployer/downloader/config.ini @@ -12,5 +12,5 @@ skip_confirm = 0 [obs_downloader_config] download_url = https://ascend-repo.obs.cn-east-2.myhuaweicloud.com/MindX/OpenSource/ascend-deployer/obs_downloader_config.zip -md5 = 78f9ae3aa87a33cc6e183b6a63742cbc +md5 = 78f9ae3aa87a33cc6e183b6a63742cb diff --git a/ascend_deployer/tools/kubernetes/driver_installer/README.rst b/ascend_deployer/tools/kubernetes/driver_installer/README.rst deleted file mode 100644 index bcaebdda..00000000 --- a/ascend_deployer/tools/kubernetes/driver_installer/README.rst +++ /dev/null @@ -1,59 +0,0 @@ - - -简介 -======= - -本工具提供给客户在已有集群的情况下在新节点加入集群后自动安装驱动的功能,支持在集群条件下华为NPU的驱动与固件的安装。 - -快速指南 -=========== - -本工具主要支持存在已使用ascend-deployer的环境安装的集群的场景,如果遇到问题,请参考\ `ascend-deployer用户指南 `__\。 - -安装内容 -------------- - -工具支持安装的内容为驱动以及固件 - -使用样例 -------------- - -以下以Ubuntu_20.04_aarch64系统,服务器上插Atlas 300I Pro推理卡为例快速展示工具的使用方式。 - -1. 准备好驱动固件的run文件(可通过ascend-deployer下载,也可自行官方下载解压)Ascend-hdk-310P-npu-driver_23.0.rc1_linux-x86-64.run和Ascend-hdk-310P-npu-firmware_6.3.0.1.241.run,。 -和驱动的依赖文件,在ascend-deployer下载中的Ubuntu_20.04_aarch64文件夹下,也可根据操作系统自己准备,这里是net-tools和pciutils的deb文件 - -2. 将上述文件与工具中的driver-install.sh dockerfile文件放置于同一文件夹下进行镜像构建,构建出的镜像名为arm-310p-installer:v1(也可以自己指定), -上传进节点可以拉取到的镜像仓中,可以为本地仓。 - - :: - - docker build -t arm-310p-installer:v1 . - 文件如下 - . - |- dockerfile - |- driver-install.sh - |- Ubuntu_20.04_aarch64 - | |- net-tools_1.60_arm64.deb - | |- pciutils_5.30_arm64.deb - |_ run_from_a310_zip - |- Ascend-hdk-310P-npu-driver_23.0.rc1_linux-x86-64.run - |- Ascend-hdk-310P-firmware_6.3.0.1.241.run - |_ install.sh - - -3. 将工具中的kubernetes的yaml进行apply(主节点),重点关注containers里的镜像是否和上面构建出的镜像正确对应,以及nodeselector字段是否为筛选arm和300p的标签 -部署后查看daemonset判断是否创建成功 - - :: - - kubectl apply -f 310-arm-installer.yaml - -4. 将节点使用kubeadm join命令加入到集群中,在主节点上给节点打上相应标签,然后等待驱动安装完毕 - - :: - - kubectl label node [worker] --overwrite host-arch=huawei-arm - kubectl label node [worker] --overwrite accelerator=huawei-Ascend310P - - 执行以上命令后,然后等待驱动安装完毕 \ No newline at end of file diff --git a/ascend_deployer/tools/kubernetes/driver_installer/image/dockerfile b/ascend_deployer/tools/kubernetes/driver_installer/image/dockerfile deleted file mode 100644 index b9d83093..00000000 --- a/ascend_deployer/tools/kubernetes/driver_installer/image/dockerfile +++ /dev/null @@ -1,14 +0,0 @@ -FROM ubuntu:20.04 - -RUN sed -i "s@http://.*ubuntu.com@http://repo.huaweicloud.com@g" /etc/apt/sources.list && \ - apt update && \ - apt install -y --no-install-recommends iproute2 iputils-ping ssh openssl sshpass && \ - apt clean - -WORKDIR /root - -COPY ./* ./ - -EXPOSE 22 - -ENTRYPOINT ["bash","driver-install.sh"] \ No newline at end of file diff --git a/ascend_deployer/tools/kubernetes/driver_installer/install_scripts/driver-install.sh b/ascend_deployer/tools/kubernetes/driver_installer/install_scripts/driver-install.sh deleted file mode 100644 index 5b2dd98f..00000000 --- a/ascend_deployer/tools/kubernetes/driver_installer/install_scripts/driver-install.sh +++ /dev/null @@ -1,30 +0,0 @@ -#!/bin/bash - -ping -c 1 172.17.0.1 > /dev/null - -if [ $? -eq 0 ];then - hostIp=172.17.0.1 -else - hostIp=$(ip route|awk '/default/ {print $3}') -fi - - -mkdir -p /root/.ssh /mnt/.ssh -ssh-keygen -t rsa -b 4096 -f ~/.ssh/id_rsa -N "" -cat ~/.ssh/id_rsa.pub >> /mnt/.ssh/authorized_keys - -cd /root -cp *-driver*.run /mnt -cp *firmware*.run /mnt -cp install.sh /mnt -cp exec.sh /mnt - -mkdir -p /mnt/pkgs -cp *.deb /mnt/pkgs -cp *.rpm /mnt/pkgs - -ssh -o "StrictHostKeyChecking=no" root@$hostIp groupadd HwHiAiUser -ssh root@$hostIp useradd -g HwHiAiUser -d /home/HwHiAiUser -m HwHiAiUser -s /bin/bash -ssh root@$hostIp bash /root/exec.sh - -tail -f /var/log/ascend_seclog/ascend_install.log diff --git a/ascend_deployer/tools/kubernetes/driver_installer/install_scripts/exec.sh b/ascend_deployer/tools/kubernetes/driver_installer/install_scripts/exec.sh deleted file mode 100644 index 1a3cd8bc..00000000 --- a/ascend_deployer/tools/kubernetes/driver_installer/install_scripts/exec.sh +++ /dev/null @@ -1,45 +0,0 @@ -#!/bin/bash - -if command -v dpkg >/dev/null 2>&1; then - dpkg --force-all -i /root/pkgs/*.deb -elif command -v rpm >/dev/null 2>&1; then - rpm -iUv /root/pkgs/*.rpm --nodeps --force -else - echo "Unknown package manager" -fi - -driver_installed=$(find /usr/local/Ascend/driver -name "version.info"|wc -l) -firmware_installed=$(find /usr/local/Ascend/firmware -name "version.info"|wc -l) -if [ $driver_installed -eq 0 ];then - bash /root/*-driver*.run --nox11 --full --install-for-all --quiet -fi -if [ $firmware_installed -eq 0 ]; then - bash /root/*firmware*.run --nox11 --full --quiet -fi -if [ $driver_installed -eq 1 ]&&[ $firmware_installed -eq 1 ]; then - firmware_filename=$(ls /root/Ascend-hdk-*-firmware*) - basename="${firmware_filename%.*}" - firmware_part=$(echo $basename | cut -d "-" -f 5) - if [[ "${firmware_part:0:8}"=="firmware" ]]; then - version_part=$(echo $firmware_part | cut -d "_" -f 2 ) - else - echo "wrong firmware file format" - exit 1 - fi - present_version=$(awk -F= '($1=="Version"){print $2}' /usr/local/Ascend/firmware/version.info) - if [[ "$present_version" != "$version_part" ]]; then - bash /root/*firmware*.run --nox11 --upgrade --quiet - fi - driver_filename=$(ls /root/Ascend-hdk-*-driver*) - driver_part=$(echo $driver_filename | cut -d "-" -f 5) - if [[ "${driver_part:0:6}"=="driver" ]]; then - version_part=$(echo $driver_part | cut -d "_" -f 2) - else - echo "wrong npu file format" - exit 1 - fi - present_version=$(awk -F= '($1=="Version"){print $2}' /usr/local/Ascend/driver/version.info) - if [[ "$present_version" != "$version_part" ]]; then - bash /root/*-driver*.run --nox11 --upgrade --quiet - fi -fi diff --git a/ascend_deployer/tools/kubernetes/driver_installer/yaml/310-arm-installer.yaml b/ascend_deployer/tools/kubernetes/driver_installer/yaml/310-arm-installer.yaml deleted file mode 100644 index 5362b23a..00000000 --- a/ascend_deployer/tools/kubernetes/driver_installer/yaml/310-arm-installer.yaml +++ /dev/null @@ -1,31 +0,0 @@ -apiVersion: apps/v1 -kind: DaemonSet -metadata: - name: driver-installer-310-arm -spec: - selector: - matchLabels: - app: 310-arm-driver - template: - metadata: - labels: - app: 310-arm-driver - spec: - containers: - - name: arm-310-installer - image: arm-310-installer:v1 - volumeMounts: - - name: run-file - mountPath: /mnt - - name: log-file - mountPath: /var/log - volumes: - - name: run-file - hostPath: - path: /root - - name: log-file - hostPath: - path: /var/log - nodeSelector: - host-arch: huawei-arm - accelerator: huawei-Ascend310 \ No newline at end of file diff --git a/ascend_deployer/tools/kubernetes/driver_installer/yaml/310-x86-installer.yaml b/ascend_deployer/tools/kubernetes/driver_installer/yaml/310-x86-installer.yaml deleted file mode 100644 index fbb41436..00000000 --- a/ascend_deployer/tools/kubernetes/driver_installer/yaml/310-x86-installer.yaml +++ /dev/null @@ -1,31 +0,0 @@ -apiVersion: apps/v1 -kind: DaemonSet -metadata: - name: driver-installer-310-x86 -spec: - selector: - matchLabels: - app: 310-x86-driver - template: - metadata: - labels: - app: 310-x86-driver - spec: - containers: - - name: x86-310-installer - image: x86-310-installer:v1 - volumeMounts: - - name: run-file - mountPath: /mnt - - name: log-file - mountPath: /var/log - volumes: - - name: run-file - hostPath: - path: /root - - name: log-file - hostPath: - path: /var/log - nodeSelector: - host-arch: huawei-x86 - accelerator: huawei-Ascend310 \ No newline at end of file diff --git a/ascend_deployer/tools/kubernetes/driver_installer/yaml/310P-arm-installer.yaml b/ascend_deployer/tools/kubernetes/driver_installer/yaml/310P-arm-installer.yaml deleted file mode 100644 index 5936f03c..00000000 --- a/ascend_deployer/tools/kubernetes/driver_installer/yaml/310P-arm-installer.yaml +++ /dev/null @@ -1,31 +0,0 @@ -apiVersion: apps/v1 -kind: DaemonSet -metadata: - name: driver-installer-310p-arm -spec: - selector: - matchLabels: - app: 310p-arm-driver - template: - metadata: - labels: - app: 310p-arm-driver - spec: - containers: - - name: arm-310p-installer - image: arm-310p-installer:v1 - volumeMounts: - - name: run-file - mountPath: /mnt - - name: log-file - mountPath: /var/log - volumes: - - name: run-file - hostPath: - path: /root - - name: log-file - hostPath: - path: /var/log - nodeSelector: - host-arch: huawei-arm - accelerator: huawei-Ascend310P \ No newline at end of file diff --git a/ascend_deployer/tools/kubernetes/driver_installer/yaml/310P-x86-installer.yaml b/ascend_deployer/tools/kubernetes/driver_installer/yaml/310P-x86-installer.yaml deleted file mode 100644 index bf6b3951..00000000 --- a/ascend_deployer/tools/kubernetes/driver_installer/yaml/310P-x86-installer.yaml +++ /dev/null @@ -1,31 +0,0 @@ -apiVersion: apps/v1 -kind: DaemonSet -metadata: - name: driver-installer-310p-x86 -spec: - selector: - matchLabels: - app: 310p-x86-driver - template: - metadata: - labels: - app: 310p-x86-driver - spec: - containers: - - name: x86-310p-installer - image: x86-310p-installer:v1 - volumeMounts: - - name: run-file - mountPath: /mnt - - name: log-file - mountPath: /var/log - volumes: - - name: run-file - hostPath: - path: /root - - name: log-file - hostPath: - path: /var/log - nodeSelector: - host-arch: huawei-x86 - accelerator: huawei-Ascend310P \ No newline at end of file diff --git a/ascend_deployer/tools/kubernetes/driver_installer/yaml/910-arm-installer.yaml b/ascend_deployer/tools/kubernetes/driver_installer/yaml/910-arm-installer.yaml deleted file mode 100644 index d83b4f85..00000000 --- a/ascend_deployer/tools/kubernetes/driver_installer/yaml/910-arm-installer.yaml +++ /dev/null @@ -1,31 +0,0 @@ -apiVersion: apps/v1 -kind: DaemonSet -metadata: - name: driver-installer-910-arm -spec: - selector: - matchLabels: - app: 910-arm-driver - template: - metadata: - labels: - app: 910-arm-driver - spec: - containers: - - name: arm-910-installer - image: arm-910-installer:v1 - volumeMounts: - - name: run-file - mountPath: /mnt - - name: log-file - mountPath: /var/log - volumes: - - name: run-file - hostPath: - path: /root - - name: log-file - hostPath: - path: /var/log - nodeSelector: - host-arch: huawei-arm - accelerator: huawei-Ascend910 \ No newline at end of file diff --git a/ascend_deployer/tools/kubernetes/driver_installer/yaml/910-x86-installer.yaml b/ascend_deployer/tools/kubernetes/driver_installer/yaml/910-x86-installer.yaml deleted file mode 100644 index 4d3787d6..00000000 --- a/ascend_deployer/tools/kubernetes/driver_installer/yaml/910-x86-installer.yaml +++ /dev/null @@ -1,31 +0,0 @@ -apiVersion: apps/v1 -kind: DaemonSet -metadata: - name: driver-installer-910-x86 -spec: - selector: - matchLabels: - app: 910-x86-driver - template: - metadata: - labels: - app: 910-x86-driver - spec: - containers: - - name: x86-910-installer - image: x86-910-installer:v1 - volumeMounts: - - name: run-file - mountPath: /mnt - - name: log-file - mountPath: /var/log - volumes: - - name: run-file - hostPath: - path: /root - - name: log-file - hostPath: - path: /var/log - nodeSelector: - host-arch: huawei-x86 - accelerator: huawei-Ascend910 \ No newline at end of file -- Gitee From c52926230f8848e548d32c7bcb798fe9ff002770 Mon Sep 17 00:00:00 2001 From: shenlian Date: Thu, 10 Jul 2025 10:25:56 +0800 Subject: [PATCH 2/2] =?UTF-8?q?=E5=88=A0=E9=99=A4k8s=20tool?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- ascend_deployer/downloader/config.ini | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ascend_deployer/downloader/config.ini b/ascend_deployer/downloader/config.ini index c2d84c94..03f66623 100644 --- a/ascend_deployer/downloader/config.ini +++ b/ascend_deployer/downloader/config.ini @@ -12,5 +12,5 @@ skip_confirm = 0 [obs_downloader_config] download_url = https://ascend-repo.obs.cn-east-2.myhuaweicloud.com/MindX/OpenSource/ascend-deployer/obs_downloader_config.zip -md5 = 78f9ae3aa87a33cc6e183b6a63742cb +md5 = 78f9ae3aa87a33cc6e183b6a63742cbc -- Gitee