diff --git a/ascend_deployer/downloader/other_downloader.py b/ascend_deployer/downloader/other_downloader.py index bad6725a3917b53e36d08f2c0a4fc3a3ee260701..14b58cc752650f2370613ab4de85823b7d4bda78 100644 --- a/ascend_deployer/downloader/other_downloader.py +++ b/ascend_deployer/downloader/other_downloader.py @@ -84,7 +84,7 @@ class OtherDownloader: other_pkgs = [pkg for pkg in other_pkgs if "tfplugin" not in pkg.filename] download_dir = os.path.join(self._base_dir, "resources", "{0}_{1}".format(soft_ver.name, soft_ver.version)) self._mk_download_dir(other_pkgs, download_dir, soft_ver) - if soft_ver.name in ("CANN", "NPU", "FaultDiag", "MindIE-image"): + if soft_ver.name in ("CANN", "NPU", "FaultDiag", "MindIE-image", "NPU-Container"): results = self._collect_pkgs_by_arch(arch, download_dir, self._base_dir, other_pkgs) else: results = [] diff --git a/ascend_deployer/downloader/software/NPU-Container_25.0.RC1.json b/ascend_deployer/downloader/software/NPU-Container_25.0.RC1.json new file mode 100644 index 0000000000000000000000000000000000000000..e4156cd33455f806da41975d2227c8409198956c --- /dev/null +++ b/ascend_deployer/downloader/software/NPU-Container_25.0.RC1.json @@ -0,0 +1,37 @@ +{ + "name": "NPU-Container", + "version": "25.0.RC1", + "default": true, + "required_soft": [ + { + "name": "NPU", + "version": "25.0.RC1" + } + ], + "other": [ + { + "filename": "910A2_aarch64.tar.gz", + "url": "https://ascend-repo.obs.cn-east-2.myhuaweicloud.com/images/npu_container/910A2.tar.gz", + "sha256": "07d77fada8971f1a3d7befdebfd93fa05d5c7233f395ad048fbcdc055fbba3d2", + "dest": "resources/npu_container" + }, + { + "filename": "910A2_x86_64.tar.gz", + "url": "https://ascend-repo.obs.cn-east-2.myhuaweicloud.com/images/npu_container/910A2.tar.gz", + "sha256": "07d77fada8971f1a3d7befdebfd93fa05d5c7233f395ad048fbcdc055fbba3d2", + "dest": "resources/npu_container" + }, + { + "filename": "openeuler-with-kmod-22.03-lts-aarch64.tar.gz", + "url": "https://ascend-repo.obs.cn-east-2.myhuaweicloud.com/images/os/openeuler-with-kmod-22.03-lts-aarch64.tar.gz", + "sha256": "935e00ad4c0de79a31c0e0212e94fc6e7a68fb4896b125ec63c027e90689b01b", + "dest": "resources/npu_container" + }, + { + "filename": "openeuler-with-kmod-22.03-lts-x86_64.tar.gz", + "url": "https://ascend-repo.obs.cn-east-2.myhuaweicloud.com/images/os/openeuler-with-kmod-22.03-lts-x86_64.tar.gz", + "sha256": "0055a5e721460bbe8757cdf4ee0fa256eb43daf24e467c52a4e2aa4fe56206b0", + "dest": "resources/npu_container" + } + ] + } \ No newline at end of file diff --git a/ascend_deployer/library/install_npu_by_container.py b/ascend_deployer/library/install_npu_by_container.py new file mode 100644 index 0000000000000000000000000000000000000000..fb47f81f189075c7530e714504db10e2f6d9eb3d --- /dev/null +++ b/ascend_deployer/library/install_npu_by_container.py @@ -0,0 +1,351 @@ +#!/usr/bin/env python3 +# coding: utf-8 +# Copyright 2025 Huawei Technologies Co., Ltd +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +# =========================================================================== + +from datetime import datetime +import glob +import os +from pathlib import Path +import platform +import re +import time + +from ansible.module_utils.basic import AnsibleModule +from ansible.module_utils.check_utils import CheckUtil +from ansible.module_utils import common_info +from ansible.module_utils.common_info import NPUCardName, get_os_and_arch, ARCH + + +class Base: + + step_install = "install" + step_build = "build" + + def __init__(self): + self.module = AnsibleModule( + argument_spec=dict( + resources_dir=dict(type="str", required=True), + step=dict(type="str", required=True), + image_name=dict(type="str", required=False), + ) + ) + self.facts = dict() + self.resources_dir = os.path.expanduser(self.module.params["resources_dir"]) + self.step = self.module.params["step"] + self.image_name = self.module.params.get("image_name") + self.os_and_arch = get_os_and_arch() + self.npu_name = self._get_npu_name() + self.npu_container = os.path.join(self.resources_dir, "npu_container") + self.docker_dir = os.path.join(self.npu_container, self.npu_name) + self.needed_commands = ["docker", "unzip", "tar", "modprobe"] + self._extract_tar() + self._validator() + + @staticmethod + def _get_npu_name(): + npu = CheckUtil.get_card() + if npu.endswith("b"): + return NPUCardName.A910A2 + if npu.endswith("93"): + return NPUCardName.A910A3 + if npu.endswith("910"): + return NPUCardName.A910A1 + return npu + + def _extract_tar(self): + pattern = "{}_ko_files*.tar.gz".format(self.os_and_arch) + matched_files = glob.glob(os.path.join(self.npu_container, pattern)) + if not matched_files: + self.module.fail_json( + changed=False, + rc=1, + msg="[ASCEND][[ERROR]]no ko_files tar.gz pkg found, pattern: {}".format(pattern) + ) + # format: 910A2_aarch64.tar.gz + pkg = [os.path.join(self.npu_container, "{}_{}.tar.gz".format(self.npu_name, ARCH))] + matched_files + for p in pkg: + command = "tar -xf {} -C {}".format(p, self.npu_container) + self.run(command) + + def _validator(self): + if not Path(self.docker_dir).exists(): + self.module.fail_json( + changed=False, + rc=1, + msg="[ASCEND][[ERROR]]check the docker dir failed: {} is not existed.".format(self.docker_dir) + ) + for cmd in self.needed_commands: + if not self.module.get_bin_path(cmd): + self.module.fail_json( + changed=False, + rc=1, + msg="[ASCEND][[ERROR]]check the required command failed: {} is not existed.".format(self.cmd) + ) + + def run(self, cmd): + return_code, out, err = self.module.run_command(cmd, use_unsafe_shell=True) + output = out + err + if return_code != 0: + self.module.fail_json( + changed=False, + rc=1, + msg="[ASCEND][[ERROR]]run command: {} failed, output: {}".format(cmd, output) + ) + return output + + +class ProcessNPUDriver(Base): + """ + A class to handle the NPU driver directory. + """ + + def __init__(self): + super(ProcessNPUDriver, self).__init__() + self.npu_info = common_info.get_npu_info() + self.driver_parent_dir = os.path.join(self.resources_dir, "npu") + self.driver_file_path = None + self.messages = [] + + def _find_files(self, path, pattern): + self.messages.append("try to find {} for {}".format(path, pattern)) + matched_files = glob.glob(os.path.join(path, pattern)) + self.messages.append("find files: " + ",".join(matched_files)) + if len(matched_files) > 0: + return matched_files[0] + return "" + + def _find_npu_files(self): + arch = ARCH + if arch == "x86_64": + arch = "x86?64" # old package mix x86-64 and x86_64 + uniform_npu_scene = npu_scene = self.npu_info.get("scene") + # uniform package has higher priority + uni_package_path = common_info.get_scene_dict(self.resources_dir).get(uniform_npu_scene) + if uni_package_path: + driver_file_path = self._find_files(uni_package_path, r"*npu-driver*linux*%s*.run" % arch) + self.driver_file_path = driver_file_path or self.driver_file_path + return + + def extract_npu_driver(self): + self._find_npu_files() + if not self.driver_file_path: + self.module.fail_json( + changed=False, + rc=1, + msg="[ASCEND][[ERROR]]Do not find npu driver run file in {}.".format(self.resources_dir) + ) + driver_dir = os.path.join(self.driver_parent_dir, "npu_driver") + command = "bash {} --noexec --extract={}".format( + self.driver_file_path, driver_dir) + self.run(command) + return driver_dir + + +class ImageController(Base): + + base_image_name = "openeuler-with-kmod" + base_image_version = "22.03-lts" + timeout = 60 + + def __init__(self): + super(ImageController, self).__init__() + self.image_version = datetime.now().strftime("%Y%m%d") + self.container_version = datetime.now().strftime("%Y%m%d%H%M") + self.ko_folder = "/lib/modules/{}/npu_driver".format(os.uname().release) + self.install_ko_script = "install_ko.sh" + self.ascend_tool_path = "/usr/local/Ascend/driver/tools" + + def _load_base_image(self): + base_image_pkg_name = "{}-{}-{}.tar.gz".format(self.base_image_name, self.base_image_version, ARCH) + command = "docker load -i {}/{}".format(self.npu_container, base_image_pkg_name) + self.run(command) + + def build_image(self): + image_name = "npu_driver_image:v{}".format(self.image_version) + self._load_base_image() + command = "docker build -f {}/Dockerfile -t {} {}".format(self.docker_dir, image_name, self.docker_dir) + self.run(command) + command = "docker save {} -o {}/{}".format(image_name, self.npu_container, image_name) + self.run(command) + return image_name + + def load_image(self): + if not self.image_name: + self.module.fail_json( + changed=False, + rc=1, + msg="[ASCEND][[ERROR]]Input image name is empty." + ) + image_file = os.path.join(self.npu_container, self.image_name) + command = "docker load -i {}".format(image_file) + self.run(command) + + def run_container(self): + container_name = "npu_driver_container_{}".format(self.container_version) + command = "docker run " \ + "--privileged " \ + "-it -d " \ + "-v /lib:/mnt/lib " \ + "-v /usr/local:/usr/local " \ + "-v /root/.bashrc:/host_bashrc " \ + "-v /etc:/etc " \ + "--name {} {}".format(container_name, self.image_name) + self.run(command) + # query log + start = 0 + command = "docker logs {}".format(container_name) + while start <= self.timeout: + out = self.run(command) + if re.search(r"Finished", out): + return container_name + start += 1 + time.sleep(1) + self.module.fail_json( + msg="container started failed, please check the container log: 'docker logs {}'".format(container_name), + rc=1, + changed=True, + ) + + def install_ko(self): + script = os.path.join(self.ko_folder, self.install_ko_script) + self.run("bash {}".format(script)) + # make binary to executable + command = "chmod +x {}/*".format(self.ascend_tool_path) + self.run(command) + + def destroy(self, container_name): + commands = [ + "docker rm -f {}".format(container_name), + "docker image rm -f {}".format(self.image_name), + "docker image rm -f {}:{}".format(self.base_image_name, self.base_image_version), + ] + for command in commands: + self.run(command) + + +class InstallNPUDriverByContainer(Base): + """ + A class to handle the installation of NPU drivers using Docker containers. + """ + + def __init__(self): + super(InstallNPUDriverByContainer, self).__init__() + self.image_controller = ImageController() + self.npu_driver = ProcessNPUDriver() + self.kernel_version = platform.uname().release + self.env_sh = Path("/etc/profile.d/ascend.sh") + self._validator() + + def _find_ko_files(self): + """ + - /resources/npu_container/{os_and_arch}_ko_files*/{}_ko_files + - 4.19.90 + - 5.10.0 + """ + folder_pattern = "{}_ko_files*".format(self.os_and_arch) + matched_folder = glob.glob(os.path.join(self.npu_container, folder_pattern)) + matched_folder = [i for i in matched_folder if os.path.isdir(i)] + if not matched_folder: + self.module.fail_json( + changed=False, + rc=1, + msg="[ASCEND][[ERROR]]folder({}} does not exist.".format(folder_pattern) + ) + unzipped_ko_files_dir = matched_folder[0] + # process kernel + # kernel example: 4.19.90-vhulk2111.1.0.h963.eulerosv2r10.aarch64, 5.10.0-60.18.0.50.oe2203.aarch64 + # process it as: 4.19.90, 5.10.0 + # split string with "-", get the first element + simple_kernel = self.kernel_version.split("-")[0] + all_existed_version = [ + i for i in os.listdir(unzipped_ko_files_dir) if os.path.isdir(os.path.join(unzipped_ko_files_dir, i)) + ] + if not all_existed_version: + self.module.fail_json( + changed=False, + rc=1, + msg="[ASCEND][[ERROR]]no ko files is found in {}.".format(unzipped_ko_files_dir) + ) + + origin_ko_files_dir = os.path.join(unzipped_ko_files_dir, all_existed_version[0]) + if simple_kernel in all_existed_version: + origin_ko_files_dir = os.path.join(unzipped_ko_files_dir, simple_kernel) + + return origin_ko_files_dir + + def _copy_resources(self): + driver_dir = self.npu_driver.extract_npu_driver() + origin_ko_files_dir = self._find_ko_files() + commands = [ + "cp -r {}/driver {} ".format(driver_dir, self.docker_dir), + "mkdir -p {}/ko_files".format(self.docker_dir), + "cp {}/*.ko {}/ko_files".format(origin_ko_files_dir, self.docker_dir) + ] + for cmd in commands: + self.run(cmd) + + def _query_result(self, container_name): + command = "bash {}".format(str(self.env_sh)) + max_retry = 10 + for i in range(max_retry): + if not Path(self.env_sh).exists(): + time.sleep((i + 1) ** 2) + continue + self.run(command) + if not self.module.get_bin_path("npu-smi"): + self.module.fail_json( + changed=False, + rc=1, + msg="[ASCEND][[ERROR]]npu-smi command not found, " \ + "please check the log of the container by 'docker logs {}'".format(container_name) + ) + + def execute(self): + """ + Main method to install the NPU driver by running a Docker container. + """ + if self.step == self.step_build: + self._copy_resources() + image_name = self.image_controller.build_image() + self.facts["image_name"] = image_name + self.module.exit_json( + rc=0, + changed=True, + msg="build {} successfully.".format(image_name), + ansible_facts=self.facts, + ) + elif self.step == self.step_install: + container_name = self.image_controller.run_container() + self.image_controller.install_ko() + self._query_result(container_name) + self.image_controller.destroy(container_name) + self.module.exit_json( + rc=0, + changed=True, + msg="install npu driver by container successfully." + ) + else: + self.module.fail_json( + changed=False, + rc=1, + msg="[ASCEND][[ERROR]]Invalid step: {}".format(self.step) + ) + + +if __name__ == "__main__": + installer = InstallNPUDriverByContainer() + installer.execute() diff --git a/ascend_deployer/module_utils/compatibility_config.py b/ascend_deployer/module_utils/compatibility_config.py index 3e134d7de7fc47924192d73ed8c097ddfa82304e..579f854c878de62a79f3176da16a220a2ebb47a7 100644 --- a/ascend_deployer/module_utils/compatibility_config.py +++ b/ascend_deployer/module_utils/compatibility_config.py @@ -63,6 +63,7 @@ class Tags: DRIVER = "driver" FIRMWARE = "firmware" NPU = "npu" + NPU_BY_CONTAINER = "npu_by_container" MCU = "mcu" TFPLUGIN = "tfplugin" NNAE = "nnae" @@ -105,7 +106,8 @@ class Tags: DRIVER, FIRMWARE, NPU, - MCU + MCU, + NPU_BY_CONTAINER } # The tfplugin component was removed after version 8.0.0. @@ -187,8 +189,9 @@ class HardwareOSTags: A300T_A2_SUPPORT_TAGS = (Tags.BASIC_TAGS | Tags.MINDCLUSTER_TAGS | Tags.AI_FRAMEWORKS_TAGS) # 800i-a2 support mindie_image - ATLAS_800I_A2_SUPPORT_TAGS = (Tags.BASIC_TAGS | Tags.MINDCLUSTER_TAGS | {Tags.MINDIE_IMAGE, Tags.OFFLINE_DEV, - Tags.OFFLINE_RUN}) + ATLAS_800I_A2_SUPPORT_TAGS = ( + Tags.BASIC_TAGS | Tags.MINDCLUSTER_TAGS | + {Tags.MINDIE_IMAGE, Tags.OFFLINE_DEV, Tags.OFFLINE_RUN}) ATLAS_800I_A3_SUPPORT_TAGS = (Tags.BASIC_TAGS | Tags.MINDCLUSTER_TAGS) - {Tags.DOCKER_IMAGES} diff --git a/ascend_deployer/playbooks/install/install_npu_by_container.yml b/ascend_deployer/playbooks/install/install_npu_by_container.yml new file mode 100644 index 0000000000000000000000000000000000000000..4ab90c156fbb29f81c3a26f8509fe5b22914c1ca --- /dev/null +++ b/ascend_deployer/playbooks/install/install_npu_by_container.yml @@ -0,0 +1,26 @@ +- hosts: + - worker[0] + - other_build_image + name: build image for npu driver + tasks: + - name: build npu driver image + install_npu_by_container: + resources_dir: "{{ resource_path }}" + step: 'build' + + - name: fetch noded image + scp: + ip: "{{ inventory_hostname }}" + port: "{{ansible_ssh_port|default('22')}}" + remote_user: "{{ ansible_ssh_user }}" + passwd: "{{ ansible_ssh_pass|default('') }}" + src: "{{ resource_path }}/npu_container/{{ image_name }}" + dest: "{{ resource_path }}/npu_container/" + fetch: 'true' + delegate_to: localhost + + - name: install npu driver by container + install_npu_by_container: + resources_dir: "{{ resource_path }}" + step: 'install' + image_name: "{{ image_name }}" \ No newline at end of file diff --git a/ascend_deployer/playbooks/process/process_install.yml b/ascend_deployer/playbooks/process/process_install.yml index 83470ce7b7a41bfc944a0f8dd0e0d9abdc0a49c9..a1ef09445566c3548a9ed34b5f4e2283e908289e 100644 --- a/ascend_deployer/playbooks/process/process_install.yml +++ b/ascend_deployer/playbooks/process/process_install.yml @@ -18,6 +18,10 @@ import_playbook: ../install/install_npu.yml tags: driver,firmware,npu +- name: install npu driver by container + import_playbook: ../install/install_npu_by_container.yml + tags: npu_by_container + - name: install atlasedge import_playbook: ../install/install_atlasedge.yml tags: atlasedge diff --git a/ascend_deployer/scripts/pkg_utils.py b/ascend_deployer/scripts/pkg_utils.py index d93c1ffe07853fe10e15a59ede5f8ad19b94415e..c5876c8e081931301c11018128cf750ddaa5a115 100644 --- a/ascend_deployer/scripts/pkg_utils.py +++ b/ascend_deployer/scripts/pkg_utils.py @@ -215,6 +215,11 @@ tags_map = { 'name_keywords': ['mindie-image'], 'path_keywords': ['MindIE-image', ], }, + 'npu_by_container': { + 'need_nexus': False, + 'name_keywords': ['*', "npu"], + 'path_keywords': ['npu_container', 'run_from_*_zip'], + }, } pkg_run_paths = ( diff --git a/ascend_deployer/tools/npu_container/910A2/Dockerfile b/ascend_deployer/tools/npu_container/910A2/Dockerfile new file mode 100644 index 0000000000000000000000000000000000000000..32bc3925837d5a702701b730dfa55adaed6f01bb --- /dev/null +++ b/ascend_deployer/tools/npu_container/910A2/Dockerfile @@ -0,0 +1,16 @@ +FROM openeuler-with-kmod:22.03-lts + +WORKDIR /app + +COPY davinci.conf . +COPY dms_events_conf.lst . +COPY ./driver /app/driver +COPY ./ko_files /app/ko_files +COPY install_ko.sh . +COPY installation.py . +COPY entrypoint.sh . + +RUN chmod +x /app/entrypoint.sh +RUN chmod +x /app/install_ko.sh + +CMD ["/app/entrypoint.sh"] \ No newline at end of file diff --git a/ascend_deployer/tools/npu_container/910A2/davinci.conf b/ascend_deployer/tools/npu_container/910A2/davinci.conf new file mode 100644 index 0000000000000000000000000000000000000000..41c033a387a8d34774d5a30d71a88083af180d9d --- /dev/null +++ b/ascend_deployer/tools/npu_container/910A2/davinci.conf @@ -0,0 +1 @@ +DAVINCI_HOME_PATH=/usr/local/Ascend \ No newline at end of file diff --git a/ascend_deployer/tools/npu_container/910A2/dms_events_conf.lst b/ascend_deployer/tools/npu_container/910A2/dms_events_conf.lst new file mode 100644 index 0000000000000000000000000000000000000000..899d83bf3fe6f2b2e353413952215cb51e243a6b --- /dev/null +++ b/ascend_deployer/tools/npu_container/910A2/dms_events_conf.lst @@ -0,0 +1,217 @@ +# event_code serverity + +# H2D heartbeat +0x40F84E00 3 + +# memory +0x80E01805 0 +0x80E18400 1 +0x80E01801 3 +0x80E18401 0 +0x80E18402 3 +0x80F38006 0 +0x80F38008 2 +0x80F38003 0 +0x80F2180D 2 +0x80E18006 0 +0x80E18005 1 +0x80E18008 2 +0x80E1800A 2 +0x80E18009 2 +0x80E01809 0 +0x80E00209 1 +0x80E0020B 3 +0x80E18000 1 +0x80E1800F 0 + +# SOC & L3D L3T CPU core +0x80A18006 0 +0x80A18005 1 +0x80A18008 2 +0x80A38006 0 +0x80A38008 2 +0x80A38003 0 +0x80A58006 0 +0x80A58008 2 +0x80A58003 0 +0x8C2FA009 0 +0x80CD8809 3 +0x80818C00 1 + +# PM/IAM +0x8C084E00 1 +0x8C0E4E00 1 +0x8C0A4E00 1 +0x8C104E00 1 +0x8C0C4E00 1 +0x8C204E00 2 +0x8C124E00 1 +0x8C044E00 1 +0x8C064E00 1 +0x8C03A000 2 +0x8C2FA001 2 +0x8C464E00 1 + +# RTS +0x80C98008 1 +0x80C98002 1 +0x80C98003 1 +0x80C98009 1 +0x80C98007 0 +0x80F78006 0 +0x80C98006 0 +0x80C78008 2 +0x80F78009 0 +0x80F78003 2 +0x80F78008 2 +0x80FA4E00 2 +0x80CD8006 0 +0x80CD8008 2 +0x80CD8003 1 +0x80FB8000 0 +0x812E4E00 2 +0x80CF8000 2 +0x80F78C02 2 +0x80F78C03 2 +0x80F78C04 2 +0x80CB800A 1 +0x80C9800A 1 +0x80818C05 2 +0x80818C06 0 +0x80C98001 2 +0x80CB8001 2 + +#DSA +0x81318006 0 +0x81318008 1 + +#TS Subsys disp +0x81338006 0 +0x81338008 2 +0x81338002 2 +0x81338004 0 + +#DSA Subsys disp +0x81938006 0 +0x81938004 0 +0x81938002 2 +0x81938008 2 + +#AIC Subsys disp +0x813B8006 0 +0x813B8008 2 +0x813B8002 2 +0x813B8004 0 + +# LPM +0x80E24E00 2 +0x80E20207 2 +0x80E3A201 2 +0x80E3A203 2 +0x80E39200 2 +0x80E21007 1 +0x80E38008 2 +0x80E38003 1 +0x80E21E01 2 +0x80E21008 2 + +# PCIe +0x80B98000 2 +0x80B98006 0 +0x80B98008 2 + +# PCIe DISP +0x81978002 2 +0x81978004 0 +0x81978006 0 +0x81978008 2 + +# Network +0x80BD8008 2 +0x80BD8000 2 +0x80BD8003 2 +0x80BD8009 2 +0x80BB8008 2 +0x80BB8009 2 +0x80BB8000 2 +0x80BB8003 2 +0x80BB800A 2 +0x81AB8003 2 +0x81AB8008 2 +0x81AB800C 2 +0x81078605 2 +0x81AD8605 2 +0x81078603 1 +0x81078607 1 +0x8C1F8608 1 +0x4C1F8608 1 + +#NIC Subsys disp +0x81958006 0 +0x81958004 0 +0x81958008 2 +0x81958002 2 + +#aicpu +0x8C1FA006 2 +0x8C17A005 1 +0x8C1DA005 1 +0x8C19A005 1 + +#TEEDrv +0x80E78000 2 +0x80E78008 2 + +#HSM +0x80E58E03 2 +0x80E58E02 2 + +#HCCS +0x819B8003 0 +0x819B8006 0 +0x819B8605 1 +0x819B800D 1 +0x819B800A 2 + +#DVPP AA/SMMU +0x814D8006 0 +0x814D8008 0 +0x814D8004 3 +0x81498004 2 + +#DVPP +0x80D38009 1 +0x80D58000 1 +0x80D58009 1 +0x80D98008 1 +0x80DD8000 1 +0x80DD8003 1 +0x80DD8008 1 +0x80DD8007 0 + +#DVPP Subsys disp +0x81478002 2 +0x81478004 0 +0x81478006 0 +0x81478008 2 + +#PERI subsys disp +0x815F8002 2 +0x815F8004 0 +0x815F8006 0 +0x815F8008 2 + +#SLLC +0x80B78000 2 +0x80B78005 1 +0x80B78006 0 + +#SIOE +0x80B58000 2 + +#HCCS LINK +0x81A3880C 0 + +#TLS +0x80818200 1 +0x80818201 2 \ No newline at end of file diff --git a/ascend_deployer/tools/npu_container/910A2/entrypoint.sh b/ascend_deployer/tools/npu_container/910A2/entrypoint.sh new file mode 100644 index 0000000000000000000000000000000000000000..b13048ec6919af7f0cb33af5615777966e4c5695 --- /dev/null +++ b/ascend_deployer/tools/npu_container/910A2/entrypoint.sh @@ -0,0 +1,4 @@ +#!/bin/bash + +python3 /app/installation.py +tail -f /dev/null \ No newline at end of file diff --git a/ascend_deployer/tools/npu_container/910A2/install_ko.sh b/ascend_deployer/tools/npu_container/910A2/install_ko.sh new file mode 100644 index 0000000000000000000000000000000000000000..fcf8a7db1ad75183bcd45e9f747d2396e9c4581c --- /dev/null +++ b/ascend_deployer/tools/npu_container/910A2/install_ko.sh @@ -0,0 +1,24 @@ +#!/bin/bash + +echo "start depmod..." +depmod +echo "finish depmod." +kernel_version=$(uname -r) +dir="/lib/modules/$kernel_version/npu_driver" +if [ -d "$dir" ]; then + echo "Files in $dir:" + for file in "$dir"/*.ko; do + if [ -f "$file" ]; then + module_name=$(basename "$file" .ko) + echo "Loading module: $module_name" + modprobe "$module_name" + if [ $? -eq 0 ]; then + echo "Module $module_name loaded successfully." + else + echo "Failed to load module $module_name." + fi + fi + done +else + echo "Directory $dir does not exist." +fi \ No newline at end of file diff --git a/ascend_deployer/tools/npu_container/910A2/installation.py b/ascend_deployer/tools/npu_container/910A2/installation.py new file mode 100644 index 0000000000000000000000000000000000000000..101d129e9b0cdb26190ea52319f8c1aaaf41d225 --- /dev/null +++ b/ascend_deployer/tools/npu_container/910A2/installation.py @@ -0,0 +1,207 @@ +#!/usr/bin/env python3 +# coding: utf-8 +# Copyright 2025 Huawei Technologies Co., Ltd +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +# =========================================================================== + +import glob +import os +from pathlib import Path +import re +import shutil +import subprocess +from typing import List +import logging + +logging.basicConfig( + level=logging.INFO, + format="%(asctime)s [%(levelname)s] %(message)s", + datefmt="%Y-%m-%d %H:%M:%S" +) + + +class Commands: + """ + A class to handle shell commands execution. + """ + @staticmethod + def run(command: str) -> None: + """ + Execute a shell command. + :param command: The command to execute. + :return: The output of the command. + """ + result = subprocess.run(command, check=True, shell=True) + if result.returncode != 0: + raise RuntimeError( + f"Command '{command}' failed with return code {result.returncode}. " + f"Output: {result.stdout.strip()}, Error: {result.stderr.strip() if result.stderr else ''}" + ) + + +class Installation: + """ + A class to handle the installation of NPU drivers in a container environment. + """ + + def __init__(self): + self.kernel_version = os.uname().release + self.ko_folder = f"/mnt/lib/modules/{self.kernel_version}/npu_driver" + self.ascend_tool_path = "/usr/local/Ascend/driver/tools" + + def _setup(self): + """ + Setup the installation environment + """ + paths = [ + "/usr/local/Ascend", + "/user/local/Ascend/driver/lib64/common", + self.ko_folder + ] + + for path in paths: + if os.path.exists(path): + logging.info(f"Removing existing directory: {path}") + shutil.rmtree(path) + os.makedirs(path, exist_ok=True) + logging.info(f"Created directory: {path}") + + logging.info("Setup completed successfully.") + + def _copy_resources(self): + """ + Copy necessary resources for installation. + """ + commands = [ + "cp -r /app/driver /usr/local/Ascend/", + "cp -r /app/davinci.conf /mnt/lib/", + "cp -r /app/dms_events_conf.lst /etc/", + "cp /usr/local/Ascend/driver/lib64/*.so /usr/local/Ascend/driver/lib64/common", + f"cp /app/ko_files/*.ko {self.ko_folder}", + ] + for command in commands: + Commands.run(command) + logging.info("Resources copied successfully.") + + @staticmethod + def _update_permissions(): + """ + Update permissions for the copied resources. + """ + commands = [ + "chmod 777 /mnt/lib/davinci.conf", + "chmod 777 /etc/dms_events_conf.lst" + ] + for command in commands: + Commands.run(command) + logging.info("Permission updated successfully.") + + @staticmethod + def _extract_array(content: str, key: str) -> List[str]: + """ + Extract an array of strings from a given content based on a key. + :param content: The content to search in. + :param key: The key to search for. + :return: A list of strings extracted from the content. + """ + pattern = rf"{key}=\((.*?)\)" + match = re.search(pattern, content, re.DOTALL) + if not match: + raise ValueError(f"Key '{key}' not found in content.") + return match.group(1).strip().split() + + def _update_specific_func(self): + target_dir = "/usr/local/Ascend/driver/device" + specific_func_file = "/app/driver/script/specific_func.inc" + + with open(specific_func_file, 'r', encoding="utf-8") as file: + content = file.read() + + src_names = self._extract_array(content, "src_names") + dst_names = self._extract_array(content, "dst_names") + + if len(src_names) != len(dst_names): + raise ValueError( + "The number of source names and destination names do not match.") + for (i, src) in enumerate(src_names): + src_file = f"{target_dir}/{src}" + dst_file = f"{target_dir}/{dst_names[i]}" + + if not glob.glob(src_file): + raise FileNotFoundError( + f"Source file '{src_file}' does not exist.") + + Commands.run(f"mv {src_file} {dst_file}") + logging.info(f"Renaming {src_file} to {dst_file} successfully.") + + @staticmethod + def _install_ko(): + command = "bash /app/install_ko.sh" + Commands.run(command) + logging.info(f"Installed ko files successfully.") + + def _make_file_executable(self): + command = f"chmod +x {self.ascend_tool_path}/*" + Commands.run(command) + logging.info(f"Make all the file in {self.ascend_tool_path} to executable successfully.") + + def _configure_env(self): + """ + Configure the environment for the installation. + """ + env_path = Path("/etc/profile.d/ascend.sh") + env_content = ( + "export LD_LIBRARY_PATH=/usr/local/Ascend/driver/lib64/common:" + "/usr/local/Ascend/driver/lib64/driver:$LD_LIBRARY_PATH\n" + f"export PATH=$PATH:{self.ascend_tool_path}/" + ) + + try: + with env_path.open("w", encoding="utf-8") as env_file: + env_file.write(env_content) + logging.info("Environment configured successfully.") + except Exception as e: + raise RuntimeError(f"Failed to configure environment: {e}") from e + + # add env command to bashrc + # mount: /root/.bashrc:/host_bashrc + bashrc_path = Path("/host_bashrc") + env_command = f"bash {env_path}\n" + try: + if bashrc_path.exists(): + content = bashrc_path.read_text(encoding="utf-8") + if env_command.strip() not in content: + with bashrc_path.open("a", encoding="utf-8") as bashrc_file: + bashrc_file.write(env_command) + except Exception as e: + raise RuntimeError(f"Failed to add env command to bashrc: {e}") from e + + def install(self): + """ + Main installation method that orchestrates the setup, copying of resources, + updating permissions, installing kernel objects, and configuring the environment. + """ + self._setup() + self._copy_resources() + self._update_permissions() + self._update_specific_func() + self._configure_env() + + +if __name__ == "__main__": + installer = Installation() + installer.install() + logging.info("Finished setting pre-env") + diff --git a/ascend_deployer/tools/npu_container/Dockerfile b/ascend_deployer/tools/npu_container/Dockerfile new file mode 100644 index 0000000000000000000000000000000000000000..c9df8dd5a1bb5ce3e99b8e4a5069ea6fcdf17126 --- /dev/null +++ b/ascend_deployer/tools/npu_container/Dockerfile @@ -0,0 +1,5 @@ +FROM openeuler/openeuler:22.03-lts + +RUN yum update && yum install -y kmod + +CMD ["/bin/bash"] \ No newline at end of file diff --git a/ascend_deployer/tools/npu_container/README.md b/ascend_deployer/tools/npu_container/README.md new file mode 100644 index 0000000000000000000000000000000000000000..b6dbf94e6a0e65b5eaa40b078475b015196d991c --- /dev/null +++ b/ascend_deployer/tools/npu_container/README.md @@ -0,0 +1,9 @@ +## How to build openeuler-with-kmod:22.03-lts + +### Command + +`docker build -t openeuler-with-kmod::22.03 .` + +### Dependency + +Please make sure the openeuler/openeuler:22.03-lts is existed. \ No newline at end of file