From facea5d2f7d34aa5a421e700612ae8acbe36879e Mon Sep 17 00:00:00 2001 From: Pingsheng Pan Date: Mon, 24 Feb 2025 13:49:37 +0800 Subject: [PATCH] Optimize dcu and add confidential container FAQ --- ...350\257\225\347\233\264\351\200\232DCU.md" | 7 +-- ...72\345\257\206\345\256\271\345\231\250.md" | 44 +++++++++++++++++++ 2 files changed, 48 insertions(+), 3 deletions(-) diff --git "a/sig/Hygon Arch/content/2-CSV\346\265\213\350\257\225\346\226\207\346\241\243/4-KATA-3/4-\346\265\213\350\257\225\347\233\264\351\200\232DCU.md" "b/sig/Hygon Arch/content/2-CSV\346\265\213\350\257\225\346\226\207\346\241\243/4-KATA-3/4-\346\265\213\350\257\225\347\233\264\351\200\232DCU.md" index 003d9193f..b0aa02534 100644 --- "a/sig/Hygon Arch/content/2-CSV\346\265\213\350\257\225\346\226\207\346\241\243/4-KATA-3/4-\346\265\213\350\257\225\347\233\264\351\200\232DCU.md" +++ "b/sig/Hygon Arch/content/2-CSV\346\265\213\350\257\225\346\226\207\346\241\243/4-KATA-3/4-\346\265\213\350\257\225\347\233\264\351\200\232DCU.md" @@ -61,7 +61,7 @@ dmesg | grep -i IOMMU 由于国内网络目前访问不了dockerhub 需要使用代理,代理参数agent.https_proxy=http://ip:port agent.no_proxy=10.*.*.*,172.*.*.* http://ip:port 为代理地址 ``` - sudo sed -i -e 's#^\(kernel_params\).*=.*$#\1 = \"agent.https_proxy=http://ip:port agent.no_proxy=10.*.*.*,172.*.*.* agent.enable_signature_verification=false \"#g' /opt/confidential-containers/share/defaults/kata-containers/configuration-qemu-csv-dcu.toml + sudo sed -i -e 's#^\(kernel_params\).*=.*$#\1 = \"agent.https_proxy=http://ip:port agent.no_proxy=10.*.*.*,172.*.*.* agent.aa_kbc_params=offline_fs_kbc::null agent.enable_signature_verification=false \"#g' /opt/confidential-containers/share/defaults/kata-containers/configuration-qemu-csv-dcu.toml ``` - 设置内存 ``` @@ -74,13 +74,14 @@ dmesg | grep -i IOMMU - 找到DCU设备 ``` - lspci -nn | grep "Display" |grep "Haiguang" + lspci -nn | grep -e "Display" -e "Co-processor" | grep "Haiguang" ``` - 结果如下: + 如果是Z100卡,那么显示的是Display,如果是K100/K100-AI卡,显示的是Co-processor” ``` - $ lspci -nn | grep "Display" |grep "Haiguang" + $ lspci -nn | grep -e "Display" -e "Co-processor" | grep "Haiguang" 03:00.0 Display controller: Chengdu Haiguang IC Design Co., Ltd. Device [1d94:53b7] (rev 01) ``` diff --git "a/sig/Hygon Arch/content/2-CSV\346\265\213\350\257\225\346\226\207\346\241\243/9-FAQ/3-\346\234\272\345\257\206\345\256\271\345\231\250.md" "b/sig/Hygon Arch/content/2-CSV\346\265\213\350\257\225\346\226\207\346\241\243/9-FAQ/3-\346\234\272\345\257\206\345\256\271\345\231\250.md" index 67536f0c8..d9516398e 100644 --- "a/sig/Hygon Arch/content/2-CSV\346\265\213\350\257\225\346\226\207\346\241\243/9-FAQ/3-\346\234\272\345\257\206\345\256\271\345\231\250.md" +++ "b/sig/Hygon Arch/content/2-CSV\346\265\213\350\257\225\346\226\207\346\241\243/9-FAQ/3-\346\234\272\345\257\206\345\256\271\345\231\250.md" @@ -221,6 +221,24 @@ EOF ``` 2)重新打包替换原有的镜像 + 4、containerd 私有仓库账号密码配置 + + 在/etc/containerd/config.toml 配置文件中添加docker.xx.cn:5005 仓库账号密码配置,参考如下: + ``` + [plugins."io.containerd.grpc.v1.cri".registry.configs] + [plugins."io.containerd.grpc.v1.cri".registry.configs."docker.xx.cn:5005".tls] + insecure_skip_verify = true + [plugins."io.containerd.grpc.v1.cri".registry.configs."docker.xx.cn:5005".auth] + username = "user" + password = "password" + + [plugins."io.containerd.grpc.v1.cri".registry.headers] + + [plugins."io.containerd.grpc.v1.cri".registry.mirrors] + [plugins."io.containerd.grpc.v1.cri".registry.mirrors."docker.xx.cn:5005"] + endpoint = ["https://docker.xx.cn:5005/"] + ``` + 4、请参考[2-4-2-测试CSV机密容器](https://openanolis.cn/sig/Hygon-Arch/doc/896792319448421882)获取启动机密容器,私有仓库不需要在**kernel_params 中添加代理操作**,将其中的dockerhub 仓库中对的镜像地址换成自己的私有仓库地址 @@ -243,3 +261,29 @@ EOF runtimeClassName: kata-qemu-csv EOF ``` +#### Q: kubelet inotify_add_watch : no space left on device 问题 + +修复方法 +``` +# sysctl fs.inotify.max_user_watches=1048576 +``` +设置完后重新测试 +#### Q: kata[56795]: time="2025-02-25T20:48:15.555280431+08:00" level=error msg="qemu-system-x86_64: sev_launch_start: LAUNCH_START ret=1 fw_error=24 'Part-specific integrity check failure'" name=containerd-shim-v2 pid=56795 qemuPid=56806 + +修复方法: + +1、将平台复位 +``` +# hag csv platform_shutdown +# hag csv factory_reset +``` +2、重新构建证书链 +``` +# hag general hgsc_import +# hag csv export_cert_chain +# cat pdh.cert pek.cert oca.cert cek.cert hsk.cert hrk.cert > cert_chain.cert +# mv -f cert_chain.cert /opt/csv/ +``` +注:**如果机器不能联网**,需要将hag csv export_cert_chain 步骤使用[2-3-5-测试CSV虚拟机迁移](https://openanolis.cn/sig/Hygon-Arch/doc/944532750000611833) 中 **离线导出证书** 章节步骤进行替换制作证书 + +修复后重新行测试 -- Gitee