diff --git a/anolis-courses/Cloud_Kernel_Series/index.yaml b/anolis-courses/Cloud_Kernel_Series/index.yaml new file mode 100644 index 0000000000000000000000000000000000000000..48a767f6ccbccc20f91ce606b43316a036c963f5 --- /dev/null +++ b/anolis-courses/Cloud_Kernel_Series/index.yaml @@ -0,0 +1,14 @@ +name: Cloud Kernel 系列课程 +desc: 本教程分两节介绍如何在 Anolis 上编写并插入一个内核模块以及当内核发生panic时如何分析vmcore +type: course +total_time: "90 min" +level: "beginner" +chapters: + - name: "在 Anolis 上编写并插入一个内核模块" + desc: "学习内核模块的编写方法和插入方法,并在 Anolis 上进行实践" + content: "write_module" + live_time: "30 min" + - name: "在 Anolis 上对内核panic进行debug" + desc: "学习内核panic的debug方法,在 Anolis 上对内核panic产生的vmcore进行分析,找出内核发生panic的原因" + content: "vmcore_debug" + live_time: "60 min" \ No newline at end of file diff --git a/anolis-courses/Cloud_Kernel_Series/kernel_module/assets/insmod.png b/anolis-courses/Cloud_Kernel_Series/kernel_module/assets/insmod.png new file mode 100644 index 0000000000000000000000000000000000000000..9dae29b88e7fd3e10f79662166975929c45ffed5 Binary files /dev/null and b/anolis-courses/Cloud_Kernel_Series/kernel_module/assets/insmod.png differ diff --git a/anolis-courses/Cloud_Kernel_Series/kernel_module/assets/rmmod.png b/anolis-courses/Cloud_Kernel_Series/kernel_module/assets/rmmod.png new file mode 100644 index 0000000000000000000000000000000000000000..ef1ea38f51961df9fe9361f964b879391ce6876c Binary files /dev/null and b/anolis-courses/Cloud_Kernel_Series/kernel_module/assets/rmmod.png differ diff --git a/anolis-courses/Cloud_Kernel_Series/kernel_module/assets/true_makefile.png b/anolis-courses/Cloud_Kernel_Series/kernel_module/assets/true_makefile.png new file mode 100644 index 0000000000000000000000000000000000000000..fa591e665c94a8125fa1ddc2447d935ec4d42a46 Binary files /dev/null and b/anolis-courses/Cloud_Kernel_Series/kernel_module/assets/true_makefile.png differ diff --git a/anolis-courses/Cloud_Kernel_Series/kernel_module/assets/wrong_makefile.png b/anolis-courses/Cloud_Kernel_Series/kernel_module/assets/wrong_makefile.png new file mode 100644 index 0000000000000000000000000000000000000000..23d4e208e18cdca91244f0fe9047d1e916e44af6 Binary files /dev/null and b/anolis-courses/Cloud_Kernel_Series/kernel_module/assets/wrong_makefile.png differ diff --git a/anolis-courses/Cloud_Kernel_Series/kernel_module/finish.md b/anolis-courses/Cloud_Kernel_Series/kernel_module/finish.md new file mode 100644 index 0000000000000000000000000000000000000000..8e96a0ba4f17e265fe58b8b0df2346402cbfae3e --- /dev/null +++ b/anolis-courses/Cloud_Kernel_Series/kernel_module/finish.md @@ -0,0 +1,5 @@ +恭喜你完成了内核模块的编写、编译、插入和卸载 + +如果还有其他问题,欢迎加入**钉钉交流群**咨询(搜索群号:33311793) +或者访问Cloud Kernel Sig 主页 +链接:https://openanolis.cn/sig/Cloud-Kernel diff --git a/anolis-courses/Cloud_Kernel_Series/kernel_module/index.yaml b/anolis-courses/Cloud_Kernel_Series/kernel_module/index.yaml new file mode 100644 index 0000000000000000000000000000000000000000..d327e9fa1ce8d6fff99fd833c1e6fe459f52aacc --- /dev/null +++ b/anolis-courses/Cloud_Kernel_Series/kernel_module/index.yaml @@ -0,0 +1,15 @@ +name: 在 Anolis 上编写并插入一个内核模块 +desc: 本课程将介绍如何在 Anolis 上编写、编译和插入一个内核模块。 +image: "Anolis OS 8.4 ANCK 64位" +live_time: "30 min" +machine: x86_64-2c8g # cpu架构-机器规格,该字段为空,默认 x86_64-2c4g +details: + steps: + start: start.md + finish: finish.md + - name: 第一步:编写一个内核模块 + content: step1.md + - name: 第二步:编译内核模块并插入 + content: step2.md + - name: 第三步:查看内核模块输出与卸载内核模块 + content: step3.md \ No newline at end of file diff --git a/anolis-courses/Cloud_Kernel_Series/kernel_module/start.md b/anolis-courses/Cloud_Kernel_Series/kernel_module/start.md new file mode 100644 index 0000000000000000000000000000000000000000..af567407a3f50f212f357f90bdcd97ac9bbf8e38 --- /dev/null +++ b/anolis-courses/Cloud_Kernel_Series/kernel_module/start.md @@ -0,0 +1,11 @@ +# 本课程将介绍如何编写一个内核模块 + +系统环境:龙蜥操作系统(Anolis OS) + +## 内核模块介绍 +Linux内核模块是一个目标文件,其中包含可以在运行时扩展内核功能的代码,当不再需要内核模块时,可以将其卸载。大多数设备驱动程序以内核模块的形式使用。 + +## 课程结构 +1. 编写模块代码与Makefile +2. 编译和插入模块 +3. 卸载模块 diff --git a/anolis-courses/Cloud_Kernel_Series/kernel_module/step1.md b/anolis-courses/Cloud_Kernel_Series/kernel_module/step1.md new file mode 100644 index 0000000000000000000000000000000000000000..51fa8b90dd032afbad84bd7914b9320354269142 --- /dev/null +++ b/anolis-courses/Cloud_Kernel_Series/kernel_module/step1.md @@ -0,0 +1,56 @@ +本节将介绍如何编写模块以及对应的Makefile文件 + +1. 在/root目录下新建工作文件夹test并进入 +[[ mkdir /root/test/ ]] +[[ cd /root/test/ ]] + +2. 在test文件夹内通过vim新建my_module.c文件 +[[ vim my_module.c ]] + +3. 将以下内容复制到my_module.c并保存退出 + +```s +#include +#include +#include + +MODULE_DESCRIPTION("My first kernel module"); +MODULE_AUTHOR("Me"); +MODULE_LICENSE("GPL"); + +static int dummy_init(void) +{ + pr_info("Hello World\n"); + return 0; +} + +static void dummy_exit(void) +{ + pr_info("Bye\n"); +} + +module_init(dummy_init); +module_exit(dummy_exit); +``` + +4. 编写Makefile文件 +[[ vim Makefile ]] +将以下内容复制到Makefile并保存退出(复制过程可能将tab转换为空格,请保证kbuild和clean的下行make前为tab而不是空格,空格会造成make行被红色长条覆盖) + +```s +obj-m:=my_module.o + +KDIR = /lib/modules/`uname -r`/build + +kbuild: + make -C $(KDIR) M=`pwd` + +clean: + make -C $(KDIR) M=`pwd` clean +``` + +错误的Makefile图示 +![](./assets/wrong_makefile.png) + +正确的Makefile图示 +![](./assets/true_makefile.png) \ No newline at end of file diff --git a/anolis-courses/Cloud_Kernel_Series/kernel_module/step2.md b/anolis-courses/Cloud_Kernel_Series/kernel_module/step2.md new file mode 100644 index 0000000000000000000000000000000000000000..269349a3977e27e70d2534d190bcb98710fb48ab --- /dev/null +++ b/anolis-courses/Cloud_Kernel_Series/kernel_module/step2.md @@ -0,0 +1,19 @@ +现在已经完成了模块和Makefile的编写,接下来将介绍如何编译和插入一个模块 + +1. 编译my_module模块 +[[ make ]] + +2. 将已编译的文件写入盘内,防止内核crash后数据没刷到盘 +[[ sync ]] + +3. 编译完目录下会生成my_module.ko,使用insmod将模块进行插入 +[[ insmod my_module.ko ]] + +4. 通过lsmod查看模块信息 +[[lsmod | grep my_module ]] + +5. 插入后通过dmesg查看模块插入时的输出 +[[ dmesg | tail ]] +此时能看到Hello World的输出 + +![](./assets/insmod.png) \ No newline at end of file diff --git a/anolis-courses/Cloud_Kernel_Series/kernel_module/step3.md b/anolis-courses/Cloud_Kernel_Series/kernel_module/step3.md new file mode 100644 index 0000000000000000000000000000000000000000..06335db009915520087055801f5175dc017217ae --- /dev/null +++ b/anolis-courses/Cloud_Kernel_Series/kernel_module/step3.md @@ -0,0 +1,10 @@ +现在已经完成了模块的编写和插入,最后再介绍如何卸载一个内核模块 + +1. 通过rmmod卸载内核模块 +[[ rmmod my_module ]] + +2. 卸载后通过dmesg查看模块卸载后的输出 +[[ dmesg | tail ]] +此时能看到Bye的输出 + +![](./assets/rmmod.png) \ No newline at end of file diff --git a/anolis-courses/Cloud_Kernel_Series/vmcore_debug/assets/RIP.png b/anolis-courses/Cloud_Kernel_Series/vmcore_debug/assets/RIP.png new file mode 100644 index 0000000000000000000000000000000000000000..f3724c65f7fff36ba3b14f458e0a5b47c09c92ea Binary files /dev/null and b/anolis-courses/Cloud_Kernel_Series/vmcore_debug/assets/RIP.png differ diff --git a/anolis-courses/Cloud_Kernel_Series/vmcore_debug/assets/true_makefile.png b/anolis-courses/Cloud_Kernel_Series/vmcore_debug/assets/true_makefile.png new file mode 100644 index 0000000000000000000000000000000000000000..fa591e665c94a8125fa1ddc2447d935ec4d42a46 Binary files /dev/null and b/anolis-courses/Cloud_Kernel_Series/vmcore_debug/assets/true_makefile.png differ diff --git a/anolis-courses/Cloud_Kernel_Series/vmcore_debug/assets/wrong_makefile.png b/anolis-courses/Cloud_Kernel_Series/vmcore_debug/assets/wrong_makefile.png new file mode 100644 index 0000000000000000000000000000000000000000..23d4e208e18cdca91244f0fe9047d1e916e44af6 Binary files /dev/null and b/anolis-courses/Cloud_Kernel_Series/vmcore_debug/assets/wrong_makefile.png differ diff --git a/anolis-courses/Cloud_Kernel_Series/vmcore_debug/finish.md b/anolis-courses/Cloud_Kernel_Series/vmcore_debug/finish.md new file mode 100644 index 0000000000000000000000000000000000000000..389f43f53bd6bd3cee499aa20ae882e0ded897db --- /dev/null +++ b/anolis-courses/Cloud_Kernel_Series/vmcore_debug/finish.md @@ -0,0 +1,5 @@ +恭喜你完成了内核panic分析 + +如果还有其他问题,欢迎加入**钉钉交流群**咨询(搜索群号:33311793) +或者访问Cloud Kernel Sig 主页 +链接:https://openanolis.cn/sig/Cloud-Kernel \ No newline at end of file diff --git a/anolis-courses/Cloud_Kernel_Series/vmcore_debug/index.yaml b/anolis-courses/Cloud_Kernel_Series/vmcore_debug/index.yaml new file mode 100644 index 0000000000000000000000000000000000000000..dd3256ccd74735245d61d7ce6e4ad8728f5c03de --- /dev/null +++ b/anolis-courses/Cloud_Kernel_Series/vmcore_debug/index.yaml @@ -0,0 +1,15 @@ +name: 在 Anolis 上通过vmcore分析内核panic原因 +desc: 本课程将介绍如何在 Anolis 通过crash工具对vmcore进行分析,从而找出内核panic的原因 +image: "Anolis OS 8.4 ANCK 64位" +live_time: "60 min" +machine: x86_64-2c8g # cpu架构-机器规格,该字段为空,默认 x86_64-2c4g +details: + steps: + start: start.md + finish: finish.md + - name: 第一步:编写一个内核测试模块 + content: step1.md + - name: 第二步:做好分析vmcore之前的准备工作 + content: step2.md + - name: 第三步:使用crash、gdb工具分析vmcore + content: step3.md \ No newline at end of file diff --git a/anolis-courses/Cloud_Kernel_Series/vmcore_debug/start.md b/anolis-courses/Cloud_Kernel_Series/vmcore_debug/start.md new file mode 100644 index 0000000000000000000000000000000000000000..abed805c4cb80cd5625de82459ec2f4546ac5616 --- /dev/null +++ b/anolis-courses/Cloud_Kernel_Series/vmcore_debug/start.md @@ -0,0 +1,21 @@ +# 本课程将介绍如何通过vmcore分析系统panic原因 + +系统环境:龙蜥操作系统 (Anolis OS), kdump, crash, gdb + +## kdump介绍 +Linux内核发生panic时,kdump服务会生成包含系统RAM信息的内核转储文件vmcore,通过分析vmcore可以得到系统panic的原因 + +## crash介绍 +crash工具可用来分析kdump服务生成的vmcore文件 + +## gdb介绍 +gdb是GNU开源组织发布的Linux下的程序调试工具,使用gdb可以做以下四件事 +1. 启动程序,指定可能影响其行为的任何内容。 +2. 使程序在指定条件下停止。 +3. 当程序停止时,检查发生了什么。 +4. 更改程序中的内容,以便可以尝试纠正一个错误的影响并继续了解另一个错误。 + +## 课程结构 +1. 编写一个内核模块 +2. 将内核模块进行插入 +3. 通过crash和objdump工具分析vmcore和对应模块 diff --git a/anolis-courses/Cloud_Kernel_Series/vmcore_debug/step1.md b/anolis-courses/Cloud_Kernel_Series/vmcore_debug/step1.md new file mode 100644 index 0000000000000000000000000000000000000000..2947b862c12a7aa44e3682585c868f07f5af4ae2 --- /dev/null +++ b/anolis-courses/Cloud_Kernel_Series/vmcore_debug/step1.md @@ -0,0 +1,68 @@ +利用上一节课程介绍的模块编写的方法,首先编写一个内核模块并插入 + +1. 在/root目录下新建工作文件夹test并进入 +[[ mkdir /root/test/ ]] +[[ cd /root/test/ ]] + +2. 在test文件夹内使用vim新建test_module.c文件 +[[ vim test_module.c ]] + +3. 将以下内容复制到test_module.c并保存退出 + +```s +#include +#include +#include +#include + +MODULE_DESCRIPTION("test kernel module"); +MODULE_AUTHOR("Me"); +MODULE_LICENSE("GPL"); + +noinline void panic_func(void) +{ + int *addr = 0x0; + *addr = 0xf; +} +int test_module_init(void) +{ + panic_func(); + return 0; +} +void test_module_exit(void) +{ +} +module_init(test_module_init); +module_exit(test_module_exit); +``` + +这个模块在插入后会触发crash,原因是访问了0地址 + + +4. 编写Makefile文件 +[[ vim Makefile ]] +将以下内容复制到Makefile并保存退出(复制过程可能将tab转换为空格,请保证kbuild和clean的下行make前为tab而不是空格,空格会造成make行被红色长条覆盖) + +```s +obj-m:=test_module.o + +KDIR = /lib/modules/`uname -r`/build + +kbuild: + make -C $(KDIR) M=`pwd` + +clean: + make -C $(KDIR) M=`pwd` clean +``` + +错误的Makefile图示 +![](./assets/wrong_makefile.png) + +正确的Makefile图示 +![](./assets/true_makefile.png) + +5. 编译该内核模块并插入,sync用于将已编译的文件写入盘内,防止内核crash后数据没刷到盘 +[[ make && sync ]] +[[ insmod test_module.ko ]] + +6. 输入任意键重新连接 \ No newline at end of file diff --git a/anolis-courses/Cloud_Kernel_Series/vmcore_debug/step2.md b/anolis-courses/Cloud_Kernel_Series/vmcore_debug/step2.md new file mode 100644 index 0000000000000000000000000000000000000000..97b78ec5ad8dae615065e4ec9eca65c1c9ad7db6 --- /dev/null +++ b/anolis-courses/Cloud_Kernel_Series/vmcore_debug/step2.md @@ -0,0 +1,17 @@ +为了分析vmcore,需要做好如下准备 + +1. 验证vmcore是否成功生成 +[[ ls /var/crash/127*/ ]] +如果有vmcore文件代表vmcore已经成功生成 + +2. 使能yum debuginfo仓库 +[[ yum install -y yum-utils ]] +[[ yum-config-manager --enable Plus-debuginfo ]] + + +3. 安装内核版本对应的debuginfo +[[ yum install kernel-debuginfo-$(uname -r) -y ]] +rpm包较大,下载安装过程中请耐心等待 + +4. 安装crash和gdb工具 +[[ yum install crash gdb -y ]] diff --git a/anolis-courses/Cloud_Kernel_Series/vmcore_debug/step3.md b/anolis-courses/Cloud_Kernel_Series/vmcore_debug/step3.md new file mode 100644 index 0000000000000000000000000000000000000000..1afa14ac52a9c2774dccfbc374ad9a292c414645 --- /dev/null +++ b/anolis-courses/Cloud_Kernel_Series/vmcore_debug/step3.md @@ -0,0 +1,35 @@ +接下来开始分析vmcore + +1. 进入vmcore所在目录 +[[ cd /var/crash/127* ]] + +2. 使用crash工具分析vmcore +[[ crash /usr/lib/debug/lib/modules/$(uname -r)/vmlinux vmcore ]] + +3. 进入crash工具,输入log查看发生panic时的log信息 +[[ log ]] +在log信息中使用 [[ /RIP ]] 搜索 RIP 相关信息,如下图所示 +![](./assets/RIP.png) + +```s +[ 1408.535985] RIP: 0010:panic_func+0x5/0x20 [test_module] +``` +代表panic发生在panic_func的0x5地址 + +4. 按q退出log信息,并输入exit退出crash +[[ exit ]] + +5. 使用gdb工具对test_module.ko进行反汇编 +[[ gdb /root/test/test_module.ko ]] + +6. 通过disas命令打印panic_func函数的汇编指令 +[[ disas /s panic_func ]] +观察如下信息: +```s +12 int *addr = 0x0; +13 *addr = 0xf; + 0x0000000000000045 <+5>: movl $0xf,0x0 +``` +可以发现在panic_func的+5地址,将0xf赋给*addr,也就是访问了0地址,造成内核panic + +7. 内核panic原因已查明,按ctrl+d退出gdb \ No newline at end of file