From 1afde0a1ef674f2bec844a66ca2051d38a66dc70 Mon Sep 17 00:00:00 2001
From: Jingqing3948 <2351290287@qq.com>
Date: Sat, 29 Jul 2023 22:35:12 +0800
Subject: [PATCH] add 20230729-riscv-klibc-str-function-analysis-1

---
 ...729-riscv-klibc-str-function-analysis-1.md | 685 ++++++++++++++++++
 1 file changed, 685 insertions(+)
 create mode 100644 articles/20230729-riscv-klibc-str-function-analysis-1.md
diff --git a/articles/20230729-riscv-klibc-str-function-analysis-1.md b/articles/20230729-riscv-klibc-str-function-analysis-1.md
new file mode 100644
index 0000000..82db121
--- /dev/null
+++ b/articles/20230729-riscv-klibc-str-function-analysis-1.md
@@ -0,0 +1,685 @@
+> Corrector: [TinyCorrect](https://gitee.com/tinylab/tinycorrect) v0.2-rc1 - [toc]<br/>
+> Author:    Jingqing3948 <2351290287@qq.com><br/>
+> Date:      2023/07/29<br/>
+> Revisor:   Falcon <falcon@tinylab.org><br/>
+> Project:   [RISC-V Linux 内核剖析](https://gitee.com/tinylab/riscv-linux)<br/>
+> Sponsor:   PLCT Lab, ISCAS
+
+# kernel libc 库分析之 str v0
+
+## 前言
+
+本文主要是展开分析 linux-lab/src/linux-stable/lib 里的 str 类函数的一部分，以及对其的一些展开测试。
+
+因为篇幅问题我截取了一部分文件在本文中展开分析，主要包括 string.c, test_string.c, string_helpers.c, test-string_helpers.c, kstrtox.c, test_kstrtox.c.
+
+## 列表
+
+```shell
+$ ls | grep str
+build_OID_registry
+fdt_strerror.c
+kstrtox.c
+kstrtox.h
+oid_registry.c
+string.c
+string_helpers.c
+strncpy_from_user.c
+strnlen_user.c
+test-kstrtox.c
+test_string.c
+test-string_helpers.c
+test_strscpy.c
+ucs2_string.c
+```
+
+## string.c
+
+### strcmp, memcmp
+
+字符串的比较。
+
+`int strncasecmp(const char *s1, const char *s2, size_t len)`：比较两个字符串，忽视大小写区别。len 是最长判断范围。返回值为 0 则表示一致。
+
+采用 unsigned char 存储每一位要比较的字符，更能确保比较时字符串行为一致且正确。
+
+`int strcasecmp(const char *s1, const char *s2)`：重载的函数，没有给定判断长度限制，因此不用判断：当字符串结束的时候，长度是否小于 len.
+
+`int strcmp(const char *cs, const char *ct)`：逐位比较两个字符串是否每一位完全一致。如果第一个不一致位 cs>ct 返回 1，否则返回 -1；一致返回 0.
+
+`int strncmp(const char *cs, const char *ct, size_t count)`：只比较指定长度。
+
+`__visible int memcmp(const void *cs, const void *ct, size_t count)`
+
+这个是有做优化的，优化部分代码如下：
+
+```c
+// src/linux-stable/lib/string.c:765
+
+#ifdef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
+	if (count >= sizeof(unsigned long)) {
+		const unsigned long *u1 = cs;
+		const unsigned long *u2 = ct;
+		do {
+			if (get_unaligned(u1) != get_unaligned(u2))
+				break;
+			u1++;
+			u2++;
+			count -= sizeof(unsigned long);
+		} while (count >= sizeof(unsigned long));
+		cs = u1;
+		ct = u2;
+	}
+#endif
+```
+
+如果定义了 CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS，就逐字比较。否则（或者需要比较的长度大小<1 字）就逐位比较。
+
+`int bcmp(const void *a, const void *b, size_t len)`：就是调用了 memcmp，返回 0 表示一致，非 0 不一致。可以在 bcmp 中加一些操作比如对返回值的判断。
+
+### strcpy, memcpy
+
+复制 src 字符串到 dest 位置。
+
+`char *strcpy(char *dest, const char *src)`：就是一位一位赋值直到到达字符串结尾。后面会介绍优化版的 strcpy，可以按块复制。
+
+`char *strncpy(char *dest, const char *src, size_t count)`：复制指定长度。给 while 多加了一个判定结束的条件：count==0.
+
+`size_t strlcpy(char *dest, const char *src, size_t size)`：把字符串指定长度复制给**缓冲区**，先判断 size 和 strlen 谁小，用小的作为复制长度调用 memcpy.
+
+和 strncpy 的区别在于：strncpy 是我指定复制 count 位过去，不管缓冲区是否溢出；strlcpy 是传入了 dest 的 size，来判断复制多少位可以不溢出，更安全。
+
+`ssize_t strscpy(char *dest, const char *src, size_t count)`：这个就是优化版的 strcpy 了。返回值标识是否复制成功，成功返回 0.
+
+1. count 不合法，即为 0 或超出 INT_MAX：返回错误码 -E2BIG。
+2. 如果启用了 `CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS`，而且 src 和 dest 都对齐了，则设置 max 限定复制的最大字节数以免越过页边界。每次复制的块大小是 `unsigned long` 32bits，如果 max >= 32bits 就复制一块过去，直到 max < 32bits，这时候开始逐位复制。
+
+```c
+// src/linux-stable/lib/string.c:200
+
+while (max >= sizeof(unsigned long)) {
+    unsigned long c, data;
+
+    c = read_word_at_a_time(src+res);
+    if (has_zero(c, &data, &constants)) {
+        data = prep_zero_mask(c, data, &constants);
+        data = create_zero_mask(data);
+        *(unsigned long *)(dest+res) = c & zero_bytemask(data);
+        return res + find_zero(data);
+    }
+    *(unsigned long *)(dest+res) = c;
+    res += sizeof(unsigned long);
+    count -= sizeof(unsigned long);
+    max -= sizeof(unsigned long);
+}
+```
+
+3. 读一个字长度出来，如果里面有 0（说明这个字中间终止了），则只复制到 0 位。截取 0 位以前的部分再补一个 0 位，复制给 dest。否则复制一整个 `unsigned long` 过去，然后开启下一轮复制循环。
+
+4. 跳出 3 的循环后，继续逐位复制到结尾。
+
+`char *stpcpy(char *__restrict__ dest, const char *__restrict__ src)`：复制完了，让 dest 指向自己这个字符串的结尾 \\0.
+
+`void *memcpy(void *dest, const void *src, size_t count)`：逐位复制。
+
+### strcat
+
+`char *strcat(char *dest, const char *src)`：src 拼接到 dest 结尾处，返回 dest 首指针。
+
+`char *strncat(char *dest, const char *src, size_t count)`：两个判定条件，1 是 src 终止，2 是复制长度超过 cnt。
+
+`size_t strlcat(char *dest, const char *src, size_t count)`：和 strlcpy strncpy 区别类似。
+
+### strchr, memchr
+
+`char *strchr(const char *s, int c)`：逐位搜索 s 字符串里面有没有 c 字符。返回对应位置的指针，没有返回空指针 NULL。也可以搜索空字符。
+
+`char *strchrnul(const char *s, int c)`：没找到也不返回 NULL，而是返回指向字符串结尾的 \0 的指针。我觉得这样我们可以借助 strchr 来找字符串的结尾位置。
+
+`char *strnchrnul(const char *s, size_t count, int c)`：限定一个长度范围内搜索 c。超出范围返回第一个超出的字符的指针。
+
+`char *strrchr(const char *s, int c)`：找到最后一个匹配 c 的指针。
+
+`char *strnchr(const char *s, size_t count, int c)`：限定在前 cnt 范围里面找。
+
+`void *memchr(const void *s, int c, size_t n)`：类似 strchr。
+
+`void *memchr_inv(const void *start, int c, size_t bytes)`：找第一个不匹配的字符位置。这里也有做一些优化。
+
+- 首先写了一个 `static void *check_bytes8(const u8 *start, u8 value, unsigned int bytes)`，这个就是在 bytes 范围内遍历查找，低效，用于处理尾部数据。
+- 如果要寻找长度<16B，就直接调用 check_bytes8 处理即可。
+
+```c
+// src/linux-stable/lib/string.c:934
+
+	value64 = value;
+#if defined(CONFIG_ARCH_HAS_FAST_MULTIPLIER) && BITS_PER_LONG == 64
+	value64 *= 0x0101010101010101ULL;
+#elif defined(CONFIG_ARCH_HAS_FAST_MULTIPLIER)
+	value64 *= 0x01010101;
+	value64 |= value64 << 32;
+#else
+	value64 |= value64 << 8;
+	value64 |= value64 << 16;
+	value64 |= value64 << 32;
+#endif
+```
+
+- 比如 value 是 0xEB，扩展为 64 位后就变为 0xEBEBEBEB. 这样可以同时进行比较提高效率。
+
+```c
+// src/linux-stable/lib/string.c:946
+
+prefix = (unsigned long)start % 8;
+if (prefix) {
+    u8 *r;
+
+    prefix = 8 - prefix;
+    r = check_bytes8(start, value, prefix);
+    if (r)
+        return r;
+    start += prefix;
+    bytes -= prefix;
+}
+```
+
+- 这里检查一下前 8 位前缀有无对齐，没有对齐就先比较一下这 8 位再将其对齐。
+
+> chatGPT: 让我们假设 start 是一个指向内存区域的指针，并且当前的 u8 类型是一个无符号 8 位整数。我们来举一个具体数字的例子来说明 prefix 是如何影响对齐的。
+> 假设 start 指针指向如下的内存区域，每个字节用十六进制表示：
+
+```
+  00   01   02   03   04   05   06   07   08   09   0A   0B   0C   0D
++----+----+----+----+----+----+----+----+----+----+----+----+----+----+
+| AB | CD | EF | 12 | 34 | 56 | 78 | 9A | BC | DE | F0 | 11 | 11 | 33 |
++----+----+----+----+----+----+----+----+----+----+----+----+----+----+
+                           ^
+                           |
+                         start
+```
+
+> 在这个例子中，start 指针指向内存的位置 start[0]，即 AB（十六进制）。我们假设每个字节都是不同的，因此在这个区域内没有重复的字节值。
+> 现在，我们来计算 prefix，即指针 start 相对于 8 字节对齐的偏移量：
+
+```c
+prefix = (unsigned long)start % 8
+       = (unsigned long)0x00 % 8
+       = 0
+```
+
+> 由于 start 指针的值是 0x00，它已经是 8 字节对齐的，prefix 的值是 0。这意味着在此例中，start 指针已经对齐了，没有前缀需要处理。
+> 但是，如果 start 指针的值不是 8 的倍数，那么 prefix 就会有一个非零值，表示指针在 8 字节对齐时，需要向前调整的字节数。通过调用 check_bytes8 函数来查找不等于给定值的字节时，我们会处理这个前缀部分。
+
+- start 想要比较完并跳转到下一个 8 字节对齐位置，需要往前比较并跳转 8-prefix 个长度。比较完后，start 指针 + 这个长度，剩余要比较的长度 bytes - 这个长度。处理完之后我们现在就对齐好了，可以开始更高效的查询方式了。
+
+```c
+// src/linux-stable/lib/string.c:958
+
+words = bytes / 8;
+
+	while (words) {
+		if (*(u64 *)start != value64)
+			return check_bytes8(start, value, 8);
+		start += 8;
+		words--;
+	}
+```
+
+- 每次比较 8 个字节 64 位，提高效率。words 部分比较完了，就剩下结尾不够 8 字节长度的待比较部分了。
+
+- 结尾部分再调用 `check_bytes8(start, value, bytes%8)` 即可完成比较。
+
+### strlen
+
+`size_t strlen(const char *s)`：逐位++到结尾为止。
+
+`size_t strnlen(const char *s, size_t count)`：限定一定长度范围内获取 len。
+
+### strspn
+
+`size_t strspn(const char *s, const char *accept)`：找找 s 字符串里面有多少个开头连续的字符是 accept 字符串里出现过的，比如 accept 是 abcd, s 是 aabcdefgaa, 那么就出现过 5 次。
+
+函数写的很简单，两层循环遍历扫描，对于每一个 s 中的字符，遍历 accept 看是否是 accept 中的一个字符，如果是 cnt++，如果不是直接返回 cnt，最终返回 cnt。但是这里是不是可以用一个缓冲数组存 *accept 对应字符，这样也不用每次到 accept 中访问指定地址中的内容？
+
+`size_t strcspn(const char *s, const char *reject)`：和 accept 相反，看开头多少个字符是未出现在 reject 中的并计数。
+
+### strpbrk
+
+`char *strpbrk(const char *cs, const char *ct)`：查找 cs 中第一次出现 ct 中字符的位置。
+
+### strsep
+
+`char *strsep(char **s, const char *ct)`：
+
+1. 借助 strpbrk 找到第一次出现 ct 中字符的 cs 的位置；
+2. 把这个字符替换成空字符；
+3. 指针指向该字符位置后面的一个字符串，并返回指针。
+
+### memset
+
+`void *memset(void *s, int c, size_t count)`：从 s 开始，赋值 count 个字符 c。返回 s 头指针也就是最开始传入的时候所指向的地址。
+
+`void *memset16(uint16_t *s, uint16_t v, size_t count)`：半字长为单位进行赋值。
+
+`void *memset32(uint32_t *s, uint32_t v, size_t count)`：1 字长为单位进行赋值。
+
+`void *memset64(uint64_t *s, uint64_t v, size_t count)`：2 字长为单位进行赋值。
+
+### memmove
+
+`void *memmove(void *dest, const void *src, size_t count)`：如果 dest 在 src 左边，则采用 memcpy 的逐位复制方法。否则采用从尾部倒过来复制的方法。这里在分析优化的时候也分析过，为了防止 dest 在 src 右边出现重叠部分从左到右复制会影响到后面的复制流程。
+
+### memscan
+
+`void *memscan(void *addr, int c, size_t size)`：找到 c 第一次出现的地址，或者超出 size 的第一个字节的地址。
+
+### strstr
+
+`char *strstr(const char *s1, const char *s2)`：找到 s1 中第一次出现 s2 子串的位置。遍历 s1 进行 memcmp(s1, s2, length_of_s2) 查找。
+
+`char *strnstr(const char *s1, const char *s2, size_t len)`：限定搜索一定长度范围。
+
+### 编译包含
+
+```makefile
+// src/linux-stable/lib/Makefile:30
+
+lib-y := ctype.o string.o vsprintf.o cmdline.o \
+	 rbtree.o radix-tree.o timerqueue.o xarray.o \
+	 idr.o extable.o sha1.o irq_regs.o argv_split.o \
+	 flex_proportions.o ratelimit.o show_mem.o \
+	 is_single_threaded.o plist.o decompress.o kobject_uevent.o \
+	 earlycpio.o seq_buf.o siphash.o dec_and_lock.o \
+	 nmi_backtrace.o nodemask.o win_minmax.o memcat_p.o \
+	 buildid.o
+```
+
+这部分函数应该是无论如何都会包含在其中，编译时生成 lib.a 文件（我觉得应该是在对应开发板 lib 文件夹内生成的）。
+
+## test_string.c
+
+对 string 的展开测试。
+
+### memset_selftest
+
+__init 表示这个函数只会在初始化时调用一次，我们在 make boot 时调用一次自检正合适。
+
+`static __init int memset16_selftest(void)`：测试思路如下。
+
+1. 首先开辟 256 * 2 * 2 个字节空间。开辟失败返回 -1。
+
+2. 然后这片空间每个半字全部赋值为 0xa1，最终值为：a1a1a1a1a1……
+
+```
++------+----------+----------------+
+| a1a1 | b2b2b2b2 | a1a1a1a1a1a1a1 |
++------+----------+----------------+
+^      ^          ^                ^
+|      |          |                |
+p     p+i       p+i+j            p+512
+```
+
+3. 对于这开辟的一整片空间，我们每次 i 从 0 到 256 遍历，j 从 0 到 256 遍历，我们让 [p+i,p+i+j] 中间的部分赋值为 b2b2b2b2……，[p,p+i] 部分和 [p+i+j，256] 的部分赋值仍为 a1a1a1a1……。然后循环检查赋值是否如预期。
+
+4. 如果发生错误，返回 `return (i << 24) | (j << 16) | k | 0x8000;` 一整个信息可以表示出错是三个循环变量的值。
+
+`static __init int memset32_selftest(void)`：类似，不过赋值格式为 0xb1b2b3b4 这样，且所需空间也加倍了。
+
+`static __init int memset64_selftest(void)`：类似。
+
+### strchr_selftest
+
+`static __init int strchr_selftest(void)`：
+
+测试案例：查找正常字符；查找 \\0；查找空字符串里的字符；查找未出现在非空字符串里的字符。
+
+1. 测试主要围绕两个字符集合的字符串展开：test_string: "abcdefghijkl" 和 empty_string: "". 返回值=0 说明测试成功。
+2. 利用 `strchr(test_string, test_string[i])` 遍历测试，得到的结果应该是 i，如果不是 i 则返回 i+'a' 可根据此结果推断出错时的值。
+3. 查找 empty_string 中出现 '\\0' 的位置，结果应为 empty_string，出错返回错误码 0x101.
+4. 查找 empty_string 中出现 'a' 的位置，结果应为 NULL，出错返回错误码 0x102.
+5. 查找 test_string 中出现 'z' 的位置，结果应为 NULL，出错返回错误码 0x103.
+
+`static __init int strnchr_selftest(void)`：主要比较和 `strchr_selftest()` 增加的一些测试区别。
+
+1. 遍历测试 test_string 的时候，也同时遍历长度变量 j。只有 j >i 的时候才应该查得到结果，否则应该结果为 NULL。发生错误返回错误码 `(i+'a')<<8|j` 能同时表示 i j 的信息。
+2. 查询 empty_string 中 '\\0' 的位置时令长度为 0（结果应为 NULL）和长度为 1（结果应为 empty_string）,发生错误时错误码分别对应 0x1001 0x1002.
+3. 增加一个 `strnchr(NULL, 0, '\0');` 的测试用例，返回结果应为 NULL。
+
+### string_selftest_init
+
+`static __init int string_selftest_init(void)`：调用上述所有测试用例。如果执行到其中一个报错，则终止测试并返回错误的测试序号、返回错误码信息。
+
+最后通过 `module_init(string_selftest_init);` 加载模块时自动调用。
+
+### 编译包含
+
+Makefile:
+
+```makefile
+obj-$(CONFIG_STRING_SELFTEST) += test_string.o
+```
+
+### 使用模块
+
+在 Kconfig.debug 里可以看到，这个测试模块是在
+
+- menu Kernel Hacking
+  - menu "Kernel Testing and Coverage"
+    - if RUNTIME_TESTING_MENU ("Runtime Testing")
+      - config STRING_SELFTEST ("Test string functions at runtime") 里。
+
+编译运行后，输出信息里有这么一句，和 `test_string.c` 里相对应：
+
+```shell
+String selftests succeeded
+```
+
+## string_helpers.c
+
+主要包括一些字符串的辅助处理函数，如获取大小、编码解码等。这里基本没什么优化内容我觉得都是基础实现（理解如有误欢迎指正！）
+
+### string_get_size
+
+`void string_get_size(u64 size, u64 blk_size, const enum string_size_units units, char *buf, int len)`：把形如 "1024" 这样的数字转为易读的 "1KB" 存储大小。
+
+### string_unescape
+
+`int string_unescape(char *src, char *dst, size_t size, unsigned int flags)`：字符串解码，将转义序列如 "\n"、"\t"、"\xHH"（十六进制）和 "\NNN"（八进制）替换为相应的字符。简单的字符替换。
+
+### string_escape_mem
+
+`int string_escape_mem(const char *src, size_t isz, char *dst, size_t osz, unsigned int flags, const char *only)`：字符串编码，转义一个内存缓冲区中的字符，使其在引号中安全记录。转义后的字符串可能长度增加因此缓冲区长度扩大为 4 倍。
+
+### kstrdup_quotable
+
+`char *kstrdup_quotable(const char *src, gfp_t gfp)`：返回一个转义并添加双引号的字符串，使其在引号中安全记录，同时避免特殊字符和双引号。
+
+`char *kstrdup_quotable_cmdline(struct task_struct *task, gfp_t gfp)`：返回一个经过转义的字符串，其中包含命令行指令，比如其中将空指针（NULL）替换为空格，并对其他特殊字符进行转义。
+
+`char *kstrdup_quotable_file(struct file *file, gfp_t gfp)`：返回一个经过转义的字符串，其中包含文件路径名。如果出错返回字符串以 < 开头（如"\<no_memory\>"）。
+
+### kfree_strarray
+
+`void kfree_strarray(char **array, size_t n)`：释放包含在数组中的多个动态分配的字符串以及数组本身。调用 kfree。
+
+### strscpy_pad
+
+`ssize_t strscpy_pad(char *dest, const char *src, size_t count)`：复制一个字符串到一个具有指定大小的缓冲区中，目标缓冲区多余部分用空字符填充确保其正确终止。
+
+### skip_spaces
+
+`char *skip_spaces(const char *str)`：删除前导空白符。
+
+### strim
+
+`char *strim(char *s)`：删除前导和尾随空格。
+
+### sysfs_streq
+
+`bool sysfs_streq(const char *s1, const char *s2)`：判断两个字符串是否相等。如果两个字符串是 'abc\\n\\0' 和 'abc\\0' 的形式，也算相等，即忽略结束符前的 \\n。
+
+### match_string
+
+`int match_string(const char * const *array, size_t n, const char *string)`：在字符串数组中匹配给定的字符串，并返回匹配字符串的索引。
+
+`int __sysfs_match_string(const char * const *array, size_t n, const char *str)`：调用 sysfs_streq 方法匹配。
+
+### strreplace
+
+`char *strreplace(char *s, char old, char new)`：将字符串中所有出现的一个字符替换为另一个字符。
+
+### memcpy_and_pad
+
+`void memcpy_and_pad(void *dest, size_t dest_len, const void *src, size_t count, int pad)`：将一个缓冲区的内容复制到另一个缓冲区，目标缓冲区多余的部分填充指定字符。
+
+### fortify_panic
+
+`void fortify_panic(const char *name)`：在启用 CONFIG_FORTIFY_SOURCE 时检测缓冲区溢出。它打印错误消息并触发内核崩溃。
+
+## test-string_helpers.c
+
+### test_string_unescape
+
+主要测试字符串解码功能。
+
+`static void __init test_string_unescape(const char *name, unsigned int flags,bool inplace)`
+
+测试用例：
+
+- 空格；
+- 八进制转义字符；
+- 十六进制转义字符；
+- 特殊字符；
+- 空格，特殊字符，八进制转义字符，十六进制转义字符的一系列组合测试（互相转义，以及指定特定的转义方法如 ESCAPE_ANY（对所有支持的字符进行转义）ESCAPE_NP (no print) 和 ESCAPE_NA (no action) 进行转义）；
+
+### test_string_escape
+
+`static __init void test_string_escape(const char *name, const struct test_string_2 *s2, unsigned int flags, const char *esc)`
+
+主要测试 `string_escape_mem` 的字符串编码功能。也是尝试各种转义字符案例的组合以及规定不同字典的转移情况。
+
+### test_string_get_size
+
+`static __init void test_string_get_size(void)`：测试规定块数\*给定块大小的十进制 二进制大小计算。其中包含 0 块数，U64 最大块数和 U64 最大块大小的边界条件。
+
+### test_string_upper_lower
+
+`static void __init test_string_upper_lower(void)`：大小写转换的测试。这个是测试 string.c 里的内容了。
+
+### test_string_helpers_init
+
+`static int __init test_string_helpers_init(void)`：就是调用了以上所有测试模块。
+
+### 编译包含
+
+```makefile
+// src/linux-stable/lib/Makefile:53
+
+obj-y += string_helpers.o
+obj-$(CONFIG_TEST_STRING_HELPERS) += test-string_helpers.o
+```
+
+### 使用模块
+
+- menu Kernel Hacking
+  - menu "Kernel Testing and Coverage"
+    - "Runtime Testing"
+      - "Test functions located in the string_helpers module at runtime"
+
+运行时输出：
+
+```shell
+test_string_helpers: Running tests...
+```
+
+只输出了这个而没有打印任何测试错误信息说明测试没问题。
+
+## kstrtox.c
+
+内核 str -> 整数。
+
+首先通过 `_parse_integer_fixup_radix` 函数判断字符串是否以 '0' 或 '0x' 开头，以此来确定基数。
+
+### parse_integer
+
+`unsigned int _parse_integer_limit(const char *s, unsigned int base, unsigned long long *p, size_t max_chars)`：转述为 unsigned long long 类型，存储到 p 里，转换方法是逐位乘以基数相加。同时返回一个 unsigned int 数值，可以存储转换的状态信息，其中包含：是否溢出；转换后字符长度信息。**不要直接用这个函数！**
+
+`unsigned int _parse_integer(const char *s, unsigned int base, unsigned long long *p)`：调用 `_parse_integer_limit` 函数。
+
+`static int _kstrtoull(const char *s, unsigned int base, unsigned long long *res)`：调用 `_parse_integer` 函数，如果发现结果溢出返回 `-ERANGE`，如果发现转义后长度为 0 或者未转换完成返回 `-EINVAL`
+
+`int kstrtoull(const char *s, unsigned int base, unsigned long long *res)`：这个就是在 `_kstrtoull` 基础上处理开头可能存在的 + 号。
+
+`int kstrtoll(const char *s, unsigned int base, long long *res)`：在 `ksrtoull` 基础上增加了一步符号判断。
+
+以下函数类似，只是处理的数据类型不同：
+
+`int _kstrtoul(const char *s, unsigned int base, unsigned long *res)`
+
+`int _kstrtol(const char *s, unsigned int base, long *res)`
+
+`int kstrtouint(const char *s, unsigned int base, unsigned int *res)`
+
+`int kstrtoint(const char *s, unsigned int base, int *res)`
+
+`int kstrtou16(const char *s, unsigned int base, u16 *res)`
+
+`int kstrtos16(const char *s, unsigned int base, s16 *res)`
+
+`int kstrtou8(const char *s, unsigned int base, u8 *res)`
+
+`int kstrtos8(const char *s, unsigned int base, s8 *res)`
+
+下面这个宏定义是以上函数转为用户空间版本的。
+
+```c
+// src/linux-stable/lib/kstrtox.c:392
+
+#define kstrto_from_user(f, g, type)					\
+int f(const char __user *s, size_t count, unsigned int base, type *res)	\
+{									\
+	/* sign, base 2 representation, newline, terminator */		\
+	char buf[1 + sizeof(type) * 8 + 1 + 1];				\
+									\
+	count = min(count, sizeof(buf) - 1);				\
+	if (copy_from_user(buf, s, count))				\
+		return -EFAULT;						\
+	buf[count] = '\0';						\
+	return g(buf, base, res);					\
+}									\
+EXPORT_SYMBOL(f)
+```
+
+### parse_bool
+
+`int kstrtobool(const char *s, bool *res)`：把用户输入的 y, Y, 1, on 解析为 true, n, N, 0, off 解析为 false，其他报错 `-EINVAL`。
+
+`int kstrtobool_from_user(const char __user *s, size_t count, bool *res)`：处理用户空间的输入 bool 值。
+
+## test_kstrtox.c
+
+测试只包含了整数部分的测试。整数部分测试基本思路如下：
+
+- 正数、负数、0、最大值、最小值、超过范围的数值等情况。
+- 测试非法输入，如包含非数字字符或无效的基数。
+- 测试空字符串和只包含空格的字符串。
+
+期望使用 `ok` 的测试函数测试正常转换案例，`fail` 的测试函数测试非法输入。
+
+以下函数是对整数转换部分的测试函数：
+
+`static void __init test_kstrtoull_ok(void)`
+
+`static void __init test_kstrtoull_fail(void)`
+
+`static void __init test_kstrtoll_ok(void)`
+
+`static void __init test_kstrtoll_fail(void)`
+
+`static void __init test_kstrtou64_ok(void)`
+
+`static void __init test_kstrtou64_fail(void)`
+
+`static void __init test_kstrtos64_ok(void)`
+
+`static void __init test_kstrtos64_fail(void)`
+
+`static void __init test_kstrtou32_ok(void)`
+
+`static void __init test_kstrtou32_fail(void)`
+
+`static void __init test_kstrtos32_ok(void)`
+
+`static void __init test_kstrtos32_fail(void)`
+
+`static void __init test_kstrtou16_ok(void)`
+
+`static void __init test_kstrtou16_fail(void)`
+
+`static void __init test_kstrtos16_ok(void)`
+
+`static void __init test_kstrtos16_fail(void)`
+
+`static void __init test_kstrtou8_ok(void)`
+
+`static void __init test_kstrtou8_fail(void)`
+
+`static void __init test_kstrtos8_ok(void)`
+
+`static void __init test_kstrtos8_fail(void)`
+
+最后通过 `static int __init test_kstrtox_init(void)` 函数调用上述所有函数。
+
+### 编译包含
+
+```makefile
+// src/linux-stable/lib/Makefile:74
+
+obj-$(CONFIG_TEST_KSTRTOX) += test-kstrtox.o
+```
+
+### 使用模块
+
+- menu Kernel Hacking
+  - menu "Kernel Testing and Coverage"
+    - if RUNTIME_TESTING_MENU ("Runtime Testing")
+      - "Test kstrto*() family of functions at runtime"
+
+测试的时候发现好像没有提示成功输出信息，于是我把测试案例故意改错了一个试了一下，然后发现多了这样一些报错信息：
+
+```shell
+------------[ cut here ]------------
+str '127', base 10, expected 126, got 127
+WARNING: CPU: 1 PID: 1 at lib/test-kstrtox.c:152 test_kstrtox_init+0x86/0x7ea
+Modules linked in:
+CPU: 1 PID: 1 Comm: swapper/0 Not tainted 5.16.0-dirty #8
+Hardware name: riscv-virtio,qemu (DT)
+epc : test_kstrtox_init+0x86/0x7ea
+ ra : test_kstrtox_init+0x86/0x7ea
+epc : ffffffff80816f9e ra : ffffffff80816f9e sp : ffffffd00021bd80
+ gp : ffffffff8137e670 tp : ffffffe001748000 t0 : ffffffff8138e348
+ t1 : ffffffff8138e348 t2 : 0000000000000000 s0 : ffffffd00021bdd0
+ s1 : ffffffff80a49f80 a0 : 0000000000000029 a1 : ffffffff812869a0
+ a2 : 0000000000000010 a3 : 000000000000005d a4 : 89abe87e0d71d600
+ a5 : 89abe87e0d71d600 a6 : 0000000000017fe8 a7 : c0000000ffffefff
+ s2 : ffffffff80da8658 s3 : 000000000000000a s4 : ffffffff80da8000
+ s5 : ffffffff80da8048 s6 : ffffffff80a4a670 s7 : ffffffff81381038
+ s8 : ffffffff80a53080 s9 : 0000000000000008 s10: ffffffff808000f8
+ s11: 0000000000000000 t3 : 00000000000f0000 t4 : 0000000000000001
+ t5 : 0000000000000009 t6 : ffffffd00021bad8
+status: 0000000000000120 badaddr: 0000000000000000 cause: 0000000000000003
+[<ffffffff80816f9e>] test_kstrtox_init+0x86/0x7ea
+[<ffffffff800025d6>] do_one_initcall+0x5c/0x1ce
+[<ffffffff808011a0>] kernel_init_freeable+0x1da/0x23e
+[<ffffffff807dc2d2>] kernel_init+0x32/0x14e
+[<ffffffff8000369a>] ret_from_exception+0x0/0xc
+---[ end trace 47ad246718a2c1d7 ]---
+```
+
+而这个位置成功运行的时候确实是没有输出 kstrtox.c 相关信息的，这里如果加一个“测试全部通过”的提示可能会更好一些。
+
+## 总结
+
+本文截取了一部分功能较为基础的 str 文件先进行分析。
+
+string.c: 包括一些基础的字符串处理函数。
+
+string_helpers.c: 主要是编解码，大小写转换等辅助处理函数。
+
+kstrtox.c: 主要是字符串转数字，布尔类型的处理函数。
+
+分析重点主要还是在其中的一些优化手段上。其中有一部分在我的第一次 pr 中有简单分析过其应用：[articles/20230617-riscv-klibc-opt-summary.md (gitee.com)][001]
+
+我认为其中的优化内容主要包括：
+
+1. `memcmp`：在开启高效对齐的前提下逐字比较，结尾部分逐位比较，效率高于逐位比较。
+2. `strscpy`：在开启高效对齐且 dest src 地址对齐的前提下逐字复制，结尾部分逐位复制。
+3. `memchr_inv`：待查找值扩展为 64 位，这样查找时更为高效。如果当前指针位置没有对齐，先逐位比较几次直至对齐在字节开始处（地址 %8 == 0），然后一次比较 8 个字节，如果在这 8 个字节中发现了要找的字符，或剩余字节数不够 8 个了，再逐位比较其中的位置。
+
+在下一篇文章中将继续展开分析 str 相关函数及其中所做优化。
+
+## 参考资料
+
+- [tinylab/riscv-linux/blob/master/articles/20230617-riscv-klibc-opt-summary.md][001]
+
+[001]: https://gitee.com/tinylab/riscv-linux/blob/master/articles/20230617-riscv-klibc-opt-summary.md
-- 
Gitee