diff --git a/articles/20220922-d1h-benchmark.md b/articles/20220922-d1h-benchmark.md
new file mode 100644
index 0000000000000000000000000000000000000000..f4261a5b614333f34a7e4a76c8ad13d2084406cf
--- /dev/null
+++ b/articles/20220922-d1h-benchmark.md
@@ -0,0 +1,690 @@
+> Corrector: [TinyCorrect](https://gitee.com/tinylab/tinycorrect) v0.1-rc2 - [toc]
+> Author: LucasXu
+> CoAuthor: Reset
+> Revisor: Taotieren
+> Date: 2022/09/06
+> Project: [RISC-V Linux 内核剖析](https://gitee.com/tinylab/riscv-linux)
+> Sponsor: PLCT Lab, ISCAS
+
+# RISC-V 性能实测,以平头哥 C906 为例
+
+## 前言
+
+在上一篇文章中,我们简要概述了 Linux 下 benchmark 工具。本文将以平头哥 C906 为例,介绍如何在真实硬件上进行性能测试。
+
+## 测试环境
+
+### 硬件环境
+
+* Nezha-D1H 开发板,基于全志科技 Allwinner D1-H 芯片设计,512MiB 内存大小
+
+### 软件环境
+
+* Debian 11,Linux kernel version: 5.4
+
+### 交叉编译工具链
+
+* `riscv64-linux-gnu-gcc` (GCC) 11.2.0
+
+## 测试方法
+
+### 测试工具
+
+* 泰晓科技制作的 `microbench`,基于指令集的性能评测
+* 我自己编写的大规模矩阵计算程序,涉及到大规模整数和浮点数计算,可以与 amd64 平台机器做个对比进行参考,主要可用于测试单核心整数以及浮点数性能
+* `UnixBench`
+
+### 工具介绍与移植
+
+#### 大规模矩阵计算程序
+
+使用 C/C++ 重新改写之前一段 `MATLAB` 矩阵计算代码,矩阵库使用 `Eigen-3.4.0` 数学库进行矩阵运算。
+
+代码如下:
+
+```cpp
+#include
+#include
+#include
+#include
+#include
+#include
+#define EIGEN_USE_BLAS
+#define PI 3.141592654
+
+using namespace Eigen;
+using namespace std;
+int sign(double x)
+{
+ if (x > 0)
+ return 1;
+ else if (x == 0)
+ return 0;
+ else
+ return -1;
+}
+double max(double a, double b)
+{
+ if (a > b)
+ return a;
+ else
+ return b;
+}
+double max(MatrixXd x)
+{
+ double max = x(0, 0);
+ for (int i = 0; i < x.rows(); i++)
+ {
+ for (int j = 0; j < x.cols(); j++)
+ {
+ if (x(i, j) > max)
+ {
+ max = x(i, j);
+ }
+ }
+ }
+ return max;
+}
+
+MatrixXd sign(MatrixXd x)
+{
+ MatrixXd y(x.rows(), x.cols());
+ for (int i = 0; i < x.rows(); i++)
+ {
+ for (int j = 0; j < x.cols(); j++)
+ {
+ y(i, j) = sign(x(i, j));
+ }
+ }
+ return y;
+}
+MatrixXd proxlasso(MatrixXd x, double mu, double tk, int max_step)
+{
+ // 创建 matrix Y 并且初始化
+ MatrixXd Y = MatrixXd::Zero(x.rows(), x.cols());
+ // matlab 语句:y = sign(x).*max(abs(x) - tk*mu, 0);
+ for (int i = 0; i < x.rows(); i++)
+ {
+ for (int j = 0; j < x.cols(); j++)
+ {
+ Y(i, j) = sign(x(i, j)) * max(abs(x(i, j)) - tk * mu, 0);
+ }
+ }
+ return Y;
+}
+MatrixXd eig(MatrixXd A)
+{
+ // 求矩阵 A 的特征值
+ EigenSolver es(A);
+ MatrixXd eigenvalues = es.eigenvalues().real();
+ return eigenvalues;
+}
+MatrixXd randomMatrix(int rows, int cols)
+{
+ srand(time(NULL));
+ cout << "开始生成随机数矩阵" << endl;
+ // 采用二重 for 循环给矩阵内容赋上随机数值
+ MatrixXd A;
+ A.resize(rows, cols);
+
+ for (int i = 0; i < rows; i++)
+ {
+ for (int j = 0; j < cols; j++)
+ {
+ double tmp = rand() % 1000000;
+ double tmp2 = tmp / 1000000.0;
+ int sign = rand() % 2;
+ if (sign == 0)
+ {
+ A(i, j) = tmp2;
+ }
+ else
+ {
+ A(i, j) = -tmp2;
+ }
+ }
+ }
+ cout << "随机数矩阵生成完毕" << endl;
+ return A;
+}
+double generateGaussianNoise(double mean, double sigma)
+{
+ // 生成非 0 正态分布,只返回非零值
+ std::random_device rd;
+ std::mt19937 gen(rd());
+ std::normal_distribution<> d(mean, sigma);
+ if (d(gen) == 0.0)
+ {
+ return generateGaussianNoise(mean, sigma);
+ }
+ else
+ {
+ return d(gen);
+ }
+}
+MatrixXd sprandnMatrix(int m, int n, int density)
+{
+ // 创建一个随机的 m×n 稀疏矩阵,在区间 [0,1] 中 density 有大约 density*m*n 个正态分布的非零项。
+ MatrixXd A;
+ A.resize(m, n);
+ for (int i = 0; i < m; i++)
+ {
+ for (int j = 0; j < n; j++)
+ {
+ A(i, j) = 0;
+ }
+ }
+ int nonzero = 97;
+ int count = 0;
+ // 生成 nonezero 个正态分布的非零项
+ cout << "开始生成稀疏矩阵" << endl;
+ while (count < nonzero)
+ {
+ int i = rand() % m;
+ int j = rand() % n;
+ if (A(i, j) == 0)
+ {
+
+ int sign = rand() % 2;
+ if (sign == 0)
+ {
+ A(i, j) = generateGaussianNoise(0, 1);
+ count++;
+ }
+ else
+ {
+ A(i, j) = -generateGaussianNoise(0, 1);
+ count++;
+ }
+ }
+ }
+ cout << "稀疏矩阵生成完毕" << endl;
+
+ return A;
+}
+int main()
+{
+ clock_t start, end;
+ start = clock();
+ cout << "Start time is: " << start << endl;
+ int max_step = 500;
+ double r = 0.1;
+ double mu = 0.001;
+ int m = 512;
+ int n = 1024;
+ // 生成随机矩阵
+ MatrixXd A = randomMatrix(m, n);
+ // 生成稀疏正态分布随机矩阵 (n,1),在区间 [0,1] 中 r 有大约 r*n*1 个正态分布的非零项
+ MatrixXd u = sprandnMatrix(n, 1, r);
+ // MatrixXd u = MatrixXd::Random(n, 1);
+ MatrixXd b = A * u;
+ double threshold = 0.0001;
+ // 准备工作完毕
+ // 求 A transpose * A 的特征值
+ MatrixXd eigenvalues = eig(A.transpose() * A);
+ // 求 A transpose * A 这一超大矩阵行列式
+ double det = (A.transpose() * A).determinant();
+ double tk = 1.0 / max(eig(A.transpose() * A));
+ int k = 0;
+ MatrixXd gradg = MatrixXd::Zero(1024, max_step);
+ MatrixXd x = MatrixXd::Zero(1024, max_step);
+ // 取 x 的第一列
+ MatrixXd f = MatrixXd::Zero(1, max_step);
+ f(0) = mu * x.col(0).lpNorm<1>() + 0.5 * ((A * x.col(0) - b).lpNorm<2>() * (A * x.col(0) - b).lpNorm<2>());
+ MatrixXd err = MatrixXd::Zero(1, max_step);
+ while (1)
+ {
+ gradg.col(k) = A.transpose() * (A * x.col(k) - b);
+ x.col(k + 1) = proxlasso((x.col(k) - tk * gradg.col(k)), mu, tk, max_step);
+ f(k + 1) = mu * x.col(k + 1).lpNorm<1>() + 0.5 * (A * x.col(k + 1) - b).lpNorm<2>() * (A * x.col(k + 1) - b).lpNorm<2>();
+ err(k) = abs((f(k + 1) - f(k)) / f(k));
+ if (err(k) <= threshold || k == max_step - 2)
+ {
+ break;
+ }
+ k++;
+ }
+ cout << "求得最小的函数值为:" << f(k) << endl;
+ cout << "迭代次数为:" << k << endl;
+ end = clock();
+ cout << "End time is: " << end << endl;
+ cout << "Time consumption is: " << (end - start) / CLOCKS_PER_SEC << endl;
+ return 0;
+}
+```
+
+#### microbench
+
+`microbench` 详细介绍可以详见之前的文章,这里就不再赘述。引用仓库中对 RV64 平台的移植说明如下,可以看到,`microbench` 的移植过程并不复杂,只需要修改一些路径即可。
+
+```shell
+$ make clean
+$ make ARCH=riscv64 clean
+$ make ARCH=riscv64
+```
+
+#### UnixBench
+
+`UnixBench` 是一个用于测试 Unix 系统性能的工具,它可以测试 CPU、内存、磁盘、文件系统、网络等方面的性能。`UnixBench` 的移植过程也比较简单,只需要修改一下 Makefile 即可。修改后的 Makefile 如下:
+
+```shell
+##############################################################################
+# UnixBench v5.1.3
+# Based on The BYTE UNIX Benchmarks - Release 3
+# Module: Makefile SID: 3.9 5/15/91 19:30:15
+#
+##############################################################################
+# Bug reports, patches, comments, suggestions should be sent to:
+# David C Niemi
+#
+# Original Contacts at Byte Magazine:
+# Ben Smith or Tom Yager at BYTE Magazine
+# bensmith@bytepb.byte.com tyager@bytepb.byte.com
+#
+##############################################################################
+# Modification Log: 7/28/89 cleaned out workload files
+# 4/17/90 added routines for installing from shar mess
+# 7/23/90 added compile for dhrystone version 2.1
+# (this is not part of Run file. still use old)
+# removed HZ from everything but dhry.
+# HZ is read from the environment, if not
+# there, you must define it in this file
+# 10/30/90 moved new dhrystone into standard set
+# new pgms (dhry included) run for a specified
+# time rather than specified number of loops
+# 4/5/91 cleaned out files not needed for
+# release 3 -- added release 3 files -ben
+# 10/22/97 added compiler options for strict ANSI C
+# checking for gcc and DEC's cc on
+# Digital Unix 4.x (kahn@zk3.dec.com)
+# 09/26/07 changes for UnixBench 5.0
+# 09/30/07 adding ubgears, GRAPHIC_TESTS switch
+# 10/14/07 adding large.txt
+# 01/13/11 added support for parallel compilation
+# 01/07/16 [refer to version control commit messages and
+# cease using two-digit years in date formats]
+##############################################################################
+
+##############################################################################
+# CONFIGURATION
+##############################################################################
+
+SHELL = /bin/sh
+
+# GRAPHIC TESTS: Uncomment the definition of "GRAPHIC_TESTS" to enable
+# the building of the graphics benchmarks. This will require the
+# X11 libraries on your system. (e.g. libX11-devel mesa-libGL-devel)
+#
+# Comment the line out to disable these tests.
+# GRAPHIC_TESTS = defined
+
+# Set "GL_LIBS" to the libraries needed to link a GL program.
+GL_LIBS = -lGL -lXext -lX11
+
+# COMPILER CONFIGURATION: Set "CC" to the name of the compiler to use
+# to build the binary benchmarks. You should also set "$cCompiler" in the
+# Run script to the name of the compiler you want to test.
+CC=riscv64-linux-gnu-gcc
+
+# OPTIMISATION SETTINGS:
+# Use gcc option if defined UB_GCC_OPTIONS via "Environment variable" or "Command-line arguments".
+ifdef UB_GCC_OPTIONS
+ OPTON = $(UB_GCC_OPTIONS)
+
+else
+ ## Very generic
+ #OPTON = -O
+
+ ## For Linux 486/Pentium, GCC 2.7.x and 2.8.x
+ #OPTON = -O2 -fomit-frame-pointer -fforce-addr -fforce-mem -ffast-math \
+ # -m486 -malign-loops=2 -malign-jumps=2 -malign-functions=2
+
+ ## For Linux, GCC previous to 2.7.0
+ #OPTON = -O2 -fomit-frame-pointer -fforce-addr -fforce-mem -ffast-math -m486
+
+ #OPTON = -O2 -fomit-frame-pointer -fforce-addr -fforce-mem -ffast-math \
+ # -m386 -malign-loops=1 -malign-jumps=1 -malign-functions=1
+
+ ## For Solaris 2, or general-purpose GCC 2.7.x
+ #OPTON = -O2 -fomit-frame-pointer -fforce-addr -ffast-math -Wall
+
+ ## For Digital Unix v4.x, with DEC cc v5.x
+ #OPTON = -O4
+ #CFLAGS = -DTIME -std1 -verbose -w0
+
+ ## gcc optimization flags
+ ## (-ffast-math) disables strict IEEE or ISO rules/specifications for math funcs
+ OPTON = -O3 -ffast-math
+
+ ## OS detection. Comment out if gmake syntax not supported by other 'make'.
+ OSNAME:=$(shell uname -s)
+ ARCH := $(shell uname -p)
+ ifeq ($(OSNAME),Linux)
+ # Not all CPU architectures support "-march" or "-march=native".
+ # - Supported : x86, x86_64, ARM, AARCH64, etc..
+ # - Not Supported: RISC-V, IBM Power, etc...
+ -march = rv64imaf
+ endif
+
+ ifeq ($(OSNAME),Darwin)
+ # (adjust flags or comment out this section for older versions of XCode or OS X)
+ # (-mmacosx-versin-min= requires at least that version of SDK be installed)
+ ifneq ($(ARCH),$(filter $(ARCH),ppc64 ppc64le))
+ OPTON += -march=native -mmacosx-version-min=10.10
+ else
+ OPTON += -mcpu=native
+ endif
+ #http://stackoverflow.com/questions/9840207/how-to-use-avx-pclmulqdq-on-mac-os-x-lion/19342603#19342603
+ CFLAGS += -Wa,-q
+ endif
+
+endif
+
+## generic gcc CFLAGS. -DTIME must be included.
+CFLAGS += -Wall -pedantic $(OPTON) -I $(SRCDIR) -DTIME
+
+##############################################################################
+# END CONFIGURATION
+##############################################################################
+
+# local directories
+PROGDIR = ./pgms
+SRCDIR = ./src
+TESTDIR = ./testdir
+RESULTDIR = ./results
+TMPDIR = ./tmp
+# other directories
+INCLDIR = /usr/include
+LIBDIR = /lib
+SCRIPTS = unixbench.logo multi.sh tst.sh index.base
+SOURCES = arith.c big.c context1.c \
+ dummy.c execl.c \
+ fstime.c hanoi.c \
+ pipe.c spawn.c \
+ syscall.c looper.c timeit.c time-polling.c \
+ dhry_1.c dhry_2.c dhry.h whets.c ubgears.c
+TESTS = sort.src cctest.c dc.dat large.txt
+
+ifneq (,$(GRAPHIC_TESTS))
+GRAPHIC_BINS = $(PROGDIR)/ubgears
+else
+GRAPHIC_BINS =
+endif
+
+# Program binaries.
+BINS = $(PROGDIR)/arithoh $(PROGDIR)/register $(PROGDIR)/short \
+ $(PROGDIR)/int $(PROGDIR)/long $(PROGDIR)/float $(PROGDIR)/double \
+ $(PROGDIR)/hanoi $(PROGDIR)/syscall $(PROGDIR)/context1 \
+ $(PROGDIR)/pipe $(PROGDIR)/spawn $(PROGDIR)/execl \
+ $(PROGDIR)/dhry2 $(PROGDIR)/dhry2reg $(PROGDIR)/looper \
+ $(PROGDIR)/fstime $(PROGDIR)/whetstone-double $(GRAPHIC_BINS)
+## These compile only on some platforms...
+# $(PROGDIR)/poll $(PROGDIR)/poll2 $(PROGDIR)/select
+
+# Required non-binary files.
+REQD = $(BINS) $(PROGDIR)/unixbench.logo \
+ $(PROGDIR)/multi.sh $(PROGDIR)/tst.sh $(PROGDIR)/index.base \
+ $(PROGDIR)/gfx-x11 \
+ $(TESTDIR)/sort.src $(TESTDIR)/cctest.c $(TESTDIR)/dc.dat \
+ $(TESTDIR)/large.txt
+
+# ######################### the big ALL ############################
+all:
+## Ick!!! What is this about??? How about let's not chmod everything bogusly.
+# @chmod 744 * $(SRCDIR)/* $(PROGDIR)/* $(TESTDIR)/* $(DOCDIR)/*
+ $(MAKE) distr
+ $(MAKE) programs
+
+# ####################### a check for Run ######################
+check: $(REQD)
+ $(MAKE) all
+# ##############################################################
+# distribute the files out to subdirectories if they are in this one
+distr:
+ @echo "Checking distribution of files"
+# scripts
+ @if test ! -d $(PROGDIR) \
+ ; then \
+ mkdir $(PROGDIR) \
+ ; mv $(SCRIPTS) $(PROGDIR) \
+ ; else \
+ echo "$(PROGDIR) exists" \
+ ; fi
+# C sources
+ @if test ! -d $(SRCDIR) \
+ ; then \
+ mkdir $(SRCDIR) \
+ ; mv $(SOURCES) $(SRCDIR) \
+ ; else \
+ echo "$(SRCDIR) exists" \
+ ; fi
+# test data
+ @if test ! -d $(TESTDIR) \
+ ; then \
+ mkdir $(TESTDIR) \
+ ; mv $(TESTS) $(TESTDIR) \
+ ; else \
+ echo "$(TESTDIR) exists" \
+ ; fi
+# temporary work directory
+ @if test ! -d $(TMPDIR) \
+ ; then \
+ mkdir $(TMPDIR) \
+ ; else \
+ echo "$(TMPDIR) exists" \
+ ; fi
+# directory for results
+ @if test ! -d $(RESULTDIR) \
+ ; then \
+ mkdir $(RESULTDIR) \
+ ; else \
+ echo "$(RESULTDIR) exists" \
+ ; fi
+
+.PHONY: all check distr programs run clean spotless
+
+programs: $(BINS)
+
+# (use $< to link only the first dependency, instead of $^,
+# since the programs matching this pattern have only
+# one input file, and others are #include "xxx.c"
+# within the first. (not condoning, just documenting))
+# (dependencies could be generated by modern compilers,
+# but let's not assume modern compilers are present)
+$(PROGDIR)/%:
+ $(CC) -o $@ $(CFLAGS) $< $(LDFLAGS)
+
+# Individual programs
+# Sometimes the same source file is compiled in different ways.
+# This limits the 'make' patterns that can usefully be applied.
+
+$(PROGDIR)/arithoh: $(SRCDIR)/arith.c $(SRCDIR)/timeit.c
+$(PROGDIR)/arithoh: CFLAGS += -Darithoh
+$(PROGDIR)/register: $(SRCDIR)/arith.c $(SRCDIR)/timeit.c
+$(PROGDIR)/register: CFLAGS += -Ddatum='register int'
+$(PROGDIR)/short: $(SRCDIR)/arith.c $(SRCDIR)/timeit.c
+$(PROGDIR)/short: CFLAGS += -Ddatum=short
+$(PROGDIR)/int: $(SRCDIR)/arith.c $(SRCDIR)/timeit.c
+$(PROGDIR)/int: CFLAGS += -Ddatum=int
+$(PROGDIR)/long: $(SRCDIR)/arith.c $(SRCDIR)/timeit.c
+$(PROGDIR)/long: CFLAGS += -Ddatum=long
+$(PROGDIR)/float: $(SRCDIR)/arith.c $(SRCDIR)/timeit.c
+$(PROGDIR)/float: CFLAGS += -Ddatum=float
+$(PROGDIR)/double: $(SRCDIR)/arith.c $(SRCDIR)/timeit.c
+$(PROGDIR)/double: CFLAGS += -Ddatum=double
+
+$(PROGDIR)/poll: $(SRCDIR)/time-polling.c
+$(PROGDIR)/poll: CFLAGS += -DUNIXBENCH -DHAS_POLL
+$(PROGDIR)/poll2: $(SRCDIR)/time-polling.c
+$(PROGDIR)/poll2: CFLAGS += -DUNIXBENCH -DHAS_POLL2
+$(PROGDIR)/select: $(SRCDIR)/time-polling.c
+$(PROGDIR)/select: CFLAGS += -DUNIXBENCH -DHAS_SELECT
+
+$(PROGDIR)/whetstone-double: $(SRCDIR)/whets.c
+$(PROGDIR)/whetstone-double: CFLAGS += -DDP -DGTODay -DUNIXBENCH
+$(PROGDIR)/whetstone-double: LDFLAGS += -lm
+
+$(PROGDIR)/pipe: $(SRCDIR)/pipe.c $(SRCDIR)/timeit.c
+
+$(PROGDIR)/execl: $(SRCDIR)/execl.c $(SRCDIR)/big.c
+
+$(PROGDIR)/spawn: $(SRCDIR)/spawn.c $(SRCDIR)/timeit.c
+
+$(PROGDIR)/hanoi: $(SRCDIR)/hanoi.c $(SRCDIR)/timeit.c
+
+$(PROGDIR)/fstime: $(SRCDIR)/fstime.c
+
+$(PROGDIR)/syscall: $(SRCDIR)/syscall.c $(SRCDIR)/timeit.c
+
+$(PROGDIR)/context1: $(SRCDIR)/context1.c $(SRCDIR)/timeit.c
+
+$(PROGDIR)/looper: $(SRCDIR)/looper.c $(SRCDIR)/timeit.c
+
+$(PROGDIR)/ubgears: $(SRCDIR)/ubgears.c
+$(PROGDIR)/ubgears: LDFLAGS += -lm $(GL_LIBS)
+
+$(PROGDIR)/dhry2: CFLAGS += -DHZ=${HZ}
+$(PROGDIR)/dhry2: $(SRCDIR)/dhry_1.c $(SRCDIR)/dhry_2.c \
+ $(SRCDIR)/dhry.h $(SRCDIR)/timeit.c
+ $(CC) -o $@ ${CFLAGS} $(SRCDIR)/dhry_1.c $(SRCDIR)/dhry_2.c
+
+$(PROGDIR)/dhry2reg: CFLAGS += -DHZ=${HZ} -DREG=register
+$(PROGDIR)/dhry2reg: $(SRCDIR)/dhry_1.c $(SRCDIR)/dhry_2.c \
+ $(SRCDIR)/dhry.h $(SRCDIR)/timeit.c
+ $(CC) -o $@ ${CFLAGS} $(SRCDIR)/dhry_1.c $(SRCDIR)/dhry_2.c
+
+# Run the benchmarks and create the reports
+run:
+ sh ./Run
+
+clean:
+ $(RM) $(BINS) core *~ */*~
+
+spotless: clean
+ $(RM) $(RESULTDIR)/* $(TMPDIR)/*
+
+## END ##
+```
+
+### 测试方法
+
+* `microbench` 和 `UnixBench` 在 amd64 平台进行移植与交叉编译,并在 RV64 平台上面进行测试,查看测试所得的分数。
+* 大规模矩阵计算程序在 amd64 平台上进行到 RV64 平台的交叉编译与移植,测试程序在 RV64 平台上运行,查看运行时间。
+
+## 测试结果
+
+### 测试结果截图
+
+`microbench` 测试结果截图
+
+
+
+`UnixBench` 测试结果截图
+
+
+
+大规模矩阵计算程序测试结果截图
+
+
+
+## 与现阶段 amd64 平台机器进行性能对比
+
+### 与 Intel Core i7-4770HQ 进行性能对比
+
+性能对比的测试程序为 `UnixBench` 和大规模矩阵计算程序,测试结果如下:
+
+```
+Benchmark Run: Wed Sep 07 2022 19:59:17 - 20:23:52
+8 CPUs in system; running 1 parallel copy of tests
+
+Dhrystone 2 using register variables 32343533.0 lps (10.0 s, 7 samples)
+Double-Precision Whetstone 4533.0 MWIPS (10.5 s, 7 samples)
+Execl Throughput 3263.5 lps (29.9 s, 2 samples)
+File Copy 1024 bufsize 2000 maxblocks 635715.1 KBps (30.0 s, 2 samples)
+File Copy 256 bufsize 500 maxblocks 168269.6 KBps (30.0 s, 2 samples)
+File Copy 4096 bufsize 8000 maxblocks 2003899.3 KBps (30.0 s, 2 samples)
+Pipe Throughput 677305.6 lps (10.0 s, 7 samples)
+Pipe-based Context Switching 110291.0 lps (10.0 s, 7 samples)
+Process Creation 8915.2 lps (30.0 s, 2 samples)
+Shell Scripts (1 concurrent) 9925.1 lpm (60.0 s, 2 samples)
+Shell Scripts (8 concurrent) 2371.0 lpm (60.0 s, 2 samples)
+System Call Overhead 554765.5 lps (10.0 s, 7 samples)
+
+System Benchmarks Index Values BASELINE RESULT INDEX
+Dhrystone 2 using register variables 116700.0 32343533.0 2771.5
+Double-Precision Whetstone 55.0 4533.0 824.2
+Execl Throughput 43.0 3263.5 759.0
+File Copy 1024 bufsize 2000 maxblocks 3960.0 635715.1 1605.3
+File Copy 256 bufsize 500 maxblocks 1655.0 168269.6 1016.7
+File Copy 4096 bufsize 8000 maxblocks 5800.0 2003899.3 3455.0
+Pipe Throughput 12440.0 677305.6 544.5
+Pipe-based Context Switching 4000.0 110291.0 275.7
+Process Creation 126.0 8915.2 707.6
+Shell Scripts (1 concurrent) 42.4 9925.1 2340.8
+Shell Scripts (8 concurrent) 6.0 2371.0 3951.7
+System Call Overhead 15000.0 554765.5 369.8
+ ========
+System Benchmarks Index Score 1111.4
+
+------------------------------------------------------------------------
+Benchmark Run: Wed Sep 07 2022 20:23:52 - 20:48:38
+8 CPUs in system; running 8 parallel copies of tests
+
+Dhrystone 2 using register variables 107735436.2 lps (10.0 s, 7 samples)
+Double-Precision Whetstone 25460.0 MWIPS (9.3 s, 7 samples)
+Execl Throughput 10155.8 lps (29.9 s, 2 samples)
+File Copy 1024 bufsize 2000 maxblocks 1254565.5 KBps (30.0 s, 2 samples)
+File Copy 256 bufsize 500 maxblocks 326444.0 KBps (30.0 s, 2 samples)
+File Copy 4096 bufsize 8000 maxblocks 3831671.6 KBps (30.0 s, 2 samples)
+Pipe Throughput 2871966.9 lps (10.0 s, 7 samples)
+Pipe-based Context Switching 280889.3 lps (10.0 s, 7 samples)
+Process Creation 24198.2 lps (30.0 s, 2 samples)
+Shell Scripts (1 concurrent) 24486.0 lpm (60.0 s, 2 samples)
+Shell Scripts (8 concurrent) 3283.5 lpm (60.0 s, 2 samples)
+System Call Overhead 2258288.6 lps (10.0 s, 7 samples)
+
+System Benchmarks Index Values BASELINE RESULT INDEX
+Dhrystone 2 using register variables 116700.0 107735436.2 9231.8
+Double-Precision Whetstone 55.0 25460.0 4629.1
+Execl Throughput 43.0 10155.8 2361.8
+File Copy 1024 bufsize 2000 maxblocks 3960.0 1254565.5 3168.1
+File Copy 256 bufsize 500 maxblocks 1655.0 326444.0 1972.5
+File Copy 4096 bufsize 8000 maxblocks 5800.0 3831671.6 6606.3
+Pipe Throughput 12440.0 2871966.9 2308.7
+Pipe-based Context Switching 4000.0 280889.3 702.2
+Process Creation 126.0 24198.2 1920.5
+Shell Scripts (1 concurrent) 42.4 24486.0 5775.0
+Shell Scripts (8 concurrent) 6.0 3283.5 5472.5
+System Call Overhead 15000.0 2258288.6 1505.5
+ ========
+System Benchmarks Index Score 3037.7
+```
+
+```
+Start time is: 1097
+开始生成随机数矩阵
+随机数矩阵生成完毕
+开始生成稀疏矩阵
+稀疏矩阵生成完毕
+求得最小的函数值为:0.151914
+迭代次数为:170
+End time is: 20455667
+Time consumption is: 20
+```
+
+## 总结
+
+* 由于是第一次接触 RISC-V 开发板,之前上面搭载的的是未完全移植的 ArchLinux 系统,设备驱动还十分不完善,导致在进行移植时遇到了很多问题,比如无法使用 HDMI 接口等,这些问题都是由于设备驱动不完善导致的,后来通过移植相对完整的 Debian 系统解决了这些问题。
+* 移植的 Debian 系统中没有 `glibc` 库,导致无法使用 `gcc` 进行交叉编译,后来通过移植 `glibc` 库解决了这个问题。
+* 在移植大规模矩阵计算程序时,由于原始代码编译出的程序运算量过于大,在开发板上运行时间过长(跑了三天三夜都没结束),导致无法进行测试,后来通过 O2 优化解决了这个问题。
+* 由上面的测试结果可显而易见,即便是仅进行单核性能对比,RISC-V 平台处理器对于目前已经成熟的 amd64 平台处理器也有着明显的性能劣势,这种性能差距不仅仅是频率所导致的,而是由于 RISC-V 平台处理器的指令集设计和 amd64 平台处理器的指令集设计的差异所导致的。该 RISC-V 芯片的基础指令性能跟对比的 amd64 芯片性能差异较大(4-10 倍),即使换算到同等主频(2-5 倍),差异也较为明显。当然,也不排除 amd64 平台睿频的情况,但是由于 Apple Macintosh 机器固件原因,即便在实体机上安装 Linux 系统,也还没能关闭睿频,所以在测试中,我们没有考虑睿频的情况。
+
+## 参考资料
+
+1. [Eigen][001]
+2. [microbench][002]
+3. [UnixBench][003]
+
+[001]: https://eigen.tuxfamily.org/
+[002]: https://gitee.com/tinylab/riscv-linux/tree/master/test/microbench
+[003]: https://github.com/kdlucas/byte-unixbench
diff --git a/articles/images/20220922-d1h-benchmark/Matrix_D1.png b/articles/images/20220922-d1h-benchmark/Matrix_D1.png
new file mode 100644
index 0000000000000000000000000000000000000000..36b19a85c78fd29ccf0673db47220086be1a8561
Binary files /dev/null and b/articles/images/20220922-d1h-benchmark/Matrix_D1.png differ
diff --git a/articles/images/20220922-d1h-benchmark/UnixBench_D1.png b/articles/images/20220922-d1h-benchmark/UnixBench_D1.png
new file mode 100644
index 0000000000000000000000000000000000000000..a3cd925622cecc8a600d3ccf8a8a3cd8c9186de3
Binary files /dev/null and b/articles/images/20220922-d1h-benchmark/UnixBench_D1.png differ
diff --git a/articles/images/20220922-d1h-benchmark/amd64_microbench.png b/articles/images/20220922-d1h-benchmark/amd64_microbench.png
new file mode 100644
index 0000000000000000000000000000000000000000..74e36d87b6b3da263e9c4688511233d654f34e92
Binary files /dev/null and b/articles/images/20220922-d1h-benchmark/amd64_microbench.png differ
diff --git a/articles/images/20220922-d1h-benchmark/rv64_microbench.png b/articles/images/20220922-d1h-benchmark/rv64_microbench.png
new file mode 100644
index 0000000000000000000000000000000000000000..b0e23f01f8bea90583002353128db2c482b9367e
Binary files /dev/null and b/articles/images/20220922-d1h-benchmark/rv64_microbench.png differ