diff --git "a/.gitee/\345\233\276\347\211\207.png" "b/.gitee/\345\233\276\347\211\207.png" new file mode 100644 index 0000000000000000000000000000000000000000..3128f8133ffc7fe37b91026abcf4241c2b9794f2 Binary files /dev/null and "b/.gitee/\345\233\276\347\211\207.png" differ diff --git a/.vscode/settings.json b/.vscode/settings.json new file mode 100644 index 0000000000000000000000000000000000000000..95becc00cae37a4c6f9727b247ebd6c3e8961e82 --- /dev/null +++ b/.vscode/settings.json @@ -0,0 +1,18 @@ +{ + "editor.tokenColorCustomizations": { + "textMateRules": [ + { + "scope": "kunpeng.func", + "settings": { + "foreground": "#28a745" + } + }, + { + "scope": "kunpeng.intrinsics", + "settings": { + "foreground": "#28a745" + } + } + ] + } +} \ No newline at end of file diff --git a/1.png b/1.png new file mode 100644 index 0000000000000000000000000000000000000000..3745da17ee9e23fa06b1770c076cf5d991248226 Binary files /dev/null and b/1.png differ diff --git a/11.png b/11.png new file mode 100644 index 0000000000000000000000000000000000000000..415cf0aaf4bacf521253cb1742bcf0e5d973d5ad Binary files /dev/null and b/11.png differ diff --git a/12.png b/12.png new file mode 100644 index 0000000000000000000000000000000000000000..d288739094e124cdeca7666c1e30b7d9edb9ecd3 Binary files /dev/null and b/12.png differ diff --git a/13.png b/13.png new file mode 100644 index 0000000000000000000000000000000000000000..54c347b1c5e0264eb6a4626a86b3a3ae4c20ee16 Binary files /dev/null and b/13.png differ diff --git a/14.png b/14.png new file mode 100644 index 0000000000000000000000000000000000000000..18b31a2d4c74ca0bc8a82c9098ad1f2516a2c22d Binary files /dev/null and b/14.png differ diff --git a/15.png b/15.png new file mode 100644 index 0000000000000000000000000000000000000000..db7aadb85613efdaf2d1ec8ad25e52d5f2d9e783 Binary files /dev/null and b/15.png differ diff --git a/16.png b/16.png new file mode 100644 index 0000000000000000000000000000000000000000..400886ed968acfa6d13ea6980ed1610d7c22d285 Binary files /dev/null and b/16.png differ diff --git a/17.png b/17.png new file mode 100644 index 0000000000000000000000000000000000000000..191afa2b661dcbf318be88b85fe085e896def28c Binary files /dev/null and b/17.png differ diff --git a/2.png b/2.png new file mode 100644 index 0000000000000000000000000000000000000000..4a49abb5c2ae24e7d5ce666c5b644a504e458efe Binary files /dev/null and b/2.png differ diff --git a/3.png b/3.png new file mode 100644 index 0000000000000000000000000000000000000000..13bf54b9fbe1bfbf1a4e7e39786f805a90891f99 Binary files /dev/null and b/3.png differ diff --git a/4.png b/4.png new file mode 100644 index 0000000000000000000000000000000000000000..a96c0bb3c00131ee09b64fb3cc22f28def3f0224 Binary files /dev/null and b/4.png differ diff --git a/5.png b/5.png new file mode 100644 index 0000000000000000000000000000000000000000..d2ac619298ed987860fbda3e3c59a502ad496987 Binary files /dev/null and b/5.png differ diff --git a/6.png b/6.png new file mode 100644 index 0000000000000000000000000000000000000000..e9dc77e2a5bbfc2d402e9366ee90b64d1353f160 Binary files /dev/null and b/6.png differ diff --git a/Dockerfile b/Dockerfile index 7ac805cb871821c6362d394d8201b9d5830e51a0..5d6a2b9b85fea931493710f9e7126fbac3147255 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,4 +1,4 @@ -FROM nginx +FROM nginx:1.20.0 RUN apt-get update && \ apt install curl -y && \ diff --git a/a9eb1fea46a14a049b2fc36116dbe43f.png b/a9eb1fea46a14a049b2fc36116dbe43f.png new file mode 100644 index 0000000000000000000000000000000000000000..71e8b4b8c428c22b4ad9efe1a702b238d9db1b0b Binary files /dev/null and b/a9eb1fea46a14a049b2fc36116dbe43f.png differ diff --git a/competition feelings .md b/competition feelings .md new file mode 100644 index 0000000000000000000000000000000000000000..23f0970e6fb78a0bf35888f8e08b880592aea092 --- /dev/null +++ b/competition feelings .md @@ -0,0 +1,36 @@ ++++ +title="**【openGauss文章征集】openGauss本地虚拟机上的部署**" + +date="2022/10/19" + +tags={"openGauss"} + +archives="2022-10" + +author="Lu Junfeng" + +summary="openGauss本地的部署" + ++++ + +**我们是参与的是关于openGauss赛道,我们做的是关于基于openGuass全密态数据库相关的内容。** + +收获有很多,从openGauss在本地的部署,再到对openGauss源码的学习,再到对openGauss全密态能力的了解,对openGauss的了解越来越深,也越发感觉他的丰富的功能特性。 + +1.openGauss的本地部署,我使用的是这个教程 地址 https://mp.weixin.qq.com/s/fRZ-p9TJOLfiEaGYZIAs6w + +由于开始对linux命令的不熟悉,从而导致使用openGauss的一键部署脚本的使用出了很多问题,所以没出一次问题,linux环境就出了很多问题,导致无法继续下去,因此只能选择重装系统,再无数次重装系统重新跑脚本,改配置下,终于成功在本地部署了openGauss数据库。 + +主要有以下几个问题: + +`(1).虚拟机的硬盘分配过小,导致openGauss安装部署失败,最开始我只分配了20G硬盘和1G内存,导致openGauss的最后一步python运行脚本安装部署openGauss报错,后来使用的是60G硬盘和4G内存,就能部署成功了。` + +`(2).由于在本地虚拟机使用的是centos 7.6操作系统,这个系统上的python默认版本是python2,而安装部署openGauss脚本的要使用python3版本,因此需要重新安装python版本,但是脚本那一步在部署的时候会导致python命令和yum命令都没办法使用,因此只需要使用。` + +**yum install python3 并且使用python3 执行最后的python文件脚本就行。 不用改配置的bak内容,改了反而更容易报错。** + +(3).`要保住虚拟机能联网,由于教程会在线下载内容,因此不联网,也会导致安装部署不成功,而且要改教程脚本里的ip地址。` + +2.对openGauss源码的学习,主要是对openGauss的安全特性源码进行学习,包括密态等值大小比较。 + +目前还在学习过程中,包括openGauss的全密态数据库特性也在学习中,等后续学了内容再更新。 diff --git a/config.toml b/config.toml index 10f9cbf0319a15918dfdc1f90e59a5ec0a9d9a39..8eee2db4c1963e04135a43645e5e7ce64abf8481 100644 --- a/config.toml +++ b/config.toml @@ -1,46 +1,46 @@ -title = "openGauss Blog" -baseurl = "http://blog.openGauss.org" -author = "@JeffProd" -copyright = "Copyright © 2008–2018, @JeffProd" -canonifyurls = true -paginate = 5 - -theme = "hugo-blog-jeffprod" - -languageCode = "en-us" -defaultContentLanguage = "en" -defaultContentLanguageInSubdir = true - - - -[taxonomies] - tag = "tags" - archive = "archives" - - -# Valine. - # You can get your appid and appkey from https://leancloud.cn - # more info please open https://valine.js.org - [params.valine] - enable = true - appId = '6wfgavgIRqmpC3hjHqQVtFWF-gzGzoHsz' - appKey = 'QRqrBDBB0p0YhrGe9IJ169ip' - notify = false # mail notifier , https://github.com/xCss/Valine/wiki - verify = false # Verification code - avatar = 'mm' - placeholder = 'Please leave a message...' - visitor = true - -[languages] - -[languages.en] - languageCode = "en-us" - contentDir = "content/en" - -[languages.zh] - languageCode = "zh-cn" - contentDir = "content/zh" - -#编译参数 -#uglyURLs = true - +title = "openGauss Blog" +baseurl = "https://blog.openGauss.org" +author = "@JeffProd" +copyright = "Copyright © 2008–2018, @JeffProd" +canonifyurls = true +paginate = 5 + +theme = "hugo-blog-jeffprod" + +languageCode = "en-us" +defaultContentLanguage = "en" +defaultContentLanguageInSubdir = true + + + +[taxonomies] + tag = "tags" + archive = "archives" + + +# Valine. + # You can get your appid and appkey from https://leancloud.cn + # more info please open https://valine.js.org + [params.valine] + enable = true + appId = '6wfgavgIRqmpC3hjHqQVtFWF-gzGzoHsz' + appKey = 'QRqrBDBB0p0YhrGe9IJ169ip' + notify = false # mail notifier , https://github.com/xCss/Valine/wiki + verify = false # Verification code + avatar = 'mm' + placeholder = 'Please leave a message...' + visitor = true + +[languages] + +[languages.en] + languageCode = "en-us" + contentDir = "content/en" + +[languages.zh] + languageCode = "zh-cn" + contentDir = "content/zh" + +#编译参数 +#uglyURLs = true + diff --git a/content/en/guidance/index_.md b/content/en/guidance/index_.md index 0b1f584075bdeda1230bb982dbd90af6ec32d9cb..a1354efaff51efbd47334e06597288be46c42975 100644 --- a/content/en/guidance/index_.md +++ b/content/en/guidance/index_.md @@ -6,13 +6,13 @@ title = "Guidance to Post a Blog" ## Preparation -1. Refer to http: //git.mydoc.io/?t=179267 to register Gitee account. +1. Refer to https://gitee.com/help/articles/4113 to register Gitee account. -2. Set your primary mail box in gitee settings https: //gitee.com/profile/emails. +2. Set your primary mail box in gitee settings https://gitee.com/profile/emails. -3. Sign your CLA in . +3. Sign your CLA in . -4. Prepare your git environment refering to http: //git.mydoc.io/?t=180692. +4. Prepare your git environment refering to https://gitee.com/help/articles/4107. ## Understand blog format @@ -39,7 +39,7 @@ Tips: you can copy content/_example/2020-03-03-sample-post.md to your folder and The blog posting follows the pull request of Gitee. -1. Fork openGauss blog project to your own gitee. Refer to for detailed guidance. +1. Fork openGauss blog project to your own gitee. Refer to for detailed guidance. 2. Clone the code to your local environment. @@ -82,6 +82,6 @@ git commit -m "" git push origin : ``` -7. Refer to http: //git.mydoc.io/?t=153749to submit your Pull Request +7. Refer to https://gitee.com/help/articles/4122to submit your Pull Request 8. Wait for reviewing and merging. diff --git a/content/en/post/2022/A-Leap-of-openGauss-from-Standalone-System-to-Distributed-System.md b/content/en/post/2022/A-Leap-of-openGauss-from-Standalone-System-to-Distributed-System.md new file mode 100644 index 0000000000000000000000000000000000000000..45fcf70ee4864a52a1423e5dc8f503773b8d1415 --- /dev/null +++ b/content/en/post/2022/A-Leap-of-openGauss-from-Standalone-System-to-Distributed-System.md @@ -0,0 +1,74 @@ ++++ + +title = "A Leap of openGauss from Standalone System to Distributed System" + +date = "2021-09-14" + +tags = [ "A Leap of openGauss from Standalone System to Distributed Systems"] + +archives = "2021-09" + +author = "Bin Zhou" + +summary = "A Leap of openGauss from Standalone System to Distributed System" + +img = "/en/post/2022/title/img2.png" + +times = "12:30" + ++++ + +# A Leap of openGauss from Standalone System to Distributed System + + + +Since June 2020, the openGauss standalone database has received wide attention from the industry and attracted many partners and developers to build a prosperous database ecosystem. In the face of massive data and ultra-high concurrency scenarios, openGauss turns to distributed solutions and focuses on solving problems in different aspects, such as massive data storage, ultra-high concurrent throughput, and large table bottlenecks, achieving another breakthrough from standalone system to distributed system. + +## **Distributed solution** + +![](../figures/21.png) + +Figure 1 Overall architecture of the distributed solution + +openGauss integrates many open-source components to build a full-stack open-source distributed solution that integrates data scale-out, distributed transactions, and governance. Figure 1 shows the overall architecture of openGauss. ShardingSphere-proxy is an open-source distributed solution that provides sharding, table partitioning, distributed transactions, auto scaling, and read/write isolation capabilities. HAProxy, working with Patroni REST APIs, can always identify the database primary node, ensuring HA and implementing load balancing. Each Patroni HA node supports one primary node and multiple standby nodes. Each node uses the Paxos protocol to ensure data consistency. Nodes can be deployed in the same or different regions to ensure data security in multiple regions and data centers. This distributed solution uses the powerful distributed capability of shardingSphere-proxy to manage clusters through Kubernetes and monitor cluster status through Prometheus. In this way, a full-stack open-source distributed solution is built. + +## **Product advantages** + +1. **Ultimate scalability and flexible scaling** + + The computing and storage capabilities can be linearly expanded through horizontal sharding to a maximum of 6400 shards. The performance increases quasi-linearly with the expansion, which effectively solves the problem of data volume expansion in a single table. Depending on service traffic, data nodes can be flexibly and smoothly scaled in or out, and read and write operations are intelligently separated to implement automatic load balancing of distributed databases. + +2. **Abundant enterprise-class features** + + Provides abundant enterprise-class features, such as distributed stored procedures, triggers, distributed transactions, fully-encrypted data, and Workload Diagnosis Reports \(WDRs\). + +3. **One-click deployment, shielding underlying dependencies** + + Standard images ensure consistent delivery in multiple environments, container-based deployment, and physical resource pooling, reducing dependency on the platform and implementing second-level application deployment. + +4. **Ultra-high availability, implementing remote disaster recovery** + + Provides powerful cluster management and O&M capabilities, and flexible deployment of multiple data centers in the same city, different cities, and multiple regions, ensuring data security and consistency based on the Paxos protocol, and providing multiple DR capabilities with "RPO = 0". + +5. **Open source, building a full-stack ecosystem** + + The standalone and distributed open-source openGauss solutions encourage more partners and developers to jointly build a prosperous database ecosystem and build a full-stack open-source ecosystem. + + +## **Application scenarios** + +1. **Banking system** + + Based on the consensus protocol Paxos, the distributed strong consistency transaction capability is provided to ensure strong data consistency in the distributed environment. Multiple data centers in multiple regions provide the "RPO = 0" capability to ensure bank-grade reliability. + +2. **Government/Enterprise office** + + Provides fully-encrypted data encryption, which is secure and reliable and supports software and hardware ecosystems to ensure HA and safeguard government and enterprise office work. + +3. **Smart grid** + + Supports flexible deployment of multiple data centers in the same city, in different cities, and in multiple regions, and flexible scaling based on service tidal characteristics, helping build smart grids. + + Currently, the distributed openGauss solution is in the crowdtesting phase. Developers are welcome to participate in this phase and work together to build a powerful distributed solution. + + diff --git a/content/en/post/2022/Automatic-Test-Framework-YAT.md b/content/en/post/2022/Automatic-Test-Framework-YAT.md new file mode 100644 index 0000000000000000000000000000000000000000..b98994c13018ba1b097c85cde977548d8ee1b526 --- /dev/null +++ b/content/en/post/2022/Automatic-Test-Framework-YAT.md @@ -0,0 +1,195 @@ ++++ + +title = "Automatic Test Framework YAT" + +date = "2021-11-19" + +tags = [ "Automatic Test Framework YAT"] + +archives = "2021-11" + +author = "Yansong LI" + +summary = "Automatic Test Framework YAT" + +img = "/en/post/2022/title/img1.png" + +times = "12:30" + ++++ + +# Automatic Test Framework — YAT + +The Yet another test \(YAT\) framework is an automatic test framework based on Python3. The core of the framework is implemented by the Kotlin language. The framework is encapsulated and bonded through Python to provide command line interfaces \(CLIs\). Figure 1 shows the overall framework. The YAT framework is continuously evolving to become a more efficient and advanced automatic test framework. + +YAT is the automatic test framework used for openGauss database specification monitoring. openGauss 2.1.0 not only releases open-source YAT, but also contributes over 30,000 automatic test cases to enhance the test capability of the openGauss community, enrich the openGauss ecosystem and attract more developers to participate in community construction. YAT code repository: [https://gitee.com/opengauss/Yat](https://gitee.com/opengauss/Yat) + +![](../figures/zh-cn_image_0000001206146876.jpg) + +Figure 1 Overall YAT framework + +## Product Advantages + +- It is friendly to database tests. Users can directly write SQL code and organize the code into a test suite for tests without additional configuration. Test cases are executed through the JDBC API and can adapt to various databases. +- It supports multiple languages and is extensible. + + Currently, languages such as SQL, Shell, Python \(unittes\), and Groovy \(Junit/Spock\) are supported. New languages and frameworks can be added through adapters. SQL statements supported by YAT are supersets of standard SQL statements.That is, YAT is extended based on standard SQL statements. Users can run shell commands, control connections, execute loops, bind SQL statements, control multiple sessions, and concurrently execute SQL statements in SQL scripts. For example: + + ``` + @conn user/passwd@127.0.0.1:9090; -- Reconnect to the database as the new user. + drop table if exists tbl_x; -- Execute SQL statements. + + create table tbl_x (id int, age int, xb int); + + insert into tbl_x values(1, 2, 4); + insert into tbl_x values(3, 4, 5); + + -- Perform the binding operation. + insert into tbl_x values(?, ?, ?); + @bind { + int 3 + int 5 + int 7 + } + -- Perform the binding operation in batches. + insert into tbl_x values(?, ?, ?); + @batch { + int 3 int 4 int 0 + int 3 int 4 int 9 + int 3 int 4 int 8 + int 3 int 4 int 7 + } + -- Run the shell commands. + @sh zctl.py -t stop; + @sh zctl.py -t start; + + -- Define sessions. + @session(name: s1) + { + @set autocommit false; + update table tbl_x set par1 = 2 par2 = 2; + insert into tbl_x values(1, 3, 4); + commit; + } + + @session(name: s2, user: abc, password: 'gauss@123') + { + @set autocommit false; + update table tbl_x set par1 = 2 par2 = 2; + insert into tbl_x values(1, 3, 4); + @step + { + select * from tbl_x for update; + } + commit; + } + -- Executes SQL statements in sessions. + @steps s1.0 s2.0 s1.1 s1.2 s2.2 s2.1; + -- Execute loops. + @for (count: 10) + { + insert into abc values(1,1,3,4); + } + -- Concurrently execute SQL statements. + @parallel { + @session { + select * from abc for update; + commit; + } + + @session { + select * from abc for update; + commit; + } + } + ``` + + The Python language test script must be a unittest test script. YAT provides a public library to facilitate database and remote SSH operations in the Python unittest test script. For example: + + ``` + class TestPrimaryStandby(TestCase): + node = None + node_standby = None + + test_table_drop = 'drop table if exists tbl_test'; + test_table_create = '''create table tbl_test ( + id int, + name char(20), + address varchar(1024) + ) + ''' + + @classmethod + def setUpClass(cls): + # Initialize the connection object. + cls.node = Node(node='primary') + cls.node_standby = Node(node='standby') + cls.node.sql(cls.test_table_drop) + cls.node.sql(cls.test_table_create) + + @classmethod + def tearDownClass(cls) -> None: + cls.node.close() + cls.node_standby.close() # Close the connection object. + + def test_abc_001(self): + # Run the shell command through SSH and check the result. + self.node.sh('echo "success"').expect('success') + + # Run the SQL statement and check the result. + self.node.sql('select * from tbl_test').expect( + (1, 'xxx', 'xxxxxxxxxxxxxxxxxxx'), + (2, 'xxx', 'xxxxxxxxxxxxxxxxxxx')) + + def test_abc_003(self): + # Run the shell command and determine the result through regular expression matching. + self.node.sh('cm ctl query').regex(r'.*success.*') + ``` + + Groovy scripts can be used to write JUnit cases or Spock cases. For more details, visit the official website. + +- It defines a set of standard test case writing specifications, test execution processes, and test report presentation modes. + + Run the **yat init** command to create a test suite template. After the **yat init** command is executed, modify the content in the specified test suite directory. Then, install the JDBC driver, add the **lib** directory to the root directory of the test suite, copy the driver to this directory, and run the **yat suite run** command to run the test suite. You can add different parameters to set whether to print the report and the report format. Figure 2 shows the test suite directory result. Configure the following directories before running the test suite: + + - **conf** directory, which stores node configuration files. + - **except** directory, which is the expected file of the test case. + - **schedule** directory, which stores the schedule files. + - **testcase** directory, which stores test case files. + + ![](../figures/zh-cn_image_0000001206626828.jpg) + + Figure 2 Directory structure of the test suite + + +- Multi-suite Schedule + + YAT Schedule is a scheduler provided by YAT to schedule multiple YAT test suites at the same time. You can schedule multiple YAT test suites in parallel or serial mode by customizing a schedule file. When there are a large number of test suites, you need to determine the combination and sequence of the test suites. YAT provides a convenient method to organize multiple test suites into a large test suite, as shown in the following figure. + + ``` + # File name: all.ys + serial {# Serial execution of test suites or test suite sets + suite '../suite1'; + suite '../suite2'; + ... + + parallel { # Parallel execution of test suites or test suite sets + suite 'parallel/suite1'; + suite 'parallel/suite2'; + ... + + serial { ... } + } + + suite 'suite3'; + ... + } + ``` + + Run the following command to execute all test suites in one-click mode: + + ``` + yat schedule -s all.ys + ``` + + diff --git a/content/en/post/2022/Basic-Principles-of-the-Patroni-for-openGauss-HA-Solution.md b/content/en/post/2022/Basic-Principles-of-the-Patroni-for-openGauss-HA-Solution.md new file mode 100644 index 0000000000000000000000000000000000000000..cfef9bc63754a99e85b3a8fb0f67b9c0107ed8f2 --- /dev/null +++ b/content/en/post/2022/Basic-Principles-of-the-Patroni-for-openGauss-HA-Solution.md @@ -0,0 +1,45 @@ ++++ + +title = "Discussion on openGauss Memory Management" + +date = "2021-9-17" + +tags = [ "Discussion on openGauss Memory Management"] + +archives = "2021-9" + +author = "Mengen Xue" + +summary = "Discussion on openGauss Memory Management" + +img = "/en/post/2022/title/img5.png" + +times = "12:30" + ++++ + +# Basic Principles of the Patroni for openGauss HA Solution + + + +## 1. Introduction to Patroni + +Patroni is an open-source product developed in Python by Zalando. It can use the distributed configuration system \(DCS\) to detect the status and configuration of each node in the storage database cluster, and perform automatic management and failover for the database cluster. + +## 2. Working Principles of Patroni + +An HA cluster consists of Patroni, DCS, and databases. This solution uses editable text configuration daemon \(ETCD\) as DCS and openGauss as the database. + +ETCD is a distributed key-value pair store. It is designed to reliably and quickly store key data and provide access services. It uses distributed locks, leader election, and write barriers to implement reliable distributed collaboration. ETCD clusters are prepared for HA and persistent data storage and retrieval. + +Patroni connects to ETCD through an API and inserts key-value pairs to record Patroni parameters, database parameters, primary/standby information, and connection information. Generally, ETCD is used to detect heartbeats of other nodes. The primary/standby information stored in the key-value pairs is obtained from ETCD to determine the status of each node and automatically manage the cluster. The following figure shows the basic principle. + +![](../figures/zh-cn_image_0000001208491336.png) + +As shown in the preceding figure, only one Patroni node can become the leader at a time. That is, only one Patroni node can hold the leader lock. This prevents split-brain. Currently, patroni-for-openGauss can rectify the following faults: + +- 1. If the primary database stops unexpectedly but can be recovered by restarting, it can be automatically started immediately. +- 2. If the primary database is faulty and cannot be started, the current primary database releases the leader lock and is demoted to standby. Then, the system automatically selects the most healthy standby database, that is, the standby database whose synchronization status is closest to that of the primary database, and promotes it to primary. +- 3. If the standby database is hung up unexpectedly and can be recovered and connected to the primary database immediately after being restarted, restart the standby database immediately. +- 4. If the standby database is faulty unexpectedly and can be started properly but it is later than the primary database after startup, rebuild the standby database to restore its status. + diff --git a/content/en/post/2022/Basic-Use-of-the-Fully-encrypted-Database.md b/content/en/post/2022/Basic-Use-of-the-Fully-encrypted-Database.md new file mode 100644 index 0000000000000000000000000000000000000000..4ba44aa39dbf5c7fbc56975d483ab6a2583a5a8e --- /dev/null +++ b/content/en/post/2022/Basic-Use-of-the-Fully-encrypted-Database.md @@ -0,0 +1,288 @@ ++++ + +title = "Basic Use of the Fully-encrypted Database" + +date = "2021-10-16" + +tags = [ "Basic Use of the Fully-encrypted Database"] + +archives = "2021-10" + +author = "Jinxiang Xiao" + +summary = "Basic Use of the Fully-encrypted Database" + +img = "/en/post/2022/title/img10.png" + +times = "12:30" + ++++ + +# Basic Use of the Fully-encrypted Database + +## 1. Introduction to the Fully-encrypted Database Features + +A fully-encrypted database aims to protect privacy throughout the data lifecycle. Data is always encrypted during transmission, computing, and storage regardless of the service scenario or environment. After the data owner encrypts data on the client and sends the encrypted data to the server, even if an attacker manages to exploit some system vulnerability and steal user data, they cannot obtain valuable information. Data privacy is protected. + +## 2. Customer Benefits of the Fully-encrypted Database + +The entire service data flow is encrypted during processing. A fully-encrypted database: + +1. Protects data privacy and security throughout the lifecycle on the cloud. Attackers cannot obtain information from the database server regardless of the data status. +2. Helps cloud service providers earn the trust of third-party users. Users, including service administrators and O&M administrators in enterprise service scenarios and application developers in consumer cloud services, can keep the encryption keys themselves so that even users with high permissions cannot access unencrypted data. +3. Enables cloud databases to better comply with personal privacy protection laws and regulations. + +## 3. Use of the Fully-encrypted Database + +Currently, the fully-encrypted database supports two connection modes: gsql and JDBC. This chapter describes how to use the database in the two connection modes. + +### 3.1 Connecting to a Fully-encrypted Database + +1. Run the **gsql -p PORT –d postgres -r –C** command to enable the encryption function. + +Parameter description: + +**-p** indicates the port number. **-d** indicates the database name. **-C** indicates that the encryption function is enabled. + +2. To support JDBC operations on a fully-encrypted database, set **enable\_ce** to **1**. + +### 3.2 Creating a User Key + +A fully-encrypted database has two types of keys: client master key \(CMK\) and data encryption key \(CEK\). + +The CMK is used to encrypt the CEK. The CEK is used to encrypt user data. + +Before creating a key, use gs\_ktool to create a key ID for creating a CMK. + +openGauss=\# **\\! gs\_ktool -g** + +The sequence and dependency of creating a key are as follows: creating a key ID \> creating a CMK \> creating a CEK. + +- **1. Creating a CMK and a CEK in the GSQL Environment** + +- [Creating a CMK\] + + CREATE CLIENT MASTER KEY client\_master\_key\_name WITH \(KEY\_STORE = key\_store\_name, KEY\_PATH = "key\_path\_value", ALGORITHM = algorithm\_type\); + + Parameter description: + + - client\_master\_key\_name + + This parameter is used as the name of a key object. In the same namespace, the value of this parameter must be unique. + + Value range: a string. It must comply with the naming convention. + + - KEY\_STORE + + Tool or service that independently manages keys. Currently, only the key management tool gs\_ktool provided by GaussDB Kenel and the online key management service huawei\_kms provided by Huawei Cloud are supported. Value range: **gs\_ktool** and **huawei\_kms** + + - KEY\_PATH + + A key in the key management tool or service. The **KEY\_STORE** and **KEY\_PATH** parameters can be used to uniquely identify a key entity. When **KEY\_STORE** is set to **gs\_ktool**, the value is **gs\_ktool** or **KEY\_ID**. When **KEY\_STORE** is set to **huawei\_kms**, the value is a 36-byte key ID. + + - ALGORITHM + + This parameter specifies the encryption algorithm used by the key entity. When **KEY\_STORE** is set to **gs\_ktool**, the value can be **AES\_256\_CBC** or **SM4**. When **KEY\_STORE** is set to **huawei\_kms**, the value is **AES\_256**. + +- \[Creating a CEK\] + + CREATE COLUMN ENCRYPTION KEY column\_encryption\_key\_name WITH\(CLIENT\_MASTER\_KEY = client\_master\_key\_name, ALGORITHM = algorithm\_type, ENCRYPTED\_VALUE = encrypted\_value\); + + Parameter description: + + - column\_encryption\_key\_name + + This parameter is used as the name of a key object. In the same namespace, the value of this parameter must be unique. + + Value range: String, which must comply with the naming convention. + + - CLIENT\_MASTER\_KEY + + Specifies the CMK used to encrypt the CEK. The value is the CMK object name, which is created using the **CREATE CLIENT MASTER KEY** syntax. + + - ALGORITHM + + Encryption algorithm to be used by the CEK. The value can be **AEAD\_AES\_256\_CBC\_HMAC\_SHA256**, **AEAD\_AES\_128\_CBC\_HMAC\_SHA256**, or **SM4\_SM3**. + + - **ENCRYPTED\_VALUE \(optional\)** + + A key password specified by a user. The key password length ranges from 28 to 256 bits. The derived 28-bit key meets the AES128 security requirements. If the user needs to use AES256, the key password length must be 39 bits. If the user does not specify the key password length, a 256-bit key is automatically generated. + + \[Example in the GSQL environment\] + + + + + + +

1

+

2

+

3

+

4

+

5

+

6

+

7

+

8

+

9

+

10

+

11

+

-- (1) Use the key management tool gs_ktool to create a key. The tool returns the ID of the newly generated key.

+

[cmd] gs_ktool -g

+

+

-- (2) Use a privileged account to create a common user named alice.

+

openGauss=# CREATE USER alice PASSWORD '********';

+

+

-- (3) Use the account of common user alice to connect to the encrypted database and execute the syntax.

+

gsql -p 57101 postgres -U alice -r -C

+

+

-- Create a CMK object.

+

openGauss=> CREATE CLIENT MASTER KEY alice_cmk WITH ( KEY_STORE = gs_ktool , KEY_PATH = "gs_ktool/1" , ALGORITHM = AES_256_CBC);

+

-- Create a CEK object.

+

openGauss=> CREATE COLUMN ENCRYPTION KEY a_cek WITH VALUES (CLIENT_MASTER_KEY = a_cmk, ALGORITHM = AEAD_AES_256_CBC_HMAC_SHA256);

+

+

openGauss=> CREATE COLUMN ENCRYPTION KEY another_cek WITH VALUES (CLIENT_MASTER_KEY = a_cmk, ALGORITHM = SM4_SM3);

+
+ +- **2. Creating a CMK and a CEK in the JDBC Environment** + + + + + + +

1

+

2

+

3

+

4

+

5

+

6

+

7

+

8

+

// Create a CMK.

+

Connection conn = DriverManager.getConnection("url","user","password");

+

Statement stmt = conn.createStatement();

+

int rc = stmt.executeUpdate("CREATE CLIENT MASTER KEY ImgCMK1 WITH ( KEY_STORE = gs_ktool , KEY_PATH = \"gs_ktool/1\" , ALGORITHM = AES_256_CBC);");

+

+

// Create a CEK.

+

int rc2 = stmt.executeUpdate("CREATE COLUMN ENCRYPTION KEY ImgCEK1 WITH VALUES (CLIENT_MASTER_KEY = ImgCMK1, ALGORITHM = AEAD_AES_256_CBC_HMAC_SHA256);");

+
+### 3.3 Creating an Encrypted Table + +After creating the CMK and CEK, you can use the CEK to create an encrypted table. + +An encrypted table can be created in two modes: randomized encryption and deterministic encryption. + +- **Creating an Encrypted Table in the GSQL Environment** + +\[Example\] + + + + + + +

1

+

2

+

3

+

openGauss=# CREATE TABLE creditcard_info (id_number int,

+

name text encrypted with (column_encryption_key = ImgCEK, encryption_type = DETERMINISTIC),

+

credit_card varchar(19) encrypted with (column_encryption_key = ImgCEK1, encryption_type = DETERMINISTIC));

+
+ +Parameter description: + +**ENCRYPTION\_TYPE** indicates the encryption type in the ENCRYPTED WITH constraint. The value of **encryption\_type\_value** can be **DETERMINISTIC** or **RANDOMIZED**. + +**** + +- **Creating an Encrypted Table in the JDBC Environment** + + + + + + +

1

+

2

+

int rc3 = stmt.executeUpdate("CREATE TABLE creditcard_info (id_number int, name varchar(50) encrypted with (column_encryption_key = ImgCEK1, encryption_type = DETERMINISTIC),credit_card varchar(19) encrypted with (column_encryption_key = ImgCEK1, encryption_type = DETERMINISTIC));");

+
+ +### 3.4 Inserting Data into the Encrypted Table and Querying the Data + +After an encrypted table is created, you can insert and view data in the encrypted table in encrypted database mode \(enabling the connection parameter **-C**\). When the common environment \(disabling the connection parameter **-C**\) is used, operations cannot be performed on the encrypted table, and only ciphertext data can be viewed in the encrypted table. + +- **Inserting Data into the Encrypted Table and Viewing the Data in the GSQL Environment** + + + + + + +

1

+

2

+

3

+

4

+

5

+

6

+

7

+

8

+

9

+

openGauss=# INSERT INTO creditcard_info VALUES (1,'joe','6217986500001288393');

+

INSERT 0 1

+

openGauss=# INSERT INTO creditcard_info VALUES (2, 'joy','6219985678349800033');

+

INSERT 0 1

+

openGauss=# select * from creditcard_info where name = 'joe';

+

id_number | name | credit_card

+

-----------+------+---------------------

+

1 | joe | 6217986500001288393

+

(1 row)

+
+ + Note: The data in the encrypted table is displayed in ciphertext when you use a non-encrypted client to view the data. + + + + + + +

1

+

2

+

3

+

4

+

5

+

6

+

openGauss=# select id_number,name from creditcard_info;

+

id_number | name

+

-----------+-------------------------------------------

+

1 | \x011aefabd754ded0a536a96664790622487c4d36

+

2 | \x011aefabd76853108eb406c0f90e7c773b71648f

+

(2 rows)

+
+ +- **Inserting Data into the Encrypted Table and Viewing the Data in the JDBC Environment** + + + + + + +

1

+

2

+

3

+

4

+

5

+

6

+

7

+

// Insert data.

+

int rc4 = stmt.executeUpdate("INSERT INTO creditcard_info VALUES (1,'joe','6217986500001288393');");

+

// Query the encrypted table.

+

ResultSet rs = null;

+

rs = stmt.executeQuery("select * from creditcard_info where name = 'joe';");

+

// Close the statement object.

+

stmt.close();

+
+ + The preceding describes how to use the fully-encrypted database features. For details, see the corresponding sections in the official document. However, for a common user, the functions described above are sufficient to ensure smooth implementation of daily work. In the future, fully-encrypted databases will evolve to be easier to use and provide higher performance. Stay tuned! + + + diff --git a/content/en/post/2022/Compilation-Guide-for-openGauss-Under-Heterogeneous-Conditions.md b/content/en/post/2022/Compilation-Guide-for-openGauss-Under-Heterogeneous-Conditions.md new file mode 100644 index 0000000000000000000000000000000000000000..40e1a02e2a298895429efe3d493b00d091e6f4dd --- /dev/null +++ b/content/en/post/2022/Compilation-Guide-for-openGauss-Under-Heterogeneous-Conditions.md @@ -0,0 +1,539 @@ ++++ + +title = "Compilation Guide for openGauss Under Heterogeneous Conditions" + +date = "2021-07-07" + +tags = [ "Compilation Guide for openGauss Under Heterogeneous Conditions"] + +archives = "2021-07" + +author = "Yansong LI" + +summary = "Compilation Guide for openGauss Under Heterogeneous Conditions" + +img = "/en/post/2022/title/img1.png" + +times = "12:30" + ++++ + +# Compilation Guide for openGauss Under Heterogeneous Conditions + +## **Download** + +opengauss-openGauss-server-v2.0.0.tar.gz \(openGauss source code package\) + +**Download link:** + +https://gitee.com/opengauss/openGauss-server/repository/archive/v2.0.0?ref=v2.0.0&sha=78689da92cdc811cad2458dc213d007e96864062&format=tar.gz&captcha\_type=yunpian + +opengauss-openGauss-third\_party-2.0.0.zip \(source code package of all third-party libraries on which openGauss-server depends\) + +**Download link:** + +https://gitee.com/opengauss/openGauss-third\_party/repository/archive/v2.0.0.zip?ref=v2.0.0&sha=3a38c6c134e5b2e39d0557d575ec04302a83584a&format=zip&captcha\_type=yunpian + +openGauss-third\_party\_binarylibs.tar.gz \(medium packages of all third-party libraries on which openGauss-server depends\) + +**Download link:** + +https://opengauss.obs.cn-south-1.myhuaweicloud.com/2.0.0/openGauss-third\_party\_binarylibs.tar.gz + +Note: This document is compiled by referring to the following official compilation document: + +https://opengauss.org/en/docs/2.0.0/docs/Quickstart/Quickstart.html + +## **Precautions** + +Currently, 64-bit executable programs are generated. + +## **Environment Information** + +The compilation is performed on Phytium Kirin V10. The detailed information is as follows: + +``` +Linux localhost.localdomain 4.19.90-vhulk2001.1.0.0026.ns7.15.aarch64 #1 SMP Sun Apr 19 22:34:29 CST 2020 aarch64 aarch64 aarch64 GNU/Linux + + +NAME="Kylin Linux Advanced Server" + +VERSION="V10 (Azalea)" +ID="kylin" + +ID_LIKE="fedora" +VARIANT="Server" + +VARIANT_ID="server" +VERSION_ID="V10" + +PRETTY_NAME="Kylin Linux Advanced Server V10 (Azalea)" +ANSI_COLOR="0;31" + +CPE_NAME="cpe:/o:kylin:enterprise_linux:V10:GA:server" +HOME_URL="https://www.kylinos.cn/" + +BUG_REPORT_URL="https://bugzilla.kylinos.cn/bugzilla/" +KYLIN_BUGZILLA_PRODUCT="Kylin Linux Advanced Server 10" + +KYLIN_BUGZILLA_PRODUCT_VERSION=V10 +KYLIN_SUPPORT_PRODUCT="Kylin Linux Advanced Server" + +KYLIN_SUPPORT_PRODUCT_VERSION="V10" +``` + +## **Compilation** + +- **openGauss-third\_party** + + This repository is used to compile all open-source third-party software on which the openGauss depends. + + There are four directories. + + a. The **build** directory contains all third-party scripts on which the compilation depends. + + b. The **buildtools** directory contains build tools for compiling these opensources and openGauss servers. + + c. The **dependency** directory contains all open-source components on which the openGauss server depends. + + d. The **platform** directory contains open-source software such as OpenJDK. + +- **Dependencies** + + The following lists the software requirements for compiling openGauss. + + \(CMake, GCC, and Python must be installed. You are advised to install other components. You can try to compile the code even if the installation is not complete.\) + + ``` + libaio-devel + ncurses-devel + + pam-devel + libffi-devel + + libtool + libtool-devel + + libtool-ltdl + python-devel + + openssl-devel + lsb_release (The medium package name in Phytium-Kylin is neokylin-lsb.) + + bison + cmake + + gcc + ``` + + Note: CMake and GCC have strict version requirements. The CMake version must be later than 3.16.5, and the GCC version must be 7.3.0. + + Compile Python 3 and set environment variables. The installation of CMake and GCC will be described later. Run the **yum install** command to install other dependencies. For network limitations, mount the Kylin ISO file for installation. + + Before installing CMake and GCC, install Python 3 and the preceding software. + + +- **Installing CMake** + + The version of CMake is cmake-3.17.1, and the installation path is **/home/opengauss/3rd/cmake**. \(Related dependencies are installed in **/home/opengauss/3rd**.\) To install the CMake, run the following commands: + + ``` + tar –zxvf cmake-3.17.1.tar.gz + ./bootstrap --prefix=/home/opengauss/3rd/cmake-3.17.1 + + make –j4 + make install + ``` + +- **Installing GCC** + + The GCC installation depends on GMP 6.2.0, MPFR 4.0.2, and MPC 1.1.0. To install the GCC, run the following commands: + + ``` + gmp + tar –zxvf gmp-6.2.0.tar.gz + + cd gmp-6.2.0 + ./configure --prefix=/home/opengauss/3rd/gmp-6.2.0 --build=x86 CFLAGS=-fPIC + + make –j4 + make install + + + mpfr + + tar –zxvf mpfr-4.0.2.tar.gz + cd mpfr-4.0.2 + + ./configure --prefix=/home/opengauss/3rd/mpfr-4.0.2 --with-gmp=/home/opengauss/3rd/gmp-6.2.0 + make –j4 + + make install + + + mpc + tar –zxvf mpc-1.1.0.tar.gz + + cd mpc-1.1.0 + ./configure --prefix=/home/opengauss/3rd/mpc-1.1.0 --with-gmp=/home/opengauss/3rd/gmp-6.2.0 --with-mpfr=/home/opengauss/3rd/mpfr-4.0.2 + + make –j4 + make install + + + gcc + + tar –zxvf gcc-7.3.0 + cd gcc-7.3.0 + + export LD_LIBRARY_PATH=/home/opengauss/3rd/gmp-6.2.0/lib:/home/opengauss/3rd/mpfr-4.0.2/lib:/home/opengauss/3rd/mpc-1.1.0/lib:$ LD_LIBRARY_PATH + ./configure --prefix=/home/opengauss/3rd/gcc-7.3.0 --with-gmp=/home/opengauss/3rd/gmp-6.2.0 --with-mpfr=/home/opengauss/3rd/mpfr-4.0.2 --with-mpc=/home/opengauss/3rd/mpc-1.1.0 + + make –j4 + make install + ``` + +- **Compiling openGauss-third\_party** + + Save the **opengauss-openGauss-third\_party-2.0.0.zip** package to **/home/opengauss** and decompress it. + + ``` + cd openGauss-third_party + ``` + + By default, compilation commands are executed concurrently, which occupies a large amount of memory. If the memory is insufficient, run the **find. -name "\*.sh" | xargs grep "make" | grep j** command to find all MAKE statements, delete **-sj**, **-sj$\{cpus\_num\}**, or **–sj 8**, and run the statements in single-thread mode. You can also change the value based on the number of cores and memory size of the host. + + Set the environment variables. + + ``` + export CMAKEROOT=/home/opengauss/3rd/cmake-3.17.1 + export GCC_PATH=/home/opengauss/3rd/gcc-7.3.0 + + export CC=$GCC_PATH/bin/gcc + export CXX=$GCC_PATH/bin/g++ + + export LD_LIBRARY_PATH=$GCC_PATH/lib64:/home/opengauss/3rd/mpc-1.1.0/lib:/home/opengauss/3rd/mpfr-4.0.2/lib:/home/opengauss/3rd/gmp-6.2.0/lib:$CMAKEROOT/lib:$LD_LIBRARY_PATH + export PATH=$CMAKEROOT/bin:$PATH + ``` + + The compilation procedure is as follows: + + ``` + 1. Run cd /home/opengauss/openGauss-third_party/build. + 2. Run sh build_all.sh. + ``` + + After the compilation is complete, the result is exported to **/home/opengauss/openGauss-third\_party/output**. + + ``` + /home/opengauss/openGauss-third_party/output is the third-party medium directory on which openGauss-server depends. + ``` + + +## **openGauss-server** + +This repository is used to compile GaussDB binary executable files. + +- **Dependencies** + + The following lists the software requirements for compiling openGauss. + + You are advised to use the default installation packages of the following dependency software obtained from the operating system installation CD-ROM or installation source. If the following software does not exist, refer to the recommended software versions. \(You can try compilation even if the installation is not complete.\) + + ``` + libaio-devel 0.3.109-13 + flex 2.5.31 or later + bison 2.7-4 + ncurses-devel 5.9-13.20130511 + glibc-devel 2.17-111 + patch 2.7.1-10 + readline-devel 7.0-13 + ``` + + +- **Compiling openGauss-server** + + Save the **opengauss-openGauss-server-v2.0.0.tar.gz** package to **/home/opengauss** and decompress it. + + ``` + cd openGauss-server + ``` + + By default, compilation commands are executed concurrently, which occupies a large amount of memory. If the memory is insufficient, run the **find. -name "\*.sh" | xargs grep "make" | grep j** command to find all MAKE statements, delete **-sj**, **-sj$\{cpus\_num\}**, or **–sj 8**, and run the statements in single-thread mode. You can also change the value based on the number of cores and memory size of the host. + + Set the environment variables. + + ``` + export CODE_BASE=`pwd` + export BINARYLIBS=`pwd`/../openGauss-third_party/output + export GAUSSHOME=$CODE_BASE/dest + export GCC_PATH=/home/opengauss/3rd/gcc-7.3.0 + export CC=$GCC_PATH/bin/gcc + export CXX=$GCC_PATH/bin/g++ + export LD_LIBRARY_PATH=$GCC_PATH/lib64:/home/opengauss/3rd/mpc-1.1.0/lib:/home/opengauss/3rd/mpfr-4.0.2/lib:/home/opengauss/3rd/gmp-6.2.0/lib:$LD_LIBRARY_PATH + export PATH=$GCC_PATH/bin:$PATH + ``` + + The compilation procedure is as follows: + + ``` + Run the following commands: ./configure --gcc-version=7.3.0 CC=g++ CFLAGS="-O2 -g3" --prefix=$GAUSSHOME --3rd=$BINARYLIBS --enable-thread-safety --with-readline --without-zlib + make –j4 + make install + Errors may be reported during the compilation. After the errors are rectified, the compilation result is exported to the $GAUSSHOME directory. + ``` + +- **Rectifying Errors** + + Save **openGauss-third\_party\_binarylibs.tar.gz** to **/home/opengauss**. + + ``` + cd /home/opengauss + tar –zxvf openGauss-third_party_binarylibs.tar.gz + ``` + + Most compilation errors are caused by the lack of some dynamic libraries and header files in the third-party software compiled in section 4.2.4. The following solutions are available: + + - 1. Run **cd openGauss-third\_party/dependency** to go to the corresponding library directory, and perform compilation by referring to **README.md**. The compilation result is output to **openGauss-third\_party/output/dependency/kylin\_aarch64**. + - 2. Copy the corresponding library in the **openGauss-third\_party\_binarylibs/dependency/openeuler\_aarch64** directory to the **openGauss-third\_party/output/dependency/kylin\_aarch64** directory. \(In this method, some libraries cannot be used after being copied, because some function symbols are missing.\) + + Note: Select the first solution if possible, unless the first solution is busy and the second solution can solve the problem. + + The following analyzes and rectifies possible errors one by one: + + - ../../../../../src/include/access/obs/obs\_am.h:33:10: fatal error: eSDKOBS.h: The file or directory does not exist. + + Cause: The **libobs** library is missing in **openGauss-third\_party/output**. + + Solution: If the **libobs** source code does not exist in **openGauss-third\_party/dependency**, use the second solution. + + cp -r openGauss-third\_party\_binarylibs/dependency/openeuler\_aarch64/libobs openGauss-third\_party/output/dependency/kylin\_aarch64 + + - ../../../../src/include/gs\_policy/gs\_string.h:32:10: fatal error: boost/functional/hash.hpp: The file or directory does not exist. + + Cause: The **boost** library is missing in **openGauss-third\_party/output**. + + Solution: Add the **boost** source code to **openGauss-third\_party/dependency**. However, the compilation process is complex. In addition, it is verified that the **boost** library in **openGauss-third\_party\_binarylibs** can be used normally. Therefore, the second solution is selected. + + cp -r openGauss-third\_party\_binarylibs/dependency/openeuler\_aarch64/boost openGauss-third\_party/output/dependency/kylin\_aarch64 + + - Cipherfn.cpp:1231:5: error: 'krb5\_set\_profile\_path' has not been declared in this scope. + + Cause: The **kerberos** library is missing in **openGauss-third\_party/output**. + + Solution: If the **kerberos** library in **openGauss-third\_party\_binarylibs/dependency** is unavailable, select the first solution. + + cd openGauss-third\_party/dependency/kerberos + + python build.py -m all -f krb5-1.17.1.tar.gz -t "comm|llt" + + Compilation error: + + /home/opengauss/3rd/gcc-7.3.0/lib/gcc/aarch64-unknown-linux-gnu/7.3.0/include-fixed/openssl/bn.h:138:11: fatal error: openssl/e\_os2.h: The file or directory does not exist. + + Solution: + + export C\_INCLUDE\_PATH=/home/opengauss/openGauss-third\_party/output/dependency/kylin\_aarch64/openssl/comm/include + + Run the **python build.py -m all -f krb5-1.17.1.tar.gz -t "comm|llt"** command. + + Continue to report the following error: + + make\[2\]: \*\*\* There is no rule to create the target libcom\_err\_gauss.exports required by binutils.versions. \(Several similar errors occur. Rectify the errors before continuing the compilation.\) + + Solution: + + cd /home /opengauss/openGauss-third\_party/dependency/kerberos/krb5-1.17.1/src/util/et/ + + cp –r libcom\_err.exports libcom\_err\_gauss.exports + + cd /home /opengauss/openGauss-third\_party/dependency/kerberos/krb5-1.17.1/src/lib/krb5 + + cp –r libkrb5.exports libkrb5\_gauss.exports + + cd /home /opengauss/openGauss-third\_party/dependency/kerberos/krb5-1.17.1/src/lib/crypto + + cp –r libk5crypto.exports libk5crypto\_gauss.exports + + cd /home /opengauss/openGauss-third\_party/dependency/kerberos/krb5-1.17.1/src/lib/rpc + + cp –r libgssrpc.exports libgssrpc\_gauss.exports + + cd /home /opengauss/openGauss-third\_party/dependency/kerberos/krb5-1.17.1/src/lib/gssapi + + cp –r libgssapi\_krb5.exports libgssapi\_krb5\_gauss.exports + + Run the **python build.py -m all -f krb5-1.17.1.tar.gz -t "comm|llt"** command. + + Continue to report the following error: + + openssl.so: In the 'unmarshal\_w' function: + + openssl.c:\(.text+0x330\): undefined reference to'BN\_set\_flags' + + openssl.so: In the 'ossl\_hash' function: + + openssl.c:\(.text+0x8b8\): undefined reference to 'EVP\_MD\_CTX\_new' + + openssl.c:\(.text+0x9ac\): undefined reference to 'EVP\_MD\_CTX\_free' + + Solution: + + cp /home/opengauss/openGauss-third\_party\_binarylibs/dependency/kylin\_aarch64/openssl/comm/lib/libcrypto.so /home/opengauss/openGauss-third\_party/dependency/kerberos/krb5-1.17.1/src/lib + + cp /home/opengauss/openGauss-third\_party\_binarylibs/dependency/kylin\_aarch64/openssl/comm/lib/libssl.so /home/opengauss/openGauss-third\_party/dependency/kerberos/krb5-1.17.1/src/lib + + Run the **python build.py -m all -f krb5-1.17.1.tar.gz -t "comm|llt"** command. + + Note: After a problem is solved, a message similar to the following is displayed: + + The next patch would create the file src/lib/crypto/libk5crypto\_gauss.exports,which already exists! Assume -R? \[n\] + + If you enter **y**, the system automatically deletes the **libcom\_err\_gauss.exports**, **ibkrb5\_gauss.exports**, **libk5crypto\_gauss.exports**, **libgssrpc\_gauss.exports** and **libgssapi\_krb5\_gauss.exports** files. Therefore, you need to copy the five files immediately after you enter **y**. + + Suggestion: Solve the preceding problems before continuing the compilation. + + - ../../../../src/include/gs\_policy/curl\_utils.h:17:10: fatal error: curl/curl.h: The file or directory does not exist. + + Cause: The **libcurl** library is missing in **openGauss-third\_party/output**. + + Solution: The **libcurl** source code exists in **openGauss-third\_party/dependency**, but the compilation process is complex. In addition, it is verified that the **libcurl** library in **openGauss-third\_party\_binarylibs** can be used normally. Therefore, the second solution is selected. + + cp -r openGauss-third\_party\_binarylibs/dependency/openeuler\_aarch64/libcurl openGauss-third\_party/output/dependency/kylin\_aarch64 + + - client\_logic.cpp:50:10: fatal error: MurmurHash3.h: The file or directory does not exist. + + costsize.cpp:94:10: fatal error: hll.h: The file or directory does not exist. + + Cause: The **postgresql-hll** library is missing in **openGauss-third\_party/output**. + + Solution: If the **postgresql-hll** source code exists in **openGauss-third\_party/dependency**, use the first solution. + + cd openGauss-third\_party/dependency/postgresql-hll + + sh build.sh –m all + + After the compilation is complete, only the **lib** folder exists and the **include** folder is missing. Copy the **lib** folder from **openGauss-third\_party\_binarylibs**. + + cp -r openGauss-third\_party\_binarylibs/dependency/openeuler\_aarch64/postgresql-hll /comm/include openGauss-third\_party/output/dependency/kylin\_aarch64/postgresql-hll/comm + + - ../../../../src/include/access/dfs/dfs\_query.h:29:10: fatal error: orc/Exceptions.hh: The file or directory does not exist. + + Cause: The **liborc** library is missing in **openGauss-third\_party/output**. + + Solution: If the **liborc** source code does not exist in **openGauss-third\_party/dependency**, use the second solution. + + cp -r openGauss-third\_party\_binarylibs/dependency/openeuler\_aarch64/liborc openGauss-third\_party/output/dependency/kylin\_aarch64 + + - remote\_read.pb.h:10:10: fatal error: google/protobuf/port\_def.inc: The file or directory does not exist. + + Cause: The **protobuf** library is missing in **openGauss-third\_party/output**. + + Solution: If the **protobuf** source code exists in **openGauss-third\_party/dependency**, use the first solution. + + cd openGauss-third\_party/dependency/protobuf + + python build.py -m all -f protobuf-3.11.3.zip -t "comm|llt" + + - remote\_read.grpc.pb.h:10:10: fatal error: grpc/impl/codegen/port\_platform.h: The file or directory does not exist. + + Cause: The **grpc** library is missing in **openGauss-third\_party/output**. + + Solution: The **grpc** source code exists in **openGauss-third\_party/dependency**, but the compilation process is complex. In addition, it is verified that the **grpc** library in **openGauss-third\_party\_binarylibs** can be used properly. Therefore, the second solution is selected. + + cp -r openGauss-third\_party\_binarylibs/dependency/openeuler\_aarch64/grpc openGauss-third\_party/output/dependency/kylin\_aarch64 + + - parquet\_file\_reader.h:27:10: fatal error: parquet/api/reader.h: The file or directory does not exist. + + Cause: The **libparquet** library is missing in **openGauss-third\_party/output**. + + Solution: If the **libparquet** source code does not exist in **openGauss-third\_party/dependency**, use the second solution. + + cp -r openGauss-third\_party\_binarylibs/dependency/openeuler\_aarch64/libparquet openGauss-third\_party/output/dependency/kylin\_aarch64 + + - /usr/bin/ld: Cannot find **–lthrift**. + + Cause: The **libthrift** library is missing in **openGauss-third\_party/output**. + + Solution: If the **libthrift** source code exists in **openGauss-third\_party/dependency**, use the first solution. + + cd openGauss-third\_party/dependency/libthrift + + sh ./build.sh + + - /usr/bin/ld: Cannot find **-lsnappy**. + + Cause: The **snappy** library is missing in **openGauss-third\_party/output**. + + Solution: If the **snappy** source code exists in **openGauss-third\_party/dependency**, use the first solution. + + cd openGauss-third\_party/dependency/snappy + + sh ./build.sh + + - /usr/bin/ld: Cannot find **-lzstd**. + + Cause: The **zstd** library is missing in **openGauss-third\_party/output**. + + Solution: If the **zstd** source code exists in **openGauss-third\_party/dependency**, use the first solution. + + cd openGauss-third\_party/dependency/zstd + + sh ./build.sh + + - /home/opengauss/openGauss-server/../openGauss-third\_party/output/dependency/kylin\_aarch64/libobs/comm/lib/libxml2.so: undefined reference to 'fcntl64@GLIBC\_2.28' + + Cause: During the compilation, the **libxml2.so** file in the **openGauss-third\_party/output/dependency/ kylin\_aarch64/libobs** directory is found, which lacks 'fcntl64@GLIBC\_2.28'. + + Solution: The **libxml2** source code exists in **openGauss-third\_party/dependency**. However, during the compilation, the **libxml2-2.9.9.tar.gz** package cannot be decompressed. In addition, **libobs** is copied from **openGauss-third\_party\_binarylibs**. Therefore, neither the first solution nor the second solution can solve this problem. + + Run the **find / -name "libxml2\*"** command. You can find the **libxm12.so\*** library in **/usr/lib64**. + + cp –r /usr/lib64/libxml2.so.2.9.1 openGauss-third\_party/output/dependency/kylin\_aarch64/libobs/comm/lib + + cd openGauss-third\_party/output/dependency/kylin\_aarch64/libobs/comm/lib + + ln –s libxml2.so.2.9.1 libxml2.so.2 + + ln –s libxml2.so.2.9.1 libxml2.so + + If the **libxml2.so\*** file already exists in **openGauss-third\_party/output/dependency/kylin\_aarch64/libobs/comm/lib**, back up the file. + + The following error information is displayed when you run the **make install** command: + + - ./zic: error while loading shared libraries: libssl.so.1.1: cannot open shared object file: No such file or directory + + Cause: **libssl.so.1.1** cannot be found. + + Solution: Run the **find / -name "libssl.so.1.1"** command. + + You can find it in **/home/opengauss/openGauss-third\_party/output/dependency/kylin\_aarch64/openssl/comm/lib**. Set the environment variable. + + export LD\_LIBRARY\_PATH=/home/opengauss/openGauss-third\_party/output/dependency/kylin\_aarch64/openssl/comm/lib:$LD\_LIBRARY\_PATH + + - cp: Failed to obtain the file status \(stat\) of "/home/opengauss/openGauss-server/../openGauss-third\_party/output/buildtools/kylin\_aarch64/gcc7.3/gcc/lib64/libstdc++.so.6": The file or directory does not exist. + + Cause: The **gcc** folder is missing in **openGauss-third\_party/output/buildtools**. + + Solution: Copy the compiled **gcc** folder to the directory. + + cd openGauss-third\_party/output/buildtools + + mkdir -p kylin\_aarch64/gcc7.3 + + cd kylin\_aarch64/gcc7.3 + + cp –r /home/opengauss/3rd/gcc-7.3.0 . + + mv gcc-7.3.0 gcc + + - cp: Failed to obtain the file status \(stat\) of "/home/opengauss/openGauss-server/../openGauss-third\_party/output/dependency/kylin\_aarch64/pljava/comm/lib/\*": The file or directory does not exist. + + Cause: The **pljava** library is missing in **openGauss-third\_party/output**. + + Solution: The **pljava** source code exists in **openGauss-third\_party/dependency**, but the compilation process is complex. In addition, it is verified that the **pljava** library in **openGauss-third\_party\_binarylibs** can be used properly. Therefore, the second solution is selected. + + cp -r openGauss-third\_party\_binarylibs/dependency/openeuler\_aarch64/pljava openGauss-third\_party/output/dependency/kylin\_aarch64 + + + diff --git a/content/en/post/2022/Core-Technologies-of-openGauss-Database-(I).md b/content/en/post/2022/Core-Technologies-of-openGauss-Database-(I).md new file mode 100644 index 0000000000000000000000000000000000000000..fbeb69b4dabeb1b529eef3d72060ec937b90df70 --- /dev/null +++ b/content/en/post/2022/Core-Technologies-of-openGauss-Database-(I).md @@ -0,0 +1,244 @@ ++++ + +title = "Core Technologies of openGauss Database I" + +date = "2020-07-23" + +tags = [ "Core Technologies of openGauss Database (I)"] + +archives = "2020-07" + +author = "Shujie Zhang" + +summary = "Core Technologies of openGauss Database (I)" + +img = "/en/post/2022/title/img5.png" + +times = "12:30" + ++++ + +# Core Technologies of openGauss Database \(I\) + +## Overview + +An SQL engine is one of the important subsystems of the database. It receives SQL statements sent by applications and directs executors to execute execution plans. As an important and complex module in an SQL engine, optimizer is regarded as the "brain" of a database. An execution plan generated by the optimizer directly determines the database performance. The following describes modules of the SQL engine. + +## **01** SQL Engine Overview + +An SQL engine is an important part of a database system. It is responsible for generating efficient execution plans based on the SQL statements entered by applications in the current load scenario. It plays an important role in efficient SQL execution. The following figure shows the SQL execution process in the SQL engine. + +SQL execution process + +![](../figures/61.png) + +As shown in the preceding figure, the SQL statement needs to be parsed to generate a logical execution plan, and the physical execution plan needs to be generated through query optimization. Then, the physical execution plan is transferred to the query execution engine for executing the physical operator. + +## **02** SQL Parsing + +The compiling process of SQL statements in DBMS accords with the routine process of compiler implementation, which requires lexical analysis, syntax analysis, and semantic analysis. + +In the SQL standards, keywords and syntax rules of the SQL language are determined. During lexical analysis, the SQL parser divides an SQL statement into independent atomic units according to keywords and interval information, and each unit is presented as a word. For example: + +◾ Lexical analysis: Identify keywords, identifiers, operators, and terminators supported by the system from query statements. Each word determines its own part-of-speech \(POS\). + +◾ Syntax analysis: Define syntax rules based on SQL language standards, and use words generated during lexical analysis to match syntax rules. If an SQL statement can match a syntax rule, an abstract syntax tree \(AST\) is generated. + +◾ Semantic analysis: Check the validity of the AST, check whether the tables, columns, functions, and expressions in the syntax tree have corresponding metadata, and convert the AST into a logical execution plan \(relational algebra expression\). + +``` +SELECT w_name FROM warehouse WHERE w_no = 1; +``` + +The following table lists atomic units such as keywords, identifiers, operators, and constants. + + + + + + + + + + + + + + + + + + + +

POS

+

Content

+

Keywords

+

SELECT, FROM, WHERE

+

Identifiers

+

w_name, warehouse, w_no

+

Operators

+

=

+

Constants

+

1

+
+ +During syntax analysis, words obtained through lexical analysis are matched with syntax rules, and an AST is generated. Each word is displayed as a leaf node of the syntax tree, as shown in the following figure. + +Abstract syntax tree + +![](../figures/zh-cn_image_0000001251458611.jpg) + +The semantics expressed by the AST is limited to ensuring that the applied SQL statements comply with the SQL standard specifications. However, the validity of the internal meaning of the SQL statements needs to be checked. + +◾ Check the use of relationships: A relationship in the FROM clause must be a relationship or view in the schema corresponding to the query. + +◾ Check and parse the use of attributes: Each attribute in the SELECT statement or WHERE clause must be the attribute of a relationship or view in the FROM clause. + +◾ Check data types: The data types of all attributes must be matched. + +During the validity check, the semantic analysis process is the same as the validity semantic binding process. Through the semantic analysis check, the AST is converted into a logical execution plan. The logical execution plan can be represented by a relational algebra expression, as shown in the following figure. + +Relational algebraic expression + +![](../figures/zh-cn_image_0000001207138590.jpg) + +## **03** Query Optimization + +Based on different optimization methods, the optimization technologies of the optimizer can be classified into the following types: + +- Rule-based optimization \(RBO\): optimizes SQL statements based on predefined heuristic rules. +- Cost-based query optimization \(CBO\): performs cost estimation on the to-be-selected execution paths corresponding to the SQL statement, and selects an execution path with a lowest cost from the to-be-selected paths as a final execution plan. +- AI-based optimization \(ABO\): collects feature information of an execution plan , obtains experience information by using a machine learning model, and then optimizes the execution plan to obtain an optimal execution plan. + +In recent years, AI technologies, especially in the deep learning field, have developed rapidly. ABOs have great advantages in modeling efficiency, estimation accuracy, and adaptability. They are expected to break the restrictions of RBO and CBO based on static models. By continuously learning historical experience, the mode of the target scenario is abstracted to form a dynamic model, which is adaptively optimized based on the actual scenario of the user. openGauss uses the CBO technology and is actively exploring ABOs. + +- **3.1 Query Rewriting** + + Query rewriting is to convert SQL statements entered by users into more efficient equivalent SQL statements. It has two basic principles. + + Equivalence: The output of the original statement is the same as that of the rewritten statement. + + Efficiency: The rewritten statement is more efficient in execution time and resource usage than the original statement. + + +- **3.2 Common Query Rewriting Technologies** + + Key query rewriting technologies of openGauss: constant expression simplification, subquery optimization, selection pushdown, and equivalent inference. + + - Simplification of Constant Expressions + + A constant expression is an expression whose calculation result is a constant in the SQL statement entered by a user. Constant expressions are classified into arithmetic expressions, logical operation expressions, and function expressions. Query rewriting can pre-calculate constant expressions to improve efficiency. + + **Example 1: **This statement is a typical arithmetic expression query rewriting statement. After the rewriting, 1+1 calculation is not required for each data record during execution. + + ``` + SELECT * FROM t1 WHERE c1 = 1+1; + SELECT * FROM t1 WHERE c1 = 2; + ``` + + **Example 2:** This statement is a typical logical operation expression. After rewriting, the condition is always false, and no result is returned. This avoids the execution of the entire statement. + + ``` + SELECT * FROM t1 WHERE 1=0 AND a=1; + SELECT * FROM t1 WHERE false; + ``` + + **Example 3: **This statement contains a function expression. The input parameters of the function are constants. After rewriting, the function calculation result is directly calculated in the optimization phase, avoiding the function calling overhead of data records one by one during the execution. + + ``` + SELECT * FROM t1 WHERE c1 = ADD(1,1); + SELECT * FROM t1 WHERE c1 = 2; + ``` + + - Subquery Optimization + + The subquery structure is clearer and complies with the reading and understanding habits of users. Therefore, the SQL statements entered by users usually contain a large number of subqueries. Subqueries are classified into correlated subqueries and non-correlated subqueries based on whether subqueries can be solved independently. + + Correlated subquery: A correlated subquery contains conditions that depend on its parent query. For example: + + ``` + SELECT * FROM t1 WHERE EXISTS (SELECT t2.c1 FROM t2 WHERE t1.c1=t2.c1); + ``` + + In the statement, the subquery depends on the value of t1.c1 transferred by the parent query. + + Non-correlated subquery: A non-correlated subquery is a subquery that does not depend on a parent query and can be solved independently. For example: + + ``` + SELECT * FROM t1 WHERE c1 = 1+1; + SELECT * FROM t1 WHERE c1 = 2; + SELECT * FROM t1 WHERE EXISTS (SELECT t2.c1 FROM t2) + ``` + + In the statement, the subquery does not depend on the conditions of its parent query. + + The correlated subquery needs to execute a parent query to obtain a result, and then drives a subquery operation. Execution efficiency of this nested loop manner is relatively low. If the subquery can be promoted to the same level as the parent query, the table in the subquery can be directly joined with the table in the parent query. Because the join operation can be implemented in multiple ways, the optimizer can select the optimal one. In this way, the query execution efficiency can be improved. In addition, the optimizer can use the Join Reorder optimization rule to exchange the join sequences of different tables to generate a better execution plan. + + **Example: **This statement is a typical subquery rewriting. After rewriting, you can use hash join to improve the query performance. + + ``` + SELECT * FROM t1 WHERE t1.c1 IN (SELECT t2.c1 FROM t2); + SELECT * FROM t1 Semi Join t2 ON t1.c1 = t2.c1; + ``` + + - SELECT Pushdown and Equivalence Inference + + The SELECT pushdown can greatly reduce the calculation workload of upper-layer operators to achieve optimization. If the SELECT condition has equivalent operations, then equivalence rinference can be implemented by using the feature of equivalent operations, so as to obtain a new select condition. + + For example, if two tables t1 and t2 respectively contain 100 rows of data \[1,2,3, ..100\], the query statement is as follows: + + ``` + SELECT t1.c1, t2.c1 FROM t1 JOIN t2 ON t1.c1=t2.c1 WHERE t1.c1=1; + ``` + + Comparison before and after query rewriting + + ![](../figures/62-0.png) + + - Outer Join Elimination + + The main difference between an outer join and an inner join is that NULL values need to be supplemented for tuples that cannot be joined. If a filter condition in the SQL statement meets the null value rejection condition \(that is, the supplemented NULL values are filtered out\), the outer join can be directly eliminated. + + **Example: **After an outer join is converted into an inner join, the optimizer can apply more optimization rules to improve execution efficiency. + + ``` + SELECT * FROM t1 FULL JOIN t2 ON t1.c1 = t2.c1 WHERE t1.c2 > 5 AND t2.c3 < 10; + SELECT * FROM t1 INNER JOIN t2 ON t1.c1 = t2.c2 WHERE t1.c2 > 5 AND t2.c3 < 10; + ``` + + - DISTINCT Elimination + + If the DISTINCT column has a primary key constraint, this column cannot be empty and has no duplicate value. Therefore, the DISTINCT operation is not required to reduce the calculation workload. + + **Example: **Some primary key attributes in column c1 determine that the DISTINCT operation is not required. The statement is as follows: + + ``` + CREATE TABLE t1(c1 INT PRIMARY KEY, c2 INT); + SELECT DISTINCT(c1) FROM t1; + SELECT c1 FROM t1; + ``` + + - Expanding IN Predicate + + **Example:** The IN operator is changed to an equivalent filter condition so that indexes can be used to reduce the calculation workload. The statement is as follows: + + ``` + SELECT * FROM t1 WHERE c1 IN (10,20,30); + SELECT * FROM t1 WHERE c1=10 or c1=20 OR c1=30; + ``` + + - Expanding View + + A view can logically simplify SQL writing and improve query usability. A view is virtual, so you need to expand the view during query rewriting. + + Example: You can rewrite the view query into a subquery and then simplify the subquery. The statement is as follows: + + ``` + CREATE VIEW v1 AS (SELECT * FROM t1,t2 WHERE t1.c1=t2.c2); + SELECT * FROM v1; + SELECT * FROM (SELECT * FROM t1,t2 WHERE t1.c1=t2.c2) as v1; + SELECT * FROM t1,t2 WHERE t1.c1=t2.c2; + ``` + + + diff --git a/content/en/post/2022/Core-Technologies-of-openGauss-Database-(II).md b/content/en/post/2022/Core-Technologies-of-openGauss-Database-(II).md new file mode 100644 index 0000000000000000000000000000000000000000..b061750b4d575a5531e7689092bd2ba01087ae9b --- /dev/null +++ b/content/en/post/2022/Core-Technologies-of-openGauss-Database-(II).md @@ -0,0 +1,196 @@ ++++ + +title = "Core Technologies of openGauss Database" + +date = "2020-07-30" + +tags = [ "Core Technologies of openGauss Database (II)"] + +archives = "2020-07" + +author = "Shujie Zhang" + +summary = "Core Technologies of openGauss Database (II)" + +img = "/en/post/2022/title/img6.png" + +times = "12:30" + ++++ + +# Core Technologies of openGauss Database \(II\) + +## Overview + +Previously, we have introduced the principles of SQL parsing and query rewriting in query optimization. Now, we will introduce the technical principles of path search and cost estimation. + +## Path Search + +The core problem of the optimizer is to obtain the optimal solution for an SQL statement. In this process, the solution space corresponding to the SQL statement needs to be enumerated, that is, different candidate execution paths need to be enumerated. These execution paths are equivalent to each other, but the execution efficiency is different. Execution costs of these execution paths in the solution space are calculated, and finally an optimal execution path may be obtained. Based on different methods for searching for candidate execution paths, the structure of the optimizer is divided into the following modes: + +**Bottom-up Mode** + +![](../figures/zh-cn_image_0000001251754525.gif) + +As shown in the preceding figure, in bottom-up mode, the logical execution plan is split. A table scan operator is created first, and then a connection operator is formed by the scan operator. Finally, a physical execution plan is formed. In this process, there are multiple types of physical scan operators and physical join operators. Therefore, multiple physical execution paths are generated. The optimizer selects an execution plan with the lowest cost based on the estimated cost of each execution path, and then transfers the execution plan to the executor for execution. + +**Top-down Mode** + +![](../figures/zh-cn_image_0000001251954519.gif) + +As shown in the preceding figure, this mode uses the object-oriented idea to objectify the core functions of the optimizer and generate a logical plan after lexical analysis, syntax analysis, and semantic analysis. Based on the logical plan, an object-based optimization rule is applied to generate a plurality of to-be-selected logical plans. The logical plans are traversed by using a top-down method, and an optimal execution path is obtained by combining dynamic planning, cost estimation, and branch and bound technologies. + +- Random Search Mode + + Regardless of the bottom-up or top-down mode, the enumeration time is too long when a large number of tables are joined. Some optimizers search for paths through random enumeration when there are a large number of tables, an attempt is made to obtain a suboptimal execution plan in a random solution space. + + Currently, the optimizers of databases such as MySQL and PostgreSQL use the bottom-up mode, and the optimizers of SQL Server and open-source Calcite and ORCA use the top-down mode. Calcite is widely used in other open source projects, such as Apache Storm, Apache Flink, Apache Kylin, Apache Drill and SQL-Gremlin, due to its good scalability. The openGauss uses a combination of a bottom-up mode and a random search mode. + + Regardless of the top-down or bottom-up mode, a search process is also a process of transforming from a logical execution plan to a physical execution plan. For example, there may be different scan operators for each table, the logical connection operator may also be converted into a plurality of different physical join operators. The following describes a specific physical operator. + +- Single-table Scan Path Search + + The openGauss uses the bottom-up path search method. Therefore, the path generation always starts from the single-table access path. There are two types of single-table access paths: + + ◾ Full table scan: Data in a table is accessed one by one. + + ◾ Index scan: Indexes are used to access data in tables. Generally, indexes are used together with predicates. + + The optimizer first estimates the cost of different scan paths based on the data volume, filter condition, and available indexes of the table and the cost model. For example, if **CREATE TABLE t1\(c1 int\)** is defined for a table and the data in the table is consecutive integers ranging from 1 to 100000000 and there is a B+ tree index in the c1 column, **SELECT \* FROM t1 WHERE c1=1;** can obtain data by reading one index page and one table page. However, for a full table scan, 100 million data records need to be read to obtain the same result. In this case, the path of the index scan wins. + + Index scan is not superior to full table scan in all cases. Their advantages and disadvantages depend on how much data can be filtered out. Generally, a database management system uses a B+ tree to create an index. If the selectivity is high, the B+ tree index causes a large number of random I/Os, this reduces the access efficiency of the index scan operator. For example, for the **SELECT \* FROM t1 WHERE c1\>0** statement, an index scan needs to access all data in the index and all data in the table, causing a large number of random I/Os. However, a full table scan only needs to access all data in the table in sequence. Therefore, the cost of a full table scan is lower. + +- Multi-table Join Path Search + + The difficulty in generating multiple table paths lies in how to enumerate all table join orders and join algorithms. Assume that two tables t1 and t2 are joined. According to the commutative law in relational algebra, the join order can be t1 x t2 or t2 x t1, and the physical join operators can be hash join, nested loop join, or merge join. In this way, there are six paths available for selection. This number increases exponentially as the number of tables increases. Therefore, an efficient search algorithm is very important. + + openGauss usually uses the bottom-up path search mode. It first generates the scan path of each table. These scan paths are at the bottom layer \(first layer\) of the execution plan. At the second layer, the optimal path for joining two tables is considered, that is, the possibility of joining every two tables is calculated through enumeration. At the third layer, the optimal path of the three-table join is considered. That is, the possibility of the three-table join is calculated through enumeration. The global optimal execution plan is generated until the top layer. Assume that the JOIN operation is performed on four tables. The join path generation process is as follows: + + ◾ Optimal path of a single table: The optimal paths of \{1\}, \{2\}, \{3\}, and \{4\} tables are generated in sequence. + + ◾ Optimal path of two tables: The optimal paths of \{1 2\}, \{1 3\}, \{1 4\}, \{2 3\}, \{2 4\} and \{3 4\} tables are generated in sequence. + + ◾ Optimal path of three tables: The optimal paths of \{1 2 3\}, \{1 2 4\}, \{2 3 4\} and \{1, 3, 4\} tables are generated in sequence. + + ◾ Optimal path of four tables: The optimal path of \{1, 2, 3, 4\} is the final path. + + The core of the multi-table path problem is join order, which is a nondeterministic polynomially \(NP\) problem. To find an optimal path in multiple relational joins, a commonly used algorithm is a cost-based dynamic planning algorithm. As the number of joined tables increases, the expansion of table search space affects the path selection efficiency of the optimizer. The cost-based genetic algorithm and other random search algorithms can be used to solve this problem. + + In addition, to prevent the search space from being too large, the following pruning policies can be used: + + ◾ Consider the paths with join conditions first and delay the Cartesian product as much as possible. + + ◾ In a search process, perform LowBound pruning on an execution path based on cost estimation, and abandon some execution paths with relatively high costs. + + ◾ Retain execution paths with special physical attributes. For example, results of some execution paths are sequential, and these execution paths may avoid re-sorting in a subsequent optimization process. + + Optimization Based on Physical Attributes + + A relationship can be regarded as a set or a package. The data structure does not set the data distribution. To improve the computing performance, some data structures or algorithms need to be used to preprocess the data distribution. These preprocessing methods use the physical attributes \(such as order\) of the physical execution path, or create physical properties for physical execution paths, which often play a significant role in query optimization. + + +## B+ Tree + +The simplest way to query data in a table is to traverse all the data in the table. However, as the data volume increases, the cost of traversing the data in the table increases. The B+ tree becomes a powerful weapon for efficient data query. + +In 1970, R. Bayer and E. McCreight proposed a balanced tree for external search, that is, B-tree. The B-tree is to create a directory on the table data, which is similar to the content in a book. In this way, you can quickly locate the data to be queried. + +As a data structure, the B+ tree is not directly related to the query optimizer. However, the database management system usually establishes an index based on the B+ tree. In the query optimization process, the query efficiency can be improved through index scanning and bitmap scanning, this involves the use of indexes of the B+ tree type. + +## Hash Table + +A hash table is also a method of preprocessing data. The openGauss database uses hash tables in multiple places or borrows the idea of hash tables to improve query efficiency. + +◾ The hash table can be used to implement the grouping operation because the hash table has the function of classifying data. + +◾ A hash index can be created by using the hash algorithm. This index is applicable to equivalent constraints. + +◾ Hash join is an important physical join path. + +## Sorting + +Sorting is also a method of preprocessing data. It is mainly used in the following aspects: + +◾ Sorting can be used to group data because the same data is aggregated after sorting. + +◾ The B-tree index needs to be created through sorting. + +The physical join path Merge Join needs to be implemented through sorting. + +The ORDER BY operation in the SQL language needs to be implemented through sorting. + +◾ When the data volume is small, all data can be loaded to the memory. In this case, internal sorting can be used. When the data volume is large, external sorting is required. Therefore, the sorting cost needs to be determined based on the data volume and available memory size. + +## Materialization + +Materialization is to save the result of the scan or join operation. If the intermediate result is large, the result may need to be written to the external memory, which causes the I/O cost. Therefore, the saving cost is high. + +The advantage of materialization is that if the internal table can be read once and used for multiple times, the intermediate result can be saved and used for multiple times. For example, table t1 and table t2 are joined. If table t2 is used as the internal table and is scanned, only 5% of the data is used as the intermediate result, if the other 95% data is filtered out, you can materialize the 5% data. In this way, each tuple in the t1 table is joined to only the 5% data. Whether the intermediate result is materialized depends on the cost estimation model. Generally, when a physical path is generated through physical optimization, the cost is estimated for both the materialized and non-materialized paths, and the path with a lower cost is finally selected. + +## Cost Estimation + +The optimizer enumerates candidate execution paths based on the generated logical execution plan. To ensure efficient execution, the optimizer needs to select the path with the lowest cost and highest execution efficiency from these paths. How to evaluate the execution cost of these plan paths becomes critical. Cost estimation is to complete this task. Based on the collected data statistics, cost estimation models are established for different planned paths to evaluate the costs and provide input for path search. + +- Statistics + + Statistical information is a cornerstone for estimating the path cost of a plan, and accuracy of the statistical information plays an important role in row count estimation and cost estimation in a cost estimation model, and directly affects an advantage and a disadvantage of a query plan. openGauss allows you to use the ANALYZE statement to collect statistics on the entire database, a single table, a column, and multiple correlated columns. + + Statistics directly affect the accuracy of cost estimation. Therefore, the frequency of collecting statistics is a sensitive parameter. If the frequency of collecting statistics is too low, the statistics will be delayed. On the contrary, if the frequency of collecting statistics is too high, the query performance will be affected indirectly. + + Generally, the database management system provides a method for manually collecting statistics. The openGauss supports statistics collection by running the ANALYZE command. In addition, the database management system automatically determines whether to re-collect statistics based on data changes. For example, when the number of frequent data updates in a table exceeds a threshold. In this case, you need to automatically update the statistics of the table. During query optimization, if the optimizer finds that the statistics data is severely delayed, the optimizer can also initiate statistics collection. + + Table-level statistics include the number of tuples \(N\) and the number of pages occupied by the table \(B\). Column-level statistics include the attribute width \(W\), maximum value \(Max\), minimum value \(Min\), and most common value \(MCV\). Generally, a histogram \(H\) is created for each column. The data in the column is displayed in a histogram based on the range, which facilitates the calculation of the selectivity. + + Histograms, such as the height-balanced histogram, frequency histogram, and multi-dimensional histogram, may present data distribution from different angles. openGauss uses height-balanced histograms, and each column of the histogram represents a same frequency. + +- Selection Rate + + Based on the statistics, the cost estimation system can know how many rows of data exist in a table, how many data pages are used, and the frequency of a value. Then, the cost estimation system can calculate how much data can be filtered out by a constraint \(for example, the WHERE condition in an SQL statement\). The ratio of the data filtered by this constraint to the total data volume is called the selectivity. Selectivity = Number of tuples after the constraint is filtered/Number of tuples before the constraint is filtered. The constraint may be formed by an independent expression, or may be a conjunctive normal form or disjunctive normal form formed by multiple expressions. For an independent expression, a selectivity needs to be calculated according to statistics information, conjunctive normal form and disjunctive normal form obtain the selectivity by means of probability calculation. + + Conjunctive normal form: P\(A and B\) = P\(A\) + P\(B\) – P\(AB\); disjunctive normal form: P\(AB\) = P\(A\) × P\(B\) + + Assume that the selectivity needs to be calculated for the constraint A \> 5 AND B < 3. First, the selectivity needs to be calculated for A \> 5 and B < 3. Because the statistics of columns A and B are available, the proportion of data whose value is greater than 5 in column A can be calculated according to the statistics. Similarly, the selectivity of column B can be calculated. Assume that the selectivity of A \> 5 is 0.3 and that of B < 3 is 0.5. The selectivity of A \> 5 AND B < 3 is calculated as follows: + + P\(A\>5 and B<3\) + + = P\(A\>5\) + P\(B<3\) – P\(A\>5\)×P\(B<3\) + + = 0.3 + 0.5 – 0.3×0.5 + + = 0.65 + + Due to the diversity of constraints, the calculation of the selectivity usually encounters some difficulties. For example, in the calculation process of the selectivity, it is usually assumed that multiple expressions are "independent" of each other, but in an actual situation, a function dependency relationship may exist between different columns. In this case, the selectivity may be inaccurate. + +- Cost Estimation Method + + The optimizer of openGauss is a cost-based optimizer. For each SQL statement, openGauss generates multiple candidate plans, calculates an execution cost for each plan, and selects the plan with the lowest cost. After a constraint determines the selectivity, the number of rows that need to be processed for each plan path can be determined, and the number of pages that need to be processed can be calculated according to the number of rows. When a plan path processes a page, an I/O cost is generated. When a plan path processes a tuple \(for example, expression calculation is performed on the tuple\), a CPU cost is generated. Therefore, an overall cost of a plan may be expressed as follows: + + Total cost = I/O cost + CPU cost + + openGauss defines the cost of scanning a page in sequence as 1 and normalizes the cost of all other operators to 1. For example, if the cost of scanning a random page is defined as 4, it is considered that the cost of scanning a random page is four times the cost of scanning a page sequentially. For another example, if the cost of processing a tuple by the CPU is 0.01, it is considered that the cost required for processing a tuple by the CPU is 1% of the cost required for sequentially scanning a page. From another perspective, openGauss divides costs into startup costs and execution costs. + + Total cost = Startup cost + Execution cost + + Startup cost: indicates the cost required from the time when an SQL statement is executed to the time when the operator outputs the first tuple. Some operators have low startup costs. For example, the scan operator on the base table can output tuples once it starts to read data pages. Therefore, the startup cost is 0. Some operators have relatively high startup costs. For example, the sorting operator needs to read all outputs of lower-layer operators, and output the first tuple only after sorting these tuples. Therefore, the startup cost of the sorting operator is relatively high. + + Execution cost: indicates the cost required from the time when the first tuple is output to the time when the query ends. The cost may further include a CPU cost, an I/O cost, and a communication cost. A size of the execution cost is related to an amount of data that needs to be processed by an operator, and is related to a function completed by each operator. The larger the amount of data to be processed and the heavier the task to be completed by the operator, the higher the execution cost. + + Total cost: Cost estimation is a bottom-up process. The cost of the scan operator is estimated first, and then the cost of the connection operator and the cost of the non-SPJ operator are estimated based on the cost of the scan operator. + +- Notes + + 1. Selection-Projection-Join \(SPJ\): The basic three operators in relational algebra are SELECTION, PROJECTION, and JOIN. + + SELECTION: In **SELECT XXX FROM T WHERE XX = 5**, the WHERE filter condition indicates a select operation. + + PROJECTION: In **SELECT c FROM t**, selecting column c indicates a projection operation. + + JOIN: In **SELECT xx FROM t1, t2 WHERE t1.c = t2.c**, tables t1 and t2 are joined. + + 2. Non-SPJ: Operators other than the SPJ operators, such as SORT, AGGREGATION, and UNION/EXCEPT. + + +## Summary + +This document describes the basic functions and principles of each module of the SQL engine in terms of SQL parser, query rewriting, cost estimation, and path search. You can further understand the optimizer optimization technology based on the analysis of specific SQL optimization cases. + diff --git a/content/en/post/2022/DB4AI-Enabling-Database-Native-AI-Computing-and-Facilitating-Service-Success-in-the-Data-Lake-Sce.md b/content/en/post/2022/DB4AI-Enabling-Database-Native-AI-Computing-and-Facilitating-Service-Success-in-the-Data-Lake-Sce.md new file mode 100644 index 0000000000000000000000000000000000000000..cf47b717b520e3bb0232231cab3ac21d0f878ecd --- /dev/null +++ b/content/en/post/2022/DB4AI-Enabling-Database-Native-AI-Computing-and-Facilitating-Service-Success-in-the-Data-Lake-Sce.md @@ -0,0 +1,162 @@ ++++ + +title = "DB4AI: Enabling Database Native AI Computing and Facilitating Service Success in the Data Lake Scenario" + +date = "2021-09-27" + +tags = [ "DB4AI: Enabling Database Native AI Computing and Facilitating Service Success in the Data Lake Scenario"] + +archives = "2021-09" + +author = "Wen Nie" + +summary = "DB4AI" + +img = "/en/post/2022/title/img6.png" + +times = "12:30" + ++++ + +# DB4AI: Enabling Database Native AI Computing and Facilitating Service Success in the Data Lake Scenario + +DB4AI tries to embed AI computing capabilities into databases to help users get rid of tedious data migration, export, and management. It sounds reasonable to use a database to store massive data. However, when using a traditional database, users who are algorithm engineers or AI beginners have to export data from a dataset and then import it to the AI computing framework for their computing tasks. Data migration is troublesome and costly. The most direct method is to write the exported data to a file. Before an AI computing task is executed, the program reads data from a file and feeds the data to the model for training. + +Here are some obvious challenges: + +- 1. Data security: + + Data carriers that are separated from the database do not have protection measures such as permission restriction and privacy protection. The risk of data deletion and tampering is greatly increased. In some fields, such as finance and healthcare, data involves sensitive information. During data migration, data needs to be masked to degrade sensitive information. + +- 2. Data migration cost: + + In AI computing, analysts and algorithmists need to focus on model design and model computing verification, instead of spending costs on data migration and sharing. However, the time and computing costs of exporting massive amount of data are inevitable. + +- 3. Data version management: + + Data is added, deleted, modified, and queried in both the AP and TP databases. For online learning, how do we capture new data in real time? For offline learning, how do we detect data distribution changes in a dataset in time? To cope with these two questions, the traditional processing methods require more data management and control. When data drift occurs, users need to update the dataset to maintain data validity. In this case, the cost is increased. In addition, users need to store datasets of different versions based on different data processing methods and filter criteria. This further increases storage costs. + + +The preceding problems do not exist in databases with DB4AI. A database is equipped with an AI framework to reduce the data migration costs. All computing processes are completed in the database. By eliminating the data migration process, DB4AI programmatically avoids the preceding problems. + +The following describes how to use the openGauss native AI framework: + +- 1. DB4AI-snapshot: data version control. + + DB4AI-Snapshots is a DB4AI feature used to manage dataset versions. Datasets are fixed by using snapshots and classified into the materialized snapshot \(MSS\) mode which uses the materialization algorithm to store data entities of original datasets, and the computed snapshot \(CSS\) mode which uses the relative calculation algorithm to store incremental data information. Compared with the MSS mode, the CSS mode greatly reduces the space usage. + + This function involves the CREATE, PREPARE, SAMPLE, PUBLISH, and PURGE operations. Examples of some operations are as follows: + + - Create a snapshot. + + ``` + openGauss=# create snapshot s1@1.0 comment is 'first version' as select * from t1; + schema | name + --------+-------- + public | s1@1.0 + (1 row) + ``` + + - \(2\) Sample a snapshot. + + 0.3 is used as the sampling rate, sampling is performed on the basis of the snapshot s1@1.0.0, and a suffix '\_sample1' is added to the generated sub-snapshot. + + ``` + openGauss=# SAMPLE SNAPSHOT s1@1.0 STRATIFY BY id AS _sample1 AT RATIO .3; + schema | name + --------+---------------- + public | s1_sample1@1.0 + (1 row) + ``` + + This function can be used to generate a test set and a training set during AI computing. For example, in the following syntax, sampling is performed in the ratio of 2:8. + + ``` + openGauss=# SAMPLE SNAPSHOT s1@1.0 STRATIFY BY id AS _test AT RATIO .2, AS _train AT RATIO .8; + schema | name + --------+-------------- + public | s1_test@1.0 + public | s1_train@1.0 + (2 rows) + ``` + + - \(3\) Publish a snapshot. + + In the snapshot feature, other states except the released state cannot be involved in AI computing. If the data in the current snapshot is available, you can publish a snapshot to change the snapshot state. You can view the state of the snapshot in the **db4ai.snapshot** system catalog. + + ``` + openGauss=# openGauss=# select * from db4ai.snapshot; + id | parent_id | matrix_id | root_id | schema | name | owner | commands | comment | published | archived | c + reated | row_count + ----+-----------+-----------+---------+--------+----------------+-------+-----------------------------+---------------+-----------+----------+----------- + -----------------+----------- + 0 | | | 0 | public | s1@1.0 | owner | {"select *","from t1",NULL} | first version | t | f | 2021-09-16 + 17:15:52.460933 | 5 + 1 | 0 | | 0 | public | s1_sample1@1.0 | owner | {"SAMPLE _sample1 .3 {id}"} | | f | f | 2021-09-16 + 17:19:12.832676 | 1 + 2 | 0 | | 0 | public | s1_test@1.0 | owner | {"SAMPLE _test .2 {id}"} | | f | f | 2021-09-16 + 17:20:46.778663 | 1 + 3 | 0 | | 0 | public | s1_train@1.0 | owner | {"SAMPLE _train .8 {id}"} | | f | f | 2021-09-16 + 17:20:46.833184 | 3 + (4 rows) + ``` + + - \(4\) Purge a snapshot. + + ``` + openGauss=# PURGE SNAPSHOT s1_sample1@1.0; + schema | name + --------+---------------- + public | s1_sample1@1.0 + (1 row) + ``` + + +- 2. DB4AI native AI syntax: used for model training and inference + + This function uses the query syntax to complete AI computing tasks. Currently, AI operators are added to the openGauss database. The operators are added to the execution plan to fully utilize the computing capability of the database to complete model training and inference tasks. + + Currently, the DB4AI engine in openGauss supports four algorithms: logistic regression, linear regression, and support vector machine classification, and K-means clustering algorithms. + + The CREATE MODEL and PREDICT BY syntaxes are used for model training and inference. + + CREATE MODEL: used for model training. After a model training task is complete, the syntax saves the trained model information to the **gs\_model\_warehouse** system catalog in the database. You can view the model information by viewing the system catalog at any time. The system catalog stores not only the model description information but also the model training information. + + +PREDICT BY: used for inference. The database searches a system catalog for a model based on the model name and loads the model to the memory. The database inputs the test data into the memory model for inference and returns the result in the form of a temporary result set. + +The following is a simple example: + +- \( 1\). Run **CREATE MODEL** for training. + + The K-means clustering algorithm is used as an example. + + ![](../figures/zh-cn_image_0000001251917015.jpg) + + The training syntax consists of four parts: model name, algorithm type, training set, and hyperparameter setting. + + The training set supports the input of tables, views, and subqueries. You only need to run one query statement to set model hyperparameters and specify the training set. The subsequent steps include data input and model saving, which are automatically completed by the database. + + When the training task is complete, the database prints a success message. + + The model has been written into the **gs\_model\_warehouse** system catalog. You can view the model information by querying the table. + + ![](../figures/zh-cn_image_0000001252197021.jpg) + +- \(2\) Run **PREDICT BY** for inference. + + Use the saved model to perform inference tasks. An example is provided as follows: + + ![](../figures/zh-cn_image_0000001207677032.jpg) + + In the PREDICT BY syntax, you only need to specify the model name, test set, and feature name to complete the inference task. + +- Summary and Prospect + + DB4AI has always been a popular topic in the database field. By making databases intelligent, you can lower the threshold and cost in the AI computing process, and further release the computing resources of the database. Big data and AI computing are good partners, so databases for big data storage should not be independent of this system. The effective combination of the two not only facilitates the AI computing process, but also increases the possibility of optimizing the database performance. + + The native AI framework of the open-source openGauss database is evolving, and there must be many shortcomings. However, the vision of "all things intelligent" inspires countless R&D engineers to move forward. + + When you pursue your goal, do not stop. There is a long way to go. + + diff --git a/content/en/post/2022/Discussion-on-openGauss-Memory-Management.md b/content/en/post/2022/Discussion-on-openGauss-Memory-Management.md new file mode 100644 index 0000000000000000000000000000000000000000..88c49eba38cae13ea6db21d17e3ef4af7d2394ee --- /dev/null +++ b/content/en/post/2022/Discussion-on-openGauss-Memory-Management.md @@ -0,0 +1,114 @@ ++++ + +title = "Discussion on openGauss Memory Management" + +date = "2021-09-13" + +tags = [ "Discussion on openGauss Memory Management"] + +archives = "2021-09" + +author = "Shifu Li" + +summary = "Discussion on openGauss Memory Management" + +img = "/en/post/2022/title/img4.png" + +times = "12:30" + ++++ + +# Discussion on openGauss Memory Management + +Recently, a friend from the technical exchange group of openGauss asked how to allocate memory during code development. This article provides a preliminary answer to this question. The memory management of openGauss has been extended and reconstructed in many aspects to adapt to the multi-thread architecture and better meet enterprise application requirements. The openGauss memory management has been optimized in the following aspects: + +- Introduced the **jemalloc** open-source library to replace **glibc** for memory allocation and release, reducing memory fragments. +- Introduced the logical memory management mechanism to control the memory usage of processes, preventing the OOM problem. +- Introduced multiple memory contexts, such as shared memory context, stack memory context, and aligned memory context, to meet code development requirements in different scenarios. +- Introduced the AddressSanitizer \(ASan\) open-source library, helping locate memory leakage and memory overwriting problems in the debug version. Various memory query views are provided, helping users observe memory usage and locate potential memory problems. + +Based on the preceding functions and features, the following describes how to use the memory during coding and how to quickly locate problems from the perspectives of developers and users. + +- **1. Precautions for openGauss Memory Management Development** + + For the memory allocation and release interfaces in openGauss, the data structure and algorithm used by the general memory context do not change greatly. The new memory context is implemented by using the new data structure. + + By default, the AllocSetContextCreate function is used to create a memory context. Check whether the type of the memory context is specified. By default, the type is not specified. The STANDARD\_CONTEXT identifier is used to create a general memory context. The memory context is used only in a single thread. As the thread exits or the job is reset, the memory context needs to be cleared to prevent memory accumulation. The root node of the memory context in a thread is TopMemoryContext \(that is, t\_thrd.top\_mem\_cxt in the code\). Generally, memory application from TopMemoryContext is forbidden in the code. Subnodes are created from the corresponding memory context node based on the memory scope. Both the parent and child nodes are general memory contexts. + + Because openGauss is a multi-thread architecture, it usually uses shared memory to store key information for multi-thread access and update. When creating a memory context, you need to specify the SHARED\_CONTEXT identifier and ensure that the parent node is a shared memory context. The root node of the shared memory context is ProcessMemory \(that is, g\_instance.instance\_context in the code\). By default, no memory is allocated from the memory context. Generally, the memory that can be allocated from the shared memory context is limited. Because the memory is mainly used during job execution, developers need to limit the size of memory that can be allocated from the shared memory context \(by limiting the number of members or using the elimination mechanism\). It is recommended that the size be less than or equal to 200 MB. The operations of allocating or releasing memory in the shared memory context do not require extra locks. You can directly invoke palloc or pfree. However, you need to determine whether lock protection is required for subsequent operations of the pointer returned after the memory is allocated based on the invoking logic. + + The implementation mechanism of the stack memory context is simple. Different from the traditional memory context, the buddy algorithm is not used for alignment to the power of 2. Therefore, only 8-byte alignment is required during memory allocation, which saves a large amount of memory space. The stack memory context applies to the scenario where only palloc is called to allocate memory and the pfree operation is not required. When the memory context is not used, MemoryContextDelete or MemoryContextReset is performed for one time. For details, see the logic of using the memory by the hashjoin operator. The aligned memory context is used to align memory pages and applies to the ADIO scenario. It is seldom used in the current code. + + In addition to the scenario where the memory context is created by specifying MemoryContextCreate, the memory context can also be created implicitly when the hash\_create function is used to create a hash table. Therefore, hash tables created by hash\_create are classified into common hash tables \(used in a single thread\) and shared hash tables \(shared by the entire process\). When creating a shared hash table, you need to specify the **HASH\_SHRCTX** parameter, and the parent memory context specified by the parameter must be the shared memory context. + + The preceding describes the basic methods of creating and using the memory context. The requirements for allocating and releasing the memory context are as follows: + + Memory contexts are classified into thread-level contexts \(such as TopMemoryContext\), session-level contexts \(such as MessageMemoryContext\), job-level contexts \(such as ExecutorState\), and operator-level contexts \(such as HashJoin\). Memory cannot be allocated from high-level memory contexts during job execution. + + Do not frequently allocate and release the same memory context. Even for temporary memory contexts, ensure that each operator allocates and releases the memory context only once. + + Release the unused memory and memory context in a timely manner. After the operator is executed, release the operator memory context in a timely manner. + + In principle, the memory consumed by the non-high memory consumption operator \(hashjoin/hashagg/setop/material/windowsagg\) cannot exceed 10 MB. If the memory consumed exceeds 10 MB, evaluation criteria must be provided. + + The total size of the shared memory context must be controlled. In principle, the memory usage cannot exceed 200 MB. If the memory usage exceeds 200 MB, evaluation is required. + + The global variable pointer is set to null after the memory is released. That is, the pfree\_ext function is invoked to set the global variable pointer to null. + + When the array memory is allocated at a time and the memory corresponding to the array subscript is accessed and written, the Assert judgment is applied to the array subscript to prevent OOM problems. + + +- **2. Locating openGauss Memory Faults** + - 1 \> The error message "memory is temporarily unavailable" is displayed. + + Check whether the log contains "reaching the database memory limitation". If yes, the fault is caused by the logical memory management mechanism of the database. In this case, you need to analyze the database view. Check whether the log contains "reaching the OS memory limitation". If yes, the fault is caused by the memory allocation failure of the operating system. In this case, you need to check the parameter configuration of the operating system and the memory hardware. + + To protect the logical memory of the database, you need to check the following views: + + - Run the **pg\_total\_memory\_detail** command to check the memory usage of the internal modules of the database. When the value of **dynamic\_used\_memory** is greater than that of **max\_dynamic\_memory**, a message is displayed indicating that the memory is insufficient. If the value of **dynamic\_used\_memory** is smaller than that of **max\_dynamic\_memory** and the value of **dynamic\_peak\_memory** is greater than that of **max\_dynamic\_memory**, the memory was insufficient. If the value of **other\_used\_memory** is larger, replace the debug version to further locate the fault. The SQL statement used is **Select \* from pg\_total\_memory\_detail**. + + - If the value of **dynamic\_used\_shrctx** is larger, query the **gs\_shared\_memory\_detail** view to check which memory context uses much memory. The SQL statement used is **Select \* from gs\_shared\_memory\_detail**. + + - If the value of **dynamic\_used\_shrctx** is not large, query the **gs\_session\_memory\_detail** view to check which memory context uses much memory. The SQL statement used is **Select \* from gs\_session\_memory**. + + \_detail order by totalsize desc limit 20; + + - If any fault is found in the memory context and it is difficult to locate the fault, use **memory\_tracking\_mode** in the debug version to further locate the file and line number. + + - If no fault is found in the memory context, check whether the number of threads is large. The possible cause is CacheMemoryContext. + + - In the debug version, run the **gdb** script to print the allocation information in the memory context. + + - 2 \> The RES of the database node is high or the node breaks down, and the message "Out of Memory" is displayed. + + Read the information in **/var/log/messages** to check which process causes the fault. Generally, the fault is caused by the GaussDB process. If the fault is caused by the GaussDB process memory, check whether the **max\_process\_memory** parameter is correctly configured. + + If the configuration is proper, check whether the memory usage of **Other** in the **pg\_total\_memory\_detail** view is too high. + + If the memory usage increases rapidly and is mainly used by the memory context, you can use jemalloc profiling to quickly locate the process to which the memory is allocated. + + High **Other** memory usage may be caused by the malloc memory of a third-party component or libpq. In this case, use the ASan tool to further locate the fault. If the fault cannot be located, disable parameters \(such as **ssl** and **llvm**\) one by one and locate the fault + + +- **3 Appendix** + - 1 \> Usage of jemalloc: + + In the debug version, run the following command to set environment variables: + + In **export MALLOC\_CONF=prof:true,prof\_final:false,prof\_gdump:true,lg\_prof\_sample:20**, the last **20** indicates that a heap file is generated every 2^20 bytes \(1 MB\). The value can be changed. However, after the value is increased, the number of heap files decreases, but some memory application information is lost. + + Run the **source** command to set environment variables and start the cluster. + + Use the **jeprof** to process heap files and generate PDF files. You can obtain the **jeprof** file from the open-source third-party binary directory **binarylibs/**_$\{platForm\}_**/jemalloc/debug/bin**. To use the binary file, you need to run the **yum install graphviz** command to install graphviz. + + To generate a PDF file, run the following command: + + Full: jeprof –show\_bytes –pdf gaussdb \*.heap \> out.pdf + + Incremental: jeprof –pdf gaussdb –base=start.heap end.heap \> out.pdf + + - 2 \> Usage of ASan: + + Check the operating system configuration: The value of **ulimit -v unlimited && vm.overcommit\_memory** is not **0**. + + Stop the cluster and add the following environment variable to the .bashrc file in standalone deployment: **export ASAN\_OPTIONS=halt\_on\_error=0:alloc\_dealloc\_mismatch=0:log\_path=/tmp/memcheck/memcheck**. In the environment variable, **log\_path** specifies the error information output location. The directory is **/tmp/memcheck/**, and the file name prefix is **memcheck**. diff --git a/content/en/post/2022/Dynamic-Data-Masking-of-openGauss.md b/content/en/post/2022/Dynamic-Data-Masking-of-openGauss.md new file mode 100644 index 0000000000000000000000000000000000000000..ca166a15e5fd00f2ad83f556374b9a8b0c4fce79 --- /dev/null +++ b/content/en/post/2022/Dynamic-Data-Masking-of-openGauss.md @@ -0,0 +1,156 @@ ++++ + +title = "Dynamic Data Masking of openGauss" + +date = "2021-03-24" + +tags = [ "Dynamic Data Masking of openGauss"] + +archives = "2021-03" + +author = "Meiting Xu" + +summary = "Dynamic Data Masking of openGauss" + +img = "/en/post/2022/title/img11.png" + +times = "12:30" + ++++ + +# Dynamic Data Masking of openGauss + +## 1 Background and Introduction + +- 1.1 Background of Data Masking + + With the rapid development and popularization of emerging technologies, such as Internet big data and cloud services, different data application modes, such as service cloudification, centralized data storage, and data sharing, have become the trend of future development. Cloud databases and cloud computing services are featured with easy deployment, low cost, high efficiency, and high reliability. As such, more and more consumers tend to store their personal data in the cloud rather than in personal portable hard disks. Actually, as data application scenarios become more complex, the risk of personal privacy data leakage and theft is increasing. In recent years, several major database information leakage events have occurred, which poses higher requirements for database security. + + Data masking is to process sensitive data by transforming or shielding the data or by other ways, aiming to protect privacy data and prevent data leakage and malicious snooping. When an enterprise or organization collects sensitive data, such as personal identity data, mobile phone numbers, and bank card numbers, and then exports the data \(in a non-production environment\) or directly queries the data \(in a production environment\), the data must be masked according to privacy protection laws and regulations. + +- 1.2 Introduction to Data Masking + + Data masking is classified into static data masking and dynamic data masking. In static data masking, data is masked before being distributed. Generally, data in the production environment is copied to the test environment or development library. Then, the exported data replaces the original data. In this way, the masked data becomes the source data for test and development. Dynamic data masking is closely related to the production environment and is mainly used in scenarios where production data is directly accessed. In dynamic data masking, sensitive data is masked in real time when it is accessed, and the consistency and validity of the source data can be guaranteed. + + **Figure 1-1** Static and dynamic data masking + + ![](../figures/110.png) + + Dynamic data masking and static data masking are applicable to different scenarios. You can select one based on the application scenario. The latest openGauss officially supports dynamic data masking. The following sections describe the dynamic data masking mechanism of openGauss. + + Currently, the mainstream dynamic data masking technology is achieved by two paths, that is, result set parsing and statement rewriting. + + - Result set parsing: The statements sent to the database are not rewritten and the data table structure needs to be obtained in advance. After the database returns the result, the data to be masked in the set is identified based on the table structure and the result data records are modified one by one. + - Statement rewriting: The query statements that contain sensitive columns are rewritten. Outer nested functions are used to rewrite the sensitive columns involved in the query so that the database returns result sets that do not contain sensitive data when running query statements. + + In terms of performance, result set parsing requires that columns be parsed, rules be matched, and data be masked after the database returns the result set. Each row of data records in the result set needs to be modified one by one. Therefore, the masking time is linearly related to the result set capacity, and the overall performance loss is large. In contrast, if you choose statement rewriting, short query statements can be parsed and rewritten, and a masking function can be embedded outside sensitive columns in the statements. When executing commands, the database automatically executes the masking function to mask data, and then the returned result set is the masked data. In this method, only one query statement is rewritten and the result set is not parsed. As such, the performance loss can be greatly reduced. openGauss adopts the statement rewriting method. The performance loss of masking 100,000 pieces of sensitive data records is less than 5%. + + In addition, for complex commands, query columns generally contain a large number of columns with the same name, table aliases, and nested queries. If you use result set parsing, you need to map the result set to the actual query column to identify whether a column needs to be masked. The more complex the query is, the more difficult the identification is, causing lower matching accuracy. In contrast, if you use statement rewriting, you can accurately nest masking functions for columns involved in complex queries. + + Based on the preceding analysis, data masking based on statement rewriting is a desirable solution in terms of both performance and accuracy. By using statement rewriting, openGauss identifies the target node of the query tree based on the user-defined masking policy after the query tree is obtained through query parsing. In addition, openGauss rewrites the node to be masked, constructs the masking query tree, and sends the query tree to the database kernel for execution. Then, the masked data is returned. + + +## 2 Dynamic Data Masking Solution of openGauss + +In the industry, the dynamic data masking function is usually loaded as a middleware plug-in or a data masking system. It is used to mask data by intercepting commands or result sets between the database on the client and that on the server. openGauss has the built-in dynamic data masking feature, so that the database can mask data without using external plug-ins, effectively reducing the risk of sensitive data leakage during data transmission. + +openGauss has defined a complete built-in security policy model from version 1.1.0. Based on the model, users can define resource labels to identify sensitive data and define related security policy mechanisms for different resource label types and content. Dynamic data masking is one of the security policy mechanisms. + +- 2.1 Built-in Security Policy + + The built-in security policy model identifies and protects user behaviors by configuring a series of security policies, providing the capabilities of protecting sensitive user data. + + Resource labels are the basis of the security policy model. It is a collection of database resources in essence. To manage database resources in a unified manner, a data manager can add multiple database resources to the same resource label and configure policies for the resource label to manage database resources in batches. + + For example, if multiple data tables contain sensitive data columns such as **creditcard** which indicates a bank card number, these columns can be classified into the **creditcard\_label** resource label in a unified manner, and then the administrator can configure data masking policies for the **creditcard\_label** resource label to implement batch configurations for all related sensitive columns. + + Dynamic data masking is a security policy supported by the security policy model. After identifying sensitive data in user tables \(sensitive data discovery and identification are not within this scope\), the data controller configures data masking policies for resource labels that contain sensitive columns and restrict users' data access and information extraction behaviors based on different application scenarios to protect sensitive data. + + In general, resource labels are used to classify database resources and put these resources into various security policies for management. The dynamic data masking feature uses resource labels to identify sensitive data and matches masking policies to mask sensitive data. + +- 2.2 Core Idea of Dynamic Data Masking + + The dynamic data masking feature of openGauss is deployed together with the database as a built-in security plug-in. No additional adaptation is required for services. The security policy module of openGauss is responsible for parsing SQL statements and matching masking policies, and the service takes effect after masking policies are configured. + + **Configuring Masking Policies** + + The configuration of a masking policy involves masking functions, resource labels, and masking filters. + + - Masking functions indicate the methods used by a masking policy to mask target columns. Currently, openGauss provides seven masking functions, namely, **creditcardmasking**, **basicemailmasking**, **fullemailmasking**, **alldigitsmasking**, **shufflemasking**, **randommasking**, and **maskall**. They are applicable to different masking scenarios. + - Resource labels are a set of labels on which a masking policy takes effect. If a target column in a query exists in a label, sensitive data of the column will be masked based on the masking policy. Please note that the dynamic data masking feature of openGauss can mask labels that contain only data columns. + - Masking filters specify the user scenarios where a masking policy takes effect and mainly involve usernames, login clients, and IP addresses of users. The data masking policy takes effect only when a query user meets the threshold specified by a masking filter. + + The following example shows how to create a dynamic data masking policy. + + **Data Preparation** + + Check whether the built-in security policy is enabled. + + ![](../figures/zh-cn_image_0000001206967370.png) + + Prepare two tables containing the sensitive columns **creditcard** and **customername**. + + ![](../figures/111.png) + + **Policy Configuration** + + Log in to the database as a policy administrator \(with the **poladmin** permission\) and add the sensitive columns in the two data tables to the resource labels **creditcard\_label** and **customer\_label** for management. + + ![](../figures/zh-cn_image_0000001252127325.png) + + Create the following two masking policies: + + - **mask\_card\_pol**: Columns in the **creditcard\_label** label are masked by using the **creditcardmasking** function only when the **user1** user uses gsql to access the tables using the IP address 10.11.12.13. + - **mask\_name\_pol**: By default, columns in the **customer\_label** label are masked by using the **maskall** function for all query users. + + ![](../figures/zh-cn_image_0000001206807380.png) + + **Triggering Data Masking Policies** + + When the system receives a query command, **security\_plugin** intercepts the query tree generated by the semantic analysis in the parser and selects the masking policy that is applicable to the user scenario based on the user login information \(username, client, and IP address\). The masking policy is configured based on resource labels \(containing only table columns\). Therefore, you need to identify whether the target node of the query tree belongs to a resource label, match the identified resource label with the masking policy, and rewrite the target node of the query tree based on the policy content. Then, the query tree is returned to the parser. + + Due to the built-in masking function of the query tree in the **security\_plugin** module, data visitors are unaware of the process of rewriting the query tree by using the built-in security policy. They access data in the same way as executing a common query with data privacy protected. + + **Figure 2-1** Dynamic data masking architecture of openGauss + + ![](../figures/112.png) + + Based on the cases described in section "Configuring Masking Policies", you can query the data table to trigger the masking policy. + + **Trigger the data masking policy.** + + If the **user1** user uses gsql to log in to the database and query sensitive data in compliance with the **mask\_card\_pol** policy, the system returns the masked data. However, the **user2** user does not comply with the policy. Therefore, the data queried by this user is not masked. + + ![](../figures/113.png) + + When the **user1** user or the **user2** user queries the **order** table, the **mask\_name\_pol** masking policy is triggered. Therefore, the **customername** column is masked. + + ![](../figures/zh-cn_image_0000001251847329.png) + + +## 3 Advantages of openGauss Dynamic Data Masking + +The dynamic data masking feature of openGauss focuses on identifying users who access data. Masking filters are configured based on a specified user, client tool, and login IP address. The policy administrator can flexibly formulate different masking policies based on different services and user scenarios and is granted with different levels of sensitive data access capabilities to adapt to various complex production environments. For example, in the finance and healthcare industries, counter service personnel can view only some information about ID cards and bank cards, while O&M administrators can query and maintain all user information. On the premise of ensuring the diversity of masking scenarios, the system performs a strict mutual exclusion check when a masking filter is specified. This prevents ambiguity in selecting policies when a user complies with multiple masking filters at the same time. + +**Figure 3-1** Data masking of openGauss based on filter criteria + +![](../figures/114.png) + +The dynamic data masking feature of openGauss focuses more on batch management of database resources. In the security policy model, database resources to be managed and controlled are classified into labels. Operations on labels are operations on a specified cluster of resources, which greatly simplifies the management process and improves management efficiency. The dynamic data masking feature of other databases is based on a single column or table. The masking policy corresponds to the database resource. Even if you want to use the same masking function, you need to configure multiple masking policies for different database resources, which increases the policy configuration cost as well as the difficulty in subsequent O&M and batch resource policy management. Therefore, allocating database resources to be managed in batches to resource labels is the basis and one of the advantages of the dynamic data masking feature of openGauss. + +**Figure 3-2** Batch policy configuration for openGauss resource labels + +![](../figures/115.png) + +The openGauss kernel is equipped with dynamic data masking, which ensures the security of data transmission paths to some extent. However, external plug-ins may be bypassed. After an external plug-in rewrites the SQL statements sent from the client or the result set returned by the server, attackers can bypass the plug-in to directly send SQL statements to the database or intercept the source data result set returned by the database. As a result, the masking plug-in becomes invalid. Therefore, compared with masking by using external plug-ins, the dynamic data masking feature of openGauss can reduce the risk of sensitive data leakage on the transmission path to some extent. + +Combined with the production environment, the purpose of dynamic data masking is to mask sensitive data in the result set. The service side provides query interfaces, and then the interfaces trigger data masking. To ensure the security of sensitive data, openGauss is adapted to masking policies in most scenarios, including addition, deletion, and modification operations with returned values, MERGE INTO statements, common table expressions \(CTEs\), and subqueries. In this way, the interfaces for the service side to perform operations on sensitive data are enriched, instead of providing only data query interfaces. + +To improve usability, openGauss provides a set of simple policy configuration syntaxes, covering the addition, deletion, and modification of resource labels and masking policies. You can use the definition syntax to easily configure masking policies, simplifying the operation process for administrators. + +## 4 Prospect of openGauss Dynamic Data Masking + +The dynamic data masking feature of openGauss provides a simple and flexible policy configuration solution to prevent user privacy data from being disclosed to some extent. It is an indispensable part of the multi-layer security defense architecture of openGauss. + +In the future, the dynamic data masking feature of openGauss will provide more flexible policy configuration methods, such as user-defined function \(UDF\) masking and conditional masking, to support more flexible and rich privacy protection scenarios. + diff --git a/content/en/post/2022/Everything-You-Want-to-Know-About-the-openGauss-Ledger-Database.md b/content/en/post/2022/Everything-You-Want-to-Know-About-the-openGauss-Ledger-Database.md new file mode 100644 index 0000000000000000000000000000000000000000..b2b84e9b168e25a1e63413b7082e6d4b8c296509 --- /dev/null +++ b/content/en/post/2022/Everything-You-Want-to-Know-About-the-openGauss-Ledger-Database.md @@ -0,0 +1,133 @@ ++++ + +title = "Everything You Want to Know About the openGauss Ledger Database" + +date = "2021-10-22" + +tags = [ "Everything You Want to Know About the openGauss Ledger Databases"] + +archives = "2021-10" + +author = "Rui He" + +summary = "Everything You Want to Know About the openGauss Ledger Database" + +img = "/en/post/2022/title/img16.png" + +times = "12:30" + ++++ + +# Everything You Want to Know About the openGauss Ledger Database + +## 1 What Is a Ledger Database? + +Coins such as bitcoin, ethereum, and dogecoin, as synonyms of the blockchain, not only affect the profit and loss of some people's accounts, but also affect the prices of graphics cards and hard disks. However, as database-related technical personnel or enthusiasts, we are more concerned about the core technology. + +As a distributed ledger technology, blockchain overcomes the disadvantages of traditional centralized ledgers, such as low storage efficiency, low reliability, and vulnerability to single-point attacks, and technically ensures that the blockchain features distributed sharing, multi-party consensus, tamper-proof, and traceability. + +Then, can we use blockchains to replace databases? The answer is no. The blockchain has many disadvantages, such as low transaction performance and inconvenient query. The bitcoin system can process seven transactions per second. If it is used to process major bank transactions, the efficiency will be very low. The openGauss database features high efficiency, high reliability, and high security. We can start from openGauss and integrate some blockchain technologies such as cryptography tamper-proof and multi-party consensus, to improve the tamper-proo and traceability capabilities of the database. The idea of the ledger database comes into being. + +A blockchain is usually divided into seven layers in terms of the architecture model: application layer, query layer, contract layer, actuator layer, consensus layer, network layer, and data layer. The following figure shows the technical points of each layer. + +![](../figures/311.png) + +Figure 1 Blockchain infrastructure model + +The database absorbs the tamper-proof capability of the blockchain. Naturally, the idea is to start from the bottom layer of the blockchain technology. At the data layer, the database provides the capability of recording data verification information and verifying data tampering. This ensures that the database can faithfully record data changes caused by each transaction when processing sensitive information, forming a faithful and complete data change ledger. The openGauss ledger database that we will introduce this time is to record data change operations when data is modified in the openGauss kernel, ensuring that the entire data link can be queried and traced. In addition, an efficient tampering check API is provided for the upper-layer application system or multiple parties to verify data consistency. In the next chapter, we will introduce the implementation principle of the ledger database and the reconstruction of the openGauss. + +## 2 Principles of the openGauss Ledger Database + +![](../figures/312.png) + +Figure 2 New modules in the ledger database + +When a client sends an SQL statement to modify data in the database, the communication module receives the SQL statement, the parsing module processes the SQL statement, converts the SQL statement into a parsing tree, and then optimizes the parsing tree to generate an execution plan. After obtaining the execution plan, the execution module calls the storage layer API to modify the data. As shown in the preceding figure, the modification verification information is recorded during data modification. In addition, the modification verification module is provided for users to call APIs to perform verification. Tampering information recording and checking are based on the tampering check information designed for database addition, deletion, and modification. The following describes the new tampering check information. + +- 2.1 Tamper-proof User Table + + ![](../figures/zh-cn_image_0000001207772870.png) + + Figure 3 Structure of the tamper-proof user table + + In the ledger database feature, schema-level isolation of tamper-proof tables from common tables is adopted. Tables in a tamper-proof schema have verification information and record each data change operation \(add, delete, and modify\). These tables are called tamper-proof tables. A table in a common schema is called a common table. + + The tamper-proof table has a structure shown in Figure 3. When a tamper-proof table is created, the system adds a hash column. When data is inserted or modified in this column, the data digest is calculated in real time. Data and abstracts are stored in the same tuple and are inseparable. Based on the unidirectionality of the hash function, the digest of each row is used as the logical representation of the data in the digest space. + +- 2.2 User History Table + + ![](../figures/zh-cn_image_0000001252412855.png) + + Figure 4 Structure of the user history table + + As shown in the preceding figure, the user history table contains four columns: xid, hash\_ins, hash\_del, and pre\_hash. Each row in the user history table corresponds to each row-level data change in the user table. **xid** records the XID when data is changed, indicating the logical time sequence of operations. **hash\_ins** records the hash values of data rows inserted using INSERT or UPDATE. **hash\_del** records the hash values of data rows deleted using DELETE or UPDATE. In addition, whether **hash\_ins** and **hash\_del** are empty indicates the INSERT, DELETE, and UPDATE operations. The following table lists the mapping relationship. + + + + + + + + + + + + + + + + + + + + +
  

hash_ins

+

hash_del

+

Insert

+

√ (Insert data hash.)

+

--

+

Delete

+

--

+

√ (Delete data hash.)

+

Update

+

√ (New data hash)

+

√ (Delete previous data hash)

+
+ + **pre\_hash** combines the data of the current row and the pre\_hash data of the previous row in the history table to generate the data summary of the current user's history table. The calculation formula is as follows: + + ![](../figures/zh-cn_image_0000001252700965.gif) + + **i** indicates the _i_th row in the user history table, and **rowdata\_i** indicates the data concatenated by **xid || hash\_ins || hash\_del** in the _i_th row.![](figures/zh-cn_image_0000001252341007.gif) + + When verifying the integrity of a user history table, the system uses the row data to calculate the **pre\_hash** value in sequence and compares it with the **pre\_hash** value in the table. If the data is inconsistent, the integrity of the user history table is damaged. + +- 2.3 Structure of the Global Blockchain Table + + ![](../figures/313.png) + + Figure 5 Structure of the global blockchain table + + The preceding figure shows the structure of the global blockchain table. Each row in the table corresponds to a tamper-proof table modification behavior and is saved as a block. The global blockchain table mainly includes three parts: The block information mainly stores mark information related to a block, including a block number and a time stamp. The operation information includes information about an operation performed by a user on a tamper-proof data table, including identification information such as a database name, a username, and a table name, and a corresponding SQL statement. The verification information stores the hash information used for consistency or integrity verification, including the table-level hash \(rel\_hash\) and global hash \(global\_hash\). + +- 2.4 Tampering Check Algorithm + + ![](../figures/314.png) + + Figure 6 Generation of tamper-proof user table verification information + + When a user calls the tampering check API, the system can concurrently use the tamper-proof user table to generate table-level verification information and use the records in the history table corresponding to the user table to generate the overall verification information of change records. Then, compare the two pieces of verification information to determine whether the data is consistent with the operation. If they are inconsistent, data modification bypasses the system records, that is, tampering. + + A process of generating table-level verification by using row-level verification information in a tamper-proof user table is shown in Figure 6. During verification, the system scans the data in the table, obtains the verification information of each row, and uses the row verification information to verify the row data. In a process of scanning the overall row calibration information, overall verification information of currently scanned data may be continuously generated by using a built-in exchangeable verification information aggregation algorithm. Because of the interchangeability of the information aggregation algorithm, this process can be completely executed in parallel. + + Figure 7 shows the overall verification information about the change records generated in the user history table. According to the structure of the user history table, the non-null elements in the **hash\_ins** column indicate the increase of data verification information caused by all operations, and the non-null elements in the **hash\_del** column indicate the decrease of verification data. A set of remaining check information is obtained by performing a difference set on two columns of elements. Then, the exchangeable verification information aggregation algorithm is used to obtain the overall verification information of the change records caused by the record operation in the user history table. In this process, due to the interchangeability of the aggregation algorithm, **hash\_ins – hash\_del** may be performed on each row first, and then information is continuously stacked and generated during scanning. Herein, generation of the overall verification information of the change records may also be completely parallel. + + ![](../figures/315.png) + + Figure 7 Generating the verification information of the user history table + + +## 3 Development Prospect of the openGauss Ledger Database + +The ledger database is the basis of the openGauss tamper-proof data. Currently, only the verification information in the database can be recorded and the high-performance verification API is provided. It provides some functions of the storage layer in the blockchain technology. To implement the tamper-proof function, we need to add high-performance remote execution capabilities between multiple databases and provide pluggable high-performance multi-party consensus protocols. In this way, the tamper-proof capability of openGauss is complete and trusted by multiple parties. In the convergence of databases and blockchains, openGauss will continuously evolve to provide more easy-to-use and efficient tamper-proof databases. + diff --git a/content/en/post/2022/Full-encryption-Upgrade-and-Unaware-Encryption-Decryption-Principle-Analysis.md b/content/en/post/2022/Full-encryption-Upgrade-and-Unaware-Encryption-Decryption-Principle-Analysis.md new file mode 100644 index 0000000000000000000000000000000000000000..32e21f38084479bf96ae6eb41de179e6cb0257e8 --- /dev/null +++ b/content/en/post/2022/Full-encryption-Upgrade-and-Unaware-Encryption-Decryption-Principle-Analysis.md @@ -0,0 +1,107 @@ ++++ + +title = "Full-encryption Upgrade and Unaware Encryption/Decryption Principle Analysis" + +date = "2021-10-13" + +tags = [ "Full-encryption Upgrade and Unaware Encryption/Decryption Principle Analysis"] + +archives = "2021-10" + +author = "Jinxiang Xiao" + +summary = "Full-encryption Upgrade and Unaware Encryption/Decryption Principle Analysis" + +img = "/en/post/2022/title/img10.png" + +times = "12:30" + ++++ + +# Full-encryption Upgrade and Unaware Encryption/Decryption Principle Analysis + +To implement encryption and decryption on the client, a large number of maintenance and management operations need to be performed on the client, including data key management, sensitive data encryption, and SQL statement parsing and modification. openGauss encapsulates these complex operations in the client encryption driver to implement automatic encryption and replacement of sensitive information. In addition, all encryption-related metadata is stored in the database so that the database can identify and process encrypted data. In addition, parameters related to sensitive information in SQL statements are encrypted to ensure that query tasks do not disclose users' query intents, reduce complex security management and operation difficulties on the client, and ensure that users are unaware of application development. In addition, the openGauss provides a series of configuration APIs to meet users' requirements for encrypted fields, encryption algorithms, and secure key storage. The transparency of the openGauss fully-encrypted database makes task migration very convenient for users. + +The most secure protection for data confidentiality and personal privacy is encryption. The full-encryption technology can encrypt and decrypt data in specific applications and process data in the encrypted state in the database to implement full-lifecycle data protection. However, mainstream application encryption and decryption technologies in the industry generally involve a large number of operations such as key management, algorithm selection, SQL statement change, and data type conversion. Therefore, when data needs to be encrypted, a large amount of adaptation and migration work is required, and risks may be caused due to human negligence. + +The core of openGauss full-encryption is to parse all input and output statements of users on the client, identify defined sensitive data, and perform automatic encryption and decryption. The whole process of using a fully-encrypted database is as follows: A user inputs the syntax. A client sends the input to a server. The server executes and returns the result to the client. During the process, the only two steps that the user can perceive are inputting syntax and obtaining the result. The technical core of the fully-encrypted database is divided into several modules such as key management, parsing layer, encryption and decryption driver, implicit conversion layer, and data cache. Figure 1 shows the architecture of the fully-encrypted database. The following describes the modules related to user perception. + +![](../figures/28.png) + +Figure 1 Fully-encrypted database architecture + +## Automatic Syntax Parsing + +In the openGauss fully-encrypted database, a lightweight parser is added to the client. The lightweight parser reuses the original parser on the server. After a user enters the syntax, such as INSERT and SELECT statements, the client parser parses the lexicon and syntax to obtain the plaintext value and its location. The encryption and decryption driver automatically replaces the plaintext with the encrypted ciphertext and sends the query statement to the server. In this case, the data transmitted over the network and stored in the database is encrypted. After the server returns the ciphertext execution result to the client, the client encryption and decryption driver automatically decrypts the returned ciphertext data and returns it to the user. The only two steps that the user can perceive are inputting the syntax and obtaining the result. The user is not aware of the entire encryption process. In addition, the syntax is the same as that for non-encrypted data. + +In the upgraded openGauss fully-encrypted database, the client parses the syntax in functions and provides an API for decrypting the record data returned by functions. When functions are created, after the syntax of function bodies is parsed at the parsing layer, values to be encrypted in the function bodies are encrypted in the function processors by using the encryption driver. When functions are executed, after the syntax is parsed at the parsing layer, the syntax enters different processors according to the called functions, and parameters are encrypted by using an encryption driver. Users are unaware of the entire parsing and encryption process. The entire process is fully automated, and users do not need to perform other operations. + +![](../figures/282.png) + +Figure 2 Creating a function/procedure by using a function or stored procedure in an encrypted equality query + +![](../figures/283.png) + +Figure 3 Executing a function/procedure by using a function or stored procedure in an encrypted equality query + +## Implicit Data Conversion + +The encrypted columns and the original data types of encrypted columns are stored in the database. The data type displayed to users is the original data type. Although the ciphertext stored in the database is in binary format, users are unaware of data encryption, decryption, and type conversion. The openGauss fully-encrypted database checks whether the returned data is encrypted. If yes, the database calls the encryption driver to decrypt the data based on the original data type. + +In the fully-encrypted openGauss database after the upgrade, when a user creates an encrypted function, the server verifies the parameter type in the function parsing module. If the data is encrypted or the column is encrypted, the server converts the input, output, and returned parameters in the optimization module. The server converts the parameter type of the function to the encrypted column type \(binary type\) and saves the original data type of the function parameter in the database. Users are unaware of the parsing, encryption, and implicit data type conversion in the entire process. They do not even need to modify the function syntax. + +![](../figures/284.png) + +Figure 4 New modules for supporting functions in a fully-encrypted database + +## Data Caching + +Performance has always been a challenge for fully-encrypted databases. In the openGauss fully-encrypted database, the main performance loss of the database lies in obtaining keys, encrypting and decrypting keys, obtaining encrypted column information, and encrypting and decrypting data. If the waiting time is too long during syntax execution, user experience deteriorates. Therefore, the data cache module is used to cache the client key information and encrypted column information on the client. When a user initializes a connection, the client master key path, column encryption key ciphertext, and encrypted column information is automatically obtained from the database. When the user uses the client for the first time, the key is automatically cached. After the user is disconnected, the key and encrypted column information is automatically destroyed. Data is cached to improve query performance without affecting user experience. + +## JDBC API + +Considering the migration of tasks between different databases, more users prefer unified access APIs, such as JDBC. In earlier versions, openGauss only allows gsql to use a fully-encrypted database. JDBC is supported in the upgraded openGauss fully-encrypted database. The JDBC client reuses the original encryption driver through the Java native interface \(JNI\). The encryption driver is reconstructed so that it can obtain data from the server through a set of interfaces compatible with libpq/JDBC. The encryption and decryption process when users call JDBC is the same as that of gsql. Users are unaware of data encryption and decryption in the encryption driver during query. + +![](../figures/285.png) + +Figure 5 New JDBC modules supported by the encrypted equality query + +## State Cryptography Administration \(SCA\) Algorithms + +Chinese cryptographic algorithms are Chinese algorithms issued by the State Cryptography Administration Office of Security Commercial Code Administration \(OSCCA\). Common algorithms include SM1, SM2, SM3, and SM4. SM1 is a symmetric encryption algorithm and is not open to the public or supported. SM2 is an asymmetric encryption algorithm based on ECC. SM3 is a message digest algorithm. SM4 is a standard packet data algorithm for WLANs, that is, symmetric encryption. Chinese cryptographic algorithms are used in many user scenarios in China. To provide users with unaware migration and expand the application scenarios of fully-encrypted databases, the master key \(CMK\) on the upgraded openGauss client supports the asymmetric encryption algorithm SM2 when encrypting a column encryption key \(CEK\). When the CEK is used to encrypt or decrypt user data, the symmetric encryption algorithms SM4 and SM3 are used for integrity check. Currently, the openGauss fully-encrypted database supports the algorithms listed in Table 1. + + + + + + + + + + + + + + + + + + + +

Fully-encrypted Database

+

KeyStore

+

Key Encryption Algorithm

+

Data Encryption Algorithm

+

openGauss

+

localkms

+

RSA_2048

+

AEAD_AES_256_CBC_HMAC_SHA_256

+

AEAD_AES_128_CBC_HMAC_SHA_256

+

SM2

+

SM4_SM3

+
+ +Table 1 Encryption algorithms supported by a fully-encrypted database + +Currently, the openGauss fully-encrypted database supports only the encrypted equality query. It provides a pure software solution in the full-encryption technology and has advantages of high security and high performance. In the future, more scenario capabilities will be opened, such as range query and fuzzy query. In addition, the TEE software and hardware integration solution will be used to form complete encrypted query and computing capabilities. In the fully-encrypted database field, openGauss will continuously evolve to provide more secure, easy-to-use, and efficient fully-encrypted databases. + diff --git a/content/en/post/2022/Guide-to-Adapting-HAProxy-to-openGauss.md b/content/en/post/2022/Guide-to-Adapting-HAProxy-to-openGauss.md new file mode 100644 index 0000000000000000000000000000000000000000..e47ab2c6201fc19f5c61d35727e8010f6459a52a --- /dev/null +++ b/content/en/post/2022/Guide-to-Adapting-HAProxy-to-openGauss.md @@ -0,0 +1,160 @@ ++++ + +title = "Guide to Adapting HAProxy to openGauss" + +date = "2021-09-18" + +tags = [ "Guide to Adapting HAProxy to openGauss"] + +archives = "2021-09" + +author = "Xin Dou" + +summary = "Guide to Adapting HAProxy to openGauss" + +img = "/en/post/2022/title/img5.png" + +times = "12:30" + ++++ + +# Guide to Adapting HAProxy to openGauss + +## 1. Introduction to HAProxy + +HAProxy is an open-source project and its code is hosted on GitHub. + +Code link: https://github.com/haproxy/haproxy + +HAProxy is a free, quick and reliable proxy, which provides HA, load balancing, and TCP- and HTTP-based proxy services. It supports virtual hosts. + +HAProxy implements an event-driven, single-process model that supports a large number of concurrent connections. + +## 2. Read/Write Isolation and Load Balancing Based on HAProxy + +HAProxy implements read/write isolation and load balancing for the openGauss cluster. The prerequisite is that Patroni manages the openGauss database cluster. The key is the configuration file. + +The HAProxy configuration consists of five parts: + +- **global**: sets global configuration parameters, which are related to processes and the operating system. + +- **defaults**: sets default parameters. These parameters can be used by the frontend, backend, and listen components. + +- **frontend**: frontend virtual node that receives requests. The frontend can specify the backend to be used based on the ACL rule. + +- **backend**: backend service cluster, which is a real server. One backend corresponds to one or more entity servers. + +- **listen**: combination of the frontend and backend. + +In the HAProxy configuration file, two listen modules are defined: **opengauss** and **opengauss\_balance**, which correspond to the write operations on the primary node and the read operations and load balancing on the standby node, respectively. In the listen modules, you can use the **server** keyword to set the backend server, that is, set the IP address and port number of each database node in the openGauss cluster managed by Patroni. Then, the database node information can be added to the HAProxy management. + +- 2.1 Write Configuration for the Primary Node + + ``` + listen opengauss # Used for monitoring the primary node. + bind *:5000 # One of the open ports, used to connect to the primary node. + option httpchk + # Enable health check for backend servers, supporting health monitoring [check]. + http-check expect status 200 + default-server inter 3s fall 3 rise 2 on-marked-down shutdown-sessions + # Monitoring interval [inter 3s], number of monitoring failures after which the backend server is considered unavailable [fall 3], number of monitoring successes after which the backend server is considered available [rise 2]; when the server is marked as down, disable the connection between HAProxy and the backend server [on-marked-down shutdown-sessions]. + server opengauss_ip1_port1 ip1:port1 maxconn 100 check port 8008 + server opengauss_ip2_port2 ip2:port2 maxconn 100 check port 8008 + server opengauss_ip3_port3 ip3:port3 maxconn 100 check port 8008 + server opengauss_ip4_port4 ip4:port4 maxconn 100 check port 8008 + # Use the server keyword to set the backend server, which is the internal name [opengauss_i] of the backend server. + ``` + + **Theoretical analysis:** + + HAProxy invokes the health monitoring representational state transfer \(REST\) application programming interface \(API\) endpoint to obtain information about the primary and standby nodes in the cluster through Patroni. + + Patroni has a rich set of REST APIs, which are the best practice of separating the frontend from the backend and are a set of development standards or specifications. Their features are summarized as follows: + + - \(1\) Each uniform resource identifier \(URI\) represents a resource. + - \(2\) A presentation layer exists between the client and the server for transferring resources. + - \(3\) The client uses four HTTP verbs to perform operations on server resources to implement REST. + + In the HTTP protocol, four verbs indicating operation modes are GET, POST, PUT, and DELETE, which correspond to four basic operations: GET is used to obtain resources, POST is used to create or update resources, PUT is used to update resources, and DELETE is used to delete resources. + + REST APIs in Patroni are used in the following scenarios: + + Reference: [https://patroni.readthedocs.io/en/latest/rest\_api.html](https://patroni.readthedocs.io/en/latest/rest_api.html) + + - \(1\) Used by Patroni for leader election. + - \(2\) Used by the patronictl tool to perform failover, switchover, reinitialization, restart, and reloading. + - \(3\) Used by the HAProxy or other load balancers to perform HTTP health check or monitoring. + + In this document, HAProxy uses the Patroni REST API to monitor the health status of the primary node, standby node, and other nodes in the cluster. + + For GET requests in health monitoring, Patroni returns a JSON document containing the node status and HTTP status code. If no complex JSON document is required and only some key information is retained, OPTIONS can be used to replace GET. + + For the following requests, when the Patroni node has the leader lock and is running as the primary node, the Patroni REST API returns the HTTP status code 200. + + _\(1\) GET /_ + + _\(2\) GET /master_ + + _\(3\) GET /primary_ + + _\(4\) GET /read-write_ + + In the preceding configuration, **option httpchk** is equivalent to invoking the GET/ request, and **http-check expect status 200** is equivalent to filtering out nodes returning status code 200 during health monitoring. When a database is configured as the primary node, and it returns status code 200 in response to the preceding configuration, it is selected as the primary node. In this way, the IP address of HAProxy and port 5000 can be used as the proxy of the primary node in the cluster. In the openGauss cluster, you can run the **gsql** command to connect to the primary node in the cluster. + + ``` + gsql -d postgres -h HAProxy_ip -p 5000 -U user -W password + ``` + +- 2.2 Read and Load Balancing Configuration for the Standby Node + + ``` + listen opengauss_balance # Used for monitoring the standby node. + bind *:5001 # One of the open ports, used to connect to the standby node. + mode tcp + option tcplog + balance roundrobin # balance defines the load balancing algorithm. roundrobin indicates that polling is performed based on weights. This is the most balanced and fair algorithm when the processing time of servers is evenly distributed. This algorithm is dynamic, which means that a weight can be adjusted at run time. + option httpchk OPTIONS /replica + http-check expect status 200 + default-server inter 3s fall 3 rise 2 on-marked-down shutdown-sessions + server opengauss_ip1_port1 ip1:port1 maxconn 100 check port 8008 inter 5000 rise 2 fall 2 + server opengauss_ip2_port2 ip2:port2 maxconn 100 check port 8008 inter 5000 rise 2 fall 2 + server opengauss_ip3_port3 ip3:port3 maxconn 100 check port 8008 inter 5000 rise 2 fall 2 + server opengauss_ip4_port4 ip4:port4 maxconn 100 check port 8008 inter 5000 rise 2 fall 2 + ``` + + **Theoretical analysis:** + + **For the GET/replica request, when the Patroni node is in the running state, the role is replica, and the noloadbalance tag is not set, the HTTP returns status code 200.** + + **In the preceding configuration, option httpchk OPTIONS/replica invokes the OPTIONS/replica request and replaces GET with OPTIONS to simplify the returned information; http-check expect status 200 is equivalent to filtering out nodes returning status code 200 during health monitoring. When a database is configured as a standby node in the cluster and it returns status code 200 in response to the preceding configuration, it is selected as a standby node. balance roundrobin configures the load balancing algorithm, so that read requests are sent to each running standby node in polling mode. In this way, the IP address of HAProxy and port 5001 can be used as the proxy of the standby nodes in the cluster to implement load balancing.** + + In the openGauss cluster, you can run the **gsql** command to connect to a standby node in the cluster. + + ``` + gsql -d postgres -h HAProxy_ip -p 5001 -U user -W password + ``` + +- 2.3 Monitoring Page + + In addition, an HAProxy monitoring page is configured. You can access this page to view the status of each node in the cluster. + + ``` + listen stats # Define a part named stats. + mode http + # Set the mode to HTTP. + bind *:7000 # One of the open ports, used for monitoring. + # Define the listening socket. + stats enable + # stats is the socket of a statistics page of HAProxy. + stats uri / + # Set the URI of the statistics page to /. + ``` + + In the preceding configuration, you can access http://ip:7000/ to view the monitoring page. _ip_ indicates the IP address of the host where HAProxy is deployed. The following figure shows the page information. + + ![](../figures/zh-cn_image_0000001252065761.gif) + + In the preceding figure, the cluster consists of one primary node and three standby nodes. The first module **opengauss** corresponds to the write operation and the green column indicates the primary node in the cluster. The second module **opengauss\_balance** corresponds to the read operation, and the green columns indicate the standby nodes in the cluster. + + +In this way, HAProxy implements read/write isolation and load balancing for the openGauss cluster through Patroni. diff --git a/content/en/post/2022/Internal-Mechanism-of-the-openGauss-DB4AI-Framework.md b/content/en/post/2022/Internal-Mechanism-of-the-openGauss-DB4AI-Framework.md new file mode 100644 index 0000000000000000000000000000000000000000..4d50de71f7b575f785da35f9500ba6f1991327f3 --- /dev/null +++ b/content/en/post/2022/Internal-Mechanism-of-the-openGauss-DB4AI-Framework.md @@ -0,0 +1,100 @@ ++++ + +title = "Internal Mechanism of the openGauss DB4AI Framework" + +date = "2021-09-26" + +tags = [ "Internal Mechanism of the openGauss DB4AI Framework"] + +archives = "2021-09" + +author = "Wen Nie" + +summary = "Internal Mechanism of the openGauss DB4AI Framework" + +img = "/en/post/2022/title/img6.png" + +times = "12:30" + ++++ + +# Internal Mechanism of the openGauss DB4AI Framework + + + +## 1. Features of the openGauss AI Framework + +In the DB4AI direction, the database integrates AI capabilities to avoid problems caused by data migration during AI computing. Different from other DB4AI frameworks, the open-source inherent framework of openGauss completes AI computing in the database by adding AI operators. + +In addition to avoiding the problems caused by data migration, the AI framework of openGauss has the following advantages: + +- 1\) Extremely low learning threshold + + Currently, most mainstream computing frameworks, such as TensorFlow, PyTorch, and Keras, rely on Python as the script language. Although it is easy to learn Python, it still requires a certain learning cost. The current framework provides the CREATE MODEL and PREDICT BY syntax to complete AI training and inference tasks. Compared with Python, this syntax is more similar to the natural language and complies with people's intuition. + + ``` + CREATE MODEL point_kmeans USING kmeans FEATURES position FROM kmeans_2d WITH num_centroids=3; + SELECT id, PREDICT BY point_kmeans (FEATURES position) as pos FROM (select * from kmeans_2d_test limit 10); + ``` + +- 2\) Simplified data version management + + The snapshot function is added to the DB4AI feature. The database uses snapshots to fix data in a dataset at a specific time point. It can also save processed and filtered data. Data can be saved in full or incremental mode. In incremental mode, only data changes are stored each time. Therefore, the space occupied by snapshots is greatly reduced. You can directly obtain the corresponding data by using the snapshots of different versions. + +- 3\) Excellent performance experience + + Compared with many AIinDB projects, openGauss embeds model computing into the database by adding AI operators. Taking algorithm training as an example, data reading, model calculation and update, and final model storage are completed in an executor of the database. In this way, the computing capability of the database is fully utilized and released. The technical roadmap deep into the kernel makes our features faster than other higher-level invocation methods. + + ![](../figures/24.png) + + +Figure 1 Performance comparison with MADlib + +## Technical Principles and Advantages + +- 1\) DB4AI-Snapshot + + ![](../figures/zh-cn_image_0000001207516746.png) + + The DB4AI.snapshot feature requires users to specify the data to be filled in the SQL query statement for operating data storage, so as to create a snapshot. The initial snapshot is always created as a real and reusable copy of the operated data, making the specific state of the data immutable. Therefore, the initial snapshot serves as the starting point for subsequent data collation, but it always allows backtracking to the exact state of the original data at the time when the initial snapshot was created. + + Because a created snapshot cannot be changed, you must prepare the snapshot before starting data collation. The prepared snapshot data can be modified collaboratively to prepare for model training, especially for data management. In addition, snapshots automatically track all changes by recording each operation as metadata in the DB4AI system directory, providing a complete integration history for the data. + + After the snapshot is prepared, you can publish it. Published snapshots are immutable, and the DB4AI system specifies that only published snapshots can be used for model training. This ensures data consistency among training tasks. + + Outdated snapshots are archived for data backup. In this state, the data remains unchanged but cannot be used to train a new model. At last, clear the snapshot, delete the data tables and views in the schema, and restore the storage space. It is important to note that, for the purpose of strict model source management, dependent snapshots cannot be deleted. + + By using GUC parameters, snapshots can be stored in materialized view mode or incremental mode. In incremental mode, the view and data table corresponding to a new snapshot store only the modified content compared with the parent snapshot, which greatly reduces the storage space. + +- 2\) DB4AI-Query + + The inherent AI framework is deeply embedded in the database kernel. It builds an execution plan that contains AI operators through query optimization and query execution. After the computing is complete, the storage module of the framework saves the model information. The AI framework is divided into three parts: query optimization, computing execution, and model storage. + + - **Query optimization:** + + The lexical rules and syntax rules CREATE MODEL and PREDICT BY are added to the framework as the AI computing entry. During query optimization, this module is responsible for simple input verification, including the validity of attribute names, whether algorithms are supported, and whether model names conflict with each other. After the verification is complete, this module generates a query plan based on the training and inference tasks. + + - **Computing execution:** + + The query execution module adds corresponding AI operators to the execution plan based on the required algorithm type and executes computing, including data reading and model calculation and update. Algorithms are highly cohesive and loosely coupled, have good algorithm scalability, and are friendly for developers to add algorithms. + + - Model storage: + + After the model training is complete, the executor transfers the model data in the form of tuples to the storage module and saves the model to the gs\_model\_warehouse system catalog. + + ![](../figures/241.png) + + The following uses CREATE MODEL as an example to describe how to implement the query statement used for model training. + + ![](../figures/zh-cn_image_0000001253422853.png) + + - Step 1: Perform lexical and syntax analysis \(Lex and Yacc\) on the query. Generate an analysis tree by identifying pattern categories and pattern combinations to check whether syntax errors exist in statements. + - Step 2: The database performs semantic analysis and rewriting on each obtained analysis tree. In the process of generating a query tree through semantic analysis, for a createmodelStmt command case, the database first checks the algorithm type to determine whether the algorithm belongs to supervised learning or unsupervised learning. Then, based on the judgment result, the system further checks whether the attributes, hyperparameters, and model names entered in the query statement are invalid. After the verification is complete, the semantic analysis generates a query tree and transfers it to the database executor. + - Step 3: The executor adds different algorithm operators to the execution plan based on the algorithm type and adds the AI operator to the upper layer of the scanning operator. During the calculation, the scanned data is input to the algorithm model for calculation and update. Finally, the operator execution ends based on the iteration conditions set by the hyperparameters. + - Step 4: The executor transfers the trained model to the storage engine in the form of tuples. The received tuple-converted model structure is verified and saved to the gs\_model\_warehouse system catalog. You can view model information in the system catalog. + + As an original advanced feature of openGauss, DB4AI consolidates the new AI practices of openGauss and further expands the application fields of openGauss. The out-of-the-box DB4AI function provided by openGauss effectively solves data migration problems in data warehouses and data lakes and improves information security during data migration. In the future, with the multi-mode and parallel computing advantages of openGauss, a unified data management platform will be formed to reduce O&M and usage difficulties caused by heterogeneous and fragmented data storage. The release of the DB4 AI feature is a key step in making openGauss a cutting-edge tool. + + + + diff --git a/content/en/post/2022/Introduction-to-Multi-Core-Optimization-of-openGauss-on-Kunpeng-Servers.md b/content/en/post/2022/Introduction-to-Multi-Core-Optimization-of-openGauss-on-Kunpeng-Servers.md new file mode 100644 index 0000000000000000000000000000000000000000..205e13df93af3a09f501c33b08a2613658f9b2f2 --- /dev/null +++ b/content/en/post/2022/Introduction-to-Multi-Core-Optimization-of-openGauss-on-Kunpeng-Servers.md @@ -0,0 +1,96 @@ ++++ + +title = "Introduction to Multi-Core Optimization of openGauss on Kunpeng Servers" + +date = "2021-03-03" + +tags = [ "Introduction to Multi-Core Optimization of openGauss on Kunpeng Servers"] + +archives = "2021-03" + +author = "Wengang Tian" + +summary = "Introduction to Multi-Core Optimization of openGauss on Kunpeng Servers" + +img = "/en/post/2022/title/img10.png" + +times = "12:30" + ++++ + +# Introduction to Multi-Core Optimization of openGauss on Kunpeng Servers + +Since the birth of integrated circuits, CPUs have experienced three development phases, among which the first phase is to increase the CPU dominant frequency. Six years after integrated circuits were invented, Gordon Moore proposed Moore's law, predicting that the number of transistors on a chip doubles every two years. Moore's law is not the law of nature, but the development of semiconductor chips has proved that Moore's predictions are correct. Technology advances in chips bring benefits mainly to two aspects: smaller manufacturing specifications and larger silicons. However, when it goes to the 7 nm process or smaller, a quantum tunneling effect occurs, and mass production of chips becomes challenging, resulting in a sharp increase in manufacturing costs. + +The second phase is to increase the number of CPU cores. If the frequency of a single-core CPU cannot be increased, the number of CPU cores can be increased to improve computing power. However, the CPU is only a logical computing unit. The programs and data in the memory must be loaded to the CPU for computing. All CPU cores share a northbridge to read memory. As the number of cores increases rapidly, the performance bottleneck of the northbridge in response time becomes more and more obvious. + +The third phase is to achieve non-uniform memory access \(NUMA\) for CPU cores. To resolve the bottleneck of the memory controller that reads memory in the northbridge, the memory may be evenly allocated to each die. However, this causes asymmetric delays when different CPU cores access different memory. The reason is that although the memory is directly attached to the CPU, the response time is short when the CPU accesses the local address corresponding to the attached memory, while to access the memory data attached to other CPUs, which is called remote access, you need to access the memory data through the inter-connect channel, and the response time is relatively long. This is the origin of NUMA. In the NUMA architecture, the physical distance between the processor and the memory block of a NUMA node is called NUMA distance. You can use the numactl tool to query the CPU access distance. A Kunpeng server is used as an example, as shown in the following figure. + +![](../figures/zh-cn_image_0000001206801884.png) + +A NUMA-based CPU brings not only surging computing power to servers, but also great challenges to software development. From the perspective of the entire IT software stack, the first thing to support NUMA is the operating system. Currently, most enterprises use Linux. After NUMA appears, Linux also provides targeted optimization solutions to preferentially attempt to allocate space in the local memory of the CPU where the request thread is located. If the local memory is insufficient, useless pages in the local memory are eliminated first. However, NUMA provided by Linux is not suitable for databases because a database is a data-intensive and high-concurrency application and has many kernel data structures inside. These data structures are accessed by both the local CPU core and the remote CPU core. To improve data access performance, the database has its own shared data buffers, which are randomly accessed by service threads on each CPU core. From the perspective of the IT software stack, databases are the core of enterprise applications, and many applications have a database in the background. The database performance determines the overall throughput of many applications. As such, if the database performance cannot be maximized in NUMA and is not in a linear ratio to the number of cores, no enterprise is willing to pay for NUMA-based CPUs though they provide rich computing power. + +![](../figures/10.png) + +Nevertheless, NUMA is an inevitable trend in CPU development. If an enterprise-level database cannot adapt to hardware development, this database would be eliminated in enterprise database selection. + +openGauss is an open-source relational database management system. It optimizes the concurrency control algorithm, kernel data structure, data access, and others according to hardware development trends of NUMA-based CPUs to release the multi-core computing power of processors and achieve 1.5 million tpmC on 2-socket 128-core Kunpeng servers. This document describes the NUMA-based multi-core optimization technology of openGauss on Kunpeng servers and provides reference for other databases to optimize performance on Kunpeng servers. It is intended for database developers working to optimize database performance. + +## 1 Introduction to Multi-Core Optimization of openGauss on Kunpeng Servers + +![](../figures/zh-cn_image_0000001207121854.png) + +A database is a software system with high concurrency and severe data access conflicts. _Staring into the Abyss: An Evaluation of Concurrency Control with One Thousand Cores_ published by Michael Stonebraker et al., Turing Award winners in the database field in 2014, shows that the transaction processing mechanism of a traditional database cannot effectively use the processing capabilities of dozens to hundreds of cores. Through a more in-depth analysis on the database, it is found that the causes lie in both the concurrency control algorithm and the implementation mechanism. To implement concurrency, the database uses many locks internally, such as Clog, WALInsert, WALWrite, ProcArray, and XidGen in openGauss. These locks are performance bottlenecks, while the essence of the locks is to protect kernel data structures. Therefore, openGauss needs to adjust and optimize these data structures to cope with multi-core concurrency in the NUMA architecture on Kunpeng servers. The main purposes are to implement nearby CPU access, eliminate single-point bottlenecks, and evenly allocate and access shared data. + +- 1.1 Binding Threads to Cores to Prevent Thread Offsets Between Cores + + ![](../figures/101.png) + + To implement nearby access to a CPU core, a thread needs to be fixed to a specific core first. The GUC parameter **numa\_distribute\_mode** in openGauss is used to control CPU core affinity. By setting this parameter, the service processing threads can be bound to specific NUMA nodes. openGauss adopts the client-server structure. The client and server interact with each other frequently through the network. To prevent network interruption and service processing from interfering with each other, core binding is required for network interruption. In addition, the core binding area for network interruption must be separated from that for background service threads. + + +- 1.2 Reconstructing NUMA-based Data to Reduce Cross-Core Access + + ![](../figures/102.png) + + WALInsertLock is used to perform concurrency protection on WAL Insert operations. You can configure multiple WALInsertLocks, for example, 16. There are two types of access: \(1\) Xlog insert, each of which requires an Insert Lock. \(2\) Traversal and access to all WALInsertLocks, which is used to check whether unacknowledged information exists during Xlog flushing. + + In the original implementation solution, all WALInsertLocks are in the same global array and stored in the shared memory. This results in a fierce contention between WALInsertLocks, and there is a high probability that remote memory access is involved. That is, there is cross-node and cross-package contention among multiple threads. Actually, WALInsertLock has multiple instances. For most operations, only one WALInsertLock is required each time. You can allocate WALInsertLocks by NUMA node. + + In the optimized solution, the global WALInsertLock array is divided into multiple subarraies based on the number of NUMA nodes, and memory is allocated by NUMA node. Each transaction thread selects the WALInsertLock corresponding to the NUMA node to which the transaction thread belongs. The WALInsertLock references the LWLock in the shared memory. To minimize cross-node contention, the LWLock is directly embedded into the WALInsertLock. In this way, the LWLock can be distributed to NUMA nodes, and access to cache lines is reduced. + +- 1.3 Partitioning Data to Reduce Thread Access Conflicts + + ![](../figures/zh-cn_image_0000001207121858.png) + + As an auxiliary of Xlog, Clog records the final state of transactions and is used to accelerate the process of determining transaction states based on logs. + + There are four transaction states: **IN\_PROGRESS**, **COMMITED**, **ABORTED**, and **SUB\_COMMITED**. Each log occupies 2 bits. Clog needs to be stored on disks. A page \(occupying 8 KB\) can contain 215 records, each log file \(segment = 2048 x 8 KB\) contains 226 records, and the log ID has 32 bits. Therefore, 256 Clog files may exist. The Clog files are stored in the **PGDATA/pg\_clog** directory. To accelerate the access to disk files, the access to Clog is implemented through a buffer pool. A unified SLRU buffer pool is used in the code. + + Before optimization, the log buffer pool of Clog is stored in the same shared memory and globally unique in the name of **CLOG Ctl**. Each worker thread uses the thread local variable ClogCtl to point to the resource. In high concurrency scenarios, resource contention becomes a performance bottleneck. After optimization, logs are evenly distributed to the buffer pools of multiple shared memory based on **PageNo** and are recorded in the thread local object array ClogCtlData. The buffer pools are named **CLOG Ctl** _i_. Buffer pool objects and corresponding global locks are added to the shared memory synchronously. + + Similarly, other internal key shared data structures are also partitioned. + + ![](../figures/zh-cn_image_0000001206961884.png) + +- 1.4 Adjusting Concurrency Control Algorithms to Reduce Single-Point Bottlenecks + + ![](../figures/zh-cn_image_0000001251841849.png) + + Before optimization, ProcArrayLock is required for obtaining transaction snapshots when a transaction starts, and for clearing transaction snapshots when the transaction ends. With the increase of concurrent connections, the snapshots obtained by the global transaction manager expand. + + After optimization, snapshots are committed by transaction and each non-read-only transaction is assigned a transaction ID \(XID\) during running. When a transaction is committed, the commit sequence number \(CSN\) is pushed and the mapping between the current CSN and the XID of the transaction is saved. The red vertical line indicates the time when the snapshot is captured. If the CSN is not used, the snapshot set corresponding to the red vertical line is \{2,4,6\}. If the CSN is used, the CSN 3 is used. In other words, modifications to TX2. TX4, TX6, TX7, and TX8 with the CSNs 4, 5, 6, 7, and 8 respectively are invisible to the snapshot. + +- 1.5 Using ARM Atomic Instructions to Reduce the Computing Overhead + + ![](../figures/zh-cn_image_0000001206801888.png) + + The atomic operation of a traditional compiler uses the load-linked/store-conditional \(LL/SC\) atomic instructions by default. To obtain the write permission on shared variables, any core must obtain the ownership of all shared variables in an exclusive manner. That is, the modification operation can be performed only after the latest data is loaded to the L1 cache where the core is located. In the case of multiple CPUs, the system performance deteriorates due to fierce contention. + + In ARMv8.1, large-system extensions \(LSE\) that provide atomic operations is introduced to perform computing operations on the storage side, improving computing performance. Theoretically, in a multi-core system, the performance of LSE is better than that of LL/SC. The test result shows that the performance of LSE 6.4.0 is three to five times that of LL/SC in high-concurrency scenarios. + + +## 2 Multi-Core Optimization Result of openGauss on Kunpeng Servers + +![](../figures/zh-cn_image_0000001206801890.png) The running of the database system involves multiple resources, including the CPU, memory, network I/O, and disk I/O. The ultimate goal of performance optimization is that each resource usage exactly reaches the bottleneck. However, in actual optimization, the environment may consist of different hardware. As such, the optimization objectives may be different, while the basic objective of system optimization is to fully utilize the CPU capabilities. After optimizing the NUMA architecture, openGauss runs on the Kunpeng 920 processor, the TPC-C test performance reaches 1,500,000 tpmC, and the CPU efficiency is close to 95%. The data shows that openGauss fully utilizes the multi-computing capabilities of CPUs. + diff --git a/content/en/post/2022/New-Feature-of-openGauss-3-0-0-Parallel-Decoding.md b/content/en/post/2022/New-Feature-of-openGauss-3-0-0-Parallel-Decoding.md new file mode 100644 index 0000000000000000000000000000000000000000..f98804e114f33d459ee95293bc715bd5eb211cd8 --- /dev/null +++ b/content/en/post/2022/New-Feature-of-openGauss-3-0-0-Parallel-Decoding.md @@ -0,0 +1,147 @@ ++++ + +title = "New Feature of openGauss 3.0.0: Parallel Decoding" + +date = "2022-03-15" + +tags = [" Parallel Decoding"] + +archives = "2022-03" + +author = "Tianqing Wang" + +summary = "New Feature of openGauss 3.0.0: Parallel Decoding" + +img = "/en/post/2022/title/img16.png" + +times = "17:30" + ++++ + +# New Feature of openGauss 3.0.0: Parallel Decoding + +## Introduction + +With the rapid development of information technology, various types of databases emerge one after another. Logical replication is increasingly important, with which data can be synchronized between heterogeneous databases. Currently, the average serial decoding performance of logical replication in openGauss is only 3 to 5 Mbit/s, which cannot meet the requirements of real-time synchronization in heavy service pressure scenarios. As a result, logs are stacked, affecting services in the production cluster. Therefore, the parallel decoding feature is designed to enable multiple threads to perform decoding in parallel, improving the decoding performance. In basic scenarios, the decoding performance can reach 100 Mbit/s. + +## Design Idea: Why Parallel Decoding Is Considered? + +In the original serial decoding logic, a single thread is used to read logs, decode logs, and combine and send results. The following figure shows the main process and time consumption. + +![](../figures/zh-cn_image_0000001279474617.png) + +It can be learned that most time of the entire process is consumed in the decoding step, which needs to be optimized by multi-thread decoding. In addition, time consumed in the sending step is obviously the second, which needs to be optimized by batch sending. + +## Working Process: Parallel Decoding Message Sequence Diagram + +As shown in the following figure, in parallel decoding, worker threads on an openGauss DN are classified into three types: + +1. Sender/Collector, which receives decoding requests from a client, collects the results of each decoder, and sends the results to the client. Only one sender/collector is created for each decoding request. +2. Reader/Dispatcher, which reads WALs and distributes them to decoders for decoding. Only one reader/dispatcher is created for a decoding request. +3. Decoder, which is responsible for decoding the logs sent by the reader/dispatcher \(when the thread is decoding the logs, the logs are temporarily stored in the read change queue\) and sending the decoding results \(when the committed logs are not decoded, the results are temporarily stored in the decode change queue\) to the sender/collector. Multiple decoders can be created for a decoding request. + +![](../figures/zh-cn_image_0000001234914846.png) + +The message sequence is described as follows: + +1. A client sends a logical replication request to a primary or standby DN. In the logical replication options, you can set parameters to connect only to the standby node to prevent the primary node from being overloaded. + +2. In addition to the sender that receives requests from a client, DNs need to create a reader/dispatcher and several decoders. + +3. The reader reads and preprocesses Xlogs. If the logs contain TOAST columns, combine the TOAST columns. + +4. The dispatcher dispatches the preprocessed logs to each decoder. + +5. Each decoder performs decoding independently. You can set the decoding format \(.json, .txt, or .bin\) through configuration options. + +6. Each decoder sends the decoding result to the collector. + +7. The collector collects decoding results by transaction. + +8. To reduce the number of sending times and the impact of network I/O on the decoding performance, when the batch sending function is enabled \(that is, **sending-batch** is set to **1**\), the sender accumulates a certain number of logs \(the threshold is set to 1 MB\) and returns the decoding result to the client in batches. + +9. To stop the logical replication process, disconnect the logical replication connection to the DN. + +10. The sender sends the exit signal to the reader/dispatcher and decoders. + +11. After receiving the exit signal, each thread releases the occupied resources, cleans up the environment, and exits. + +## Technical Details 1: Visibility Reconstruction + +In logical decoding, historical logs are parsed. Therefore, it is important to determine the visibility of tuples in logs. In the original serial decoding logic, the active transaction linked list mechanism is used to determine the visibility. However, for parallel decoding, it is costly for each decoder to maintain an active transaction linked list, which adversely affects the decoding performance. Therefore, visibility reconstruction is performed, and the commit sequence number \(CSN\) is used to determine tuple visibility. For each XID, the visibility process is as follows: + +![](../figures/zh-cn_image_0000001279274373.png) + +The main process is as follows: + +1. Obtain a CSN used to determine the visibility based on XID. Ensure that the CSN value can be obtained based on any XID. If the XID is abnormal, a CSN indicating a specific status is returned. This CSN can also be used to determine the visibility. + +1. If the CSN has been committed, it is compared with the CSN in the snapshot. If the CSN of the transaction is smaller, the transaction is visible. Otherwise, the transaction is invisible. +2. If the CSN is not committed, the transaction is invisible. + +Based on the foregoing logic, in parallel decoding, logic for determining tuple snapshot visibility is sequentially determining snapshot visibilities of tuple **Xmin** \(XID during insertion\) and **Xmax** \(XID during deletion/update\). The overall idea is that if Xmin is invisible/uncommitted or Xmax is visible, the tuple is invisible; if Xmin is visible and Xmax is invisible/uncommitted, the tuple is visible. Each flag bit in the tuple maintains its original meaning and participates in visibility determination. + +## Technical Details 2: Batch Sending + +After parallel decoding is used, the time occupied by the decoding process is significantly reduced. However, in this case, the sender becomes a bottleneck, and costs of performing a complete sending process for each decoding result are excessively high. Therefore, the batch sending mode is used. The decoding results are collected temporarily and sent to the client when the threshold is exceeded. During batch sending, the length of each decoding result and the specified separator need to be recorded so that users of the parallel decoding function can split the logs to be sent in batches. + +## Usage Mode + +The following optional configuration items are added for parallel decoding: + +1. Decoder concurrency + +Configure **parallel-decode-num** to specify the number of decoders for parallel decoding. The value is an integer ranging from 1 to 20. The value **1** indicates that decoding is performed based on the original serial logic and the code logic of this feature is not used. The default value is **1**. When this item is set to **1**, the decoding format **decode-style** cannot be configured. + +2. Decoding whitelist + +Configure **white-table-list** to specify the table to be decoded. The value is a character string of the text type that contains table names in the whitelist. Different tables are separated by commas \(,\). Example: **select \* from pg\_logical\_slot\_peek\_changes\('slot1', NULL, 4096, 'white-table-list', 'public.t1,public.t2'\);** + +3. Decoding only on the standby node + +Configure the **standby-connection** parameter to specify whether to perform decoding only on the standby node. The value is of the Boolean type. If the value is **true**, only the standby node can be connected for decoding. When the primary node is connected for decoding, an error is reported and the decoding exits. If the value is **false**, there is no restriction. The default value is **false**. + +4. Decoding format + +Configure **decode-style** to specify the decoding format. The value can be **'j'**, **'t'** or **'b'** of the char type, indicating the JSON, text, or binary format, respectively. The default value is **'b'**, indicating binary decoding. + +5. Batch sending + +Configure the **sending-batch** parameter to determine whether to send decoding results in batches. The value is **0** or **1**. The default value **0** indicates that batch sending is disabled. The value **1** indicates that batch sending is enabled when the accumulated size of decoding results reaches or just exceeds 1 MB. + +The following uses JDBC as an example to describe how to perform parallel decoding. Perform the following configurations when establishing a connection: + +``` +PGReplicationStream stream = conn + .getReplicationAPI() + .replicationStream() + .logical() + .withSlotName(replSlotName) + .withSlotOption("include-xids", true) + .withSlotOption("skip-empty-xacts", true) + .withSlotOption("parallel-decode-num", 10) + .withSlotOption("white-table-list", "public.t1,public.t2") + .withSlotOption("standby-connection", true) + .withSlotOption("decode-style", "t") +.withSlotOption("sending-batch", 1) + .start(); +``` + +The added logic is from the sixth line to the second line from the bottom, indicating that 10 concurrent decoding operations are performed, only the **public.t1** and **public.t2** tables are decoded, the standby node connection is enabled, the decoding format is text, and the batch sending function is enabled. If the parameter value is out of the range, an error is reported and the allowed value range is displayed. + +## Auxiliary Functions: Monitoring Function + +During parallel decoding, the **gs\_get\_parallel\_decode\_status\(\)** function is added to help locate the decoding performance bottleneck when the decoding speed is low. This function is used to check the length of the read change queue that stores logs that have not been decoded and the length of the decode change queue that stores decoding results that have not been sent of each decoder on the current DN. + +This function has no input parameter. The return result contains four columns: **slot\_name**, **parallel\_decode\_num**, **read\_change\_queue\_length**, and **decode\_change\_queue\_length**. + +**slot\_name** indicates the replication slot name and its type is text. **parallel\_decode\_num** indicates the number of parallel decoding threads and its type is integer. **read\_change\_queue\_length** records the read change queue length of each decoder and its type is text. **decode\_change\_queue\_length** records the length of the decode change queue of each decoder and its type is text. The usage is as follows: + +![](../figures/zh-cn_image_0000001235074794.png) + +If decoding stalls, execute the function on the decoding DN and check the value of **read\_change\_queue\_length** in the query result. Record the length of the log reading queue in each decoder. If the value is too small, log reading is blocked. In this case, check whether the disk I/O is insufficient. Check the value of **decode\_change\_queue\_length** in the query result. The value indicates the length of the decoding log queue in each decoder. If the value is too small, the decoding speed is too slow. You can increase the number of decoders. If the values of **read\_change\_queue\_length** and **decode\_change\_queue\_length** are large, decoding log sending is blocked. In this case, check the log replay speed of the parallel decoding user in the target database. Generally, the decoding stall is caused by insufficient CPU, I/O, or memory resources. The decoding stall can be avoided by using the standby node to ensure sufficient resources. + +## Conclusion + +Parallel decoding can greatly improve the decoding performance of logical replication.Therefore, it is forgiven even if it increases service pressure on decoding instances. As a key technology of heterogeneous database data replication, parallel decoding plays an important role in openGauss. + diff --git a/content/en/post/2022/Permission-Management-Model-of-the-openGauss-Database-(Continued).md b/content/en/post/2022/Permission-Management-Model-of-the-openGauss-Database-(Continued).md new file mode 100644 index 0000000000000000000000000000000000000000..cc80a580d8876c5c76998da66dcc7bb2cb833c63 --- /dev/null +++ b/content/en/post/2022/Permission-Management-Model-of-the-openGauss-Database-(Continued).md @@ -0,0 +1,84 @@ ++++ + +title = "Permission Management Model of the openGauss Database Continued" + +date = "2021-08-02" + +tags = [ "Permission Management Model of the openGauss Database"] + +archives = "2021-08" + +author = "Rongrong Song" + +summary = "Permission Management Model of the openGauss Database (Continued)" + +img = "/en/post/2022/title/img3.png" + +times = "12:30" + ++++ + +# Permission Management Model of the openGauss Database \(Continued\) + +The openGauss database uses a role-based access control model. In addition to the classification of system permissions and object permissions described in the _Permission Management Model of the openGauss Database_, there are some advanced permission management mechanisms to meet customers' service requirements. + +## **1. Separation of Duties** + +Separation of duties is a supplement to the system permission management mechanism. The core idea is to separate the permissions for managing database objects, users, and audit logs to prevent high risks caused by excessive centralized rights of an administrator. You can set the GUC parameter **enableSeparationOfDuty** to **on** to enable the function. + +After separation-of-duty is enabled, the permission scope of **SYSADMIN** is narrowed down. That is, **SYSADMIN** does not have the permissions to create users or roles, or view or delete database audit logs. The **SYSADMIN**, **CREATEROLE**, and **AUDITADMIN** permissions are isolated from each other and do not affect each other. A user can be assigned only one attribute. + +After separation-of-duty is enabled, the permissions are divided as follows: + + + + + + + + + + + + + + + + +

System Permission

+

Permission Description

+

SYSADMIN

+

Allows users to create databases and tablespaces.

+

CREATEROLE

+

Allows users to create users and roles.

+

AUDITADMIN

+

Allows users to view and delete audit logs.

+
+ +## **2. Column-Level Access Control** + +In some service scenarios, some columns in a data table store important information and need to be invisible to users, but data in other columns needs to be viewed or operated by users. In this case, access control needs to be performed on a specific column in the data table to implement column-level access control for users. + +openGauss provides the GRANT and REVOKE statements to grant and revoke permissions on column objects. + +``` +Example 1: Grant the SELECT permission on the first column fir of the tbl table and the UPDATE permission on the second column sec of the tbl table to user1. +openGauss=# GRANT select(fir),update(sec) ON TABLE tbl TO user1; +GRANT +After the permission is granted, user user1 can perform the SELECT operation on the first column of the tbl table and the UPDATE operation on the second column. +Example 2: Revoke the SELECT permission on the first column fir of the tbl table from user1. +openGauss=# REVOKE select(fir) ON tbl FROM user1; +REVOKE +After the revocation, user user1 no longer has the permission to view data in the first column fir of the tbl table. +``` + +## **3. Row-Level Access Control** + +In actual services, users may be allowed to view only rows that meet specific conditions in a data table. In this case, row-level access control is required so that different users can read different results when performing the same SQL query, update, or delete operation. + +You can create a row-level security policy for a data table. The policy defines an expression that takes effect only for specific database users and SQL operations. When a database user accesses the data table, rows that meet the policy conditions are visible to the user, and rows that do not meet the policy conditions are invisible to the user. In this way, row-level access control is implemented for the user. + +![](../figures/zh-cn_image_0000001251894929.jpg) + +openGauss provides the CREATE, ALTER, and DROP ROW LEVEL SECURITY statements to create, modify, and delete row-level access control policies. + diff --git a/content/en/post/2022/Permission-Management-Model-of-the-openGauss-Database.md b/content/en/post/2022/Permission-Management-Model-of-the-openGauss-Database.md new file mode 100644 index 0000000000000000000000000000000000000000..99b6fedde9a7e564a93e5cd644027ebfd7874b35 --- /dev/null +++ b/content/en/post/2022/Permission-Management-Model-of-the-openGauss-Database.md @@ -0,0 +1,328 @@ ++++ + +title = "Permission Management Model of the openGauss Database" + +date = "2021-07-26" + +tags = [ "Permission Management Model of the openGauss Database"] + +archives = "2021-07" + +author = "Rongrong Song" + +summary = "Permission Management Model of the openGauss Database" + +img = "/en/post/2022/title/img3.png" + +times = "12:30" + ++++ + +# Permission Management Model of the openGauss Database + +The database stores a large amount of important data and sensitive information and provides data sharing services for authorized users with different permissions. Therefore, the database must have a complete security defense mechanism to defend against internal and external malicious attacks, to ensure that data is not lost, privacy is not disclosed, and data is not tampered with. Currently, the openGauss database has built an in-depth defense security system to enhance database security in applications. A complete permission management mechanism can effectively block unauthorized operations of malicious users. This document focuses on the permission management mechanism in the openGauss database. + +## 1 Common Permission Management Models + +There are three common permission management models: policy-based access control model, role-based access control model, and session- and role-based access control model. The openGauss database inherits the permission management mechanism of PostgreSQL, adopts the role-based access control model, and uses roles to organize and manage permissions, greatly simplifying permission authorization management. With the role mechanism, to grant permissions to a group of users with the same permissions, you only need to grant the permissions to a role and then grant the role to the group of users. You do not need to grant permissions to users one by one. In addition, the separation of roles and permissions can be used to control different permissions of different users and achieve mutual restriction and balance. + +With the development of databases and expansion of service scenarios, higher requirements are proposed for database permission separation and fine-grained permission management. The native permission division of PostgreSQL cannot meet diversified service security requirements; therefore, the openGauss database divides permissions at a finer granularity based on the permission models so that users can flexibly assign and manage user permissions based on actual services. + +## 2 openGauss Database Permission Levels + +In the logical structure of the object layout in the openGauss database system, multiple databases can be created under each instance, multiple schemas can be created under each database, and multiple objects can be created under each schema, such as tables, functions, views, and indexes, and each table can be measured by row and column to form the following logical levels: + +![](../figures/3.png) + +The permission system of the openGauss database is constructed based on the preceding logical distribution, as shown in the following figure. Each layer has its own permission control. + +![](../figures/31.png) + +For example, if a user wants to view data in a row of a data table, the user must have the LOGIN permission for logging in to the database, CONNECT permission for connecting to the database where the table is stored, USAGE permission for using the schema of the table, and SELECT permission for viewing the table; in addition, the row level security requirements for the row of data must be met. For details about permission concepts and classification, see the next section. + +## 3 Classification of openGauss Database Permissions + +In the openGauss database, users and roles are basically the same concepts. The only difference is that a role does not have the LOGIN permission by default when being created, and a schema with the same name as the role is not automatically created. That is, a role with the LOGIN permission can be considered as a user. In the following sections, users are used to connect to and access the database and execute SQL statements, and roles are used to organize and manage permissions. Different permissions are packaged into a role and assigned to a user so that the user can obtain all permissions of the role. In addition, after the permissions of a role are changed, the permissions of all members in the role are automatically changed. + +In the openGauss database system, permissions are classified into system permissions and object permissions. + +- System permissions refer to the permissions of a user to use a database, such as logging in to a database, creating a database, creating a user or role, and creating a security policy. +- Object permissions refer to the permissions to perform special operations on database objects, such as databases, schemas, tables, views, and functions. Different objects are associated with different permissions, such as database connection permissions, permissions to view, update, and insert tables, and permissions to execute functions. It is meaningful to describe object permissions based on specific objects. + +**3.1 System Permissions** + +System permissions are also called user attributes. Users with specific attributes obtain the permissions corresponding to the specified attributes. System permissions cannot be inherited by roles. When creating a user or role, you can run the **CREATE ROLE/USER** SQL statement to specify some attributes for the user or role, or run the **ALTER ROLE/USER** statement to add or cancel user attributes for the user or role. + +The openGauss database supports granting and revoking of the following system permissions: + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

System Permission

+

Permission Description

+

SYSADMIN

+

Allows users to create databases and tablespaces.

+

Allows users to create users and roles.

+

Allows users to view and delete audit logs.

+

Allows users to view data of other users.

+

MONADMIN

+

Allows users to view and manage permissions for the dbe_perf schema and monitoring views or functions in this schema.

+

OPRADMIN

+

Allows users to use Roach to back up and restore databases.

+

POLADMIN

+

Allows users to create resource tags, dynamic data masking policies, and unified audit policies.

+

AUDITADMIN

+

Allows users to view and delete audit logs.

+

CREATEDB

+

Allows users to create databases.

+

USEFT

+

Allows users to create foreign tables.

+

CREATEROLE

+

Allows users to create users and roles.

+

INHERIT

+

Allows a user to inherit the permissions of the role of the group to which the user belongs.

+

LOGIN

+

Allow users to log in to the database.

+

REPLICATION

+

Allows users to perform streaming replication operations.

+

VCADMIN

+

Allows users to create resource pools in associated logical clusters and manage permissions on the associated logical clusters.

+
+ +The openGauss provides the CREATE and ALTER ROLE/USER statements to grant and revoke system permissions. The following is an example: + +**3.2 Object Permissions** + +By default, an object owner has all the operation permissions on the object, such as modifying, deleting, and viewing the object, granting object operation permissions to other users, and revoking granted operation permissions. ALTER, DROP, COMMENT, INDEX, VACUUM, and regrantable permissions for objects are inherent permissions of the owner and are implicitly owned by the owner. Object owners can remove their own common permissions, for example, making tables read-only to themselves or others. + +Object permissions can be inherited by roles. In this way, users can package these individual permissions into a role for permission management. The openGauss database supports the following object permissions for each type of database objects: + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

Object

+

Permission

+

Description

+

TABLESPACE

+

CREATE

+

Allows users to create tables in specified tablespaces.

+

ALTER

+

Allows users to run the ALTER statement to modify the attributes of a specified tablespace.

+

DROP

+

Allows users to delete specified tablespaces.

+

COMMENT

+

Allows users to define or modify comments for a specified tablespace.

+

DATABASE

+

CONNECT

+

Allows users to connect to a specified database.

+

TEMP

+

Allows users to create temporary tables in a specified database.

+

CREATE

+

Allows users to create schemas in a specified database.

+

ALTER

+

Allows users to run the ALTER statement to modify attributes of a specified database.

+

DROP

+

Allows users to delete a specified database.

+

COMMENT

+

Allows users to define or modify comments for a specified database.

+

SCHEMA

+

CREATE

+

Allows users to create new objects in a specified schema.

+

USAGE

+

Allows users to access objects contained in a specified schema.

+

ALTER

+

Allows users to run the ALTER statement to modify attributes of a specified schema.

+

DROP

+

Allows users to delete a specified schema.

+

COMMENT

+

Allows users to define or modify comments for a specified schema.

+

FUNCTION

+

EXECUTE

+

Allows users to use a specified function.

+

ALTER

+

Allows users to run the ALTER statement to modify attributes of a specified function.

+

DROP

+

Allows users to delete a specified function.

+

COMMENT

+

Allows users to define or modify comments for a specified function.

+

TABLE

+

INSERT

+

Allows users to run the INSERT statement to insert data into a specified table.

+

DELETE

+

Allows users to run the DELETE statement to delete data from a specified table.

+

UPDATE

+

Allows users to run the UPDATE statement on a specified table.

+

SELECT

+

Allows users to run the SELECT statement on a specified table.

+

TRUNCATE

+

Allows users to run the TRUNCATE statement on a specified table.

+

REFERENCES

+

Allows users to create a foreign key constraint on a specified table.

+

TRIGGER

+

Allows users to create a trigger on a specified table.

+

ALTER

+

Allows users to run the ALTER statement to modify attributes of a specified table.

+

DROP

+

Allows users to delete a specified table.

+

COMMENT

+

Allows users to define or modify comments for a specified table.

+

INDEX

+

Allows users to create indexes on a specified table and manage the indexes on the specified table.

+

VACUUM

+

Allows users to perform ANALYZE and VACUUM operations on a specified table.

+
+ +openGauss provides the GRANT and REVOKE statements to grant and revoke object permissions. + +**3.3 User Permission Set** + +According to the permission management mechanism of the openGauss database, a user has the union of the following types of permissions: + +![](../figures/32.png) + +In actual service applications, you are advised to configure accounts based on the least privilege principle and assign the minimum permissions to users on the basis that service requirements are met. + +## 4 openGauss Database Permission Evolution + +The openGauss database provides a series of system permissions and object permissions. You can combine permissions into roles based on actual services. However, with the feedback from users in various application scenarios, the openGauss database will provide a series of built-in roles in the future. The permissions that are frequently used in actual applications are packaged into built-in roles. Users can directly use the built-in roles to manage permissions. + diff --git a/content/en/post/2022/SQL-Engine-Source-Parsing.md b/content/en/post/2022/SQL-Engine-Source-Parsing.md new file mode 100644 index 0000000000000000000000000000000000000000..02fbbc49ef65755e431e998b71fdb4b1a08bb09d --- /dev/null +++ b/content/en/post/2022/SQL-Engine-Source-Parsing.md @@ -0,0 +1,2121 @@ ++++ + +title = "SQL Engine Source Parsing" + +date = "2021-08-27" + +tags = [ "SQL Engine Source Parsing"] + +archives = "2021-08" + +author = "Shujie Zhang" + +summary = "SQL Engine Source Parsing" + +img = "/en/post/2022/title/img4.png" + +times = "12:30" + ++++ + +# SQL Engine Source Parsing + ++++ + +## Query Optimization + +The query optimization process of the openGauss database is clear. From the perspective of source code organization, related code is distributed in different directories, as shown in Table 1. + +Table 1 Description of the query optimization module + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

Module

+

Directory

+

Description

+

Query rewriting

+

src/gausskernel/optimizer/prep

+

Includes subquery optimization, predicate simplification and regularization, predicate passing closure, and other query rewriting optimization technologies.

+

Statistics

+

src/gausskernel/optimizer/commands/analyze.cpp

+

Generates various types of statistics, which can be used for selectivity estimation, row count estimation, and cost estimation.

+

Cost estimation

+

src/common/backend/utils/adt/selfuncs.cpp

+

src/gausskernel/optimizer/path/costsize.cpp

+

Includes selectivity estimation, row count estimation, and cost estimation.

+

Physical path

+

src/gausskernel/optimizer/path

+

Generates physical paths.

+

Dynamic programming

+

src/gausskernel/optimizer/plan

+

Searches for physical paths using the dynamic programming method.

+

Genetic algorithm

+

src/gausskernel/optimizer/geqo

+

Searches for physical paths using the genetic algorithm.

+
+## Query Rewriting + +As the SQL language is diversified and flexible, the SQL statements written by different developers vary according to their experience. In addition, the SQL statements can be automatically generated by tools. The SQL language is a descriptive language. A database user only describes a desired result, and does not care about a specific data obtaining manner. The SQL language entered into the database is usually not in their optimal forms, and may include some redundant information. Mining this information could improve the exaction efficiency. Query rewriting is to convert SQL statements entered by users into more efficient equivalent SQL statements. It has two basic principles. + +- \(1\) Equivalence: The output of the original statement is the same as that of the rewritten statement. +- \(2\) Efficiency: The rewritten statement is more efficient in execution time and resource usage than the original statement. + +Query rewriting is mainly equivalent transformation based on relational algebra. This transformation usually meets the laws of commutativity, associativity, distributive, and decomposition, as shown in Table 2. + +Table 2 Equivalent transformation based on relational algebra + + + + + + + + + + + + + + + + + + + + +

Equivalent transformation

+

Content

+

Law of commutativity

+

A x B == B x A

+

A ⨝B == B ⨝ A

+

A ⨝F B == B ⨝F A Where, F is the join condition.

+

Π p(σF (B)) == σF (Π p(B)) Where, F∈p

+

Law of associativity

+

(A x B) x C==A x (B x C)

+

(A ⨝ B) ⨝ C==A ⨝ (B ⨝ C)

+

(A ⨝F1 B) ⨝F2 C==A ⨝F1 (B ⨝F2 C) Where, F1 and F2 are join conditions.

+

Law of distributive

+

σF(A x B) == σF(A) x B Where, F ∈ A

+

σF(A x B) == σF1(A) x σF2(B)

+

Where, F = F1 ∪ F2, F1∈A, F2 ∈B

+

σF(A x B) == σFX (σF1(A) x σF2(B))

+

Where, F = F1∪F2∪FX, F1∈A, F2 ∈B

+

Π p,q(A x B) == Π p(A) x Π q(B) Where, p∈A, q∈B

+

σF(A x B) == σF1(A) x σF2(B)

+

Where, F = F1 ∪ F2, F1∈A, F2 ∈B

+

σF(A x B) == σFx (σF1(A) x σF2(B))

+

Where, F = F1∪F2∪Fx, F1∈A, F2 ∈B

+

Law of decomposition

+

Π P=p1,p2,…pn(Π Q=q1,q2,…qn(A)) == Π P=p1,p2,…pn(A) Where, P ⊆ Q

+

σF1(σF2(A)) == σF1∧F2(A)

+
+ + +Query rewriting can achieve optimization based on relational algebra theories, such as predicate pushdown and subquery optimization, or based on heuristic rules, such as outer join elimination and table join elimination. In addition, there are some optimizations related to a specific optimization rule and an actual execution process. For example, based on parallel scanning, consider to execute an aggregation operator by phase. Aggregation is divided into different phases, to improve execution efficiency. + +From another perspective, query rewriting is equivalent transformation based on optimization rules and belongs to logical optimization, which can also be called rule-based optimization. How do we measure the performance improvement of an SQL statement after query rewriting? It is very important to evaluate query rewriting based on costs. Therefore, query rewriting can be not only based on experience, but also based on costs. + +Taking predicate transfer closure and predicate pushdown as examples, predicate pushdown can greatly reduce the calculation workload of upper-layer operators to achieve optimization. If the predicate condition has equivalent operations, then equivalence inference can be implemented by equivalent operations, so as to obtain a new selection condition. + +For example, if two tables t1 and t2 each contain a total of 100 rows of data \[1,2,3, ..100\], the query statement **SELECT t1.c1, t2.c1 FROM t1 JOIN t2 ON t1.c1=t2.c1 WHERE t1.c1=1** may be optimized by select pushdown and equivalent inference, as shown in Figure 1. + +![](../figures/62.png) + +Figure 1 Comparison before and after query rewriting + +As shown in Figure 1 \(1\), 100 rows of data in tables t1 and t2 are scanned and joined to generate the intermediate result, and then the selection operation is performed. The final result contains only one row of data. If equivalence inference is used, it may be obtained that values in \{t1.c1, t2.c1, 1\} are equivalent to each other. Therefore, a new selection condition of t2.c1 = 1 is deduced, and the condition is pushed down to t2. In this way, the rewritten logical plan in Figure 1 \(4\) is obtained. As shown in the preceding figure, the rewritten logical plan only needs to obtain one piece of data from the base table. During join, there is only one piece of data in the inner and outer tables. In addition, the filter criteria in the final result are not required, greatly improving the performance. + +At the code level, the architecture of query rewriting is roughly shown in Figure 2. + +![](../figures/41.png) + +Figure 2 Architecture of query rewriting + +- \(1\) Pulling up a subquery: When a subquery appears in RangeTableEntry, it stores a subquery tree. If the subquery is not pulled up, a subquery execution plan is formed after query optimization. The upper-layer execution plan and subquery plan perform nested loops to obtain the final result. In this process, the query optimization module does not have so many optimization choices for the subquery. If the subquery is pulled up, it is joined with tables at the upper layer. + +- \(2\) Constant replacement: Because the constant reference speed is faster, the variable can be replaced by the calculated constant. The implementation function is preprocess\_const\_params. + +- \(3\) Replacing common table expressions \(CTEs\) with subqueries: Theoretically, CTEs have the same performance as subqueries. However, subqueries can be further pulled up, rewritten, and optimized. Therefore, subqueries are used to replace CTEs. The implementation function is substitute\_ctes\_with\_subqueries. + +- \(4\) Replacing multi count \(distinct\) with multiple subqueries: If this type of query occurs, multiple count \(distinct\) queries are replaced with multiple subqueries. Each subquery contains a count \(distinct\) expression. The implementation function is convert\_multi\_count\_distinct. + +- \(5\) Pulling up sublinks: Sublinks appear in constraints such as WHERE and ON, and are usually used together with predicates such as ANY, ALL, IN, EXISTS, and SOME. Although sublinks are clear from the logical level of statements, the efficiency varies. For example, the execution result of a correlated sublink is related to the parent query. That is, each tuple of the parent query corresponds to the re-evaluation of the sublink. In this case, you can pull up the sublink to improve efficiency. In this part, ANY and EXISTS sublinks are pulled up to SemiJoin or Anti-SemiJoin. The implementation function is pull\_up\_sublinks. + +- \(5\) Reducing ORDER BY: In the parent query, database records may need to be reordered. Therefore, reducing the number of ORDER BY statements in the subquery can improve the efficiency. The implementation function is reduce\_orderby. + +- \(6\) Deleting NotNullTest: Deleting related non-null tests can improve efficiency. The implementation function is removeNotNullTest. + +- \(7\) Lazy Agg rewriting: Lazy aggregation is used to reduce the number of aggregation times. The implementation function is lazyagg\_main. + +- \(8\) A lot of work has been done to optimize the join operation to obtain a better execution plan. The implementation function is pull\_up\_subqueries. + +- \(9\) UNION ALL optimization: The UNION ALL operation at the top layer is processed to convert the UNION ALL set operation to the AppendRelInfo operation. The implementation function is flatten\_simple\_union\_all. + +- \(10\) Expanding an inherited table: If an inherited table is used during the execution of a query statement, the inherited table exists as a parent table. The parent table needs to be expanded into multiple inherited tables. The implementation function is expand\_inherited\_tables. + +- \(11\) Expression preprocessing: This module standardizes expressions in the query tree, including replacing the alias Var generated by links, evaluating constant expressions, leveling constraints, and generating execution plans for sublinks. The implementation function is preprocess\_expression. + +- \(12\) Processing the HAVING clause: In the HAVING clause, some constraints can be converted into filter conditions \(corresponding to WHERE\). The constraints in the HAVING clause are split to improve efficiency. + +- \(13\) Outer join elimination: The purpose is to convert an outer join to an inner join to simplify the query optimization process. The reduce\_outer\_join function is used. + +- \(14\) Full join rewriting: Rewrites the full join function to improve its functionality. For example, the statement **SELECT \* FROM t1 FULL JOIN t2 ON TRUE** can be converted to **SELECT \* FROM t1 LEFT JOIN t2 ON TRUE UNION ALL \(SELECT \* FROM t1 RIGHT ANTI FULL JOIN t2 ON TRUE\)**. The implementation function is reduce\_inequality\_fulljoins. + + The following uses pulling up sublinks as an example to describe the most important subquery optimization in openGauss. A sublink is a special subquery. It appears in constraints such as WHERE and ON, and is often accompanied by predicates such as ANY, EXISTS, ALL, IN, and SOME. The openGauss database sets different SUBLINK types for different predicates. The code is as follows: + + ``` + Typedef enum SubLinkType { + EXISTS_SUBLINK, + ALL_SUBLINK, + ANY_SUBLINK, + ROWCOMPARE_SUBLINK, + EXPR_SUBLINK, + ARRAY_SUBLINK, + CTE_SUBLINK + } SubLinkType; + ``` + + The openGauss database defines an independent structure SubLink for sublinks, which describes the sublink types and operators. The code is as follows: + + ``` + Typedef struct SubLink { + Expr xpr; + SubLinkType subLinkType; + Node* testexpr; + List* operName; + Node* subselect; + Int location; + } SubLink; + ``` + + Figure 3 shows the interface functions related to pulling up sublinks. + + ![](../figures/42.png) + + Figure 3 Interface functions related to sublinks + + The main process of pulling up sublinks is implemented in the pull\_up\_sublinks function. The pull\_up\_sublinks function invokes pull\_up\_sublinks\_jointree\_recurse to recursively process nodes in Query-\>jointree. Table 3 lists the input parameters of the function. + + Table 3 Input parameters of the function + + + + + + + + + + + + + + + + + + + + + + + + + +

Parameter

+

Type

+

Description

+

root

+

PlannerInfo*

+

Input parameter, which is used to query the context information about the optimization module.

+

jnode

+

Node*

+

Input parameter, which indicates the node to be recursively processed. The value can be RangeTblRef, FromExpr, or JoinExpr.

+

relids

+

Relids*

+

Output parameter, which is a set of tables involved in the jnode parameter.

+

Return value

+

Node*

+

Node after the sublink is pulled up.

+
+ + + There are three types of jnodes: RangeTblRef, FromExpr, and JoinExpr, which are processed by the pull\_up\_sublinks\_jointree\_recurse function separately. + +- RangeTblRef + + RangeTblRef is a leaf node of Query-\>jointree and is the condition for ending the function recursion. When the program reaches this branch, there are two cases: + + - **\(1\) If the current statement is a single table query and has no join, the recursion proceeds until it ends. Then, the program checks whether the sublink meets other pull-up conditions.** + - \(2\) If the query statement has joins, during the recursion of From-\>fromlist, JoinExpr-\>larg, or JoinExpr-\>rarg, when it traverses the RangeTblRef leaf node, relids \(a set of tables\) of the RangeTblRef node is returned to the upper layer to determine whether the sublink can be pulled up. + +- FromExpr + + - \(1\) Traverse the nodes in From-\>fromlist recursively, and then invoke the pull\_up\_sublinks\_jointree\_recurse function recursively for each node until reaching the RangeTblRef leaf node. + - \(2\) Invoke the pull\_up\_sublinks\_qual\_recurse function to process From-\>qual and process ANY\_SUBLINK or EXISTS\_SUBLINK that may occur. + +- JoinExpr + + **\(1\) Invoke the pull\_up\_sublinks\_jointree\_recurse function to recursively process JoinExpr-\>larg and JoinExpr-\>rarg until reaching the RangeTblRef leaf node. In addition, check whether the sublink can be pulled up based on the join type.** + + \(2\) Invoke the pull\_up\_sublinks\_qual\_recurse function to process JoinExpr-\>quals and process ANY\_SUBLINK or EXISTS\_SUBLINK that may occur. The **available\_rels1** parameter of the pull\_up\_sublinks\_qual\_recurse function varies depending on the join type. + + In addition to ANY\_SUBLINK and EXISTS\_SUBLINK, the pull\_up\_sublinks\_qual\_recurse function also performs query rewriting for OR clauses and EXPR-type sublinks. The code logic of pulling up sublinks of the Expr type is as follows: + + - \(1\) Use the safe\_convert\_EXPR function to check whether the sublink can be pulled up. The code is as follows: + + ``` + //Check whether the current SQL statement meets the condition for pulling up the sublink. + if (subQuery->cteList || + subQuery->hasWindowFuncs || + subQuery->hasModifyingCTE || + subQuery->havingQual || + subQuery->groupingSets || + subQuery->groupClause || + subQuery->limitOffset || + subQuery->rowMarks || + subQuery->distinctClause || + subQuery->windowClause) { + ereport(DEBUG2, + (errmodule(MOD_OPT_REWRITE), + (errmsg("[Expr sublink pull up failure reason]: Subquery includes cte, windowFun, havingQual, group, " + "limitoffset, distinct or rowMark.")))); + return false; + } + ``` + + - \(2\) Use the push\_down\_qual function to extract related conditions from the sublink. The code is as follows: + + ``` + Static Node* push_down_qual(PlannerInfo* root, Node* all_quals, List* pullUpEqualExpr) + { + If (all_quals== NULL) { + Return NULL; + } + + List* pullUpExprList = (List*)copyObject(pullUpEqualExpr); + Node* all_quals_list = (Node*)copyObject(all_quals); + + set_varno_attno(root->parse, (Node*)pullUpExprList, true); + set_varno_attno(root->parse, (Node*)all_quals_list, false); + + Relids varnos = pull_varnos((Node*)pullUpExprList, 1); + push_qual_context qual_list; + SubLink* any_sublink = NULL; + Node* push_quals = NULL; + Int attnum = 0; + + While ((attnum = bms_first_member(varnos)) >= 0) { + RangeTblEntry* r_table = (RangeTblEntry*)rt_fetch(attnum, root->parse->rtable); + + // This table must be a base table. Otherwise, it cannot be processed. + If (r_table->rtekind == RTE_RELATION) { + qual_list.varno = attnum; + qual_list.qual_list = NIL; + + // Obtain the condition that contains the special varno. + get_varnode_qual(all_quals_list, &qual_list); + + If (qual_list.qual_list != NIL && !contain_volatile_functions((Node*)qual_list.qual_list)) { + any_sublink = build_any_sublink(root, qual_list.qual_list, attnum,pullUpExprList); + push_quals = make_and_qual(push_quals, (Node*)any_sublink); + } + + list_free_ext(qual_list.qual_list); + } + } + + list_free_deep(pullUpExprList); + pfree_ext(all_quals_list); + + return push_quals; + } + ``` + + - **\(3\) Use the transform\_equal\_expr function to construct a subquery to be pulled up. \(Add a GROUP BY clause and delete related conditions.\) The code is as follows:** + + ``` + // Add GROUP BY and windowClasues for SubQuery. + if (isLimit) { + append_target_and_windowClause(root,subQuery,(Node*)copyObject(node), false); + } else { + append_target_and_group(root, subQuery, (Node*)copyObject(node)); + } + // Delete related conditions. + subQuery->jointree = (FromExpr*)replace_node_clause((Node*)subQuery->jointree, + (Node*)pullUpEqualExpr, + (Node*)constList, + RNC_RECURSE_AGGREF | RNC_COPY_NON_LEAF_NODES); + ``` + + - \(4\) Construct the conditions that need to be pulled up. The code is as follows: + + ``` + // Construct the conditions to be pulled up. + joinQual = make_and_qual((Node*)joinQual, (Node*)pullUpExpr); + ... + Return joinQual; + ``` + + - \(5\) Generate a join expression. The code is as follows: + + ``` + // Generate a join expression. + if (IsA(*currJoinLink, JoinExpr)) { + ((JoinExpr*)*currJoinLink)->quals = replace_node_clause(((JoinExpr*)*currJoinLink)->quals, + tmpExprQual, + makeBoolConst(true, false), + RNC_RECURSE_AGGREF | RNC_COPY_NON_LEAF_NODES); + + } else if (IsA(*currJoinLink, FromExpr)) { + ((FromExpr*)*currJoinLink)->quals = replace_node_clause(((FromExpr*)*currJoinLink)->quals, + tmpExprQual, + makeBoolConst(true, false), + RNC_RECURSE_AGGREF | RNC_COPY_NON_LEAF_NODES); + } + + rtr = (RangeTblRef *) makeNode(RangeTblRef); + rtr->rtindex = list_length(root->parse->rtable); + + // Construct the JoinExpr of the left join. + JoinExpr *result = NULL; + result = (JoinExpr *) makeNode(JoinExpr); + result->jointype = JOIN_LEFT; + result->quals = joinQual; + result->larg = *currJoinLink; + result->rarg = (Node *) rtr; + + // Add JoinExpr to rangetableentry. In subsequent processing, the left outer join can be converted to an inner join. + rte = addRangeTableEntryForJoin(NULL, + NIL, + result->jointype, + NIL, + result->alias, + true); + root->parse->rtable = lappend(root->parse->rtable, rte); + ``` + +## Statistics and Cost Estimation + +In different data distribution, the execution efficiency of the same query plan may be significantly different. Therefore, the impact of data distribution on the plan should also be considered during plan selection. Unlike common logical optimization, physical optimization builds plan optimization on data and improves performance by minimizing data operation costs. In terms of functions, the physical optimization of openGauss involves the following three key steps: + +\(1\) Data distribution generation: Mines data distribution from data tables and stores the data. + +\(2\) Plan cost estimation: Based on data distribution, a cost model is established to estimate the actual execution time of a plan. + +\(3\) Optimal plan selection: Based on the cost estimation, the system searches for the plan with the minimum cost from the candidate plans. + +First, introduce the concepts related to data distribution and the internal storage mode of the database. + +### 1. Data Distribution Storage + +The distribution of dataset _D_ consists of the frequencies of different values on _D_. Assume that _D_ is the projection data in the **Grade** column of Table 4. This column has three values 1, 2, and 3. For details about the frequency distribution, see Table 5. Here, the number of **Grade** values is referred to as number of distinct values \(NDV\). + +Table 4 Grade attribute distribution + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

Sno

+

Name

+

Gender

+

Grade

+

001

+

Xiao Zhang

+

Male

+

1

+

002

+

Xiao Li

+

Male

+

2

+

003

+

Xiao Wang

+

Male

+

3

+

004

+

Xiao Zhou

+

Female

+

1

+

005

+

Xiao Chen

+

Female

+

1

+
+ + +Table 5 Grade frequency distribution + + + + + + + + + + + + + + +

Grade

+

1

+

2

+

3

+

Frequency

+

3

+

1

+

1

+
+ + +_D_ may relate to a plurality of attributes, and distribution of the plurality of attributes is referred to as joint distribution. The value space of the joint distribution may be very large. From the perspective of performance, the database does not store the joint distribution of _D_, but stores the attribute distribution of _D_ separately. For example, the database stores the frequency of \{Gender='Male'\} and \{Grade='1'\}, instead of \{Gender='Male', Grade='1'\}. This practice loses much of the information distributed on _D_. As will be seen in the subsequent section on selectivity and data distribution, openGauss will use prediction techniques to infer the joint distribution when the system requires it. Although, in some cases, the results of this speculation may differ significantly from the actual situation. + +The data structure of data distribution is especially critical to understanding how the database stores this information. Generally, a key-value \(KV\) pair is the most commonly used structure for describing distribution, where key indicates a value, and value indicates a frequency. However, when the NDV is large, the expansion of the key value causes low storage and read performance of the KV. To improve efficiency, the openGauss uses the "KV vector + histogram" to indicate the attribute distribution. + +**Logical structure of data distribution**: A high-frequency value frequency is stored by using a KV, and its storage structure is referred to as a most common value \(MCV\). A frequency other than the high-frequency value is described by using an equal-bin-count histogram \(EH\). In the implementation, the openGauss puts the k \(k = 100\) key values with the highest frequency into the MCV, and puts the other key values into the EH. + +It should be noted that the EH combines the frequencies of multiple values, which significantly improves the access efficiency but also blurs the distribution. However, as can be seen in the following sections, the high frequency value is more critical to the estimation of the plan cost than the low frequency value. Therefore, this hybrid strategy, which trades for high performance at the cost of losing the accuracy of low frequency values, is undoubtedly a fairly cost-effective approach. + +**Storage location of data distribution**: In openGauss, information such as the MCV and EH is stored in the PG\_STATISTIC system catalog. Table 6 describes the table definition. + +Table 6 Definitions of the PG\_STATISTIC system catalog + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

starelid

+

staattnum

+

stanullfrac

+

stakind1

+

stanumbers1

+

stavalues1

+

Stakind2

+

...

+

0001

+

1

+

0

+

1

+

{0.2851, 0.1345}

+

{1, 2}

+

2

+
  

0001

+

2

+

0

+

1

+

{0.1955, 0.1741}

+

{mathematics, language}

+

2

+
  
+ + +One tuple in Table 6 stores statistics of one attribute. The following describes the attribute meanings of tuples. + +\(1\) The **starelid/staattnum** attribute indicates the table OID and attribute ID. + +\(2\) The attribute **stanullfrac** indicates the percentage of null values in the attribute. The value **0** indicates that the column has no null value. + +\(3\) The attribute group **\{stakind1, stanumbers1, stavalues1\}** forms a slot in the PG\_STATISTIC system catalog and stores information about a data structure type in Table 7. There are five slots in the PG\_STATISTIC system catalog. Generally, the first slot stores the MCV information, and the second slot stores the EH information. Take the MCV slot as an example. The **stakind1** attribute indicates that the slot type is MCV, and **1** is the enumerated value of STATISTIC\_KIND\_MCV. The **stanumbers1** and **stavalues1** attributes record the MCV content. The **stavalues1** attribute records the key value, and the **stanumbers1** attribute records the frequency corresponding to the key. In the preceding example, the frequency ratio for value **1** is 0.2851, and the frequency ratio for value **2** is 0.1345. + +Table 7 Description of PG\_STATISTIC + + + + + + + + + + + + + + + + + + + + + + +

Type

+

Description

+

STATISTIC_KIND_MCV

+

High-frequency values (MCVs): The values that appear most frequently in a column are sorted according to the occurrence frequency, and a corresponding frequency array is generated. In this way, you can know the high-frequency values in a column and the frequencies of these high-frequency values.

+

STATISTIC_KIND_HISTOGRAM

+

Histogram. The openGauss database uses an EH to describe the distribution of data in a column. High-frequency values are not displayed in the histogram, ensuring that the data distribution is relatively flat.

+

STATISTIC_KIND_CORRELATION

+

Correlation coefficient. The correlation coefficient records the correlation between the unsorted data distribution and the sorted data distribution in the current column. This value is used to estimate the cost during index scanning. Assume that the correlation between the unsorted data distribution and sorted data distribution in a column is 0, that is, the data is not correlated. In this case, the cost of index scanning is higher.

+

STATISTIC_KIND_MCELEM

+

High-frequency type values (MCVs), which is used for the array type or other types. The openGauss database provides the ts_typanalyze system function to generate statistics of this type.

+

STATISTIC_KIND_DECHIST

+

Array histogram, which is used to generate histograms for array types. The openGauss database provides the array_typanalyze system function to generate statistics of this type.

+
+ + +Note that data distribution and the content of the PG\_STATISTIC system catalog are not automatically generated when the table is created. They are generated when the ANALYZE operation is performed on the table. + +### 2. Data Distribution Extraction + +This section describes the logical structure and storage of data distribution in the openGauss. How can we obtain data distribution information from data? The following describes the distribution extraction process in the openGauss. To deepen the understanding of the method, let's begin with analyzing the challenges. + +The most direct way to obtain the distribution is to traverse all data and generate the MCV and EH information directly by counting. However, in practice, there may be a large amount of data, and the I/O cost of traversal is usually unacceptable. For example, the bill data of a bank involves hundreds of billions of records and is stored at the TB level. In addition to the I/O cost, the memory consumption of the counting process may exceed the upper limit, which makes the algorithm implementation especially difficult. Therefore, a more realistic approach is to reduce the scale of data analysis and use small sample analysis to estimate the overall data distribution. Then, the quality of selected samples is particularly important. + +Currently, the sample generation process of the openGauss database is implemented in the acquire\_sample\_rows function, which uses the two-phase sampling algorithm to estimate the data distribution. In the first phase, an S algorithm is used to randomly sample a physical page, to generate a sample S1. In the second phase, a Z \(Vitter\) algorithm is used to perform reservoir sampling on tuples included in S1, to finally generate a sample S2 including 30000 tuples. The two-phase algorithm ensures that S2 is an unbiased sample of the original data. Therefore, you can infer the original data distribution by analyzing S2 and record the distribution information in the PG\_STATISTIC system catalog. + +openGauss divides sample generation into two steps to improve sampling efficiency. A theoretical basis of the method depends on the following practical condition: A quantity _M_ of physical pages occupied by data can be accurately obtained, but a quantity _n_ of tuples included in each physical page is unknown. Because _M_ is known, the S algorithm may evenly sample a page by using a probability of 1/_M_, and may generate a small sample S1 of original data. Generally, a tuple belonging to any physical page is an equal probability event, which ensures that S1 is an unbiased sample. However, the total number of tuples in S1 is far less than that of original data, so costs of performing secondary sampling on S1 are greatly reduced. The main reason why the S algorithm is not used in the second phase is that the total number of tuples _N_ of S1 is unknown \(because _n_ is unknown\), and the sampling probability \(1/_N_\) cannot be obtained using the S algorithm. The Z \(Vitter\) algorithm is a reservoir sampling algorithm, which can ensure uniform sampling when the total amount of data is unknown. The principle of reservoir sampling algorithm is not the focus of this section. You can refer to related information by yourself. + +### 3. Selectivity and Data Distribution + +SQL query often contains the WHERE constraint \(filtering condition\), for example,** SELECT \* FROM student WHERE gender = 'male';** and **SELECT \* FROM student WHERE grade \> '1'**. What is the actual effect of the constraint on the query result? In order to measure the effectiveness of constraints, the concept of selectivity is first introduced. + +**Selectivity**: Given the query dataset _C_ \(which can be a data table or any intermediate result set\) and constraint expression _x_, the selectivity of _x_ relative to _C_ is defined as follows: + +![](figures/zh-cn_image_0000001256862067.gif) + +Where, |C| indicates the total number of records in _C_, and |C|x indicates the number of records that meet the _x_ constraint on _C_. As shown in Table 8, when _x_ is **"grade = 1"**, 3/5.![](C:figures/zh-cn_image_0000001212222114.gif) + +Table 8 Selectivity of dataset C + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

Sno

+

Name

+

Gender

+

Grade

+

001

+

Xiao Zhang

+

Male

+

1

+

002

+

Xiao Li

+

Male

+

2

+

003

+

Xiao Wang

+

Male

+

3

+

004

+

Xiao Zhou

+

Female

+

1

+

005

+

Xiao Chen

+

Female

+

1

+
+ + +Data distribution of _C_ is denoted as π. It can be learned from the definition that selec\(x│C\) is actually a description of π according to semantic _x_. Data distribution helps calculate the selectivity so that the calculation process does not need to traverse the original data. In the cost estimation section, you will see that the selectivity plays a significant role in estimating the cost of a plan.![](figures/zh-cn_image_0000001212382082.gif) + +Based on this idea, this section will focus on the selectivity calculation in the openGauss. As the selectivity calculation under simple constraints is representative, this section will take simple constraints as an example for illustration. A simple constraint is defined as a non-range constraint that involves only a single attribute in the base table. + +For details about the calculation method of non-simple constraint selectivity, read the source code in this chapter. + +- Selectivity Calculation Under Simple Constraints + + Assume that _x_ is a simple constraint, and the attribute distribution information related to _x_ already exists in the tuple _r_ of the PG\_STATISTIC system catalog \(see the Data Distribution Storage section\). openGauss calls the clause\_selectivity function to convert the tuple _r_ to the selectivity based on the requirements of _x_. + + The second parameter **clause** of **clause\_selectivity** is constraint statement _x_. For different SQL queries, there may be multiple types of clauses for the input **clause\_selectivity**. Table 9 lists the typical types. + + Table 9 Simple constraint types + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

Simple Constraint Type

+

Example

+

Var

+

SELECT * FROM PRODUCT WHERE ISSOLD;

+

Const

+

SELECT * FROM PRODUCT WHERE TRUE;

+

Param

+

SELECT * FROM PRODUCT WHERE $1;

+

OpExpr

+

SELECT * FROM PRODUCT WHERE PRIZE = '100';

+

AND

+

SELECT * FROM PRODUCT WHERE PRIZE = '100' AND TYPE = 'HAT';

+

OR

+

SELECT * FROM PRODUCT WHERE PRIZE = '100' OR TYPE = 'HAT';

+

NOT

+

SELECT * FROM PRODUCT WHERE NOT EXIST TYPE = 'HAT';

+

Other

+
  
+ + + \{Var, Const, Param, OpExpr\} are basic constraints and \{AND, OR, NOT\} are SET constraints. Obviously, the constraint \{Var, Const, Param\} can be considered as a special example of the OpExpr constraint. For example, **SELECT \* FROM PRODUCT WHERE ISSOLD** is equivalent to **SELECT \* FROM PRODUCT WHERE ISSOLD = TRUE**. Due to limitations of space, this section will describe the selectivity calculation based on the OpExpr constraint in detail, and briefly introduces the key logic of the selectivity calculation based on constraints of the SET type. + + \(1\) Selectivity calculation based on the OpExpr constraint + + The query statement **SELECT \* FROM PRODUCT WHERE PRIZE = '100'** is used as an example. The clause\_selectivity function finds the OpExpr branch based on the clause \(PRIZE = '100'\) type. Then it calls the treat\_as\_join\_clause function to determine whether the clause is a join constraint. If the result is false, the clause is a filter condition \(OP\). In this case, it calls the restriction\_selectivity function to estimate the selectivity of the **clause** parameter. The code is as follows: + + ``` + Selectivity + clause_selectivity(PlannerInfo *root, + Node *clause, + int varRelid, + JoinType jointype, + SpecialJoinInfo *sjinfo) + { + Selectivity s1 = 0.5;/* default for any unhandled clause type */ + RestrictInfo *rinfo = NULL; + + if (clause == NULL) /* can this still happen? */ + return s1; + if (IsA(clause, Var))... + else if (IsA(clause, Const))... + else if (IsA(clause, Param)) + + // Processing branch of the NOT clause + else if (not_clause(clause)) + { + /* inverse of the selectivity of the underlying clause */ + s1 = 1.0 - clause_selectivity(root, + (Node *) get_notclausearg((Expr *) clause), + varRelid, + jointype, + sjinfo); + } + + // Processing branch of the AND clause + else if (and_clause(clause)) + { + /* share code with clauselist_selectivity() */ + s1 = clauselist_selectivity(root, + ((BoolExpr *) clause)->args, + varRelid, + jointype, + sjinfo); + } + + // Processing branch of the OR clause + else if (or_clause(clause)) + { + ListCell *arg; + + s1 = 0.0; + foreach(arg, ((BoolExpr *) clause)->args) + { + Selectivity s2 = clause_selectivity(root, + (Node *) lfirst(arg), + varRelid, + jointype, + sjinfo); + + s1 = s1 + s2 - s1 * s2; + } + } + + // Processing branch of the join or OP clause + else if (is_opclause(clause) || IsA(clause, DistinctExpr)) + { + OpExpr *opclause = (OpExpr *) clause; + Oidopno = opclause->opno; + + // Process the join clause. + if (treat_as_join_clause(clause, rinfo, varRelid, sjinfo)) + { + /* Estimate selectivity for a join clause. */ + s1 = join_selectivity(root, opno, + opclause->args, + opclause->inputcollid, + jointype, + sjinfo); + } + + //Process the OP clause. + else + { + /* Estimate selectivity for a restriction clause. */ + s1 = restriction_selectivity(root, opno, + opclause->args, + opclause->inputcollid, + varRelid); + } + } + ... ... + return s1; + } + ``` + + The restriction\_selectivity function identifies that PRIZE = '100' is an equivalent constraint like Var = Const. It indirectly calls the var\_eq\_const function through the eqsel function to estimate the selectivity. In this process, the var\_eq\_const function reads the PRIZE column distribution information in the PG\_STATISTIC system catalog and attempts to use the MCV in the information to calculate the selectivity. The get\_attstatsslot function is preferentially called to check whether 100 exists in the MCV in the following cases: + + - Case 1: If yes, the proportion of '100' is directly returned from the MCV as the selectivity. + + - Case 2: If no, calculate the total proportion **sumcommon** of high frequency values and return \(1.0 – **sumcommon** – **nullfrac**\)/**otherdistinct** as the selectivity. **nullfrac** is the proportion of NULL values, and **otherdistinct** is the NDV of low frequency values. + + Because the constraint added for query is PRIZE < '100', the restriction\_selectivity function will call the scalargtsel function based on the operator type and attempt to calculate the selectivity using the information in the PG\_STATISTIC system catalog. The values that meet the condition <'100' may exist in the MCV and EH respectively. Therefore, values need to be collected in the two structures respectively. Compared with that in the MCV, the process of collecting the values that meet the conditions in the EH is more complex. Based on the order of keys in the EH, openGauss uses binary search to quickly search for values that meet the conditions, sums up the total proportion of the values, and records the sum as selec\_histogram. Note that the EH does not record the frequency of '100' separately. Instead, it combines '100' and adjacent values into a bucket \(recorded as bucket _B_\) and records only the total frequency \(_Fb_\) of the values in bucket _B_. To solve this problem, openGauss assumes that the frequencies of elements in the bucket are the same and uses the following formula: + + ![](../figures/zh-cn_image_0000001257142015.gif) + + To estimate the proportion of values that meet the conditions in _B_. The specific code of this process is implemented in the ineq\_histogram\_selectivity function. Finally, the selectivity value returned by the restriction\_selectivity function is **selec** = **selec\_mcv** + **selec\_histogram**, where **selec\_mcv** is the percentage of MCVs that meet the conditions. + + +- Selectivity calculation based on constraints of the SET type + + For a SET constraint, the clause\_selectivity function recursively calculates the selectivity of its basic constraints. The final selectivity is then returned in the manner listed in Table 10 according to the semantics of the SET type. + + Table 10 Selectivity of the SET type + + + + + + + + + + + + + + + + +

SET Type

+

Description

+

NOT

+

selec(B) = 1 –selec(A) {B = NOT A}

+

AND

+

{A AND B}

+

OR

+

{A OR B}

+
+ + + By refering to the data distribution storage section, you may find that openGauss does not store the multi-attribute joint distribution. As shown in Table 6-15, openGauss calculates the joint distribution based on the assumption that the values of different columns are independent of each other. In the scenario where columns are not independent, the prediction often has deviations. For example, for the student table, the gender is related to the major. Therefore, the number of students in a computer department cannot be calculated by multiplying the proportion of male students by the number of students in the department. However, independent assumptions can generally lead to accurate results. + +- \(3\) Default selectivity parameters + + When the data distribution is unknown, the selectivity cannot be estimated by using the conventional method. For example, the ANALYZE operation is not performed on the data table, or the filter condition is an uncertain parameter. To provide a proper reference value for the optimizer, openGauss provides a series of empirical parameters of the selectivity, as shown in Table 11. + + Table 11 Selectivity parameters + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

Variable

+

Value

+

Description

+

DEFAULT_EQ_SEL

+

0.005

+

Default selectivity for an equivalent constraint, for example, A = b.

+

DEFAULT_INEQ_SEL

+

0.3333333333333333

+

Default selectivity for an unequal constraint, for example, A < b.

+

DEFAULT_RANGE_INEQ_SEL

+

0.005

+

Default selectivity for range constraints involving the same attribute (column), for example, A > b AND A < c.

+

DEFAULT_MATCH_SEL

+

0.005

+

Default selectivity for constraints based on pattern matching, for example, LIKE.

+

DEFAULT_NUM_DISTINCT

+

200

+

Number of elements in the value range after an attribute is deduplicated (distinct). Generally, DEFAULT_EQ_SEL and DEFAULT_EQ_SEL are the reciprocal of each other.

+

DEFAULT_UNK_SEL

+

0.005

+

Default selectivity for constraints such as BoolTest or NullText, for example, IS TRUE or IS NULL.

+

DEFAULT_NOT_UNK_SEL

+

(1.0 - DEFAULT_UNK_SEL)

+

Default selectivity for constraints such as BoolTest or NullText, for example, IS NOT TRUE or IS NOT NULL.

+
+ + +### 4. Cost Estimation + +Query execution costs are classified into I/O costs and CPU costs. Both costs are positively correlated with the number of tuples processed during the query. Therefore, it is relatively accurate to estimate the total cost of the query plan by using the selectivity. However, due to the differences in hardware environments, the cost model of openGauss outputs only a common indicator for measuring the plan quality, not the execution time. To describe the measurement process, the following describes the I/O and CPU cost estimation methods from the perspective of cost model parameters. + +- \(1\) I/O cost estimation + + On disks, tuples are organized as data pages. Page access modes include sequential read and random read. Restricted by the performance of storage media, the efficiency of sequential read is much higher than that of random read. For example, when HDDs face a large number of random access requests, the head seek time occupies most of the data read time. In openGauss, the I/O costs in different access modes are as follows: + + ``` + DEFAULT_SEQ_PAGE_COST 1.0 + DEFAULT_RANDOM_PAGE_COST 4.0 + ``` + + By default, the ratio of sequential read overheads to random read overheads on data pages is set to 1:4. + + The setting is reasonable for HDDs. However, for SSDs with excellent addressing capabilities, this parameter needs to be adjusted based on the actual requirements. In practice, database deployment is complex, and a system may have multiple different storage media at the same time. To enable the cost model to cope with the I/O performance of different storage media, openGauss provides users with a method of setting the unit cost of file I/O. + + ``` + CREATE TABLESPACE TEST_SPC LOCATION '...' WITH (SEQ_PAGE_COST=2, RANDOM_PAGE_COST=3); + ``` + + According to the I/O cost parameter and the selectivity, the I/O overhead of the candidate plan can be easily estimated. The following uses sequential scan \(SeqScan\) and index scan \(IndexScan\) as examples to describe the cost estimation process. + + - 1. SeqScan: traverses table data from the beginning to the end. This is a sequential read. Therefore, the I/O cost of SeqScan is **DEFAULT\_SEQ\_PAGE\_COST** multiplied by the total number of data pages in the table. + + - 2. IndexScan: uses indexes to search for table data that meets constraints. This is a random read. Therefore, the I/O cost of IndexScan is x **DEFAULT\_RANDOM\_PAGE\_COST**. + + _P_ \(number of data pages that meet the constraint\) is positively correlated with _R_ \(number of tuples that meet the constraint\), and _R_ = Total number of tuples in the table x Selectivity. After openGauss calculates _R_, it invokes the index\_pages\_fetched\(R, ...\) function to estimate _P_. This function is implemented in the **costsize.c** file. For details, see the paper_ Index scans using a finite LRU buffer: A validated I/O model_ of Mackert L F and Lohman G M. + + By observing the cost model, we can find that when the selectivity exceeds a certain threshold, _P_ is relatively large, and the cost of IndexScan is higher than that of SeqScan. Therefore, the efficiency of IndexScan is not always higher than that of SeqScan. + + +- \(2\) CPU cost estimation + + The database consumes CPU resources in the data addressing and data processing phases, for example, tuple projection selection and index search. Obviously, for different operations, the cost of the CPU is different. openGauss divides the CPU cost into tuple processing cost and data operation cost. + + ① Tuple processing cost: cost of converting a piece of disk data into a tuple. For ordinary table data and index data, the cost parameters are as follows: + + ``` + #define DEFAULT_CPU_TUPLE_COST 0.01 + #define DEFAULT_CPU_INDEX_TUPLE_COST 0.005 + ``` + + Among the default parameters, the index cost is lower. This is because index data typically involves fewer columns than table data and requires less CPU resources. + + ② Data operation cost: cost of projecting a tuple or determining whether a tuple meets the condition based on the constraint expression. The cost parameters are as follows: + + ``` + #define DEFAULT_CPU_OPERATOR_COST 0.0025 + ``` + + Given the above parameters, the estimated CPU cost is proportional to the computation scale of the problem, which depends on the selectivity. This relationship is similar to the relationship between the complexity of the algorithm instance and _n_. Due to limited space, this section does not provide details. + +## + +## Physical Path + +In the database, paths are represented by the path structure. The path structure is derived from the node structure. The path structure is also a base structure, which is similar to the base class in C++. Each specific path is derived from the path structure. For example, the IndexPath structure used by the index scanning path is derived from the path structure. + +``` +typedef struct Path +{ +NodeTag type; +NodeTag pathtype; /* Path type, such as T_IndexPath and T_NestPath.*/ +RelOptInfo *parent; /* Intermediate result generated after the current path is executed.*/ +PathTarget *pathtarget; /* Projection of the path. The expression cost is also saved.*/ +/* Pay attention to the expression index.*/ +ParamPathInfo *param_info; /* Parameter used during execution. In the executor, subqueries or some special */ +/* joins need to obtain the current value of another table in real time.*/ +Bool parallel_aware; /* Parallel parameter, which is used to distinguish parallel and non-parallel.*/ +bool parallel_safe; /* Parallel parameter, which is determined by the set_rel_consider_parallel function.*/ +int parallel_workers; /* Parallel parameter, indicating the number of parallel threads.*/ +double rows; /* Estimated amount of data in the intermediate result generated during the execution of the current path.*/ +Cost startup_cost; /* Startup cost, that is, the cost from statement execution to obtaining the first result.*/ +Cost total_cost; /* Overall execution cost of the current path.*/ +List *pathkeys; /* Key value for sorting intermediate results generated in the current path. If the intermediate results are unordered, the value is NULL.*/ +} Path; +``` + +## Dynamic Programming + +Currently, openGauss has completed rule-based query rewriting and logical decomposition, and has generated the physical path of each base table. The physical path of the base table is only a small part of the optimizer planning. Now, openGauss will enter another important task of the optimizer, that is, generating the join path. openGauss uses the bottom-up optimization. For the multi-table join path, dynamic programming and genetic algorithm are used. This section mainly introduces dynamic programming. But if there are a large number of tables, genetic algorithm is required. Genetic algorithm can avoid the problem of space expansion during join path search in the case of too many tables. In common scenarios, dynamic programming is used, which is the default optimization method used by the openGauss. + +After logical decomposition and optimization, tables in the statement are flattened, that is, the original tree structure is changed to the flattened array structure. The join relationships between tables are also recorded in the **SpecialJoinInfo** structure in the **root** directory, which is the basis for dynamic join planning. + +### 1. Dynamic Programming Method + +First, the dynamic programming method is applicable to an optimal solution problem including a large quantity of repeated sub-problems. By memorizing the optimal solution to each sub-problem, same sub-problems are solved only once, and a record of solving the previous same sub-problem may be reused next time. As such, it is required that the optimal solutions to these sub-problems can form the optimal solution to the whole problem, that is, they should have the property of the optimal substructure. For statement join optimization, the optimal solution to an entire statement join is the optimal solution to a block of statement join. In a planning process, a local optimal solution cannot be repeatedly calculated, and the local optimal solution calculated last time is directly used. + +![](../figures/zh-cn_image_0000001257142943.jpg)![](../figures/zh-cn_image_0000001211903080.jpg) + +FIG. 1 Optimal solution to a repeated sub-problem + +For example, the join operation of A x B in two join trees in Figure 1 is a repeated sub-problem, because no matter whether the A x B x C x D join path or the A x B x C join path is generated, the A x B join path needs to be generated first. There may be hundreds of join methods for a path generated by multi-table join, that is, when many layers are stacked. The number of repeated sub-problems of these join trees is large. Therefore, the join tree has repeated sub-problems, which can be solved once and used for multiple times. That is, for the join A x B, the optimal solution needs to be generated only once. + +The code of the multi-table join dynamic programming algorithm starts from the make\_rel\_from\_joinlist function, as shown in Figure 2. + +![](../figures/zh-cn_image_0000001256862995.jpg) + +Figure 2 Multi-table join dynamic programming algorithm + +- 1)make\_rel\_from\_joinlist function + + The main entry of the implementation code of dynamic programming starts from the make\_rel\_from\_joinlist function. The input parameter of the make\_rel\_from\_joinlist function is the RangeTableRef linked list after the deconstruct\_jointree function is flattened. Each RangeTableRef represents a table. You can search for the RelOptInfo structure of the base table based on the linked list. The found RelOptInfo structure is used to construct a base table RelOptInfo structure at layer 1 of the dynamic programming algorithm, and "accumulation" continues to be performed at layer-1 RelOptInfo structure subsequently. The code is as follows: + + ``` + // Traverse the joinlist after leveling. The linked list is the linked list of RangeTableRef. + foreach(jl, joinlist) + { + Node *jlnode = (Node *) lfirst(jl); + RelOptInfo *thisrel; + + // In most cases, the RangeTableRef linked list is used. The subscript value (rtindex) stored in the RangeTableRef linked list is used. + // Search for the corresponding RelOptInfo structure. + if (IsA(jlnode, RangeTblRef)) + { + int varno = ((RangeTblRef *) jlnode)->rtindex; + thisrel = find_base_rel(root, varno); + } + // Affected by the from_collapse_limit and join_collapse_limit parameters, there are nodes that are not flattened. In this case, the make_rel_from_joinlist function is invoked recursively. + else if (IsA(jlnode, List)) + thisrel = make_rel_from_joinlist(root, (List *) jlnode); + else + ereport (......); + + // The first initial linked list is generated, that is, the linked list of the base table. + // This linked list is the basis of the dynamic programming method. + initial_rels = lappend(initial_rels, thisrel); + } + ``` + + +- 2)standard\_join\_search function + + In the dynamic programming method, a table is added to each layer in the process of accumulating tables. When all tables are added, the final join tree is generated. Therefore, the number of accumulated layers is the number of tables. If there are _N_ tables, data needs to be accumulated for _N_ times. The accumulation process at each layer is described in the join\_search\_one\_level function. This function is mainly used to prepare for the accumulation join, including allocating memory space occupied by RelOptInfos at each layer and reserving some information after RelOptInfos at each layer are accumulated. + + Create a "join array", which is similar to a structure of \[LIST1, LIST2, LIST3\], where a linked list in the array is used to store all RelOptInfo structures of a layer in the dynamic programming method. For example, the first linked list in the array stores linked lists related to all base table paths. The code is as follows: + + ``` + // Allocate the RelOptInfo linked lists of all layers during accumulation. + root->join_rel_level = (List**)palloc0((levels_needed + 1) * sizeof(List*)); + // Initialize all layer-1 base table RelOptInfos. + root->join_rel_level[1] = initial_rels; + After completing the initialization, you can start trying to build RelOptInfo for each layer. The code is as follows: + for (lev = 2; lev <= levels_needed; lev++) { + ListCell* lc = NULL; + // Generate all RelOptInfo structures of the corresponding layer in the join_search_one_level function. + join_search_one_level(root, lev); + + ... + } + ``` + + +- 3)join\_search\_one\_level function + + The join\_search\_one\_level function is mainly used to generate all RelOptInfos in one layer, as shown in Figure 3. To generate RelOptInfo of the _N_th layer, there are mainly three manners: one is to attempt to generate a left-deep tree and a right-deep tree, one is to attempt to generate a bushy tree, and the other is to attempt to generate a join path of a Cartesian product \(commonly referred to as a traversal attempt\). + + ![](../figures/43.png) + + Figure 3 Manners of generating RelOptInfo of the Nth layer + + - \(1\) Left-deep tree and right-deep tree + + The generation principle of the left-deep tree is the same as that of the right-deep tree, except that the positions of the two RelOptInfos to be joined are exchanged in the make\_join\_rel function. That is, each RelOptInfo has a chance to be used as an inner table or an outer table. In this way, more joins may be created to help generate the optimal path. + + As shown in Figure 4, two RelOptInfos to be selected need to be joined to generate A x B x C, and the left-deep tree is to exchange positions of AxB and C. A x B is used as an inner table to form a left-deep tree, and A x B is used as an outer table to form a right-deep tree. + + ![](../figures/44.png) + + Figure 4 Schematic diagram of a left-deep tree and a right-deep tree + + The code is as follows: + + ``` + // Traverse the upper layer of the current layer, that is, to generate RelOptInfo of layer 4, + // try to join RelOptInfo at layer 3 and the base table at layer 1. + foreach(r, joinrels[level - 1]) + { + RelOptInfo *old_rel = (RelOptInfo *) lfirst(r); + // If there is a join relationship or join sequence restriction between two RelOptInfos, + // a join is preferentially generated for the two RelOptInfos. + // The has_join_restriction function may make a misjudgment. However, more refined filtering will be provided in the future. + if (old_rel->joininfo != NIL || old_rel->has_eclass_joins || + has_join_restriction(root, old_rel)) + { + ListCell *other_rels; + // To generate RelOptInfo of the Nth layer, RelOptInfo of the (N – 1)th layer needs to be joined with the base table set of the first layer. + // That is, if the RelOptInfo of layer 2 needs to be generated, the RelOptInfo of layer 1 and the base table set of layer 1 are joined. + // Therefore, processing is required when tables at layer 2 are generated from base tables to prevent itself from being joined with itself. + if (level == 2) + other_rels = lnext(r); + else + other_rels = list_head(joinrels[1]); + // old_rel "may" have join constraints or join sequence restrictions with other tables. + // other_rels "may" contain the possible tables. The make_rels_clause_joins function performs accurate judgment. + make_rels_by_clause_joins(root, old_rel, other_rels); + } + else + { + // Try to generate a join path for tables without join relationships or tables with join sequence restrictions. + make_rels_by_clauseless_joins(root, old_rel, list_head(joinrels[1])); + } + } + ``` + + - \(2\) Bushy tree + + To generate RelOptInfo of the _N_th layer, the left-deep tree or the right-deep tree joins RelOptInfo of the \(N – 1\)th layer with the base table of the first layer. Regardless of the left-deep tree or the right-deep tree, in essence, it constructs the current-layer RelOptInfo by referencing the base table RelOptInfo. A bushy tree is generated without using the base table. The bushy tree attempts to randomly join RelOptInfos of all layers. For example, RelOptInfo of the \(_N_ – 2\)th layer and that of the second layer are joined, and circumstances with \(2,_N_ – 2\), \(3,_N_ – 3\), \(4,_N_ – 4\), and more may be deduced in sequence. The establishment of a bushy tree must meet two conditions: One is that two RelOptInfos have a related constraint or a restriction on the join sequence, and the other is that two RelOptInfos cannot have an intersection table. + + ``` + for (k = 2;; k++) + { + int other_level = level - k; + foreach(r, joinrels[k]) + { + // There are join constraints or join sequence restrictions. + if (old_rel->joininfo == NIL && !old_rel->has_eclass_joins && + !has_join_restriction(root, old_rel)) + continue; + ... + for_each_cell(r2, other_rels) + { + RelOptInfo *new_rel = (RelOptInfo *) lfirst(r2); + // No intersection is allowed. + if (!bms_overlap(old_rel->relids, new_rel->relids)) + { + // There are related join constraints or restrictions on the join sequence. + if (have_relevant_joinclause(root, old_rel, new_rel) || + have_join_order_restriction(root, old_rel, new_rel)) + { + (void) make_join_rel(root, old_rel, new_rel); + } + } + } + } + } + ``` + + - \(3\) Cartesian product + + After trying the left-deep tree, right-deep tree, and bushy tree, if no legal join is generated, a final attempt needs to be made on RelOptInfos of layer _N_ – 1 and layer 1. That is, an attempt is made to join each RelOptInfo at the \(_N_ – 1\)th layer with RelOptInfo at the first layer. + + + +### 2. Path Generation + +We have learned the dynamic programming method used in path generation, and how to generate RelOptInfo for the current layer during the accumulation process in the previous section. For generating RelOptInfo of the current layer, several problems may be faced: one is to determine whether two RelOptInfos can be joined, and the other is to generate a physical join path. Currently, a physical join path mainly has three implementations: NestLoopJoin, HashJoin, and MergeJoin. A process of establishing the join path is a process of continuously attempting to generate the three paths. + +- Perform check + + In the dynamic programming method, each RelOptInfo of the _N_ – 1 layer and each RelOptInfo of the first layer need to be joined, and then the RelOptInfo of the new join is stored in the current _N_th layer. The time complexity of the algorithm is about O \(M x N\). If there are a relatively large quantity of RelOptInfos at both the \(_N_ – 1\)th layer and the first layer, the search space expands greatly. However, some RelOptInfos can be avoided during join. This is also the purpose of timely check. Detecting and skipping the join between two RelOptInfos in advance can save unnecessary overheads and improve the optimization efficiency of the optimizer. + + - \(1\) Preliminary check + + The following conditions are the main factors to be measured in the preliminary check: + + - The value of **joininfo** in RelOptinfo is not **NULL**. This indicates that the RelOptInfo has related constraints with other RelOptInfos. That is, the current RelOptInfo may be associated with other tables. + + - The value of **has\_eclass\_joins** in RelOptInfo is **true**, indicating that the current RelOptInfo and other RelOptInfos may have equivalent join conditions in the equivalence class record. + + - The return value of the has\_join\_restriction function is **true**, indicating that the join sequence between the current RelOptInfo and other RelOptInfos is limited. + + The preliminary check is to use the RelOptInfo information to determine the possibility, that is, to check whether there are join conditions and join sequence constraints. + + ``` + static bool has_join_restriction(PlannerInfo* root, RelOptInfo* rel) + { + ListCell* l = NULL; + + // If the current RelOptInfo involves Lateral semantics, there must be join sequence constraints. + foreach(l, root->lateral_info_list) + { + LateralJoinInfo *ljinfo = (LateralJoinInfo *) lfirst(l); + + if (bms_is_member(ljinfo->lateral_rhs, rel->relids) || + bms_overlap(ljinfo->lateral_lhs, rel->relids)) + return true; + } + + // Process only the conditions except the inner join. + foreach (l, root->join_info_list) { + SpecialJoinInfo* sjinfo = (SpecialJoinInfo*)lfirst(l); + + // Skip the full-join check. Other mechanisms are used to ensure the join sequence. + if (sjinfo->jointype == JOIN_FULL) + continue; + + // If the SpecialJoinInfo has been included in the RelOptInfo, skip this step. + if (bms_is_subset(sjinfo->min_lefthand, rel->relids) && + bms_is_subset(sjinfo->min_righthand, rel->relids)) + continue; + + //If the relids and min_lefthand or min_righthand variables of the RelOptInfo structure overlap, there may be constraints on the join sequence. + if (bms_overlap(sjinfo->min_lefthand, rel->relids) || + bms_overlap(sjinfo->min_righthand, rel->relids)) + return true; + } + + return false; + } + ``` + + - \(2\) Precise check + + After the preliminary check, if it is determined that there is no join condition or join sequence constraint on the RelOptInfos on both sides, the make\_rels\_by\_clauseless\_joins function is entered, and all possible paths in the RelOptInfo are joined with the RelOptInfo at layer 1. If the current RelOptInfo may have join constraints or join sequence restrictions, the make\_rel\_by\_clause\_joins function is invoked to further check the current RelOptInfo and other RelOptInfo at layer 1 to determine whether the join can be performed. + + The have\_join\_order\_restriction function determines whether there are join sequence restrictions on two RelOptInfos from the following two aspects: One is to determine whether the two RelOptInfos have a Lateral semantic sequence restriction, and the other is to determine whether min\_lefthand and min\_righthand in SpecialJoinInfo have a join sequence restriction on the two RelOptInfos. + + The analysis of the have\_join\_order\_restriction source code is as follows: + + ``` + bool have_join_order_restriction(PlannerInfo* root, RelOptInfo* rel1, RelOptInfo* rel2) + { + bool result = false; + ListCell* l = NULL; + + // If the dependency relationship with Lateral semantics exists, the join sequence must be restricted. + foreach(l, root->lateral_info_list) + { + LateralJoinInfo *ljinfo = (LateralJoinInfo *) lfirst(l); + + if (bms_is_member(ljinfo->lateral_rhs, rel2->relids) && + bms_overlap(ljinfo->lateral_lhs, rel1->relids)) + return true; + if (bms_is_member(ljinfo->lateral_rhs, rel1->relids) && + bms_overlap(ljinfo->lateral_lhs, rel2->relids)) + return true; + } + + // Traverse all SpecialJoinInfo in the root directory and check whether the two RelOptInfos have join constraints. + foreach (l, root->join_info_list) { + SpecialJoinInfo* sjinfo = (SpecialJoinInfo*)lfirst(l); + + if (sjinfo->jointype == JOIN_FULL) + continue; + + // The minimum set is a subset of the two tables. The two tables must be joined in the specified sequence. + if (bms_is_subset(sjinfo->min_lefthand, rel1->relids) && + bms_is_subset(sjinfo->min_righthand, rel2->relids)) { + result = true; + break; + } + // Conversely, the minimum set is a subset of the two tables. The two tables must be joined in the specified sequence. + if (bms_is_subset(sjinfo->min_lefthand, rel2->relids) && + bms_is_subset(sjinfo->min_righthand, rel1->relids)) { + result = true; + break; + } + + // If both tables have intersection with one end of the minimum set, the two tables should be joined at the end. + // So let them join first. + if (bms_overlap(sjinfo->min_righthand, rel1->relids) && bms_overlap(sjinfo->min_righthand, rel2->relids)) { + result = true; + break; + } + // The reverse is the same as above. + if (bms_overlap(sjinfo->min_lefthand, rel1->relids) && bms_overlap(sjinfo->min_lefthand, rel2->relids)) { + result = true; + break; + } + } + + // If the two tables have corresponding join relationships with other tables, + // you can join them with the tables with join relationships first. + if (result) { + if (has_legal_joinclause(root, rel1) || has_legal_joinclause(root, rel2)) + result = false; + } + + return result; + } + ``` + + - \(3\) Legal join + + As RelOptInfo causes search space expansion, if the legal join check is performed on the two RelOptInfo structures, the search time is too long. This is why the preliminary check and accurate check need to be performed in advance. The search time can be reduced to achieve the pruning effect. + + For legal joins, the main code is in join\_is\_legal, which is used to determine whether two RelOptInfo structures can be joined to generate a physical path. The input parameters are the two RelOpInfo structures. The logical join between two RelOptInfo structures to be selected may be InnerJoin, LeftJoin, or SemiJoin, or no legal logical join exists. In this case, you need to determine the join in two steps. + + Step 1: Traverse SpecialJoinInfo in the join\_info\_list linked list in the **root** directory to check whether a legal SpecialJoinInfo can be found. A corresponding SpecialJoinInfo is generated for each logical join relationship except InnerJoin. In addition, the legal join sequence is recorded in SpecialJoinInfo. + + Step 2: Check the Lateral relationship in RelOptInfo and check whether the found SpecialJoinInfo meets the join sequence requirement specified by the Lateral semantics. + + +- Create a join path + + So far, two RelOptInfo structures that meet the condition have been filtered out. The next step is to establish a physical join relationship for paths in the two RelOptInfo structures. Common physical join paths include NestLoop, MergeJoin, and HashJoin, which are implemented by using the sort\_inner\_and\_outer, match\_unsorted\_outer, and hash\_inner\_and\_outer functions. + + For example, the sort\_inner\_and\_outer function is used to generate the MergeJoin path. It is assumed that the paths of the inner and outer tables are unordered. Therefore, the paths must be sorted explicitly. The path with the lowest total cost is selected for the inner and outer tables. The matvh\_unsorted\_outer function indicates that the outer table is sorted. In this case, you only need to sort the inner table to generate the MergeJoin path, NestLoop, or parameterized path. The final choice is to set up a HashJoin path to join the two tables, that is, to set up a hash table. + + To facilitate the creation of MergeJoin, constraints need to be processed first. Therefore, the constraints applicable to MergeJoin are filtered out \(select\_mergejoin\_clauses function\). In this way, the Mergejoinable join constraint can be used in both the sort\_inner\_and\_outer and match\_unsorted\_outer functions. The code is as follows: + + ``` + // Extract the conditions for MergeJoin. + foreach (l, restrictlist) { + RestrictInfo* restrictinfo = (RestrictInfo*)lfirst(l); + + // If the current join is an outer join and is a filter condition, ignore it. + if (isouterjoin && restrictinfo->is_pushed_down) + continue; + + // Preliminarily determine whether the join constraint can be used for MergeJoin. + // restrictinfo->can_join and restrictinfo->mergeopfamilies are generated in distribute_qual_to_rels. + if (!restrictinfo->can_join || restrictinfo->mergeopfamilies == NIL) { + // Ignore FULL JOIN ON FALSE. + if (!restrictinfo->clause || !IsA(restrictinfo->clause, Const)) + have_nonmergeable_joinclause = true; + continue; /* not mergejoinable */ + } + + // Check whether the constraint is in the form of outer op inner or inner op outer. + if (!clause_sides_match_join(restrictinfo, outerrel, innerrel)) { + have_nonmergeable_joinclause = true; + continue; /* no good for these input relations */ + } + + // Update and use the final equivalence class. + // Normalize pathkeys so that constraints can match pathkeys. + update_mergeclause_eclasses(root, restrictinfo); + + if (EC_MUST_BE_REDUNDANT(restrictinfo->left_ec) || EC_MUST_BE_REDUNDANT(restrictinfo->right_ec)) { + have_nonmergeable_joinclause = true; + continue; /* can't handle redundant eclasses */ + } + + result_list = lappend(result_list, restrictinfo); + } + ``` + + - \(1\) sort\_inner\_and\_outer function + + The sort\_inner\_and\_outer function is mainly used to generate a MergeJoin path. It needs to explicitly sort the two child RelOptInfo structures, and only the cheapest\_total\_path function in the child RelOptInfo needs to be considered. Generate pathkeys by using the join constraint of MergeJoinable \(which can be used to generate Merge Join\), and then continuously adjust the sequence of pathkeys in pathkeys to obtain different pathkeys. Then, the innerkeys of the inner table and outerkeys of the outer table are determined based on the pathkeys in different sequences. The code is as follows: + + ``` + // Try to join and traverse each path in the outer table and inner table. + foreach (lc1, outerrel->cheapest_total_path) { + Path* outer_path_orig = (Path*)lfirst(lc1); + Path* outer_path = NULL; + j = 0; + foreach (lc2, innerrel->cheapest_total_path) { + Path* inner_path = (Path*)lfirst(lc2); + outer_path = outer_path_orig; + + // The parameterized path cannot be used to generate the MergeJoin path. + if (PATH_PARAM_BY_REL(outer_path, innerrel) || + PATH_PARAM_BY_REL(inner_path, outerrel)) + return; + + // The lowest-cost path of the outer table and inner table must be met. + if (outer_path != linitial(outerrel->cheapest_total_path) && + inner_path != linitial(innerrel->cheapest_total_path)) { + if (!join_used[(i - 1) * num_inner + j - 1]) { + j++; + continue; + } + } + + // Generate a unique path. + jointype = save_jointype; + if (jointype == JOIN_UNIQUE_OUTER) { + outer_path = (Path*)create_unique_path(root, outerrel, outer_path, sjinfo); + jointype = JOIN_INNER; + } else if (jointype == JOIN_UNIQUE_INNER) { + inner_path = (Path*)create_unique_path(root, innerrel, inner_path, sjinfo); + jointype = JOIN_INNER; + } + // Determine the pathkeys set that can be generated by the MergeJoin path based on the extracted conditions. + all_pathkeys = select_outer_pathkeys_for_merge(root, mergeclause_list, joinrel); + // Process each pathkey in the preceding pathkeys and try to generate a MergeJoin path. + foreach (l, all_pathkeys) { + ... + // Generate the pathkey of the inner table. + innerkeys = make_inner_pathkeys_for_merge(root, cur_mergeclauses, outerkeys); + + // Generate the pathkey of the outer table. + merge_pathkeys = build_join_pathkeys(root, joinrel, jointype, outerkeys); + + // Generate the MergeJoin path based on the pathkey and inner and outer table paths. + try_mergejoin_path(root, ......, innerkeys); + } + j++; + } + i++; + } + ``` + + - \(2\) match\_unsorted\_outer function + + The overall code roadmap of the match\_unsorted\_outer function is similar to that of the sort\_inner\_and\_outer function, except that the sort\_inner\_and\_outer function infers the pathkeys of the inner and outer tables based on conditions. In the match\_unsorted\_outer function, the outer table path is assumed to be ordered. It sorts the join constraints based on the pathkey of the outer table. That is, the pathkeys of the outer table can be used as outerkeys, so as to check which join constraint matches the current pathkeys, filter the matched join constraint, and generate the innerkeys that needs to be displayed and sorted based on the matched join constraint. + + - \(3\) hash\_inner\_and\_outer function + + The hash\_inner\_and\_outer function is used to create a HashJoin path. The distribute\_restrictinfo\_to\_rels function has determined whether a constraint is applicable to HashJoin. To create a hash table, HashJoin can be used only when at least one join constraint applicable to HashJoin exists. Otherwise, the hash table cannot be created. + + +- Filter paths + + So far, the physical join paths Hashjoin, NestLoop, and MergeJoin are generated. You need to determine whether a path is worth storage according to the cost calculated during the generation process, because many paths are generated in the path join phase. In addition, some obviously poor paths are generated. In this case, filtering can help you perform a basic check and save the time for generating the plan. Taking a long time to generate a plan is unacceptable, even if it is a "good" execution plan. + + add\_path is the main function for filtering paths. The code is as follows: + + ``` + switch (costcmp) { + case COSTS_EQUAL: + outercmp = bms_subset_compare(PATH_REQ_OUTER(new_path), + PATH_REQ_OUTER(old_path)); + if (keyscmp == PATHKEYS_BETTER1) { + if ((outercmp == BMS_EQUAL || outercmp == BMS_SUBSET1) && + new_path->rows <= old_path->rows) + // The cost of the new path is similar to that of the old path, while the pathkeys is longer and fewer parameters are required. + // The number of rows in the result set is small. Therefore, the new path is accepted and the old path is discarded. + remove_old = true; /* new dominates old */ + } else if (keyscmp == PATHKEYS_BETTER2) { + if ((outercmp == BMS_EQUAL || outercmp == BMS_SUBSET2) && + new_path->rows >= old_path->rows) + // The cost of the new path is similar to that of the old path, while the pathkeys is shorter and more parameters are required. + // If the result set contains more rows, the new path is not accepted and the old path is retained. + accept_new = false; /* old dominates new */ + } else { + if (outercmp == BMS_EQUAL) { + // The cost, pathkeys, and path parameters of the new and old paths are the same or similar. + // If the number of rows returned by the new path is small, the new path is accepted and the old path is discarded. + if (new_path->rows < old_path->rows) + remove_old = true; /* new dominates old */ + // If the number of rows returned by the new path is large, the new path is not accepted and the old path is retained. + else if (new_path->rows > old_path->rows) + accept_new = false; /* old dominates new */ + // The cost, pathkeys, path parameters, and number of rows in the result sets are similar. + // The range for determining the cost is strictly specified. If the new path is good, the new path is used and the old path is discarded. + else { + small_fuzzy_factor_is_used = true; + if (compare_path_costs_fuzzily(new_path, old_path, SMALL_FUZZY_FACTOR) == + COSTS_BETTER1) + remove_old = true; /* new dominates old */ + else + accept_new = false; /* old equals or + * dominates new */ + } + // If the cost and pathkeys are similar, compare the number of rows and parameters. If the number of rows and parameters of the new path is better than those of the old path, discard the old path; if the number of rows and parameters of the old path is better than those of the new path, discard the new path. + } else if (outercmp == BMS_SUBSET1 && + new_path->rows <= old_path->rows) + remove_old = true; /* new dominates old */ + else if (outercmp == BMS_SUBSET2 && + new_path->rows >= old_path->rows) + accept_new = false; /* old dominates new */ + /* else different parameterizations, keep both */ + } + break; + case COSTS_BETTER1: + // Based on all the comparison results of the new and old paths, it is determined that the new path is better than or equal to the old path. + // Therefore, the new path is accepted and the old path is discarded. + if (keyscmp != PATHKEYS_BETTER2) { + outercmp = bms_subset_compare(PATH_REQ_OUTER(new_path), + PATH_REQ_OUTER(old_path)); + if ((outercmp == BMS_EQUAL || outercmp == BMS_SUBSET1) && + new_path->rows <= old_path->rows) + remove_old = true; /* new dominates old */ + } + break; + case COSTS_BETTER2: + // Based on all the comparison results of the new and old paths, it is determined that the old path is better than the old path. + //The new path is not accepted and the old path is retained. + if (keyscmp != PATHKEYS_BETTER1) { + outercmp = bms_subset_compare(PATH_REQ_OUTER(new_path), + PATH_REQ_OUTER(old_path)); + if ((outercmp == BMS_EQUAL || outercmp == BMS_SUBSET2) && + new_path->rows >= old_path->rows) + accept_new = false; /* old dominates new */ + } + break; + default: + + /* + * can't get here, but keep this case to keep compiler + * quiet + */ + break; + } + ``` + +## Genetic Algorithm + +As a kind of evolutionary algorithm, the genetic algorithm draws on natural selection and genetic mechanism in the Darwinian theory of biological evolution. The optimal individual is generated by simulating the evolution process of natural selection and survival of the fittest. + +After a specific quantity of original individuals are generated, a new chromosome may be generated by means of gene arrangement and combination, and then a next-generation chromosome is obtained by means of chromosome hybridization and mutation. To select an excellent chromosome, a fitness function needs to be established to calculate a fitness value, so as to eliminate chromosomes with low fitness. In this way, the best individual is gradually evolved through constant inheritance and mutation among individuals. The individual is the solution of the problem by substituting this process into the solution. By genetic algorithm, the solution of the problem can converge to the optimal solution through this kind of intergenerational inheritance. + +Different from a method in which dynamic programming resolves a problem into several independent sub-problems, the genetic algorithm is a selection process. The genetic algorithm enlarges a solution space by using a method of constructing a new chromosome by means of chromosome hybridization, and performs screening in the solution space at any time by using a fitness function, to recommend a good gene and eliminate bad genes. As a result, the solution obtained by genetic algorithm is not necessarily the global optimal solution like dynamic programming, but it can be close to the global optimal solution as much as possible by improving hybridization and mutation. + +Thanks to the efficiency advantage in multi-table join, genetic algorithm is a useful supplement to the dynamic programming method in openGauss database. The genetic algorithm is used only when the Enable\_geqo parameter is enabled and the number of RelOptInfo structures to be joined exceeds Geqo\_threshold \(12 by default\). + +The genetic algorithm is implemented in the following five steps: + +\(1\) Pool initialization: A gene is encoded, and a plurality of chromosomes is generated by randomly arranging and combining the genes. These chromosomes form a new pool. In addition, fitness of the chromosome is calculated in the chro**mosome generation process.** + +\(2\) Chromosome selection: A chromosome used for crossover and mutation is selected through random selection \(actually, a probability-based random number generation algorithm is used, so that an excellent chromosome can be selected\). + +\(3\) Crossover: Chromosomes are crossed over, to generate a new chromosome and add the new chromosome to the pool. + +\(4\) Mutation: A mutation operation is performed on chromosomes, to generate a new chromosome and add the new chromosome to the pool. + +\(5\) Fitness calculation: Eliminates bad chromosomes. + +For example, if the genetic algorithm is used to resolve a travelling salesman problem \(TSP\), cities may be used as genes, a path traveling through each city is used as a chromosome, a total length of the paths is used as fitness, and the fitness function is responsible for screening out a relatively long path and retaining a relatively short path. The algorithm procedure is as follows: + +\(1\) Pool initialization: Cities are numbered, and the cities are arranged and grouped according to the numbers, to generate multiple new paths \(chromosomes\). Then, an overall path length \(fitness\) is calculated according to a distance between the cities, and the multiple new paths form a pool. + +\(2\) Chromosome selection: Two paths are selected for crossover \(it should be noted that a city cannot repeatedly appear in a new chromosome generated through crossover\), and a path length is calculated for a new path generated through the crossover operation. + +\(3\) Mutation: A chromosome is randomly selected for mutation \(a common method is to exchange locations of cities in a path\), and a path length is calculated for a new path obtained after the mutation operation. + +\(4\) Fitness calculation: All paths in the pool are sorted in ascending order based on the path length, and paths ranked at the bottom are eliminated. + +The genetic algorithm of the openGauss database simulates the method of solving the TSP. RelOptInfo is used as a gene, the finally generated join tree is used as a chromosome, the total cost of the join tree is used as fitness, and the fitness function is used for filtering based on the cost of the path; but the join path search in the openGauss database is slightly different from the path search for the TSP. For the TSP, the paths have no connection problems. The two cities are connected and the distance between any two cities can be calculated. Due to the restriction of the join constraints in the database, the two tables cannot be joined, or the join tree cannot be generated. In addition, it should be noted that the implementation of genetic algorithm in the openGauss database is slightly different from that of a common genetic algorithm, and the genetic algorithm in the openGauss database does not have a mutation process, and generates a new chromosome only by means of crossover. + +The general entry of genetic algorithm in the openGauss database is the geqo function. The input parameters are **root** \(querying the optimized context information\), **number\_of\_rels** \(number of RelOptInfo structures to be joined\), and **initial\_rels** \(all base tables\). + +### File Directory Structure + +As a relatively independent optimizer module, genetic algorithm has its own file directory structure, as shown in Table 6-17. + +Table 6-17 Optimizer file directory structure + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

Document

+

Function

+

geqo_copy.cpp

+

Gene copy function, that is, the gepo_copy function.

+

geqo_cx.cpp

+

Cycle crossover (cx) algorithm function.

+

geqo_erx.cpp

+

Implemented based on EGDE RECOMBINATION CROSSOVER and provides the gimme_edge_table function.

+

geqo_eval.cpp

+

Calculates the fitness and calls the make_one_rel function to generate the join relationship.

+

geqo_main.cpp

+

Genetic algorithm entry, that is, the main function geqo.

+

geqo_misc.cpp

+

Genetic algorithm information printing function, which is an auxiliary function.

+

geqo_mutation.cpp

+

Gene mutation function, that is, the geqo_mutation function, which is called when the cx function fails.

+

geqo_ox1.cpp

+

Order crossover algorithm mode 1 (ox1) function.

+

geqo_ox2.cpp

+

Order crossover algorithm mode 2 (ox2) function.

+

geqo_pmx.cpp

+

Partially matched crossover (PMX) function.

+

geqo_pool.cpp

+

A gene pool that processes the genetic algorithm. A gene pool is a collection of all individuals (including chromosomes and new chromosomes obtained after multiple tables are joined).

+

geqo_px.cpp

+

Position crossover (px) algorithm function.

+

geqo_random.cpp

+

Random algorithm function of the genetic algorithm, which is used to randomly generate mutation content.

+

geqo_recombination.cpp

+

Recombination algorithm of the genetic algorithm, that is, the init_tour function.

+

geqo_selection.cpp

+

Random individual selection function of the genetic algorithm, that is, the geqo_selection function.

+
+ + +These files are stored in **src/gausskernel/optimizer/gepo** as modules of the optimizer genetic algorithm. We will interpret the code in these files in later sections. + +### Pool Initialization + +Before using the genetic algorithm, you can use the value of **Gepo\_threshold** to adjust the triggering condition. To facilitate code interpretation, the threshold condition is reduced to 4 \(that is, the genetic algorithm is used when the number of RelOptInfo structures or base tables is 4\). In the following code interpretation process, four tables t1, t2, t3, and t4 are used as examples for description. + +As a gene of the genetic algorithm, RelOptInfo needs to be encoded first. The openGauss database uses a real number encoding manner, that is, \{1,2,3,4\} is used to represent four tables t1, t2, t3, and t4, respectively. + +Then, the size of a pool is obtained by using the gimme\_pool\_size function. The size of the pool is affected by two parameters: **Geqo\_pool\_size** and **Geqo\_effort**. The pool is represented by using a Pool structure, and the chromosome is represented by using a Chromosome structure. The code is as follows: + +``` +/* Chromosome structure*/ +typedef struct Chromosome { +/* string is an integer array, which represents a sorting mode of genes and corresponds to a join tree.*/ +/* For example, {1,2,3,4} corresponds to t1 JOIN t2 JOIN t3 JOIN t4. */ +/* For example, {2,3,1,4} corresponds to t2 JOIN t3 JOIN t1 JOIN t4. */ +Gene* string; +Cost worth; /* Fitness of a chromosome, which is actually a path cost. */ +} Chromosome; + +/* Pool structure */ +typedef struct Pool { +Chromosome *data; /* Chromosome array. Each tuple in the array is a join tree.*/ +int size; /* Number of chromosomes, that is, number of join trees in data, generated by gimme_pool_size.*/ +int string_length; /* A quantity of genes in each chromosome is the same as a quantity of genes in the base table.*/ +} Pool; +``` + +In addition, a quantity of times of chromosome crossover is obtained by using the gimme\_number\_generations function. A larger quantity of times of chromosome crossover indicates that more new chromosomes are generated, and a better solution is more likely to be found. However, the larger quantity of times of chromosome crossover also affects performance. You can adjust the quantity of times of crossover by setting the **Geqo\_generations** parameter. + +The variables in the structure are as follows: + +\(1\) A quantity \(Pool.size\) of chromosomes that are determined by using gimme\_pool\_size. + +\(2\) A quantity \(Pool.string\_length\) of genes in each chromosome, which is the same as the quantity of base tables. + +Then, a chromosome may be generated. The chromosome is generated by using a Fisher-Yates shuffle algorithm, and finally a quantity \(Pool.size\) of chromosomes are generated. The algorithm is implemented as follows: + +``` +/* Initialize the gene sequence to {1,2,3,4}.*/ +for (i = 0; i < num_gene; i++) +tmp[i] = (Gene)(i + 1); + +remainder = num_gene - 1; /* Define the number of remaining genes.*/ + +/* Implement the shuffle method to randomly select genes for multiple times as a part of gene encoding.*/ +for (i = 0; i < num_gene; i++) { +/* choose value between 0 and remainder inclusive */ +next = geqo_randint(root, remainder, 0); +/* output that element of the tmp array */ +tour[i] = tmp[next]; /* Gene encoding*/ +/* and delete it */ +tmp[next] = tmp[remainder]; /* Update the remaining gene sequence.*/ +remainder--; +} +``` + +Table 6-18 describes the process of generating a chromosome. It is assumed that four random results are \{1, 1, 1, 0\}. + +Table 6-18 Process of generating a chromosome + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

Gene Candidate Set

+

(Tmp)

+

Result Set

+

(tour)

+

Random Number

+

Range

+

Random Number

+

Description

+

1 2 3 4

+

2

+

0–3

+

1

+

Assume that the random number is 1, the first gene of the result set is tmp[1], and the value is 2. The candidate set tmp is updated and the unselected tail value is placed to the selected position.

+

1 4 3

+

2 4

+

0–2

+

1

+

Assume that the random number is 1 and the second gene of the result set is 4. The candidate set tmp is updated again.

+

1 3

+

2 4 3

+

0–1

+

1

+

Assume that the random number is 1, and the third gene in the result set is 3. Because the tail value is selected, the candidate set does not need to be updated.

+

1

+

2 4 3 1

+

0–0

+

0

+

The last gene is 1.

+
+ + +After a chromosome is randomly generated for a plurality of times, a pool is obtained. It is assumed that there are four chromosomes in total in the pool, and a structure of the pool is described by using a diagram, as shown in Figure 6-13. + +![](../figures/45.png) + +Figure 6-13 Chromosome structure + +Then, fitness \(worth\) is calculated for each chromosome. A process of calculating fitness is actually a process of generating a join tree according to the gene encoding sequence of the chromosome and calculating the cost of the join tree. + +In the openGauss database, each chromosome uses a left-deep tree by default. Therefore, after gene encoding of each chromosome is determined, a join tree of the chromosome is determined accordingly. For example, for a chromosome \{2, 4, 3, 1\}, the corresponding join tree is \(\(t2, t4\), t3\), t1\), as shown in Figure 6-14. + +![](../figures/zh-cn_image_0000001212089804.png) + +Figure 6-14 Chromosome join tree + +The openGauss database generates fitness by using the geqo\_eval function. The geqo\_eval function first generates a join tree based on gene encoding of a chromosome, and then calculates the cost of the join tree. + +The genetic algorithm uses the gimme\_tree function to generate a join tree. The merge\_clump function is recursively called in the function. The merge\_clump function joins tables as many as possible, generates a join subtree, and records the number of nodes in each join subtree. Then, the join subtree is recorded in the clumps linked list in descending order of the number of nodes. The code is as follows: + +``` +/* Traverse all tables cyclically and join tables as many as possible.*/ +For (rel_count = 0; rel_count < num_gene; rel_count++) { +int cur_rel_index; +RelOptInfo* cur_rel = NULL; +Clump* *cur_clump = NULL; + +/* tour represents a chromosome. Here, a gene in the chromosome is obtained, that is, a base table.*/ +cur_rel_index = (int) tour[rel_count]; +cur_rel = (RelOptInfo *) list_nth(private->initial_rels, cur_rel_index - 1); + +/* Generate a clump for the base table. size=1 indicates that there is only one base table in the current clump.*/ +cur_clump = (Clump*)palloc(sizeof(Clump)); +cur_clump->joinrel = cur_rel; +cur_clump->size = 1; + +/* Attempt to join, perform recursive operations, and record the clumps to the clumps linked list.*/ +clumps = merge_clump(root, clumps, cur_clump, false); +} +``` + +The previously generated chromosome \{2, 4, 3, 1\} is used as an example, assuming that: + +\(1\) 2 and 4 cannot be joined. + +\(2\) 4 and 3 can be joined. + +\(3\) 2 and 1 can be joined. + +Table 6-19 describes the process of generating a join tree under these conditions. + +Table 6-19 Join tree generation process + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

Number of Rounds

+

relcount

+

Join Result Set

+

clumps

+

Description

+

Initial

+

NULL

+

Create a cur_clump node whose gene is 2 and cur_clump.size is 1.

+

0

+

{2}

+

Because clumps == NULL and cur_clump is not joined with any table, cur_clump is directly added to clumps.

+

1

+

{2}, {4}

+

Create a cur_clump node whose gene is 4 and cur_clump.size is 1, and attempt to join the cur_clump node whose gene is 4 with the node in the clumps linked list. Because 2 and 4 cannot be joined, node 4 is also added to the clumps linked list.

+

2

+

{2}

+

Create a cur_clump node whose gene is 3 and cur_clump.size is 1, traverse the clumps linked list, and attempt to join with 2 and 4 respectively. It is found that the join with 4 can be established. Create a new old_clumps node whose ols_clumps.size is 2 based on the join between 3 and 4 and delete node 4 from the clumps linked list.

+

{3, 4} {2}

+

Use the old_clumps generated by joining 2 and 4 as the parameter to recursively invoke merge_clump and attempt to join old_clumps with nodes in the clumps linked list. It is found that they cannot be joined. That is, {3,4} and {2} cannot be joined. In this case, add old_clumps to clumps. Because old_clumps.size is the largest currently, insert old_clumps to the beginning of clumps.

+

3

+

{3, 4}

+

Create a cur_clump node whose gene is 1 and cur_clump.size is 1.

+

Traverse the clumps linked list and try to join with {3, 4} and {2} respectively. It is found that the join with 2 can be established. Create a new old_clumps node whose ols_clumps.size is 2 based on 1 and 2, and delete node 2 from the clumps linked list.

+

{3, 4} {1, 2}

+

Use the new old_clumps generated by joining 1 and 2 as the parameter to recursively invoke merge_clump. Use old_clumps to join with the nodes in the clumps linked list. If the join fails, add old_clumps to clumps. Because old_clumps.size is 2, insert clumps to the end of clumps.

+
+ + +According to the steps in the example, the process of the merge\_clumps function is to continuously attempt to generate a larger clump. + +``` +/* If a join can be generated, try to generate a join with more nodes through recursion.*/ +if (joinrel != NULL) { +... +/* Generate a new join node and increase the number of joined nodes.*/ +old_clump->size += new_clump->size; +pfree_ext(new_clump); + +/* Delete the joined nodes from the clumps join table.*/ +clumps = list_delete_cell(clumps, lc, prev); +/* Use clumps and the newly generated join node (old_clump) as parameters to continue to generate joins.*/ +return merge_clump(root, clumps, old_clump, force); +} +``` + +According to the example in the preceding table, the **clumps** linked list contains two nodes, which are two join subtrees. After **force** is set to **true**, the system attempts to join the two nodes again. + +``` +/* If there are multiple nodes in clumps, it indicates that the join tree is not generated successfully.*/ +if (list_length(clumps) > 1) { +... +foreach(lc, clumps) { +Clump* clump = (Clump*)lfirst(lc); +/* Set the force parameter to true and try to join unconditionally.*/ +fclumps = merge_clump(root, fclumps, clump, true); +} +clumps = fclumps; +} +``` + +### 3. Operator Selection + +After a pool is generated, intergenerational genetic optimization can be performed. Two chromosomes are randomly selected from the pool to perform a crossover operation. In this way, a new chromosome can be generated. + +As chromosomes in the pool are already sorted according to fitness, a chromosome with lower fitness \(lower cost\) is better. It is expected that a better chromosome is inherited. Therefore, a chromosome with lower fitness is preferred when a father chromosome and a mother chromosome are selected. The concept of bias is involved in the selection process. It is a fixed value in the operator. The value of bias can be adjusted through the parameter **Geqo\_selection\_bias** \(by default, it is **2.0**\). + +``` +/* A father chromosome and a mother chromosome are selected by using the linear_rand function.*/ +first = linear_rand(root, pool->size, bias); +second = linear_rand(root, pool->size, bias); +``` + +To generate a random number \(x\) based on a certain probability distribution, you need to know the probability distribution function or probability density function \(PDF\) first. The PDF ![](figures/zh-cn_image_0000001212063076.gif)used by the openGauss database is as follows: + +![](../figures/zh-cn_image_0000001257142945.gif) + +The following cumulative distribution function \(CDF\) is obtained by using the PDF: + +![](../figures/zh-cn_image_0000001211903084.gif) + +Then, a random number that conforms to the probability distribution can be obtained by using the PDF and the inverse function method. + +Function: + +![](../figures/zh-cn_image_0000001256862999.gif) + +Inverse function: + +![](../figures/zh-cn_image_0000001212223058.gif) + +This is consistent with the implementation of the linear\_rand function in the source code. + +![](../figures/zh-cn_image_0000001256982939.png) + +The code of probability-based random number generation algorithm is extracted for calculation and verification, and the characteristics of random number generation are analyzed. It is assumed that bias is 2.0, and then the PDF is used to calculate the theoretical probability value of each interval for analysis. For example, for a range from 0.6 to 0.7, the theoretical probability is calculated as follows: + +![](../figures/zh-cn_image_0000001257063005.gif) + +Figure 6-15 shows the theoretical probability values in each range. + +![](../figures/46.png) + +Figure 6-15 Theoretical probability value of random number generation + +It can be learned from Figure 6-15 that theoretical probability values in all ranges decrease sequentially. In other words, when a parent chromosome is selected, a chromosome with lower fitness \(lower cost\) is more likely to be selected. + +### 4. Crossover Operator + +After the parent chromosomes are selected by using the selection operator, a crossover operation may be performed on the selected parent chromosomes, to generate a new child chromosome. + +The openGauss provides a plurality of crossover methods, including edge combination crossover, partially matched crossover, cycle crossover, position crossover, and order crossover. In the process of source code analysis, the position crossover method is taken as an example for illustration. + +It is assumed that the gene code of the selected father chromosome is \{1, 3, 2, 4\} with a fitness of 100, and the gene code of the selected mother chromosome is \{2, 3, 1, 4\} with a fitness of 200. When a child chromosome is not generated and is in an uninitialized state, the statuses of these chromosomes are shown in Figure 6-16. + +![](../figures/47.png) + +Figure 6-16 Chromosome status + +A random number **num\_positions** needs to be generated for the crossover operation. The position of the random number is in a range between 1/3 and 2/3 of a total quantity of genes. The random number represents a quantity of father chromosome genes that need to be inherited to a child chromosome according to the position. The code is as follows: + +``` +/* num_positions determines the number of genes inherited from the father chromosome to the child chromosome.*/ +num_positions = geqo_randint(root, 2 * num_gene / 3, num_gene / 3); + +/* Select a random position.*/ +for (i = 0; i < num_positions; i++) +{ +/* A position is randomly generated, and genes at the position of the father chromosome are inherited to the child chromosome.*/ +pos = geqo_randint(root, num_gene - 1, 0); + +offspring[pos] = tour1[pos]; +/* Mark that the genes at this position have been used. The mother chromosome cannot inherit the same genes to the child chromosome.*/ +city_table[(int) tour1[pos]].used = 1; +} +``` + +It is assumed that the father chromosome needs to inherit two genes to the child chromosome, to respectively transmit gene 1 and gene 2. In this case, the status of the child chromosome is shown in Figure 6-17. + +![](../figures/48.png) + +Figure 6-17 Current chromosome status + +Currently, the child chromosome already has two genes: 3 and 2. After the mother chromosome excludes the two genes, there are still two genes: 1 and 4. The two genes are written into the child chromosome according to the sequence in the mother chromosome, and a new child chromosome is generated, as shown in Figure 6-18. + +![](../figures/49.png) + +Figure 6-18 New chromosome status + +### 5. Fitness Calculation + +After the newly generated child chromosome is obtained, you can calculate fitness by using the geqo\_eval function. Then, add the chromosome to the pool by using the spread\_chromo function. + +``` +/* Fitness analysis */ +kid->worth = geqo_eval(root, kid->string, pool->string_length); + +/* Diffusion of the chromosome based on fitness*/ +spread_chromo(root, kid, pool); +``` + +Because chromosomes in the pool should always be in an ordered state, the spread\_chromo function may traverse the pool by using the dichotomy to compare fitness of the chromosomes in the pool and fitness of the new chromosome, and search for a position for inserting the new chromosome according to the fitness. The chromosome behind it automatically moves back by one position, and the last chromosome is eliminated. If the fitness of the new chromosome is the highest, the chromosome is eliminated directly. The code is as follows: + +``` +/* Use the dichotomy to traverse chromosomes in the pool.*/ +top = 0; +mid = pool->size / 2; +bot = pool->size - 1; +index = -1; + +/* Chromosome screening*/ +while (index == -1) { +/* Moving is required in the following four cases.*/ +if (chromo->worth <= pool->data[top].worth) { +index = top; +} else if (chromo->worth - pool->data[mid].worth == 0) { +index = mid; +} else if (chromo->worth - pool->data[bot].worth == 0) { +index = bot; +} else if (bot - top <= 1) { +index = bot; +} else if (chromo->worth < pool->data[mid].worth) { +/* +* The following two cases are handled separately because no new position is found. +*/ +bot = mid; +mid = top + ((bot - top) / 2); +} else { /* (chromo->worth > pool->data[mid].worth) */ +top = mid; +mid = top + ((bot - top) / 2); +} +} +``` + +The genetic algorithm continuously generates a new chromosome for a pool by selecting an excellent chromosome and performing intergenerational crossover for a plurality of times, and the chromosome is repeatedly generated, so as to push a solution of the algorithm to approach from local optimal to global optimal. + + + +## Summary + +This chapter describes the implementation process of the SQL engine, including SQL parsing, query rewriting, and query optimization. The SQL engine involves a large amount of code, featuring high code coupling and complex implementation logic. For better understanding, you are advised to master the overall code process and key structures, and summarize them in practice. + + diff --git a/content/en/post/2022/Segment-Page-Feature-of-openGauss-for-Solving-File-Storage-Problems.md b/content/en/post/2022/Segment-Page-Feature-of-openGauss-for-Solving-File-Storage-Problems.md new file mode 100644 index 0000000000000000000000000000000000000000..959e8c8f8280e4ac093067dceb52f1845126ee12 --- /dev/null +++ b/content/en/post/2022/Segment-Page-Feature-of-openGauss-for-Solving-File-Storage-Problems.md @@ -0,0 +1,227 @@ ++++ + +title = "Segment-Page Feature of openGauss for Solving File Storage Problems" + +date = "2021-10-20" + +tags = [ "Segment-Page Feature of openGauss for Solving File Storage Problems"] + +archives = "2021-10" + +author = "Peng Bao " + +summary = "Segment-Page Feature of openGauss for Solving File Storage Problems" + +img = "/en/post/2022/title/img16.png" + +times = "12:30" + ++++ + +# Segment-Page Feature of openGauss for Solving File Storage Problems + +In modern society, data is growing explosively, and service requirements in the industry are complex. The amount of data to be stored and the number of tables to be created keep increasing. Each common data table of openGauss corresponds to a logical large file \(maximum size: 32 TB\). The logical file is divided into multiple actual files based on the fixed size and stored in the corresponding database directory. Therefore, as the data volume of each data table increases, the number of files required for underlying data storage increases gradually. In addition, openGauss provides features such as hash bucket tables and large partitioned tables. Each data table is split into several sub-tables, and the number of files required at the bottom layer increases exponentially. Therefore, this storage management mode has the following problems: + +- 1. It depends greatly on the file system and cannot perform fine-grained control to improve maintainability. +- 2. There are too many file handles in the case of a large amount of data. Currently, only virtual handles can be used to solve the problem, which affects the system performance. +- 3. Too many small files may cause random I/O problems in scenarios such as full build and full backup, affecting performance. + +To resolve the foregoing problems, openGauss introduces a segment-page storage management mechanism, which is similar to segment-page memory management of an operating system, but differs greatly in implementation mechanisms. + +## Implementation Principle of the Segment-Page Mechanism + +With the segment-page storage management, tablespaces and data files are logically organized into segments, extents, and pages/blocks for storage allocation and management, as shown in the following figure. Specifically, a database \(in a tablespace\) has only one segment space. The actual physical storage may be a file or may be split into multiple files. Data is allocated to all tables in the database from this space. Therefore, the number of tables is irrelevant to the number of physical files. Each table has a logical segment, and all data in the table is stored in the segment. Multiple extents are mounted to each segment. Each extent is a continuous physical page. Extent sizes can be flexibly adjusted based on service requirements to avoid storage space waste. + +![](../figures/zh-cn_image_0000001207699778.jpg) + +Figure 1 Segment-page storage design + +Segment-page files can be automatically expanded until the disk space is used up or the limit threshold for the tablespace is reached. Segment-page storage does not automatically reclaim disk space. After some data tables are deleted, the space occupied by the data tables in the segment-page file is reserved and the disk space is not released. These reserved spaces will be reused by tables that are expanded or created later. If you do not need to reuse the space, you can manually call system functions to recycle and then release disk space. + +In internal implementation, each segment corresponds to a physical file that is originally stored in page mode. For example, each partitioned table and a bucket in each hash bucket table have an independent segment. Multiple extents are mounted to each segment. Each extent is consecutive in a file, but extents may not be consecutive between each other. A segment can be dynamically expanded by adding new extents, but an extent cannot be directly reclaimed. You can reclaim storage space by segment by truncating or clustering the entire table. + +Currently, four sizes of extents are supported: 64 KB, 1 MB, 8 MB, and 64 MB. For a segment, the size of the extent expanded each time is fixed. The size of the first 16 extents is 64 KB, the size of the 17th to 143th extents is 1 MB. The same rule applies to other extents. The following figure lists the parameters. + +Table 1 Classification of extents stored in a segment + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

Group

+

Extent Size

+

Extent Page Count

+

Extent Count Range

+

Total Page Count

+

Total Size

+

1

+

64 KB

+

8

+

[1, 16]

+

128

+

1 MB

+

2

+

1 MB

+

128

+

[17, 143]

+

16K

+

128 MB

+

3

+

8 MB

+

1024

+

[144, 255]

+

128K

+

1 GB

+

4

+

64 MB

+

8192

+

[256, …]

+

...

+

...

+
+ +## Guide to Using Segment-Page Tables + +When using the CREATE TABLE statement to create a table, you can specify **segment=on** to enable a row-store table to store data in segment-page mode. If **hashbucket=on** is specified, **segment=on** is forcibly used by default. Currently, segment-page storage does not support column-store tables. Segment-page tablespaces are automatically created and do not require additional commands. + +- Set **segment** to **on** to create a segment-page common table. + + create table t1\(a int, b int, PRIMARY KEY\(a,b\)\) with\(segment=on\); + + ![](../figures/zh-cn_image_0000001207539820.jpg) + +- Set **hashbucket** to **on** to create a segment-page hash bucket table. + + create table t1\(a int, b int, PRIMARY KEY\(a,b\)\) with\(hashbucket=on\); + + ![](../figures/zh-cn_image_0000001252579733.jpg) + + To help users better use the segment-page storage function, openGauss provides two built-in system functions to display the extent usage. Users can use the two views to determine whether to reclaim data and which part of the data to be reclaimed. + +- **pg\_stat\_segment\_space\_info\(Oid tablespace, Oid database\);**specifies the OIDs of the tablespace and database to display the usage information about all extent groups in the tablespace. + + Table 2 pg\_stat\_segment\_space\_info view column information + + + + + + + + + + + + + + + + + + + + + + + + + +

Name

+

Description

+

extent_size

+

Extent specifications of an extent group. The unit is the number of blocks.

+

total_blocks

+

Total number of extents in a physical file

+

meta_data_blocks

+

Number of blocks occupied by the metadata managed in a tablespace, including the space header and map page but excluding the segment head

+

used_data_blocks

+

Number of extents used for storing data, including the segment head

+

utilization

+

Percentage of the number of used blocks to the total number of blocks, that is, (the value of used_data_blocks + the value of meta_data_block)/the value of total_blocks

+

high_water_mark

+

High-water mark, indicating the number of allocated extents and maximum physical page number. Blocks that exceed the high-water mark are not used and can be directly recycled.

+
+ + ![](../figures/zh-cn_image_0000001207699780.jpg) + + +- **pg\_stat\_segment\_extent\_usage\(Oid tablespace, Oid databse, uint32 extent\_type\);**specifies the usage information of each allocated extent in an extent group returned each time.**extent\_type** indicates the type of the extent group. The value is an integer ranging from 1 to 5.If the value is not within the range, an error is reported. + + Table 3 pg\_stat\_segment\_extent\_usage view column information + + + + + + + + + + + + + + + + + + + + + + +

Name

+

Description

+

start_block

+

Start physical page number of an extent

+

extent_size

+

Extent size

+

usage_type

+

Usage type of an extent, for example, segment head and data extent

+

ower_location

+

Object location of an extent to which a pointer points. For example, the owner of a data extent is the head of the segment to which the data extent belongs.

+

special_data

+

Position of an extent in its owner. The value of this column is related to the usage type. For example, special data of a data extent is the extent ID in the segment to which the data extent belongs.

+
+ + +- **gs\_spc\_shrink\(Oid tablespace, Oid database, uint32 extent\_type\);**specifies that one extent group is cleared at a time. The target size in shrinking is automatically calculated as follows: Active data volume + 128 MB. The value is rounded up and aligned with 128 MB. + +## Summary + +openGauss provides the segment-page solution to solve the problem that there are too many underlying file handles when there are a large number of hash bucket tables and large partitioned tables. In the segment-page solution, a table corresponds to a logical segment. Different segments at the bottom layer are stored in a physical file, greatly reducing the number of handles of the physical file at the bottom layer. Even in the case of a large amount of data, the scenario where there are too many file handles such as common tables is avoided, and the system maintainability is improved. In addition, in scenarios such as full build and full backup, random I/Os caused by too many small files can be reduced to improve system I/O performance. The parameters related to the current segment-page table are fixed. In the future, openGauss can use the AI technology to automatically adjust parameters for the segment-page storage mechanism, providing users with more intelligent and better-performance segment-page storage policies. + diff --git a/content/en/post/2022/Setting-up-One-Primary-and-Two-Standby-openGauss-Databases-on-Kubernetes.md b/content/en/post/2022/Setting-up-One-Primary-and-Two-Standby-openGauss-Databases-on-Kubernetes.md new file mode 100644 index 0000000000000000000000000000000000000000..592999cf6577d10a1ed54e3b9bf42571a90d01c1 --- /dev/null +++ b/content/en/post/2022/Setting-up-One-Primary-and-Two-Standby-openGauss-Databases-on-Kubernetes.md @@ -0,0 +1,466 @@ ++++ + +title = "Setting up One Primary and Two Standby openGauss Databases on Kubernetes" + +date = "2021-10-09" + +tags = [ "Setting up One Primary and Two Standby openGauss Databases on Kubernetes"] + +archives = "2021-10" + +author = "Bin Zhou" + +summary = "Setting up One Primary and Two Standby openGauss Databases on Kubernetes" + +img = "/en/post/2022/title/img2.png" + +times = "12:30" + ++++ + +# Setting up One Primary and Two Standby openGauss Databases on Kubernetes + +Initialize the environment as the **master** or **node** role. + + + + + + + + + + + + + + + + +

IP

+

Hostname

+

Role

+

192.168.0.1

+

k8smaster

+

master

+

192.168.0.2

+

k8snode01

+

node

+
+ +Disable **firewalld**. + +- systemctl stop firewalld +- systemctl disable firewalld + +## 1. Update Docker. + +``` +rpm -qa|grep docker +yum remove docker +curl -fsSL https://get.docker.com/ | sh +systemctl start docker +systemctl enable docker +``` + +## 2. Prepare the Kubernetes source. + +``` +vim /etc/yum.repos.d/kubernetes.repo + +[kubernetes] +name=Kubernetes +baseurl=https://mirrors.aliyun.com/kubernetes/yum/repos/kubernetes-el7-x86_64 +enabled=1 +gpgcheck=0 +repo_gpgcheck=0 +gpgkey=https://mirrors.aliyun.com/kubernetes/yum/doc/yum-key.gpg https://mirrors.aliyun.com/kubernetes/yum/doc/rpm-package-key.gpg + +yum install -y kubeadm kubectl etcd +``` + +## 3. Check image names required by kubeadm. + +``` +[root@ecs-66cc dockerimages]# kubeadm config images list + +k8s.gcr.io/kube-apiserver:v1.21.1 +k8s.gcr.io/kube-controller-manager:v1.21.1 +k8s.gcr.io/kube-scheduler:v1.21.1 +k8s.gcr.io/kube-proxy:v1.21.1 +k8s.gcr.io/pause:3.4.1 +k8s.gcr.io/etcd:3.4.13-0 +k8s.gcr.io/coredns/coredns:v1.8.0 +``` + +## 4. Install images required for Kubernetes. + +``` +docker pull registry.aliyuncs.com/google_containers/kube-apiserver:v1.21.1 +docker pull registry.aliyuncs.com/google_containers/kube-controller-manager:v1.21.1 +docker pull registry.aliyuncs.com/google_containers/kube-scheduler:v1.21.1 +docker pull registry.aliyuncs.com/google_containers/kube-proxy:v1.21.1 +docker pull registry.aliyuncs.com/google_containers/pause:3.4.1 +docker pull registry.aliyuncs.com/google_containers/etcd:3.4.13-0 +docker pull coredns/coredns:1.8.0 +``` + +## 5. Modify the Docker tags to match those required by kubeadm. + +- Download images from Chinese sources. + + ``` + docker tag registry.aliyuncs.com/google_containers/kube-apiserver:v1.21.1 k8s.gcr.io/kube-apiserver:v1.21.1 + docker tag registry.aliyuncs.com/google_containers/kube-controller-manager:v1.21.1 k8s.gcr.io/kube-controller-manager:v1.21.1 + docker tag registry.aliyuncs.com/google_containers/kube-scheduler:v1.21.1 k8s.gcr.io/kube-scheduler:v1.21.1 + docker tag registry.aliyuncs.com/google_containers/kube-proxy:v1.21.1 k8s.gcr.io/kube-proxy:v1.21.1 + docker tag registry.aliyuncs.com/google_containers/pause:3.4.1 k8s.gcr.io/pause:3.4.1 + docker tag registry.aliyuncs.com/google_containers/etcd:3.4.13-0 k8s.gcr.io/etcd:3.4.13-0 + docker tag docker.io/coredns/coredns:1.8.0 k8s.gcr.io/coredns/coredns:v1.8.0 + ``` + +- Delete invalid images. + + ``` + docker rmi registry.aliyuncs.com/google_containers/kube-apiserver:v1.21.1 + docker rmi registry.aliyuncs.com/google_containers/kube-controller-manager:v1.21.1 + docker rmi registry.aliyuncs.com/google_containers/kube-scheduler:v1.21.1 + docker rmi registry.aliyuncs.com/google_containers/kube-proxy:v1.21.1 + docker rmi registry.aliyuncs.com/google_containers/pause:3.4.1 + docker rmi registry.aliyuncs.com/google_containers/etcd:3.4.13-0 + docker rmi coredns/coredns:1.8.0 + ``` + + +## 6. Write Kubernetes initialization configurations and initialize Kubernetes as **master**. + +kubeadm.yaml + +``` +apiVersion: kubeadm.k8s.io/v1beta2 +clusterName: kubernetes +kind: ClusterConfiguration +kubernetesVersion: v1.21.1 +controllerManager: + extraArgs: + horizontal-pod-autoscaler-use-rest-clients: "true" + horizontal-pod-autoscaler-sync-period: "10s" + node-monitor-grace-period: "10s" +apiServer: + extraArgs: + runtime-config: "api/all=true" +``` + +Copy the configuration file to Kubernetes and specify it during initialization. + +``` +cp kubeadm.yaml /etc/kubernetes/manifests/ +kubeadm init --config kubeadm.yaml +``` + +After the operation is successful, retain the following information for later use: + +``` +kubeadm join 192.168.0.35:6443 --token ru2883.u4rhwkx5oqrol9at \ + --discovery-token-ca-cert-hash sha256:f2dbe7ce49b322e8145b6e9b4303e56468ad1352daabecb797f7bd161a64e018 +``` + +Perform initialization. + +``` +mkdir -p $HOME/.kube +sudo cp -i /etc/kubernetes/admin.conf $HOME/.kube/config +sudo chown $(id -u):$(id -g) $HOME/.kube/config +``` + +Install the network plugin. + +``` +kubectl apply -f "https://cloud.weave.works/k8s/net?k8s-version=$(kubectl version | base64 | tr -d '\n')" +``` + +## 7. Join nodes. + +After the node is installed, it does not need to be initialized. Run the **kubeadm join** command to add the node to the primary node. + +``` +kubeadm join 192.168.0.35:6443 --token ru2883.u4rhwkx5oqrol9at \ + --discovery-token-ca-cert-hash sha256:f2dbe7ce49b322e8145b6e9b4303e56468ad1352daabecb797f7bd161a64e018 +``` + +## 8. Import images as **master** and **node**. + +``` +docker load < opengauss.tar.gz +``` + +## 9. Create a service \(SVC\) as **master**. + +Create an SVC for pods:kubectl create -f opengauss-svc.yaml + +The content of the **opengauss-svc.yaml** file is as follows: + +``` +apiVersion: v1 +kind: Service +metadata: + name: opengauss-service-1 +spec: + ports: + - port: 5432 + protocol: TCP + targetPort: 5432 + name: gsql + - port: 5434 + protocol: TCP + targetPort: 5434 + name: localport + - port: 2380 + protocol: TCP + targetPort: 2380 + name: etcd1-service + - port: 2379 + protocol: TCP + targetPort: 2379 + name: etcd1-local + selector: + app: opengauss-1 + clusterIP: None + +--- + +apiVersion: v1 +kind: Service +metadata: + name: opengauss-service-2 +spec: + ports: + - port: 5432 + protocol: TCP + targetPort: 5432 + name: gsql + - port: 5434 + protocol: TCP + targetPort: 5434 + name: localport + - port: 2380 + protocol: TCP + targetPort: 2380 + name: etcd1-service + - port: 2379 + protocol: TCP + targetPort: 2379 + name: etcd1-local + selector: + app: opengauss-2 + clusterIP: None + +--- + +apiVersion: v1 +kind: Service +metadata: + name: opengauss-service-3 +spec: + ports: + - port: 5432 + protocol: TCP + targetPort: 5432 + name: gsql + - port: 5434 + protocol: TCP + targetPort: 5434 + name: localport + - port: 2380 + protocol: TCP + targetPort: 2380 + name: etcd1-service + - port: 2379 + protocol: TCP + targetPort: 2379 + name: etcd1-local + selector: + app: opengauss-3 + clusterIP: None +``` + +## 10. Create a pod as **master**. + +Create the primary and standby pods of openGauss. + +kubectl create -f opengauss-pod.yaml + +The content of the **opengauss-pod.yaml** file is as follows: + +``` +apiVersion: v1 +kind: Pod +metadata: + name: opengauss-1 + labels: + app: opengauss-1 +spec: + restartPolicy: Never + containers: + - name: opengauss-1 + image: opengauss:1.0.5 + imagePullPolicy: Never + securityContext: + runAsUser: 0 + volumeMounts: + - mountPath: /var/lib/opengauss/data/ + name: openguass-volume + ports: + - containerPort: 5432 + name: opengauss + env: + - name: HOST_NAME + value: opengauss-1 + - name: HOST_IP + valueFrom: + fieldRef: + fieldPath: status.podIP + - name: PEER_IPS + value: opengauss-service-2,opengauss-service-3 + - name: PEER_HOST_NAMES + value: opengauss-2,opengauss-3 + - name: PORT + value: "5432" + - name: GS_PASSWORD + value: "Test@56789" + - name: SERVER_MODE + value: primary + - name: db_config + value: + volumes: + - name: openguass-volume + hostPath: + path: /data/opengauss-1/ + type: DirectoryOrCreate + +--- + +apiVersion: v1 +kind: Pod +metadata: + name: opengauss-2 + labels: + app: opengauss-2 +spec: + restartPolicy: Never + containers: + - name: opengauss-2 + image: opengauss:1.0.5 + imagePullPolicy: Never + securityContext: + runAsUser: 0 + volumeMounts: + - mountPath: /var/lib/opengauss/data/ + name: openguass-volume + ports: + - containerPort: 5432 + name: opengauss + env: + - name: HOST_NAME + value: opengauss-2 + - name: HOST_IP + valueFrom: + fieldRef: + fieldPath: status.podIP + - name: PEER_IPS + value: opengauss-service-1,opengauss-service-3 + - name: PEER_HOST_NAMES + value: opengauss-1,opengauss-3 + - name: PORT + value: "5432" + - name: GS_PASSWORD + value: "Test@56789" + - name: SERVER_MODE + value: standby + - name: db_config + value: + volumes: + - name: openguass-volume + hostPath: + path: /data/opengauss-2/ + type: DirectoryOrCreate + +--- + +apiVersion: v1 +kind: Pod +metadata: + name: opengauss-3 + labels: + app: opengauss-3 +spec: + restartPolicy: Never + containers: + - name: opengauss-3 + image: opengauss:1.0.5 + imagePullPolicy: Never + securityContext: + runAsUser: 0 + volumeMounts: + - mountPath: /var/lib/opengauss/data/ + name: openguass-volume + ports: + - containerPort: 5432 + name: opengauss + env: + - name: HOST_NAME + value: opengauss-3 + - name: HOST_IP + valueFrom: + fieldRef: + fieldPath: status.podIP + - name: PEER_IPS + value: opengauss-service-1,opengauss-service-2 + - name: PEER_HOST_NAMES + value: opengauss-1,opengauss-2 + - name: PORT + value: "5432" + - name: GS_PASSWORD + value: "Test@56789" + - name: SERVER_MODE + value: standby + - name: db_config + value: + volumes: + - name: openguass-volume + hostPath: + path: /data/opengauss-3/ + type: DirectoryOrCreate +``` + +## 11. Test the database as **master**. + +``` +1. Access the primary node of the database. + +kubectl exec -it opengauss-1 -- /bin/bash +2. Switch the user. +su omm +3. Access the database. +gsql +``` + +## 12. Common Commands + +All commands are executed as **master**. + +``` +View cluster nodes. +kubectl get node +View cluster pods. +kubectl get pod --all-namespaces +Check the cluster service. +kubectl get svc --all-namespaces +Access the container. +kubectl exec -it Container name (pod name for a single container) -n opengauss -- /bin/bash +Run the following command to view pod or SVC details: +kubectl describe pod/svc pod/SVC name -n pod/SVC namespaces +View the log information. +kubectl logs pod Pod name -n Pod namespaces +``` + diff --git a/content/en/post/2022/The-Combination-of-openGauss-Database-and-AI.md b/content/en/post/2022/The-Combination-of-openGauss-Database-and-AI.md new file mode 100644 index 0000000000000000000000000000000000000000..875e5161d59823d3d1c65262cd78453dc6d342da --- /dev/null +++ b/content/en/post/2022/The-Combination-of-openGauss-Database-and-AI.md @@ -0,0 +1,125 @@ ++++ + +title = "The Combination of openGauss Database and AI" + +date = "2020-10-10" + +tags = [ "The Combination of openGauss Database and AI"] + +archives = "2020-10" + +author = "Tianqing Wang" + +summary = "The Combination of openGauss Database and AI" + +img = "/en/post/2022/title/img8.png" + +times = "12:30" + ++++ + +# The Combination of openGauss Database and AI + +openGauss has powerful computing performance and efficient data processing capabilities. It is also a native AI database that supports self-tuning parameters, SQL diagnosis, fault self-diagnosis, and full lifecycle management. The following describes the AI features of openGauss. + +## X-Tuner: database parameter tuning framework. + +A database is a very complex system. It contains a large number of configuration parameters and controls memory allocation, I/O optimization, query plan cost, parallelism, log recording, data recovery, and other behaviors. The database performance varies according to the configuration parameters under specific loads. Many database administrators and experts are trying to adjust these parameters to achieve good database performance. + +However, database parameter tuning is a NP-hard problem. Generally, people adjust parameters based on their own experience and understanding of the current environment. There are a large number of uncertainties during tuning. To solve this problem, database engineers try to build an intelligent system that can perform automatic tuning, such as Postgresqltuner.pl of PostgreSQL and mysqltuner.pl of MySQL. Similarly, with the emergence of AI technologies, some AI-based tuning solutions, such as OtterTune, have emerged. Nevertheless, all tuning solutions have the following restrictions: + +- The database has hundreds of parameters. DBAs cannot adjust such a number of parameters at a time. +- Traditional machine learning requires a large amount of data training. It is difficult to collect data, especially data with good performance, regardless of whether this method is feasible. + +- Some methods use reinforcement learning, but the relationship between database status and database parameters is not considered during tuning. + +Considering the preceding restrictions, openGauss develops its own database parameter tuning framework X-Tuner. Compared with the traditional methods, X-Tuner has the following features: + +- **1. **Robust and fault-tolerant: + + The X-Tuner framework is designed with a large number of fault tolerance and emergency handling mechanisms. When the system or database is faulty, the algorithm can exit normally without affecting the system. + +- **2. **Flexible deployment and easy to use: + + X-Tuner is developed based on Python 3.0+ and supports Linux and Windows OSs. Users can easily deploy X-Tuner. In terms of usage, X-Tuner supports local and remote connection modes, which are applicable to various user situations. + +- **3. **Easy to understand and facilitate secondary development: + + X-Tuner is compiled strictly based on the benchmark test module, tuning algorithm module, connection module, and log module. X-Tuner is highly hierarchical and easy to understand. In addition, users can tune or compile their own functional modules based on X-Tuner. + + Tests show that the X-Tuner parameter tuning framework based on reinforcement learning and heuristic algorithms can greatly improve system performance with the minimum memory usage. Figure 1 shows the parameter tuning process. + + ![](../figures/zh-cn_image_0000001251969031.jpg)Figure 1 X-Tuner parameter tuning process + + +With the X-Tuner technology, HUAWEI CLOUD DAS can intelligently recommend parameters based on the historical load of user databases. Tests show that the overall performance is improved by about 20%, greatly saving cloud computing resources and reducing production costs. + +## SQLDiag: intelligently identifies SQL statements. + +SQLDiag is a framework for estimating the SQL statement execution duration in openGauss. An existing prediction technology is mainly a model prediction based on an execution plan. However, these prediction solutions are mainly applicable to an OLAP scenario, and a complete execution plan of an SQL statement needs to be obtained. This greatly limits short queries such as OLTP or HTAP. + +Different from the preceding solution, SQLDiag focuses on a historical SQL statement of a database. Because execution duration of the SQL statement of the database does not differ greatly in a short time, SQLDiag may detect a similar SQL statement from historical data, and predict the execution duration of the SQL statement based on an SQL vectorization technology and a time series prediction algorithm. In this way, potential slow SQL statements can be identified. This framework has the following advantages: + +- Execution plans do not require SQL statements and have no impact on database performance. +- SQLDiag is widely used in many scenarios and can even be used in NoSQL after reconstruction, while many other algorithms in the industry only target at a scenario, such as, OLTP or OLAP. +- The framework is robust and easy to understand. Users can design their own prediction models by simply modifying the framework. + + ![](../figures/zh-cn_image_0000001251839693.png) + + +Figure 2 shows the SQLDiag prediction result. + +Figure 2 SQLDiag prediction result + +## Intelligent optimizer: database execution cost prediction. + +In a large number of service scenarios that depend on databases, operations such as account login, order query, as well as report query, data mining involving hundreds of millions of lines are performed. These operations are abstracted and converted at the service application layer and performed in the form of SQL statements. The SQL engine in the database kernel further optimizes the SQL statements. + +Challenges from service logic: During SQL statement processing, service scenarios become increasingly complex and business intelligence tools are used, generating SQL statements of different quality. Some of the SQL statements may consume a large number of read/write and computing resources. As a result, servers block other service statements. Therefore, the SQL diagnosis capability of the upper-layer service components of the database is required. + +Challenges from SQL statements: During optimization of a query execution plan, more complex query statements also bring new challenges to query optimization. In the early stage of database system development, query optimization mainly depends on rule-based expert system, that is, a series of optimization rules with strict sequence. This type of expert system can be considered as the experience summary of database optimization experts in some common scenarios. As a result, the rule system always generates the same query plan for the same query statement regardless of the actual data volume and distribution in the database. If a scenario does not comply with experience rules, the database efficiency cannot be ensured. + +The core problem of SQL diagnosis and plan optimization is the evaluation of resource overhead. In mainstream database products, the query resource overhead mainly depends on sampling and analysis of full data in the database, establishment of statistical models and cost models, and abstraction of the plan execution process. Currently, the following challenges are faced: + +- The data sampling and analysis process occupies the disk read/write and CPU computing resources of the server to a large extent. However, if the sampling rate is reduced to avoid the resource overhead, the evaluation accuracy of the query resource overhead decreases. +- As service statements are executed continuously, the data scale and distribution change gradually, which invalidates the original data models. These two types of problems may cause the server to fail to respond for a long time. +- Is there a way to maintain the accuracy of resource overhead prediction with as little encroachment on database resources as possible? The answer is yes. openGauss provides the query performance prediction function based on online deep learning for database users. + +The intelligent optimizer has the following features: + +- **1. **One-click modeling and machine learning. + + For SQL performance evaluation, openGauss integrates data collection, cleaning, preprocessing, and encoding, and training monitoring. Users only need to configure a few parameters for the model based on the recommendation and call the model prediction API to obtain the performance prediction result after the model training is complete. + +- **2. **Fine-grained, easily locating performance bottlenecks. + + It supports fragment-level query performance prediction of fine-grained query plans, helping users locate performance bottlenecks and better rewrite statements. In the future, intelligent SQL optimization based on plan segment cost performance prediction will be supported. + +- **3. **Flexible deployment, minimizing the impact on database performance. + + The model computing module can be deployed on the cloud or in other environments isolated from the database based on user requirements. Historical performance data is used for modeling, and no extra resource overhead caused by data sampling is required. + + +- **4. **Open APIs, benefiting data scientists. + + The HTTPS protocol is used to connect the database kernel to the deep learning model. Open APIs allow users to build custom machine learning jobs into database functions for one-click calling. + + +According to actual tests, the prediction accuracy of query performance is improved by 40% compared with the native PostgreSQL model. + +![](../figures/zh-cn_image_0000001207089084.gif) + +Figure 3 Predicted query performance of openGauss compared with the native PostgreSQL database + +According to the distribution of the prediction accuracy, the accuracy of the 95th percentile is improved by 3e5 times, and the accuracy of the 75th percentile is improved by 124 times. The overall prediction reliability is greatly improved. + +![](../figures/zh-cn_image_0000001207249058.gif) + +Figure 4 Prediction accuracy distribution of openGauss and PostgreSQL + +## Summary and Prospect + +openGauss is a shallow attempt to combine AI with databases, and there may still be many shortcomings. In addition to the preceding typical open-source AI features, many AI features are still under exploration. We can feel that in the preceding application scenarios, the AI-based method can greatly reduce manpower and improve production efficiency. + +Although the road to combining AI with databases is very difficult and rough, and even faces doubts from the industry, numerous R&D engineers have never given up their beliefs and are moving forward with the ideal of "all things intelligent." We hope that the open source openGauss can attract more developers in the industry to integrate AI with databases, further promote the upgrade of database technologies, and stimulate the emergence of more valuable and meaningful AI databases. In this way, we can realize the great vision of intelligent databases in the future. + diff --git a/content/en/post/2022/Transaction-Mechanism-Source-Code-Analysis.md b/content/en/post/2022/Transaction-Mechanism-Source-Code-Analysis.md new file mode 100644 index 0000000000000000000000000000000000000000..d18eb05bbd9c7627177d58a6ac19d1c5315c39f2 --- /dev/null +++ b/content/en/post/2022/Transaction-Mechanism-Source-Code-Analysis.md @@ -0,0 +1,2249 @@ ++++ + +title = "Transaction Mechanism Source Code Analysis" + +date = "2021-07-05" + +tags = [ "Transaction Mechanism Source Code Analysis"] + +archives = "2021-07" + +author = "Jiangjun Jiang" + +summary = "Transaction Mechanism Source Code Analysis" + +img = "/en/post/2022/title/img17.png" + +times = "12:30" + ++++ + +# Transaction Mechanism Source Code Analysis + + + +A transaction is the execution unit of a database operation and must have the basic atomicity, consistency, isolation, and durability \(ACID\) properties. + +1. Atomicity: After a transaction is committed, operations in the transaction are all executed or none of the operations is executed. +2. Consistency: Transaction execution cannot damage the integrity and consistency of database data. +3. Isolation: The execution of a transaction cannot be interfered by other transactions in a concurrent environment. +4. Durability: Once a transaction is committed, its changes to the state of the database are permanently saved in the database. + +This chapter describes how the openGauss transaction module implements the basic properties of database transactions to ensure that user data is not lost, is modified correctly, and is queried correctly. + +## 5.1 Overall Transaction Architecture and Code Overview + +Figure 5-1 shows the overall structure of the transaction module. + +Overall structure + +![](../figures/171.png) + +In openGauss, the implementation of transactions is closely related to the implementation of the storage engine. The code is mainly stored in the **src/gausskernel/storage/access/transam** and **src/gausskernel/storage/lmgr** directories. Figure 5-1 shows the key files. + +1. Transaction manager: It is the core of the transaction system. It is implemented as a finite loop state machine. It receives commands from external systems and determines the next execution process of a transaction based on the state of the current transaction. +2. Log manager: It records the transaction execution state and data change process, including transaction commit logs \(Clogs\), transaction commit sequence number logs \(CSNlogs\), and transaction logs \(Xlogs\). Clogs record only transaction execution results. CSNlogs record the sequence in which logs are committed for visibility determination. Xlogs are redo logs for data restoration and persistency. +3. Thread management mechanism: Transaction information of all threads is recorded at a memory area. Any thread can access this area to obtain the status information of other transactions. +4. Multi-Version Concurrency Control \(MVCC\) mechanism: In openGauss, the MVCC mechanism is used in the transaction read process based on the commit sequence number \(CSN\) committed by each transaction. In this way, read and write operations on tuples do not block each other. For details about how to determine visibility, see section 5.2 "Transaction Concurrency Control." +5. Lock manager: It controls the write concurrency of the system and uses the lock mechanism to ensure the isolation between the transaction write processes. + +## 5.2 Transaction Concurrency Control + +The transaction concurrency control mechanism is used to ensure the ACID properties of openGauss when transactions are concurrently executed. The following describes the components of the transaction concurrency control mechanism in more detail. + +- **Transaction State Machine** + + openGauss divides the transaction system into two layers: TBlockState \(upper layer\) and TransState \(lower layer\). + + With a layered design, details can be shielded when upper-layer services are processed to flexibly support various transaction execution statements \(BEGIN, START TRANSACTION, COMMIT, ROLLBACK, and END\) on the client. + + 1. TBlockState: state of the query statements sent from the client, which is used to improve data operation flexibility. Multiple query statements can be executed in a transaction in the form of transaction blocks. + 2. TransState: state of the entire transaction from the perspective of the kernel. + - **Upper-Layer Transaction State Machines** + + The code of the TBlockState structure is as follows: + + ``` + typeset enum TBlockState + { + /* State not in the transaction block: A transaction contains a single SQL statement.*/ + TBLOCK_DEFAULT, /* Default state of the transaction block*/ + TBLOCK_STARTED,/* Execute a single query statement.*/ + + /* State in the transaction block: A transaction contains multiple statements.*/ + TBLOCK_BEGIN,/* Execute the BEGIN or START TRANSACTION statement.*/ + TBLOCK_INPROGRESS,/* The transaction block is being processed.*/ + TBLOCK_END, /* Execute the END or COMMIT statement.*/ + TBLOCK_ABORT,/* Wait for the ROLLBACK statement from the client and then execute it after an error is reported during execution in the transaction block.*/ + TBLOCK_ABORT_END, /* Receive the ROLLBACK statement from the client and then execute it after an error is reported during execution in the transaction block.*/ + TBLOCK_ABORT_PENDING,/* Receive the ROLLBACK statement from the client and then execute it after execution in the transaction block is successful.*/ + TBLOCK_PREPARE, /* Execute the PREPARE TRANSACTION statement for two-phase transaction commit.*/ + + /* The state of the sub-transaction block is similar to that of the preceding transaction block.*/ + TBLOCK_SUBBEGIN,/* Execute the SAVEPOINT statement.*/ + TBLOCK_SUBINPROGRESS,/* The sub-transaction block is being processed.*/ + TBLOCK_SUBRELEASE,/* Execute the RELEASE SAVEPOINT statement.*/ + TBLOCK_SUBCOMMIT,/* Execute the END or COMMIT statement to recursively commit the lowest-layer sub-transaction to the top-layer transaction.*/ + TBLOCK_SUBABORT,/* Wait for the ROLLBACK TO or ROLLBACK statement from the client and then execute them after an error is reported during execution in the sub-transaction block.*/ + TBLOCK_SUBABORT_END,/* Receive and then execute the ROLLBACK TO statement to roll back to the upper-layer sub-transaction or the ROLLBACK statement from the client after an error is reported during execution in the sub-transaction block.*/ + TBLOCK_SUBABORT_PENDING,/* Receive and then execute the ROLLBACK TO statement to roll back to the upper-layer sub-transaction or the ROLLBACK statement from the client after execution in the sub-transaction block is successful.*/ + TBLOCK_SUBRESTART,/* Receive and then execute the ROLLBACK TO statement to roll back to the current sub-transaction after execution in the sub-transaction block is successful.*/ + TBLOCK_SUBABORT_RESTART /* Receive and then execute the ROLLBACK TO statement to roll back to the current sub-transaction after an error is reported during execution in the sub-transaction block.*/ + } TBlockState; + ``` + + For better understanding, the state of the sub-transaction block is omitted. The state machine behavior of the sub-transaction block is similar to that of the transaction block. The relationship between a transaction block and its sub-transaction block is similar to the implementation of a stack. The sub-transaction block starts later the transaction block and ends earlier. + + Figure 5-2 shows the state machine of an explicit transaction block and the corresponding transition functions. + + Figure 5-2:State machine of a transaction block + + ![](../figures/172.png) + + Table 1 lists the values in the transaction state machine structure corresponding to the transaction block states in Figure 5-2. + + **表 1** Transaction block states + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

Transaction State

+

Transaction State Machine Structure

+

Default

+

TBLOCK_DEFAULT

+

Started

+

TBLOCK_STARTED

+

Transaction block started

+

TBLOCK_BEGIN

+

Transaction block in progress

+

TBLOCK_INPROGRESS

+

Transaction block ended

+

TBLOCK_END

+

Rolling back

+

TBLOCK_ABORT

+

Rollback ended

+

TBLOCK_ABORT_END

+

Waiting for rollback

+

TBLOCK_ABORT_PENDING

+
+ + When no exception occurs, the state machine of a transaction block runs in the following states cyclically: TBLOCK\_DEFAULT -\> TBLOCK\_STARTED -\> TBLOCK\_BEGIN -\> TBLOCK\_INPROGRESS -\> TBLOCK\_END -\> TBLOCK\_DEFAULT, as shown in Figure 5-2. The remaining state machines are exception processing branches of each state point in the preceding normal scenarios. + + 1. Errors that occur before entering the TBLOCK\_INPROGRESS state: If a transaction does not start, an error will be reported and the transaction will be rolled back. The system will clear resources and return to the TBLOCK\_DEFAULT state. + 2. Errors that occur in the TBLOCK\_INPROGRESS state in the following scenarios: Transaction execution failure: TBLOCK\_INPROGRESS -\> TBLOCK\_ABORT -\> TBLOCK\_ABORT\_END -\> TBLOCK\_DEFAULT; manual rollback of a transaction that is successfully executed: TBLOCK\_INPROGRESS -\> TBLOCK\_ABORT\_PENDING -\> TBLOCK\_DEFAULT + 3. Errors that occur when a user executes the COMMIT statement: TBLOCK\_END -\> TBLOCK\_DEFAULT As shown in Figure 5-2, the transaction exits the TBLOCK\_DEFAULT state after it starts and returns to this state after it ends. + 4. openGauss also supports implicit transaction blocks. When a client executes a single SQL statement, the SQL statement can be automatically committed. The state machine of openGauss is relatively simple and runs in the following states cyclically: TBLOCK\_DEFAULT -\> TBLOCK\_STARTED -\> TBLOCK\_DEFAULT. + + - **Lower-Layer Transaction States** + + The TransState structure specifies transaction states from the perspective of the kernel. Its code is as follows: + + ``` + typedef enum TransState + { + TRANS_DEFAULT, /* The current state is the default idle state. No transaction starts.*/ + TRANS_START, /* The transaction is being started.*/ + TRANS_INPROGRESS, /* The transaction is stared and is in progress.*/ + TRANS_COMMIT, /* The transaction is being committed.*/ + TRANS_ABORT, /* The transaction is being rolled back.*/ + TRANS_PREPARE /* The two-phase commit transaction enters the PREPARE TRANSACTION state.*/ + } TransState; + ``` + + ![](../figures/173.png) + + Figure 5-3 Lower-layer transaction states + + Figure 5-3 shows the lower-layer states in the kernel. For details about the lower-layer state machine, see the description of TransState. + + 1. Before a transaction starts, the transaction state is TRANS\_DEFAULT. + 2. When a transaction starts, the transaction state is TRANS\_START. + 3. After a transaction is successfully started, the transaction state is always TRANS\_INPROGRESS. + 4. When a transaction ends or is rolled back, the transaction state is TARNS\_COMMIT or TRANS\_ABORT. + 5. After a transaction ends, the transaction state goes back to TRANS\_DEFAULT. + + - **Transaction State Machine Running Instance** + + This section provides a running instance of a state machine in SQL to help you better understand how internal transactions work. Execute the following SQL statements on the client: + + ``` + BEGIN; + SELECT * FROM TABLE1; + END; + ``` + + 1\) Overall execution process + + Figure 5-4 shows the overall execution process. The execution of any statement first enters the transaction block of the transaction processing interface, then calls the underlying function of the transaction to process the specific statement, and finally returns to the transaction block. + + **Figure 5-4** Overall execution process + + ![](../figures/174.png) + + 2\) Execution process of the BEGIN statement \(Figure 5-5\) + + \(1\)The entry function **exec\_simple\_query** processes the BEGIN statement. + + \(2\)The **start\_xact\_command** function starts a QUERY statement and calls the **StartTransactionCommand** function. At this time, the upper-layer state of the transaction block is not TBLOCK\_DEFAULT. The **StartTransaction** function is called to set the lower-layer state of the transaction to TRANS\_START. After the memory, buffer, and lock resources are initialized, the lower-layer state of the transaction is set to TRANS\_INPROGRESS, and the upper-layer state of the transaction block is set to TBLOCK\_STARTED in the **StartTransactionCommand** function. + + \(3\) The **PortalRun** function processes the BEGIN statement, calls functions downwards, and calls the **BeginTransactionBlock** function to set the upper-layer state of the transaction block to TBLOCK\_BEGIN. + + \(4\) The **finish\_xact\_command** function ends a QUERY statement, calls the **CommitTransactionCommand** function to change the upper-layer state of the transaction block from TBLOCK\_BEGIN to TBLOCK\_INPROGRESS, and waits for reading the next statement. + + ![](../figures/175.png) + + Figure 5-6 Execution process of the BEGIN statement + + 3\) Execution process of the SELECT statement \(Figure 5-6\) + + \(1\) The entry function exec\_simple\_query processes the SELECT \* FROM table1; command. + + \(2\) The start\_xact\_command function starts a QUERY statement and calls the StartTransactionCommand function. The upper-layer state of the transaction block is TBLOCK\_INPROGRESS, which indicates that the TBlockState structure is inside the transaction block. Therefore, NULL is returned without changing the upper-layer state and lower-layer state of the transaction. + + \(3\) The PortalRun function executes the SELECT statement and calls the ExecutorRun function downwards to query the optimal path based on the execution plan. + + \(4\) The finish\_xact\_command function ends the QUERY statement and calls the CommitTransactionCommand function. The current upper-layer state of the transaction block is still TBLOCK\_INPROGESS, and the current upper-layer state and lower-layer state of the transaction are not changed. + + ![](../figures/176.png) + + Figure 5-7 Execution process of the SELECT statement + + 4\) Execution process of the END statement \(Figure 5-7\) + + \(1\) The entry function exec\_simple\_query processes the END statement. + + \(2\) The start\_xact\_command function starts a QUERY statement and calls the StartTransactionCommand function. The current upper-layer state of the transaction block is TBLOCK\_INPROGESS, indicating that the transaction is still in progress. In this case, the upper-layer state and lower-layer state of the transaction are not changed. + + \(3\) The PortalRun function processes the END statement, calls the processUtility function in sequence, and finally calls the EndTransactionBlock function to set the current upper-layer state of the transaction block to TBLOCK\_END. + + \(4\) The finish\_xact\_command function ends the QUERY statement and calls the CommitTransactionCommand function. The current state of the transaction block is TBLOCK\_END. Then, this function calls the CommitTransaction function to commit the transaction, sets the lower-layer state of the transaction to TRANS\_COMMIT, commits the transaction, and clears transaction resources. After the cleanup, the lower-layer state of the transaction is set to TRANS\_DEFAULT, and the CommitTansactionCommand function is returned. The upper-layer state of the transaction block is set to TBLOCK\_DEFAULT, and the entire transaction block ends. + + ![](../figures/177.png) + + Figure 5-8 Execution process of the END statement + + - Functions related to transaction state transition + + 1\) Transaction processing subfunctions for applying for, recycling, and clearing transaction resources based on the current upper-layer state machine of the transaction + + For details, see Table 5-2. + + **表 2** Transaction processing subfunctions + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

Subfunction

+

Description

+

StartTransaction

+

Starts a transaction, initializes the memory and variables, and sets the lower-layer state of the transaction to TRANS_INPROGRESS.

+

CommitTransaction

+

Transits the current lower-layer state from TRANS_INPROGRESS to TRANS_COMMIT, makes Clogs and Xlogs persistent locally, clears the corresponding transaction slot information, and then sets the lower-layer state to TRANS_DEFAULT.

+

PrepareTransaction

+

Similar to the CommitTransaction function, transits the current lower-layer state from TRANS_INPROGRESS to TRANS_PREPARE, constructs a two-phase GXACT structure, creates a two-phase file, adds dummy slot information, transfers the thread lock information to the dummy slot, releases resources, and finally sets the lower-layer state to TRANS_DEFAULT.

+

AbortTransaction

+

Releases LWLocks, UnlockBuffers, and LockErrorCleanup, transits the lower-layer state from TRANS_INPROGRESS to TRANS_ABORT, records the corresponding Clogs, clears the transaction slot information, and releases various resources.

+

CleanupTransaction

+

The current lower-layer state should be TRANS_ABORT. This function is generally called after the AbortTransaction function is called and also clears some resources.

+

FinishPreparedTransaction

+

Ends a two-phase commit transaction.

+

StartSubTransaction

+

Starts a sub-transaction.

+

CommitSubTransaction

+

Commits a sub-transaction.

+

AbortSubTransaction

+

Rolls back a sub-transaction.

+

CleanupSubTransaction

+

Clears resource information about sub-transactions, which is similar to CleanupTransaction.

+

PushTransaction/PopTransaction

+

A sub-transaction is similar to stack information. These two functions start and end a sub-transaction, respectively.

+
+ + 2\) Processing functions for calling subfunctions based on the corresponding state machine + + For details, see Table 5-3. + + **表 3** Transaction execution function + + + + + + + + + + + + + + + + +

Function

+

Description

+

StartTransactionCommand

+

Calls the corresponding transaction execution function based on the upper-layer state when a transaction starts.

+

CommitTransactionCommand

+

Calls the corresponding transaction execution function based on the upper-layer state when a transaction ends.

+

AbortCurrentTransaction

+

Calls the longjump function, clears the corresponding resources in advance, and sets the upper-layer state of a transaction to TBLOCK_ABORT when an internal error occurs in the transaction.

+
+ + 3\) Functions for controlling the upper-layer transaction state machine + + For details, see Table 5-4. + + **表 4** Functions for controlling the upper-layer transaction state machine + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

Function

+

Description

+

BeginTransactionBlock

+

Sets the upper-layer transaction state to TBLOCK_BEGIN when a transaction starts explicitly.

+

EndTransactionBlock

+

Sets the upper-layer transaction state to TBLOCK_END when a transaction is committed explicitly.

+

UserAbortTransactionBlock

+

Sets the upper-layer transaction state to TBLOCK_ABORT_PENDING or TBLOCK_ABORT_END when a transaction is rolled back explicitly.

+

PrepareTransactionBlock

+

Sets the upper-layer transaction state to TBLOCK_PREPARE when the PREPARE statement is executed explicitly.

+

DefineSavepoint

+

Calls the PushTransaction function to set the upper-layer transaction state of the sub-transaction to TBLOCK_SUBBEGIN when the SAVEPOINT statement is executed.

+

ReleaseSavepoint

+

Sets the upper-layer transaction state of the sub-transaction to TBLOCK_SUBRELEASE when the RELEASE SAVEPOINT statement is executed.

+

RollbackToSavepoint

+

Sets the upper-layer transaction state of all sub-transactions to TBLOCK_SUBABORT_PENDING or TBLOCK_SUBABORT_END and that of top-layer transactions to TBLOCK_SUBABORT_RESTART when the ROLLBACK TO statement is executed.

+
+ + + + +- **XID Allocation, Clogs, and CSNlogs** + + To distinguish different transactions in the database, openGauss allocates unique identifiers to the transactions, that is, transaction IDs \(XIDs\). An XID is a monotonically increasing number of the uint64 type. After a transaction ends, Clogs are used to record whether the transaction is committed, and CSNlogs are used to record the sequence number of the committed transaction for visibility determination. + + - 64-Bit XID Allocation + + openGauss assigns a unique XID to each write transaction. When a transaction is inserted, the transaction information is written to the **xmin** field in the tuple header, indicating the XID of tuple insertion. When a transaction is updated or deleted, the current transaction information is written to the **xmax** field in the tuple header, indicating the XID of tuple deletion. Currently, XIDs are allocated as uint64 numbers that monotonically increase. To save space and be compatible with earlier versions, the **xmin** and **xmax** fields in the tuple header are stored in two parts. The values of the **xmin** and **xmax** fields in the tuple header are both uint32 numbers. The page header stores the 64-bit **xid\_base** field, which is the **xid\_base** field of the current page. + + Figure 5-8 shows the tuple structure, and Figure 5-9 shows the page header structure. The formula for calculating the values of the **xmin** and **xmax** fields of each tuple is as follows: Value of **xmin**/Value of **xmax** in the tuple header + Value of **xid\_base** in the page header. + + ![](../figures/zh-cn_image_0000001252563289.png) + + Figure 5-9 Tuple structure + + ![](../figures/zh-cn_image_0000001207963344.png) + + Figure 5-9 Page header structure + + When larger XIDs are continuously inserted into the page, the XID may exceed the value of **xid\_base** + 232. In this case, you need to adjust the value of **xid\_base** to ensure that the values of the **xmin** and **xmax** fields of all tuples can be calculated based on the value of **xid\_base** and the tuple header value. For details about the logic, see section \(3\) in section 3\) "Key functions" in 5.2.2.4 "Clogs and CSNlogs". + + To prevent XIDs from being consumed too quickly, openGauss allocates XIDs only to write transactions and does not allocate extra XIDs to read-only transactions. That is, XIDs are allocated only when they are required. If an XID has not been allocated to a transaction when an XID is allocated to its sub-transaction, the system allocates an XID to the transaction first to ensure that the XID of the sub-transaction is greater than that of the transaction. Theoretically, 64-bit XIDs are sufficient. If transactions per second \(TPS\) of the database are 10 million, that is, 10 million transactions can be processed per second, 64-bit XIDs can be used for 580,000 years. + + - **Clogs and CSNlogs** + + Clogs and CSNlogs are used to maintain the mapping between XIDs and Clogs and that between XIDs and CSNlogs, respectively. Because memory resources are limited and long transactions may exist in the system, not all mappings can be stored in the memory. In this case, the mappings need to be written to disks as physical files. Therefore, Clog files \(XID - \> CommitLog Map\) and CSNlog files \(XID -\> CommitSeqNoLog Map\) are generated. Both CSNlogs and Clogs use the simple least recently used \(SLRU\) mechanism to read files and flush data to disks. + + 1\) Clogs are used to record the commit status of XIDs. In openGauss, four bits are used to identify the status of each XID. The code of Clogs is as follows: + + ``` + #define CLOG_XID_STATUS_IN_PROGRESS 0x00: The transaction has not started or is in progress (crash may occur). + #define CLOG_XID_STATUS_COMMITTED 0x01: The transaction has been committed. + #define CLOG_XID_STATUS_ABORTED 0x02: The transaction has been rolled back. + #define CLOG_XID_STATUS_SUB_COMMITTED 0x03: The sub-transaction has been committed but the status of its transaction is unknown. + ``` + + Figure 5-10 shows the physical structure of a Clog page. + + ![](../figures/178.png) + + Figure 5-10 Physical structure of a Clog page + + Figure 5-10 shows that transactions 1, 4, and 5 are still in progress, transaction 2 has been committed, and transaction 3 has been rolled back. + + 2\) CSNlogs are used to record the sequence number of transaction commit. openGauss allocates an 8-byte CSN of the uint64 type to each XID. Therefore, an 8-KB page can store the CSNs of 1000 transactions. When the size of the CSNlogs reaches a certain value, the logs are divided into file blocks. The size of each CSNlog file block is 256 KB. Similar to the XIDs, several special numbers are reserved for the CSNs. The code of CSNlogs is as follows: + + ``` + #define COMMITSEQNO_INPROGRESS UINT64CONST(0x0): The transaction has not been committed or rolled back. + #define COMMITSEQNO_ABORTED UINT64CONST(0x1): The transaction has been rolled back. + #define COMMITSEQNO_FROZEN UINT64CONST(0x2): The transaction has been committed and is visible to any snapshot. + #define COMMITSEQNO_FIRST_NORMAL UINT64CONST(0x3): start value of the CSN of the transaction + #define COMMITSEQNO_COMMIT_INPROGRESS (UINT64CONST(1) << 62): The transaction is being committed. + ``` + + Similar to the Clogs, the physical structure of the CSNlogs is shown in Figure 5-11. + + ![](../figures/179.png) + + Figure 5-11 Physical structure of CSNlogs + + The CSNs corresponding to the XIDs 2048, 2049, 2050, 2051, 2052 and 2053 are 5, 4, 7, 10, 6, and 8 respectively. That is, the transaction commit sequence is 2049 -\> 2048 -\> 2052 -\> 2050 -\> 2053 -\> 2051. + + 3\) Key functions + + Functions for calculating the value of **xid\_base** on the page with 64-bit XIDs are as follows: + + \(1\) **Heap\_page\_prepare\_for\_xid**: This function is called when a write operation is performed on the page to adjust the value of **xid\_base**. + +  If a new XID is between the value of **xid\_base** + value of **FirstNormalxid** and the value of **xid\_base** + value of **MaxShortxid\(0xFFFFFFFF\)**, the value of **xid\_base** does not need to be adjusted. + + ‚ If a new XID is less than the value of **xid\_base** + value of **FirstNormalxid**, the value of **xid\_base** needs to be decreased. + + ƒ If a new XID is larger than the value of **xid\_base** + value of **MaxShortxid**, the value of **xid\_base** needs to be increased. + + „ In special cases, if the XID span of the page is greater than the range that can be represented by 32 bits, smaller XIDs on the page need to be frozen. That is, **xid** of the transaction to be committed is set to **FrozenTransactionId \(2\)**, which is visible to all transactions, and **xid** of the transaction to be rolled back is set to **InvalidTransactionId \(0\)**, which is invisible to all transactions. + + \(2\) **Freeze\_single\_heap\_page**: This function is used to freeze small XIDs on the page. + +  Calculate the value of **oldestxid**. Transactions with an XID smaller than the value of **oldestxid** will not be accessed anymore. In this case, the XID of the transaction that has been committed can be marked as **FrozenTransactionId**, which is visible to all transactions, and the XID of the transaction that has been rolled back can be marked as **InvalidTransactionId**, which is invisible to all transactions. + + ‚ Clear the hot update link, redirect the item ID, and arrange the page space. + + ƒ Process each tuple based on **oldestxid**. + + \(3\) **Heap\_page\_shift\_base**: This function is used to update the value of **xid\_base** and adjust the values of **xmin** and **xmax** in each tuple header on the page. + + \(4\) **GetNewTransactionId**: This function is used to obtain the latest XID. + + +- **MVCC Mechanism for Visibility Determination** + + openGauss uses the MVCC mechanism to ensure data consistency. During data scanning, each transaction obtains only the data generated when the snapshot is obtained, instead of the latest state of data. This prevents data inconsistency caused by updates of other concurrent transactions. A main advantage of the MVCC mechanism is that a lock request for reading data does not conflict with a lock request for writing data, so that the read operation and the write operation do not block each other. The following describes the transaction isolation levels and the CSN mechanism for determining visibility in openGauss. + + - **Transaction Isolation Levels** + + The SQL standard considers the phenomena that should be avoided between parallel transactions and defines the following isolation levels, as shown in Table 5-5. + + **表 5** Transaction isolation levels + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

Isolation Level

+

P0 (Dirty Read)

+

P1 (Dirty Read)

+

P2 (Fuzzy Read)

+

P3 (Phantom Read)

+

Read uncommitted

+

Impossible

+

Possible

+

Possible

+

Possible

+

Read committed

+

Impossible

+

Impossible

+

Possible

+

Possible

+

Repeatable read

+

Impossible

+

Impossible

+

Impossible

+

Possible

+

Serializable

+

Impossible

+

Impossible

+

Impossible

+

Impossible

+
+ + \(1\) Dirty write: Two transactions are written, committed, or rolled back separately, and the transaction results cannot be determined. That is, one transaction can roll back the commit of the other transaction. + + \(2\) Dirty read: A transaction can read modified data that is not committed by another transaction. + + \(3\) Fuzzy read: A transaction repeatedly reads data that has been read, and the data result is modified by another transaction. + + \(4\) Phantom read: A transaction repeatedly performs a range query and returns a group of data that meets the conditions. The number of data records in the result set of each query changes due to the modification of other transactions. + + During the implementation of various types of databases, some new phenomena occur in concurrent transactions, and some extensions are made based on the original isolation level. For details, see Table 5-6. + + **表 6** Transaction isolation level extensions + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

Isolation Level

+

P0 (Dirty Read)

+

P1 (Dirty Read)

+

P4 (Lost Update)

+

P2 (Fuzzy Read)

+

P3 (Phantom Read)

+

A5A (Read Skew)

+

A5B (Write Skew)

+

Read uncommitted

+

Impossible

+

Possible

+

Possible

+

Possible

+

Possible

+

Possible

+

Possible

+

Read committed

+

Impossible

+

Impossible

+

Possible

+

Possible

+

Possible

+

Possible

+

Possible

+

Repeatable read

+

Impossible

+

Impossible

+

Impossible

+

Impossible

+

Possible

+

Impossible

+

Impossible

+

Snapshot consistent read

+

Impossible

+

Impossible

+

Impossible

+

Impossible

+

Occasional

+

Impossible

+

Possible

+

Serializable

+

Impossible

+

Impossible

+

Impossible

+

Impossible

+

Impossible

+

Impossible

+

Impossible

+
+ + \(5\) Lost update: When a transaction reads a tuple and updates the tuple, another transaction modifies the tuple value. As a result, the modification is lost. + + \(6\) Read skew: It is assumed that data x and y have an implicit constraint x + y =100. Transaction 1 reads x = 50, and transaction 2 writes x = 25 and updates y = 75 to ensure that the constraint is met. After transaction 2 is committed, transaction 1 reads y = 75 again. As a result, transaction 1 reads x + y = 125, which does not meet the constraint. + + \(7\) Write skew: It is assumed that data x and y have an implicit constraint x + y ≤ 100. Transaction 1 reads x=50 and writes y = 50. Transaction 2 reads y=30, writes x = 70, and commits the data. Then, transaction 1 commits its data. As a result, x = 70 and y = 50 do not meet the constraint of x + y ≤ 100. + + openGauss provides the read committed and repeatable read isolation levels. The repeatable read isolation level does not have the phantom read problem but has the A5B \(write skew\) problem. + + - **CSN Mechanism** + + 1\) Working principles of CSNs \(Figure 5-12\) + + ![](../figures/zh-cn_image_0000001208473690.png) + + Figure 5-12 Working principles of CSNs + + Each non-read-only transaction is assigned with an XID during running. Then the CSN is pushed when the transaction is committed, and the mapping between the CSN and the XID is saved \(in CSNlogs\). In Figure 5-12, the solid vertical line indicates that value 4 which is the next value of the CSN \(3\) of the latest committed transaction is obtained when the snapshot is obtained. Transactions TX1, TX3, and TX5 have been committed, and their CSNs are 1, 2, and 3, respectively. Transactions TX2, TX4, and TX6 are in progress, and transactions TX7 and TX8 have not started. For the current snapshot, the commit results of transactions whose CSN is smaller than 4 are visible. The commit results of other transactions are invisible because they are not committed when the snapshot is obtained. + + 2\) Process for determining visibility by using MVCC snapshots + + When a snapshot is obtained, the minimum active XID is recorded as the value of **snapshot.xmin**. The XID of the latest committed transaction \(specified by **latestCompleteXid**\) + 1 is recorded as the value of **snapshot.xmax**. The CSN of the latest committed transaction + 1 \(**NextCommitSeqNo**\) is recorded as the value of **snapshot.csn**. Figure 5-13 shows the process of determining visibility. + + ![](../figures/1710.png) + + Figure 5-13 Process for determining visibility by using MVCC snapshots + + \(1\) If the XID is greater than or equal to the value of **snapshot.xmax**, the XID is invisible. + + \(2\) If the XID is smaller than the value of **snapshot.xmin**, the XID has ended before the transaction starts. You need to query the commit state of the transaction in the Clog and set a flag in the tuple header. + + \(3\) If the XID is between the value of **snapshot.xmin** and that of **snapshot.xmax**, the CSN of transaction ending needs to be read from the CSN-XID mapping. If the CSN has a value that is smaller than the value of **snapshot.csn**, the transaction is visible. Otherwise, the transaction is invisible. + + 3\) Commit process + + Figure 5-14 shows the transaction commit process. + + ![](../figures/1711.png) + + Figure 5-14 Commit process + + \(1\) The **commit-in-progress** flag is set for the CSN-XID mapping. + + \(2\) The atom updates the value of **NextCommitSeqNo**. + + \(3\) Redo logs are generated, and Clogs and CSNlogs are written. + + \(4\) The PGPROC structure is updated to remove the corresponding transaction information from the PGPROC structure. Both **xid** and **xmin** are set to **InvalidTransactionId**. + + 4\) Hot backup support + + Xlogs of the **commit-in-progress** flag are added between steps \(1\) and \(2\) in the transaction commit process. When reading the snapshot, the standby node obtains the lightweight lock ProcArrayLock and calculates the current snapshot. If the CSN corresponding to the XID has the **COMMITSEQNO\_COMMIT\_INPROGRESS** flag when the CSN in the current snapshot is used, you must wait for the corresponding transaction to commit the Xlog and then read the corresponding CSN for visibility determination after the Xlog playback is complete. To implement the preceding wait operation, the standby node calls the **XactLockTableInsert** function to obtain the transaction exclusive lock of the corresponding XID when performing the redo operation on the Xlog of the **commit-in-progress** flag. If other read transactions access the XID, they wait on the transaction lock of the XID until the corresponding transaction commits the Xlog for playback. + + - **Key Data Structures and Functions** + + 1\). Snapshots + + ``` + Code related to snapshots is as follows: + typedef struct SnapshotData { + SnapshotSatisfiesFunc satisfies; /* Function for determining visibility. Generally, HeapTupleSatisfiesMVCC is used.*/ + TransactionId xmin; /* Minimum XID of the current active transaction. If the XID is less than the value, the transaction ends.*/ + TransactionId xmax; /* XID of the latest committed transaction (specified by latestCompeleteXid) + 1. If the XID is greater than or equal to the value, the transaction has not started and the XID is invisible.*/ + TransactionId* xip; /* Recorded linked list of current active transactions. The value is invalid in the CSN version.*/ + TransactionId* subxip; /* Recorded linked list of cached active sub-transactions. The value is invalid in the CSN version.*/ + uint32 xcnt; /* Recorded number of active transactions (number of tuples in XIDs of active transactions). The value is invalid in the CSN version.*/ + GTM_Timeline timeline; /* Invalid in standalone openGauss.*/ + uint32 max_xcnt; /* Maximum number of XIDs of active transactions. The value is invalid in the CSN version.*/ + int32 subxcnt; /* Number of linked lists of cached active sub-transactions. The value is invalid in the CSN version.*/ + int32 maxsubxcnt; /* Maximum number of linked lists of cached active sub-transactions. The value is invalid in the CSN version.*/ + bool suboverflowed; /* Whether the number of linked lists of active sub-transactions exceeds the pre-allocated upper limit in the shared memory. The value is invalid in the CSN version. */ + + CommitSeqNo snapshotcsn; /* CSN of the snapshot. Generally, the value is the CSN of the latest committed transaction + 1 (NextCommitSeqNo). Transactions whose CSN is smaller than the value are visible. */ + + int prepared_array_capacity; /* Invalid in standalone openGauss.*/ + int prepared_count; /* Invalid in standalone openGauss.*/ + TransactionId* prepared_array; /* Invalid in standalone openGauss.*/ + + bool takenDuringRecovery; /* Whether the snapshot is generated during recovery.*/ + bool copied; /* Whether the snapshot is static at the session level or is copied from the newly allocated memory.*/ + + CommandId curcid; /* Command sequence number in the transaction block. Data inserted earlier in the same transaction is visible to subsequent statements. */ + uint32 active_count; /* refcount of ActiveSnapshot stack*/ + refcount of uint32 regd_count; /* refcount of RegisteredSnapshotList*/ + void* user_data; /* Used by the local multi-version snapshot, indicating that the snapshot is used by threads and cannot be released directly.*/ + SnapshotType snapshot_type; /* Invalid in standalone openGauss.*/ + } SnapshotData; + ``` + + 2\) HeapTupleSatisfiesMVCC + + This function is used to scan snapshots of common read transactions based on the CSN logic. The code is as follows: + + ``` + bool HeapTupleSatisfiesMVCC(HeapTuple htup, Snapshot snapshot, Buffer buffer) + { + …… /* Initializes variables.*/ + + if (!HeapTupleHeaderXminCommitted(tuple)) { /* Determines the hint bit recorded by a bit. During visibility determination, openGauss needs to know the commit state of the Clog corresponding to xmin and xmax of tuples. To avoid repeated access to the Clog, openGauss optimizes visibility determination. The hint bit records the transaction state in the tuple header and uses a bit to indicate the commit or rollback states. openGauss does not update the hint bit in a tuple when a transaction is committed or rolled back. Instead, openGauss reads and sets the hint bit from the Clog if the hint bit is not set during visibility determination. Otherwise, openGauss directly reads the hint bit, and this prevents a tuple from repeatedly obtaining the final transaction commit state. If xmin and xmax of the tuple are found to have been committed during a scanning, the corresponding flag is added to accelerate the scanning. If no flag is added, visibility determination continues. */ + if (HeapTupleHeaderXminInvalid(tuple)) /* Also check the hint bit. If xmin is marked as invalid, the transaction that inserts the tuple has been rolled back. In this case, the tuple is invisible.*/ + return false; + + if (TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetXmin(page, tuple))) { /* If the tuple is accessed inside a transaction, the command ID (CID) of the tuple needs to be identified. That is, in the same transaction. The scanning result inserted earlier by the current transaction can be queried in the subsequent query.*/ + ……. + } else { /* If other transactions are scanned, determines whether the transactions are visible based on the snapshot.*/ + } else { /*visible = XidVisibleInSnapshot(HeapTupleHeaderGetXmin(page, tuple), snapshot, &hintstatus); /* Determine whether the transaction is visible based on CSNlogs and return the final commit state of the transaction.*/ + if (hintstatus == XID_COMMITTED) /* If the transaction is committed, add the committed hint bit to accelerate determination.*/ + SetHintBits(tuple, buffer, HEAP_XMIN_COMMITTED, HeapTupleHeaderGetXmin(page, tuple)); + + if (hintstatus == XID_ABORTED) { + … /* If the transaction is rolled back, add a rollback flag.*/ + SetHintBits(tuple, buffer, HEAP_XMIN_INVALID, InvalidTransactionId); + } + if (!visible) { /* If xmin is invisible, the tuple is invisible; otherwise, the transaction that inserts the tuple has been committed for the snapshot. In this case, continue to determine whether the tuple deletion transaction is committed for the snapshot.*/ + return false; + } + } + } + } else {/* If xmin of the tuple has been marked with the committed hint bit, use the function interface CommittedXidVisibleInSnapshot to determine whether the tuple is visible to the snapshot.*/ + /* xmin is committed, but maybe not according to our snapshot */ + if (!HeapTupleHeaderXminFrozen(tuple) && + !CommittedXidVisibleInSnapshot(HeapTupleHeaderGetXmin(page, tuple), snapshot)) { + return false; + } + } + …… /* Subsequent visibility determination for xmax is similar to that for xmin. If xmax is visible to the current snapshot, the tuple deletion transaction has been committed, and xmax is invisible. Otherwise, xmax is visible.*/ + if (!(tuple->t_infomask & HEAP_XMAX_COMMITTED)) { + if (TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetXmax(page, tuple))) { + if (HeapTupleHeaderGetCmax(tuple, page) >= snapshot->curcid) + return true; /* The transaction to be deleted has been committed before the scanning starts.*/ + else + return false; /* The operation deletion transaction is committed after the scanning starts.*/ + } + + visible = XidVisibleInSnapshot(HeapTupleHeaderGetXmax(page, tuple), snapshot, &hintstatus); + if (hintstatus == XID_COMMITTED) { + /* Set the hint bit of xmax.*/ + SetHintBits(tuple, buffer, HEAP_XMAX_COMMITTED, HeapTupleHeaderGetXmax(page, tuple)); + } + if (hintstatus == XID_ABORTED) { + /* Rollback or crash*/ + SetHintBits(tuple, buffer, HEAP_XMAX_INVALID, InvalidTransactionId); + } + if (!visible) { + return true; /* Consider the tuple active if the transaction corresponding to xmax in the snapshot is invisible.*/ + } + } else { + /* Consider that the operation of deleting the tuple is not complete and the tuple is still visible if the transaction corresponding to xmax has been committed but the transaction in the snapshot is invisible.*/ + if (!CommittedXidVisibleInSnapshot(HeapTupleHeaderGetXmax(page, tuple), snapshot)) { + return true; /* Consider the tuple visible.*/ + } + } + return false; + } + ``` + + 3\) HeapTupleSatisfiesNow + + The logic of this function is similar to that of MVCC. The only difference is that this function only determines the states of **xmin** and **xmax**, and no longer calls the **XidVisibleInSnapshot** and **CommittedXidVisibleInSnapshot** functions to determine visibility to snapshots. + + 4\) HeapTupleSatisfiesVacuum + + This function returns the corresponding state according to the value of **oldestXmin**. A dead tuple \(invisible tuple of an earlier version in the openGauss MVCC mechanism\) that is not accessed by any other unfinished transactions \(value of **xmax** < value of **oldestXmin**\) can be cleared by executing the VACUUM statement. The function code is as follows: + + ``` + HTSV_Result HeapTupleSatisfiesVacuum(HeapTuple htup, TransactionId OldestXmin, Buffer buffer) + { + …… /* Initializes variables.*/ + if (!HeapTupleHeaderXminCommitted(tuple)) { /* Accelerate hint bits. The logic is the same as that of MVCC. */ + if (HeapTupleHeaderXminInvalid(tuple)) /* Return and clear the dead tuple if xmin is not committed. */ + return HEAPTUPLE_DEAD; + xidstatus = TransactionIdGetStatus(HeapTupleGetRawXmin(htup), false); /* Obtain the current transaction state through the CSNlog.*/ + if (xidstatus == XID_INPROGRESS) { + if (tuple->t_infomask & HEAP_XMAX_INVALID) /* If xmax does not exist, the tuple is not deleted. In this case, the tuple is being inserted. Otherwise, the tuple is being deleted.*/ + return HEAPTUPLE_INSERT_IN_PROGRESS; + return HEAPTUPLE_DELETE_IN_PROGRESS; /* Return a message indicating that the deletion is in progress.*/ + } else if (xidstatus == XID_COMMITTED) { /* Add the hint bit and then check whether xmax is committed if xmin is committed. */ + SetHintBits(tuple, buffer, HEAP_XMIN_COMMITTED, HeapTupleGetRawXmin(htup)); + } else { + .... /* If a transaction ends and is not committed, it may be aborted or crashed. Generally, a dead tuple is returned and can be deleted. In a standalone system, t_thrd.xact_cxt.useLocalSnapshot does not take effect, and its value is always false. */ + SetHintBits(tuple, buffer, HEAP_XMIN_INVALID, InvalidTransactionId); + return ((!t_thrd.xact_cxt.useLocalSnapshot || IsInitdb) ? HEAPTUPLE_DEAD : HEAPTUPLE_LIVE); + } + } + /* Check xmax. If xmax is not set, the tuple is not deleted. In this case, the tuple is alive and cannot be deleted. */ + if (tuple->t_infomask & HEAP_XMAX_INVALID) + return HEAPTUPLE_LIVE; + ...... + if (!(tuple->t_infomask & HEAP_XMAX_COMMITTED)) { /* Check whether the value of xmax is smaller than that of oldesxmin if xmax is committed. If the value of xmax is smaller than that of oldesxmin, no unfinished transactions access the tuple and the tuple can be deleted. */ + xidstatus = TransactionIdGetStatus(HeapTupleGetRawXmax(htup), false); + if (xidstatus == XID_INPROGRESS) + return HEAPTUPLE_DELETE_IN_PROGRESS; + else if (xidstatus == XID_COMMITTED) + SetHintBits(tuple, buffer, HEAP_XMAX_COMMITTED, HeapTupleGetRawXmax(htup)); + else { + … /* The transaction corresponding to xmax aborts or crashes.*/ + SetHintBits(tuple, buffer, HEAP_XMAX_INVALID, InvalidTransactionId); + return HEAPTUPLE_LIVE; + } + } + + /* Check whether the tuple can be deleted. If the value of xmax is smaller than that of oldestXmin, the tuple can be deleted. */ + if (!TransactionIdPrecedes(HeapTupleGetRawXmax(htup), OldestXmin)) + return ((!t_thrd.xact_cxt.useLocalSnapshot || IsInitdb) ? HEAPTUPLE_RECENTLY_DEAD : HEAPTUPLE_LIVE); + + /* The tuple may be considered dead, is not accessed by any active transaction, and can be deleted. */ + return ((!t_thrd.xact_cxt.useLocalSnapshot || IsInitdb) ? HEAPTUPLE_DEAD : HEAPTUPLE_LIVE); + } + ``` + + 5\) SetXact2CommitInProgress + + This function sets the **COMMITSEQNO\_COMMIT\_INPROGRESS** flag \(for details, see section 5.2.2 XID Allocation, Clogs, and CSNlogs\) of the CSNlog corresponding to an XID, indicating that the transaction corresponding to the XID is being committed. This operation is performed to ensure atomicity during visibility determination, that is, to prevent concurrent read transactions from reading inconsistent data during CSN setting. + + 6\) CSNLogSetCommitSeqNo + + This function sets CSNlogs for the corresponding XID. + + 7\) RecordTransactionCommit + + This function records transaction commit, including writing Clogs, CSNlog Xlogs, Clogs, and CSNlogs. + + +- **Intra-Process Multi-Thread Management Mechanism** + + This section briefly describes the data structures of the intra-process multi-thread management mechanism and the multi-version snapshot computing mechanism. + + - **Transaction Information Management** + + When the database is started, a shared memory segment is maintained. When each thread is initialized, a slot is obtained from the shared memory and the thread information is recorded in the slot. When a snapshot is obtained, the slot information needs to be updated in the shared memory array. When the transaction ends, the transaction information needs to be cleared from the slot. During snapshot calculation, the global array is traversed to obtain transaction information of all concurrent threads and calculate snapshot information \(such as values of **xmin**, **xmax**, and **snapshotcsn**\). The key data structure code for transaction information management is as follows: + + ``` + typedef struct PGXACT { + GTM_TransactionHandle handle; /* Invalid in standalone mode.*/ + TransactionId xid; /* XID of the thread. If there is no XID, the value is 0.*/ + TransactionId prepare_xid; /* XID in the preparation phase.*/ + + TransactionId xmin; /* Minimum active XID when the current transaction starts. The operation by executing the VACUUM statement does not delete tuples whose XID is greater than or equal to the value of xmin. */ + CommitSeqNo csn_min; /* Minimum active CSN when the current transaction starts.*/ + TransactionId next_xid; /* Invalid in standalone mode*/ + int nxids; /*Number of sub-transactions*/ + uint8 vacuumFlags; /* Flags related to the operation by executing the VACUUM statement*/ + + bool needToSyncXid; /* Invalid in standalone mode*/ + bool delayChkpt; /* If the thread requires the checkpoint thread to delay the wait, the value is true. + #ifdef __aarch64__ */ + char padding[PG_CACHE_LINE_SIZE - PGXACT_PAD_OFFSET]; /* Structure alignment for performance*/ + #endif + } PGXACT; + + struct PGPROC { + SHM_QUEUE links; /* Pointer in the linked list*/ + + PGSemaphoreData sem; /* Semaphore waiting in sleep mode*/ + int waitStatus; /* Waiting status*/ + + Latch procLatch; /* Common latch of the thread*/ + + LocalTransactionId lxid; /* Local top-layer XID of the current thread*/ + ThreadId pid; /* Thread ID*/ + + ThreadId sessMemorySessionid; + uint64 sessionid; /* Current session ID in thread pool mode*/ + int logictid; /* Logical thread ID*/ + TransactionId gtt_session_frozenxid; /* Frozen XID of a session-level global temporary table*/ + + int pgprocno; + int nodeno; + + /* When the thread starts, the following data structures are 0.*/ + BackendId backendId; /* Background ID of the thread*/ + Oid databaseId; /*Object identifier (OID) of the currently accessed database*/ + Oid roleId; /* OID of the current user*/ + + /* Version number, which is used to determine the old and new versions during the upgrade.*/ + uint32 workingVersionNum; + + /* Mark whether the current transaction receives conflict signals in hot backup mode. The ProcArray lock is required for setting this parameter. */ + bool recoveryConflictPending; + + /* Information about the LWLock waited by the thread */ + bool lwWaiting; /* The value is true when the LWLock is waited for.*/ + uint8 lwWaitMode; /* Pre-obtain the lock mode.*/ + bool lwIsVictim; /*Forcibly abandon the LWLock.*/ + dlist_node lwWaitLink; /* Wait for the next waiter of the same LWLock object.*/ + + /* Common lock information about thread waiting*/ + LOCK* waitLock; /*Regular lock object that is waited for*/ + PROCLOCK* waitProcLock; /* Holder of the regular lock object that is waited for*/ + LOCKMODE waitLockMode; /* Pre-obtain the mode of the regular lock object.*/ + LOCKMASK heldLocks; /*Bit mask of the lock object mode obtained by the thread*/ + + /* Wait for the primary and standby servers to replay the log synchronization information.*/ + XLogRecPtr waitLSN; /* LSN that is waited for*/ + int syncRepState; /* Wait for the primary/standby synchronization state.*/ + bool syncRepInCompleteQueue; /* Whether waiting in the completion queue*/ + SHM_QUEUE syncRepLinks; /* Pointer pointing to the synchronization queue*/ + + DataQueuePtr waitDataSyncPoint; /* Data synchronization point of data page replication*/ + int dataSyncRepState; /*Synchronization state of data page replication*/ + SHM_QUEUE dataSyncRepLinks; /* Pointer pointing to the data page synchronization queue*/ + + MemoryContext topmcxt; /* Top-layer memory context of the thread*/ + char myProgName[64]; + pg_time_t myStartTime; + syscalllock deleMemContextMutex; + + SHM_QUEUE myProcLocks[NUM_LOCK_PARTITIONS]; + + /* The following structures are used to commit XIDs in batches.*/ + /* Whether the member is a member in XID batch commit*/ + bool procArrayGroupMember; + /* Next member in XID batch commit*/ + pg_atomic_uint32 procArrayGroupNext; + /* Larger XID between the XIDs of a transaction and its sub-transaction*/ + TransactionId procArrayGroupMemberXid; + + /* SCN*/ + CommitSeqNo commitCSN; + + /* The following structures are used to commit Clogs in batches.*/ + bool clogGroupMember; /* Whether the member is a member in Clog batch commit*/ + pg_atomic_uint32 clogGroupNext; /* Next member in Clog batch commit*/ + TransactionId clogGroupMemberXid; /*XID committed in Clog batch commit*/ + CLogXidStatus clogGroupMemberXidStatus; /* Transaction state in Clog batch commit*/ + int64 clogGroupMemberPage; /* Clog page corresponding to Clog batch commit*/ + XLogRecPtr clogGroupMemberLsn; /* LSN of members in Clog batch commit*/ + #ifdef __aarch64__ + /* The following structures are used to insert playback logs in batches in the ARM architecture.*/ + bool xlogGroupMember; + pg_atomic_uint32 xlogGroupNext; + XLogRecData* xlogGrouprdata; + XLogRecPtr xlogGroupfpw_lsn; + XLogRecPtr* xlogGroupProcLastRecPtr; + XLogRecPtr* xlogGroupXactLastRecEnd; + void* xlogGroupCurrentTransactionState; + XLogRecPtr* xlogGroupRedoRecPtr; + void* xlogGroupLogwrtResult; + XLogRecPtr xlogGroupReturntRecPtr; + TimeLineID xlogGroupTimeLineID; + bool* xlogGroupDoPageWrites; + bool xlogGroupIsFPW; + uint64 snap_refcnt_bitmap; + #endif + + LWLock* subxidsLock; + struct XidCache subxids; /* XID of the sub-transaction*/ + + LWLock* backendLock; /* Lightweight lock of each thread, used to protect concurrent access to the following data structures*/ + + /* Lock manager data, recording fast-path locks taken by this backend. */ + uint64 fpLockBits; /* Holding mode of the fast path lock*/ + FastPathTag fpRelId[FP_LOCK_SLOTS_PER_BACKEND]; /* Slot ID of the table object*/ + bool fpVXIDLock; /* Whether to obtain the fast path lock of the local XID*/ + LocalTransactionId fpLocalTransactionId; /* Local XID*/ + }; + ``` + + ![](../figures/1712.png) + + Figure 5-15 Transaction information + + As shown in Figure 5-15, **proc\_base\_all\_procs** and **proc\_base\_all\_xacts** are global shared areas. When a thread starts, a slot is registered in the shared area, and the thread-level pointer variables _t\_thrd.proc_ and _t\_thrd.pgxact_ point to the area. When a transaction starts in the thread, information such as **xmin** and **xid** of the transaction is filled in the **pgxact** structure. The key functions and interfaces are as follows: + + \(1\) **GetOldestXmin**: Returns the value of **oldestXmin** cached by the current multi-version snapshot. \(For details about the multi-version snapshot mechanism, see the following sections.\) + + \(2\) ProcArrayAdd: Registers a slot in the shared area when a thread starts. + + \(3\) ProcArrayRemove: Removes the current thread from the ProcArray array. + + \(4\) TransactionIdIsInProgress: Checks whether an XID is in progress. + + - **Multi-Version Snapshot Mechanism** + + openGauss uses a shared memory segment to obtain snapshots and manage transaction information of each thread. If a shared lock is held when a snapshot is calculated and an exclusive lock is held when a transaction ends, severe lock contention occurs. To resolve this issue, openGauss introduces the multi-version snapshot mechanism. Each time a transaction ends, an exclusive lock is held, a version of the snapshot is calculated, and the version is recorded to a loop buffer queue memory. When another thread obtains the snapshot, the thread does not hold the shared lock to recalculate the snapshot. Instead, the thread obtains the latest snapshot from the top of the loop queue through atomic operations and increases the reference count by 1. After the snapshot information is copied, the reference count is decreased by 1. If the reference count of a slot is 0, the slot can be reused by a new snapshot. + + 1\) Data structure of a multi-version snapshot + + The code of the data structure of a multi-version snapshot is as follows: + + ``` + typedef struct _snapxid { + TransactionId xmin; + TransactionId xmax; + CommitSeqNo snapshotcsn; + TransactionId localxmin; + bool takenDuringRecovery; + ref_cnt_t ref_cnt[NREFCNT]; /* Reference count of the snapshot. If the value is 0, the snapshot can be reused.*/ + } snapxid_t; /* Content of the multi-version snapshot. If CSNs are used in openGauss, only key information such as xmin, xmax, and snapshotcsn needs to be recorded. */ + + static snapxid_t* g_snap_buffer = NULL; /* Pointer of the buffer queue memory area*/ + static snapxid_t* g_snap_buffer_copy = NULL; /* Shallow copy of the buffer queue memory*/ + static size_t g_bufsz = 0; + Whether the static bool g_snap_assigned = false; /* Whether the buffer queue of the multi-version snapshot has been initialized.*/ + + #define SNAP_SZ sizeof(snapxid_t) /* Size of each multi-version snapshot*/ + #define MaxNumSnapVersion 64 /* Size of the multi-version snapshot queue (64 versions)*/ + + static volatile snapxid_t* g_snap_current = NULL; /* Current snapshot pointer*/ + static volatile snapxid_t* g_snap_next = NULL; /* Snapshot pointer of the next available slot*/ + ``` + + 2\) Process of creating a buffer queue + + When shared memory is created, the size of the shared memory area is calculated as follows: Value of **MaxNumSnapVersion** x Value of **SNAP\_SZ**. **g\_snap\_current** is set to **0** and **g\_snap\_next** is set to the value of 1 x value of **SNAP\_SZ**. + + 3\) Calculating a multi-Version snapshot + + \(1\) Obtain the current **g\_snap\_next**. + + \(2\) Ensure that the exclusive lock of the PGPROC array is held, calculate key structures such as **xmin**, **xmax**, and the CSN, and save the calculation result to **g\_snap\_next**. + + \(3\) Search for the next reusable slot whose **refcount** is set to **0**, set **g\_snap\_current** to **g\_snap\_next**, and set **g\_snap\_next** to the reusable slot offset. + + 4\) Obtaining a multi-version snapshot + + \(1\) Obtain the g\_snap\_current pointer and add 1 to the reference count of the current snapshot slot to prevent it from being reused during concurrent snapshot updates. + + \(2\) Copy the information in the current snapshot to the static snapshot memory of the current connection. + + \(3\) Release the current multi-version snapshot and decrease the reference count of the current snapshot slot by 1. + + 5\) Key functions + + \(1\) CreateSharedRingBuffer: Creates shared memory information for a multi-version snapshot. + + \(2\) GetNextSnapXid: Obtains the position of the next multi-version snapshot. The function code is as follows: + + ``` + static inline snapxid_t* GetNextSnapXid() + { + return g_snap_buffer ? (snapxid_t*)g_snap_next : NULL; + } + ``` + + \(3\) SetNextSnapXid: Obtains the next available slot and updates the current multi-version snapshot to the latest version. The function code is as follows: + + ``` + static void SetNextSnapXid() + { + if (g_snap_buffer != NULL) { + g_snap_current = g_snap_next; /* Update the multi-version snapshot to the latest version. */ + pg_write_barrier(); /* Prevent ARM disorders during ring buffer initialization. */ + g_snap_assigned = true; + snapxid_t* ret = (snapxid_t*)g_snap_current; + size_t idx = SNAPXID_INDEX(ret); + loop: /* Main loop. The overall idea is to continuously traverse the slot information of multiple versions and search for a reusable slot whose refcout is set to 0. */ + do { + ++idx; + /* Search again if rewinding occurs.*/ + if (idx == g_bufsz) + idx = 0; + ret = SNAPXID_AT(idx); + if (IsZeroRefCount(ret)) { + g_snap_next = ret; + return; + } + } while (ret != g_snap_next); + ereport(WARNING, (errmsg("snapshot ring buffer overflow."))); + /* Currently, the number of multi-version snapshots is 64. Theoretically, the slots may be fully occupied. If there is no idle slot, traverse the slots again. */ + goto loop; + } + } + ``` + + \(4\) CalculateLocalLatestSnapshot: Calculates the information about the multi-version snapshot. The function code is as follows: + + ``` + void CalculateLocalLatestSnapshot(bool forceCalc) + { + …/*Initialize variables.*/ + + snapxid_t* snapxid = GetNextSnapXid(); /* Set the slot information of the next idle multi-version snapshot.*/ + + /* Initialize xmax to the value of latestCompletedXid + 1.*/ + xmax = t_thrd.xact_cxt.ShmemVariableCache->latestCompletedXid; + TransactionIdAdvance(xmax); + + /* The values of xmin and oldestxmin are not recalculated when each transaction is committed. They are calculated only when 1000 transactions are committed or at an interval of 1s. In this case, the values of xmin and oldestxmin are small, but visibility determination is not affected. */ + currentTimeStamp = GetCurrentTimestamp(); + if (forceCalc || ((++snapshotPendingCnt == MAX_PENDING_SNAPSHOT_CNT) || + (TimestampDifferenceExceeds(snapshotTimeStamp, currentTimeStamp, CALC_SNAPSHOT_TIMEOUT)))) { + snapshotPendingCnt = 0; + snapshotTimeStamp = currentTimeStamp; + + /* Initialize xmin.*/ + globalxmin = xmin = xmax; + + int* pgprocnos = arrayP->pgprocnos; + int numProcs; + + /* + Traverse the PGPROC structure cyclically and calculate the snapshot value. + */ + numProcs = arrayP->numProcs; + /* The main process is to traverse proc_base_all_xacts, record the minimum value of pgxact->xid as xmin, and record the minimum value of pgxact->xmin as oldestxmin. */ + for (index = 0; index < numProcs; index++) { + int pgprocno = pgprocnos[index]; + volatile PGXACT* pgxact = &g_instance.proc_base_all_xacts[pgprocno]; + TransactionId xid; + + if (pgxact->vacuumFlags & PROC_IN_LOGICAL_DECODING) + continue; + + /* Skip xmin of autovacuum to prevent long operations by executing the VACUUM statement from blocking dirty tuple recycling.*/ + if (pgxact->vacuumFlags & PROC_IN_VACUUM) + continue; + + /* Use the minimum value of xmin to update globalxmin.*/ + xid = pgxact->xmin; + + if (TransactionIdIsNormal(xid) && TransactionIdPrecedes(xid, globalxmin)) + globalxmin = xid; + + xid = pgxact->xid; + + if (!TransactionIdIsNormal(xid)) + xid = pgxact->next_xid; + + if (!TransactionIdIsNormal(xid) || !TransactionIdPrecedes(xid, xmax)) + continue; + + if (TransactionIdPrecedes(xid, xmin)) + xmin = xid; + } + + if (TransactionIdPrecedes(xmin, globalxmin)) + globalxmin = xmin; + + t_thrd.xact_cxt.ShmemVariableCache->xmin = xmin; + t_thrd.xact_cxt.ShmemVariableCache->recentLocalXmin = globalxmin; + } + /* Assign values to the multi-version snapshot information. The values of xmin and oldestxmin may be small because they are not calculated in time. The value of xmax and CSN value are accurate. Note that the exclusive lock must be held when the snapshot is calculated. */ + snapxid->xmin = t_thrd.xact_cxt.ShmemVariableCache->xmin; + snapxid->xmax = xmax; + snapxid->localxmin = t_thrd.xact_cxt.ShmemVariableCache->recentLocalXmin; + snapxid->snapshotcsn = t_thrd.xact_cxt.ShmemVariableCache->nextCommitSeqNo; + snapxid->takenDuringRecovery = RecoveryInProgress(); + SetNextSnapXid(); /* Set the current multi-version snapshot.*/ + } + ``` + + \(5\) GetLocalSnapshotData: Obtains the latest multi-version snapshot for transactions. The function code is as follows: + + ``` + Snapshot GetLocalSnapshotData(Snapshot snapshot) + { + /* Check whether a multi-version snapshot exists. Before the recovery process starts, the multi-version snapshot is not calculated. In this case, NULL is returned. */ + if (!g_snap_assigned || (g_snap_buffer == NULL)) { + ereport(DEBUG1, (errmsg("Falling back to origin GetSnapshotData: not assigned yet or during shutdown\n"))); + return NULL; + } + pg_read_barrier(); /* Prevent ARM disorders during ring buffer initialization.*/ + snapxid_t* snapxid = GetCurrentSnapXid(); /* Add 1 to refcount of the current multi-version snapshot to prevent it from being reused by transactions that concurrently calculate new snapshots. */ + + snapshot->user_data = snapxid; + + … /* Assign the information in snapxid of the multi-version snapshot to the snapshot. Note that this is deep copy because the multi-version snapshot has only several key variables. You can directly assign a value to the snapshot. Then, you can release refcount of the multi-version snapshot. */ + u_sess->utils_cxt.RecentXmin = snapxid->xmin; + snapshot->xmin = snapxid->xmin; + snapshot->xmax = snapxid->xmax; + snapshot->snapshotcsn = snapxid->snapshotcsn; + … + ReleaseSnapshotData(snapshot); /* Release refcount of the multi-version snapshot so that it can be reused. */ + return snapshot; + } + ``` + + + +## 5.3 Lock Mechanism + +In a database, concurrency control on public resources is implemented by using locks. According to different purposes of locks, locks can be generally classified into three types: spinlock, LWLock, and regular lock. Further encapsulation can be performed based on the three types of locks. The general operation process of using a lock includes locking, performing operations in the critical section, and releasing the lock. On the premise of accuracy, lock usage and contention have become important factors that restrict performance. The following briefly describes three types of locks in openGauss, and then focuses on lock-related performance optimization of openGauss based on the Kunpeng architecture. + +- **Spinlocks** + + A spinlock is generally implemented by using a test-and-set \(TAS\) atomic instruction of a CPU. There are only two states: locked and unlocked. A spinlock can be held by only one process. The difference between a spinlock and a semaphore is that when a process cannot obtain resources, the semaphore makes the process asleep and blocked, while the spinlock makes the process busy and waiting. The spinlock is mainly used in a scenario in which the locking duration is very short, for example, modifying a flag or reading a flag field, within dozens of instructions. When writing code, ensure that the spinlock is locked and unlocked in the same function. Deadlock cannot be detected and shall be guaranteed by the code itself. There are no waiting queues. The spinlock consumes CPU resources. If it is not used properly for a long time, a core dump is triggered. In openGauss, many 32-bit, 64-bit, and 128-bit variables are updated by compare-and-swap \(CAS\) atomic operations to avoid or reduce the use of spinlocks. + + Operations related to spinlocks are as follows: + + \(1\) SpinLockInit: Initializes the spinlock. + + \(2\) SpinLockAcquire: Locks the spinlock. + + \(3\) SpinLockRelease: Releases the spinlock. + + \(4\) SpinLockFree: Destroys the spinlock and clears related resources. + +- **LWLocks** + + LWLocks are implemented by using atomic operations, waiting queues, and semaphores. There are two types of LWLocks: shared lock and exclusive lock. Multiple processes can obtain a shared lock at the same time, but an exclusive lock can be held by only one process. When a process cannot obtain resources, the LWLock makes the process asleep and blocked. LWLocks are mainly used in scenarios where operations in the internal critical section take a long time. Locking and unlocking operations can cross functions but must be released immediately after use. Deadlock shall be guaranteed by the code itself. However, due to code complexity and handling of different exceptions, openGauss provides a deadlock detection mechanism to avoid LWLock deadlock in various exception scenarios. + + Functions related to LWLocks are as follows: + + \(1\) LWLockAssign: Applies for an LWLock. + + \(2\) LWLockAcquire: Locks an LWLock. + + \(3\) LWLockConditionalAcquire: Conditionally locks an LWLock. If no lock is obtained, **false** is returned and the system does not keep waiting. + + \(4\) LWLockRelease: Releases an LWLock. + + \(5\) LWLockReleaseAll: Releases all LWLocks held by the current thread. If an error occurs during the transaction, all LWLocks are rolled back and released to prevent subsequent operations from being blocked. + + The related structure code is as follows: + + ``` + #define LW_FLAG_HAS_WAITERS ((uint32)1 << 30) + #define LW_FLAG_RELEASE_OK ((uint32)1 << 29) + #define LW_FLAG_LOCKED ((uint32)1 << 28) + + #define LW_VAL_EXCLUSIVE ((uint32)1 << 24) + #define LW_VAL_SHARED 1 /* Mark the state of an LWLock to obtain or release the lock.*/ + + typedef struct LWLock { + uint16 tranche; /* ID of an LWLock*/ + pg_atomic_uint32 state; /* Lock state*/ + dlist_head waiters; /* Linked list of threads that are waiting for locks*/ + #ifdef LOCK_DEBUG + pg_atomic_uint32 nwaiters; /* Number of threads waiting for locks*/ + struct PGPROC *owner; /* Last holder of an exclusive lock*/ + #endif + #ifdef ENABLE_THREAD_CHECK + pg_atomic_uint32 rwlock; + pg_atomic_uint32 listlock; + #endif + } LWLock; + ``` + +- **Regular Locks** + + Regular locks are implemented by using a hash table. Regular locks support multiple lock modes. The semantics and conflicts between these lock modes are defined by conflict tables. Regular locks are used to lock database objects accessed by services. The locking of a regular lock complies with the two-phase locking protocol of the database. That is, a regular lock is locked during access and is released when a transaction is committed. + + A regular lock has a waiting queue and provides a deadlock detection mechanism. When a deadlock is detected, a transaction is rolled back. + + openGauss provides eight lock levels for controlling the concurrency of different statements. Level-1 locks are used for the SELECT statement. Level-3 locks are used for the INSERT, UPDATE, and DELETE statements. Level-4 locks are used for the VACUUM and ANALYZE statements. Level-8 locks are used for various DDL statements. The specific macro definitions and naming code are as follows: + + ``` + #define AccessShareLock 1 /* SELECT statement*/ + #define RowShareLock 2 /* SELECT FOR UPDATE and FOR SHARE statements*/ + #define RowExclusiveLock 3 /* INSERT, UPDATE, and DELETE statements*/ + #define ShareUpdateExclusiveLock \ + 4 /* VACUUM (non-FULL), ANALYZE, and CREATE INDEX CONCURRENTLY statements*/ + #define ShareLock 5 /* CREATE INDEX (WITHOUT CONCURRENTLY) statement*/ + #define ShareRowExclusiveLock \ + 6 /* It is similar to the exclusive mode, but concurrent access in ROW SHARE mode is allowed.*/ + #define ExclusiveLock \ + 7 /* Blocks ROW SHARE. In this case, the SELECT...FOR UPDATE statement will not be executed.*/ + #define AccessExclusiveLock \ + 8 /* ALTER TABLE, DROP TABLE, VACUUM FULL, and LOCK TABLE statements*/ + ``` + + Table 5-7 describes the eight levels of lock conflict and concurrency control, where √ indicates that two lock operations can be performed concurrently.![](figures/zh-cn_image_0000001252142637.gif) + + **表 7** Lock conflict and concurrency control + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

Lock Level

+

1

+

2

+

3

+

4

+

5

+

6

+

7

+

8

+

1. ACCESS SHARE

+

+

+

+

+

+

+

+

-

+

2. ROW SHARE

+

+

+

+

+

+

+

-

+

-

+

3. ROW EXCLUSIVE

+

+

+

+

+

-

+

-

+

-

+

-

+

4. SHARE UPDATE EXCLUSIVE

+

+

+

+

-

+

-

+

-

+

-

+

-

+

5. SHARELOCK

+

+

+

-

+

-

+

+

-

+

-

+

-

+

6. SHARE ROW EXCLUSIVE

+

+

+

-

+

-

+

-

+

-

+

-

+

-

+

7. EXCLUSIVE

+

+

-

+

-

+

-

+

-

+

-

+

-

+

-

+

8. ACCESS EXCLUSIVE

+

-

+

-

+

-

+

-

+

-

+

-

+

-

+

-

+
+ + The lock object data structure is as follows. Different lock objects are identified by assigning values to field 1 to field 5, and **locktag\_type** is used to identify lock object types, such as table-level relation objects, row-level tuple objects, and transaction objects. The corresponding code is as follows: + + ``` + typedef struct LOCKTAG { + uint32 locktag_field1; /* 32 bits*/ + uint32 locktag_field2; /* 32 bits*/ + uint32 locktag_field3; /* 32 bits*/ + uint32 locktag_field4; /* 32 bits*/ + uint16 locktag_field5; /* 32 bits*/ + uint8 locktag_type; /* For details, see LockTagType.*/ + uint8 locktag_lockmethodid; /* Lock method type*/ + } LOCKTAG; + + typedef enum LockTagType { + LOCKTAG_RELATION, /* Table relation*/ + /* The ID of LOCKTAG_RELATION consists of the OID of the database and the OID of the table. If the OID of the database is 0, the table is a shared table. The OID is a common object identifier of the openGauss kernel.*/ + LOCKTAG_RELATION_EXTEND, /* Priority of the extension table*/ + /* ID of LOCKTAG_RELATION_EXTEND*/ + LOCKTAG_PARTITION, /* Partition*/ + LOCKTAG_PARTITION_SEQUENCE, /* Partition sequence*/ + LOCKTAG_PAGE, /* Page in the table*/ + /* The ID of LOCKTAG_PAGE is the value of RELATION + value of BlockNumber (page number).*/ + LOCKTAG_TUPLE, /* Physical tuple*/ + /* The ID of LOCKTAG_TUPLE is the value of PAGE + value of OffsetNumber (offset on the page).*/ + LOCKTAG_TRANSACTION, /* XID (to wait for the corresponding transaction to end)*/ + /* The ID of LOCKTAG_TRANSACTION is the XID.*/ + LOCKTAG_VIRTUALTRANSACTION, /* Virtual XID*/ + /* The ID of LOCKTAG_VIRTUALTRANSACTION is its virtual XID.*/ + LOCKTAG_OBJECT, /* Non-table database object*/ + /* The ID of LOCKTAG_OBJECT is data OID + class OID + object OID + sub-ID.*/ + LOCKTAG_CSTORE_FREESPACE, /* Free space of column store*/ + LOCKTAG_USERLOCK, /* Lock object reserved for the user lock*/ + LOCKTAG_ADVISORY, /* Advisory lock*/ + LOCK_EVENT_NUM + } LockTagType; + ``` + + In the structure of a regular lock, **tag** is the unique identifier of a regular lock object, and the PROCLOCK structure is the pointer that connects all threads that hold the lock and those that are waiting for the lock. The corresponding code is as follows: + + ``` + typedef struct LOCK { + /* Hash key*/ + LOCKTAG tag; /* Unique identifier of the lock object*/ + + /* Data*/ + LOCKMASK grantMask; /* Bit mask of the obtained lock object*/ + LOCKMASK waitMask; /* Bit mask of the lock object that is waited for*/ + SHM_QUEUE procLockss; /* Object linked list of the PROCLOCK structure associated with the lock*/ + PROC_QUEUE waitProcss; /* Object linked list of the PGPROC structure waiting for the lock*/ + int requested[MAX_LOCKMODES]; /* Lock request counts*/ + int nRequested; /* Total number of requested arrays*/ + int granted[MAX_LOCKMODES]; /* Count of obtained locks*/ + int nGranted; /* Total number of granted arrays*/ + } LOCK; + ``` + + The PROCLOCK structure is used to connect the information about the threads that wait for a lock and about those that hold the lock. The corresponding code is as follows: + + ``` + typedef struct PROCLOCK { + /* Identifier*/ + PROCLOCKTAG tag; /* Object unique identifier of the PROCLOCK structure*/ + + /* Data */ + LOCKMASK holdMask; /* Bit mask of the lock type that has been obtained*/ + LOCKMASK releaseMask; /* Bit mask of the lock type that has been pre-released*/ + SHM_QUEUE lockLink; /* Pointer pointing to the linked list of a lock object*/ + SHM_QUEUE procLink; /* Pointer pointing to the linked list of the PGPROC structure*/ + } PROCLOCK; + ``` + + The **waitLock** field in the t\_thrd.proc structure records the lock that the thread is waiting for. The **procLocks** field in the structure associates all lock-related holders and waits. Figure 5-16 shows the queue relationship. + + ![](../figures/zh-cn_image_0000001252803745.png) + + Figure 5-16 Queue relationship of the t\_thrd.proc structure + + The main functions of a regular lock are as follows: + + \(1\) LockAcquire: Locks a lock object. + + \(2\) LockRelease: Releases a lock object. + + \(3\) LockReleaseAll: Releases all lock resources. + +- **Deadlock Detection Mechanism** + + A deadlock occurs because process B needs to access the resources of process A, but process A does not release the resources occupied by its lock due to various reasons. As a result, the database is always in the blocked state. As shown in Figure 5-17, T1 uses resource R1 and requests resource R2, while T2 holds resource R2 and requests resource R1. + + ![](../figures/1713.png) + + Figure 5-17 Deadlock status + + The necessary condition for a deadlock is that resources are requested and held. Each process can use one resource and request another resource at the same time. A common way to break a deadlock is to interrupt the execution of one of the transactions and break waiting loop. openGauss provides a deadlock detection mechanism for both LWLocks and regular locks. The following describes the related principles and code. + + - **Deadlock Detection and Self-Sealing of LWLocks** + + openGauss uses an independent monitoring thread to detect, diagnose, and release deadlocks of LWLocks. A worker thread writes a timestamp value before successfully requesting a LWLock. After successfully obtaining the lock, the worker thread sets the timestamp to 0. The monitoring thread can quickly compare the timestamp values to locate the thread that fails to obtain the lock resource for a long time. This process is fast and lightweight. Diagnosis of deadlock detection is triggered only when a long lock wait is detected. This prevents frequent diagnosis from affecting service execution. Once a deadlock loop is confirmed, the monitoring thread records the deadlock information in the log, and then takes recovery measures to recover the deadlock. That is, the monitoring thread selects a thread in the deadlock loop to report an error and then exit. Figure 5-18 shows the mechanism. + + ![](../figures/1714.png) + + Figure 5-18 Deadlock detection and self-healing of LWLocks + + Deadlock detection and verification are operations that consume too many CPU resources. To prevent the database performance and running stability from being affected, LWLock detection uses a lightweight detection method to quickly determine whether a deadlock may occur. The watchdog is used for detection by using timestamps. When a lock is requested, the worker thread writes the timestamp when the wait starts in the global memory. After the lock request is successful, the timestamp is set to 0. For a deadlocked thread, its lock request is in the wait state, and the timestamp is not set to 0. In addition, the difference between the timestamp and the current running timestamp becomes larger and larger. The GUC parameter **fault\_mon\_timeout** specifies the check interval. The default value is 5 seconds. Deadlock detection for LWLocks is performed at the interval specified by **fault\_mon\_timeout**. If the same thread and lock ID are detected and the timestamp exceeds the detection interval, deadlock detection is triggered. The functions for time statistics and lightweight detection are as follows: + + \(1\) pgstat\_read\_light\_detect: Reads the timestamp related to the thread and lock ID from the statistical information structure and records the timestamp to the pointer queue. + + \(2\) lwm\_compare\_light\_detect: Compares the state with that several seconds before detection. If threads and lock IDs that may be deadlocked are found, **true** is returned. Otherwise, **false** is returned. + + LWLock deadlock detection is a directed acyclic graph \(DAG\) determination process. Its implementation is similar to that of a regular lock, which will be described in detail in the following section. Deadlock detection requires two types of information: lock information \(including request and allocation information\) and thread information \(including waiting and holding information\). The information is recorded in corresponding global variables and can be accessed and determined by the deadlock monitoring thread. The related functions are as follows: + + \(1\) lwm\_heavy\_diagnosis: Detects whether a deadlock occurs. + + \(2\) lwm\_deadlock\_report: Reports detailed deadlock information for fault locating and diagnosis. + + \(3\) lw\_deadlock\_auto\_healing: Heals a deadlock by selecting a thread in the loop to exit. + + The data structure related to the lock and thread used for deadlock detection is as follows: + + \(1\) **lock\_entry\_id** records thread information. **thread\_id** and **sessionid** adapt to the thread pool framework so that correct information can be found from the statistics. The corresponding code is as follows: + + ``` + typedef struct { + ThreadId thread_id; + uint64 st_sessionid; + } lock_entry_id; + ``` + + \(2\) **lwm\_light\_detect** records the thread that may be deadlocked and uses a linked list to connect all the current information. The corresponding code is as follows: + + ``` + typedef struct { + /* Thread ID*/ + lock_entry_id entry_id; + + /* Reference count of LWLock detection*/ + int lw_count; + } lwm_light_detect; + ``` + + \(3\) **lwm\_lwlocks** records thread-related lock information, including the number of held locks and lock wait information. The corresponding code is as follows: + + ``` + typedef struct { + lock_entry_id be_tid; /* Thread ID*/ + int be_idx; /* Location of the background thread*/ + LWLockAddr want_lwlock; /* Information about the lock that has been obtained in advance*/ + int lwlocks_num; /* Number of LWLocks held by the thread*/ + lwlock_id_mode* held_lwlocks; /* LWLock array held by the thread*/ + } lwm_lwlocks; + ``` + + - **Deadlock Detection for Regular Locks** + + If no conflict occurs when openGauss obtains a lock, openGauss directly locks the lock. If a conflict occurs, openGauss sets a timer and waits. After the specified period of time, openGauss is called by the timer to detect deadlock. If process T2 is behind process T1 in the waiting queue of a lock, and the lock that process T2 needs to obtain conflicts with the lock that process T1 needs to obtain, there is a soft edge from T2 to T1. If the lock request from process T2 conflicts with the lock held by process T1, a hard edge exists. The overall idea is, by calling functions recursively, to start from the thread that is waiting for a lock currently and move forward along the waiting edge to check whether a loop exists. If a soft edge exists in the loop, the two processes in the loop are waiting for the lock. In this case, sort the lock waiting queue again to try to solve the deadlock conflict. If there is no soft edge, only the transaction waiting for the current lock can be terminated to solve the deadlock loop. As shown in Figure 5-19, the dashed line indicates a soft edge, and the solid line indicates a hard edge. Thread A waits for thread B, thread B waits for thread C, and thread C waits for thread A. Because thread A waits for thread B on a soft edge, the wait relationship is adjusted, as shown in Figure 5-19. In this case, thread A waits for thread C, and thread C waits for thread A. There is no soft edge, and a deadlock is detected. + + ![](../figures/1715.png) + + Figure 5-19 Deadlock detection for regular locks + + The main functions are as follows: + + \(1\) DeadLockCheck: Detects deadlocks. + + \(2\) DeadLockCheckRecurse: Returns **true** if a deadlock occurs, or returns **false** and resolves the deadlock conflict if a soft edge exists. + + \(3\) check\_stack\_depth: openGauss checks the deadlock recursive detection stack. \(If the deadlock detection recursive stack is too long, all LWLock partitions are held for a long time during deadlock detection, blocking services.\) + + \(4\) CheckDeadLockRunningTooLong: openGauss checks the deadlock detection time to prevent the time from being too long. If deadlock detection lasts for too long, all subsequent services are blocked. The corresponding code is as follows: + + ``` + static void CheckDeadLockRunningTooLong(int depth) + {/* Check every four layers.*/ + if (depth > 0 && ((depth % 4) == 0)) { + TimestampTz now = GetCurrentTimestamp(); + long secs = 0; + int usecs = 0; + + if (now > t_thrd.storage_cxt.deadlock_checker_start_time) { + TimestampDifference(t_thrd.storage_cxt.deadlock_checker_start_time, now, &secs, &usecs); + if (secs > 600) { /* An error is reported if deadlock detection lasts for more than 10 minutes. */ + #ifdef USE_ASSERT_CHECKING + DumpAllLocks();/* All lock information for fault locating in the debug version is exported. */ + #endif + + ereport(defence_errlevel(), (errcode(ERRCODE_INTERNAL_ERROR), + errmsg("Deadlock checker runs too long and is greater than 10 minutes."))); + } + } + } + } + ``` + + \(5\) FindLockCycle: Checks for deadlock loops. + + \(6\) FindLockCycleRecurse: internal recursive function called during deadlock detection. + + The corresponding data structures are as follows: + + \(1\) The core and most critical directed edge data structure in deadlock detection. The corresponding code is as follows: + + ``` + typedef struct EDGE { + PGPROC *waiter; /* Waiting thread*/ + PGPROC *blocker; /* Blocked thread*/ + int pred; /* Workspace for topology sorting*/ + int link; /* Workspace for topology sorting*/ + } EDGE; + ``` + + \(2\) A waiting queue that can be rearranged. The corresponding code is as follows: + + ``` + typedef struct WAIT_ORDER { + LOCK *lock; /* the lock whose wait queue is described */ + PGPROC **procs; /* array of PGPROC *'s in new wait order */ + int nProcs; + } WAIT_ORDER; + ``` + + \(3\) Information printed at the end of the deadlock detection. The corresponding code is as follows: + + ``` + typedef struct DEADLOCK_INFO { + LOCKTAG locktag; /* Unique identifier of the lock object that is waited for*/ + LOCKMODE lockmode; /* Type of the lock object that is waited for*/ + ThreadId pid; /* ID of the blocked thread*/ + } DEADLOCK_INFO; + ``` + + +- **Lockless Atomic Operation** + + openGauss encapsulates atomic operations of 32, 64, and 128 bits, which are used to replace spinlocks and implement atomic update operations of simple variables. + + \(1\) gs\_atomic\_add\_32: Performs a 32-bit add operation and returns the new value. The corresponding code is as follows: + + ``` + static inline int32 gs_atomic_add_32(volatile int32* ptr, int32 inc) + { + return __sync_fetch_and_add(ptr, inc) + inc; + } + ``` + + \(2\) Adds gs\_atomic\_add\_64: Performs a 64-bit add operation and returns the new value. The corresponding code is as follows: + + ``` + static inline int64 gs_atomic_add_64(int64* ptr, int64 inc) + { + return __sync_fetch_and_add(ptr, inc) + inc; + } + ``` + + \(3\) gs\_compare\_and\_swap\_32: 32-bit CAS operation. If the value of **dest** is not updated before, **newval** is written to **dest**. If the value of **dest** is not updated, **true** is returned. Otherwise, **false** is returned. The corresponding code is as follows: + + static inline bool gs\_compare\_and\_swap\_32\(int32\* dest, int32 oldval, int32 newval\) + + ``` + { + if (oldval == newval) + return true; + + volatile bool res = __sync_bool_compare_and_swap(dest, oldval, newval); + + return res; + } + ``` + + \(4\) gs\_compare\_and\_swap\_64: 64-bit CAS operation. If the value of **dest** is not updated before, **newval** is written to **dest**. If the value of **dest** is not updated, **true** is returned. Otherwise, **false** is returned. The corresponding code is as follows: + + ``` + static inline bool gs_compare_and_swap_64(int64* dest, int64 oldval, int64 newval) + { + if (oldval == newval) + return true; + + return __sync_bool_compare_and_swap(dest, oldval, newval); + } + ``` + + \(5\) arm\_compare\_and\_swap\_u128: openGauss provides cross-platform 128-bit CAS operations. On an ARM platform, a separate instruction set is used to assemble 128-bit atomic operations to improve the lock concurrency performance of the kernel. For details, see the next section. The corresponding code is as follows: + + ``` + static inline uint128_u arm_compare_and_swap_u128(volatile uint128_u* ptr, uint128_u oldval, uint128_u newval) + { + #ifdef __ARM_LSE + return __lse_compare_and_swap_u128(ptr, oldval, newval); + #else + return __excl_compare_and_swap_u128(ptr, oldval, newval); + #endif + } + #endif + ``` + + \(6\) atomic\_compare\_and\_swap\_u128: 128-bit CAS operation. If the value of **dest** is not updated by other threads before update, **newval** is written to **dest**. If the value of **dest** is not updated, a new value is returned. Otherwise, the value updated by other threads is returned. Note that the upper-layer caller must ensure that the input parameters are 128-bit aligned. The corresponding code is as follows: + + ``` + static inline uint128_u atomic_compare_and_swap_u128( + volatile uint128_u* ptr, + uint128_u oldval = uint128_u{0}, + uint128_u newval = uint128_u{0}) + { + #ifdef __aarch64__ + return arm_compare_and_swap_u128(ptr, oldval, newval); + #else + uint128_u ret; + ret.u128 = __sync_val_compare_and_swap(&ptr->u128, oldval.u128, newval.u128); + return ret; + #endif + } + ``` + +- **Performance Optimization Based on Kunpeng Servers** + + This section describes how to optimize the lock-related functions and structures of openGauss based on hardware structures. + + - **WAL Group Insert Optimization** + + The redo log cache system of the database refers to the write cache for database redo log persistency. Database redo logs are written to the log cache before being written to disks for persistency. The write efficiency of the log cache is the main factor that determines the overall throughput of the database. To ensure that logs are written in sequence, lock contention occurs when threads write logs. As such, lock contention becomes the main performance bottleneck. Based on the CPU characteristics of ARM-based Kunpeng servers, openGauss inserts logs in groups to reduce lock contention and improve the efficiency of inserting WALs, thereby improving the throughput performance of the entire database. Figure 5-20 shows the process of inserting logs in groups. + + ![](../figures/1716.png) + + Figure 5-20 Inserting logs in groups + + \(1\) All threads do not need to contend for a lock. + + \(2\) In the same time window, all threads join a group before contending for a lock. The first thread that joins the group is the leader thread. CAS atomic operations are performed to manage queues. + + \(3\) The leader thread contends for the lock on behalf of the entire group. Other follower threads in the group start to sleep and wait for the leader thread to wake them up. + + \(4\) After obtaining the lock, the leader thread traverses the logs to be inserted by all threads in the group to obtain the total space required. The leader thread reserves space only once. + + \(5\) The leader thread writes the logs to be written by all threads in the group to the log buffer. + + \(6\) The lock is released and all follower threads are awakened. + + \(7\) The follower threads do not need to contend for the lock because the logs to be written have been written by the leader thread. They directly enter the subsequent process. + + The key function code is as follows: + + ``` + static XLogRecPtr XLogInsertRecordGroup(XLogRecData* rdata, XLogRecPtr fpw_lsn) + { + …/* Initialize variables and perform simple verification.*/ + START_CRIT_SECTION(); /* Start the critical section.*/ + + proc->xlogGroupMember = true; + … + proc->xlogGroupDoPageWrites = &t_thrd.xlog_cxt.doPageWrites; + + nextidx = pg_atomic_read_u32(&t_thrd.shemem_ptr_cxt.LocalGroupWALInsertLocks[groupnum].l.xlogGroupFirst); + + while (true) { + pg_atomic_write_u32(&proc->xlogGroupNext, nextidx); /* Record the previous member to the PGPROC structure.*/ + /* Prevent ARM disorders to ensure that all previous write operations are visible.*/ + pg_write_barrier(); + + if (pg_atomic_compare_exchange_u32(&t_thrd.shemem_ptr_cxt.LocalGroupWALInsertLocks[groupnum].l.xlogGroupFirst, + &nextidx, + (uint32)proc->pgprocno)) { + break; + } /* Obtain the proc no field of the previous member. If the field is invalid, the member is the leader. */ + } + } /* Non-leader members do not obtain the WAL Insert lock. They only wait until they are awakened by the leader.*/ + if (nextidx != INVALID_PGPROCNO) { + int extraWaits = 0; + + for (;;) { + } /* Function as a read barrier.*/ + PGSemaphoreLock(&proc->sem, false); + } /* Function as a read barrier.*/ + pg_memory_barrier(); + if (!proc->xlogGroupMember) { + break; + } + extraWaits++; + } + + while (extraWaits-- > 0) { + PGSemaphoreUnlock(&proc->sem); + } + END_CRIT_SECTION(); + return proc->xlogGroupReturntRecPtr; + } + /* The leader member holds the lock.*/ + WALInsertLockAcquire(); + } /* Calculate the size of Xlog records of each member thread.*/ + … + /* The leader thread inserts the Xlog records of all member threads into the buffer.*/ + while (nextidx != INVALID_PGPROCNO) { + localProc = g_instance.proc_base_all_procs[nextidx]; + + if (unlikely(localProc->xlogGroupIsFPW)) { + nextidx = pg_atomic_read_u32(&localProc->xlogGroupNext); + localProc->xlogGroupIsFPW = false; + continue; + } + XLogInsertRecordNolock(localProc->xlogGrouprdata, + localProc, + XLogBytePosToRecPtr(StartBytePos), + XLogBytePosToEndRecPtr( + StartBytePos + MAXALIGN(((XLogRecord*)(localProc->xlogGrouprdata->data))->xl_tot_len)), + XLogBytePosToRecPtr(PrevBytePos)); + PrevBytePos = StartBytePos; + StartBytePos += MAXALIGN(((XLogRecord*)(localProc->xlogGrouprdata->data))->xl_tot_len); + nextidx = pg_atomic_read_u32(&localProc->xlogGroupNext); + } + + WALInsertLockRelease(); /* Complete the work, release the lock, and wake up all member threads.*/ + while (wakeidx != INVALID_PGPROCNO) { + PGPROC* proc = g_instance.proc_base_all_procs[wakeidx]; + + wakeidx = pg_atomic_read_u32(&proc->xlogGroupNext); + pg_atomic_write_u32(&proc->xlogGroupNext, INVALID_PGPROCNO); + proc->xlogGroupMember = false; + pg_memory_barrier(); + + if (proc != t_thrd.proc) { + PGSemaphoreUnlock(&proc->sem); + } + } + + END_CRIT_SECTION(); + return proc->xlogGroupReturntRecPtr; + } + ``` + + - **False Sharing Elimination by Using Cache Alignment** + + When accessing the main memory, the CPU obtains the data of the entire cache line at a time. The typical value for x86 is 64 bytes. Both the L1 and L2 caches of the ARM 1620 chip occupy 64 bytes, and the L3 cache occupies 128 bytes. This method of obtaining data can greatly improve data access efficiency. However, if data at different locations in a same cache line is frequently read and written by different threads, a same cache line of another CPU becomes invalid during writing. Therefore, the CPU's efforts to obtain data in the main memory based on the cache line are not only wasted, but also become a performance burden. False sharing refers to a behavior with low performance in which different CPUs simultaneously access different locations in a same cache line. + + Take LWLocks as an example. The code is as follows: + + ``` + #ifdef __aarch64__ + #define LWLOCK_PADDED_SIZE PG_CACHE_LINE_SIZE(128) + #else + #define LWLOCK_PADDED_SIZE (sizeof(LWLock) <= 32 ? 32 : 64) + #endif + typedef union LWLockPadded + { + LWLocklock; + charpad[LWLOCK_PADDED_SIZE]; + } LWLockPadded; + ``` + + In the current lock logic, access to LWLocks is still one of the mostly discussed topics. If the value of **LWLOCK\_PADDED\_SIZE** has 32 bytes and LWLocks are stored in a continuous array, a 64-byte cache line can contain two LWLockPadded structures, and a 128-byte cache line can contain four LWLockPadded structures at the same time. When the system contends fiercely for LWLocks, the corresponding cache line is continuously obtained and becomes invalid, wasting a large number of CPU resources. Therefore, when the ARM machine is optimized, **padding\_size** is set to **128** to eliminate false sharing and improve the overall performance of LWLocks. + + - **Lock-free Critical Section Protection by Using 128-Bit CAS Operations of WAL Insert Locks** + + Currently, WAL of the database or file system needs to insert the log information generated in the memory to the log buffer. To implement high-speed log caching, the log management system concurrently inserts logs by reserving global locations. Generally, two 64-bit global data location indexes are used to indicate the start and end positions of store insertion. A maximum of 16-EB data indexes is supported. To protect the global location indexes, WAL introduces a high-performance atomic lock to protect each log buffer. In the NUMA architecture, especially in the ARM architecture, concurrent WAL cache protection becomes a bottleneck due to atomic lock backoff, high cross-CPU access latency, and cache consistency performance differences. + + A main idea involved in optimization is to replace an atomic lock with the information about the two 64-bit global data locations through 128-bit atomic operations, eliminating costs of cross-CPU access, backoff, and cache consistency of the atomic lock. For details, see Figure 5-21. + + ![](../figures/zh-cn_image_0000001208315958.gif) + + Figure 5-21 Lock-free critical section protection by using 128-bit CAS operations + + The global location information includes a 64-bit start address and a 64-bit end address. These two addresses are combined into 128-bit information, and the lock-free location information is reserved through CAS atomic operations. The ARM platform does not support the 128-bit atomic operation library. openGauss loads two pieces of 64-bit ARM data by executing the **exclusive** command. The 64-bit ARM assembly instruction is LDXP/STXP. + + The key data structure and the code of the **ReserveXLogInsertLocation** function are as follows: + + ``` + typedef union { + uint128 u128; + uint64 u64[2]; + uint32 u32[4]; + } uint128_u; /* To ensure readability and operability of the code, 128-bit unsigned integers are designed as a union structure, and 64-bit values are assigned to the memory location. */ + static void ReserveXLogInsertLocation(uint32 size, XLogRecPtr* StartPos, XLogRecPtr* EndPos, XLogRecPtr* PrevPtr) + { + volatile XLogCtlInsert* Insert = &t_thrd.shemem_ptr_cxt.XLogCtl->Insert; + uint64 startbytepos; + uint64 endbytepos; + uint64 prevbytepos; + + size = MAXALIGN(size); + + #if defined(__x86_64__) || defined(__aarch64__) + uint128_u compare; + uint128_u exchange; + uint128_u current; + + compare = atomic_compare_and_swap_u128((uint128_u*)&Insert->CurrBytePos); + + loop1: + startbytepos = compare.u64[0]; + endbytepos = startbytepos + size; + + exchange.u64[0] = endbytepos; /* To ensure readability of the code, 128-bit unsigned integers are designed as a union structure. The start and end positions are written to exchange. */ + exchange.u64[1] = startbytepos; + + current = atomic_compare_and_swap_u128((uint128_u*)&Insert->CurrBytePos, compare, exchange); + if (!UINT128_IS_EQUAL(compare, current)) { /* If update is performed concurrently by other threads, it will be performed cyclically.*/ + UINT128_COPY(compare, current); + goto loop1; + } + prevbytepos = compare.u64[1]; + + #else + SpinLockAcquire(&Insert->insertpos_lck); /* Other platforms use atomic spinlocks to protect variable updates.*/ + startbytepos = Insert->CurrBytePos; + prevbytepos = Insert->PrevBytePos; + endbytepos = startbytepos + size; + Insert->CurrBytePos = endbytepos; + Insert->PrevBytePos = startbytepos; + + SpinLockRelease(&Insert->insertpos_lck); + #endif /* __x86_64__|| __aarch64__ */ + *StartPos = XLogBytePosToRecPtr(startbytepos); + *EndPos = XLogBytePosToEndRecPtr(endbytepos); + *PrevPtr = XLogBytePosToRecPtr(prevbytepos); + } + ``` + + - **Clog Partition Optimization** + + For details about Clogs, see section 5.2.2 XID Allocation, Clogs, and CSNlogs. Each transaction has four states: **IN\_PROGRESS**, **COMMITED**, **ABORTED**, and **SUB\_COMMITED**. Each log occupies 2 bits. Clogs need to be stored on disks. One page \(occupying 8 KB\) can contain 215 logs, and each log file \(segment = 256 x 8 KB\) can contain 226 logs. Currently, access to Clogs is implemented through a buffer pool. A unified SLRU buffer pool is used in the code. + + ![](../figures/1717.png) + + Figure 5-22 Clog buffer pool before optimization + + ![](../figures/1718.png) + + Figure 5-23 Clog buffer pool after optimization + + As shown in Figure 5-22, the buffer pool of Clogs is globally unique in the shared memory in the name of **CLOG Ctl**, which is shared by worker threads. In a high-concurrency scenario, resource contention becomes a performance bottleneck. Figure 5-23 shows the Clog buffer pool after partition optimization. A modulo operation \(obtaining the remainder after dividing two numbers\) is performed based on the page number to evenly distribute logs to the buffer pools of multiple shared memory, and the logs are recorded in the thread local object array ClogCtlData. The buffer pools are named **CLOG Ctl** _i_ . Buffer pool objects and corresponding global locks are added to the shared memory synchronously. The overall throughput is improved in a scattered manner. + + To optimize Clog partitions, the operations related to the original buffer pool in the source code need to be changed to the operations on the buffer pool of the corresponding partition. The corresponding partition can be easily located based on the XID and page number, and the corresponding control lock is changed from one lock to multiple locks. The involved structure code is as follows. Table 5-8 lists the involved functions. + + ``` + /* Clog partition*/ + #define NUM_CLOG_PARTITIONS 256 /* Number of partitions*/ + /* Clog lightweight partition lock*/ + #define CBufHashPartition(hashcode) \ + ((hashcode) % NUM_CLOG_PARTITIONS) + #define CBufMappingPartitionLock(hashcode) \ + (&t_thrd.shemem_ptr_cxt.mainLWLockArray[FirstCBufMappingLock + CBufHashPartition(hashcode)].lock) + #define CBufMappingPartitionLockByIndex(i) \ + (&t_thrd.shemem_ptr_cxt.mainLWLockArray[FirstCBufMappingLock + i].lock) + ``` + + **表 8** Functions for Clog partition optimization + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

Function

+

Description

+

CLOGShmemInit

+

Calls SimpleLruInit to initialize the Clog buffer in the shared memory.

+

ZeroCLOGPage

+

Initializes the value on the Clog log page to 0.

+

BootStrapCLOG

+

Creates an initial available Clog page in the buffer, calls ZeroCLOGPage to initialize the value on the page to 0, writes the Clog page to the disk, and returns to the page when a database is created.

+

CLogSetTreeStatus

+

Sets the final state of transaction commit.

+

CLogGetStatus

+

Queries the transaction state.

+

ShutdownCLOG

+

Closes the buffer and refreshes the data to the disk.

+

ExtendCLOG

+

Creates a Clog page for a newly allocated transaction.

+

TruncateCLOG

+

Deletes logs that expire due to the creation of log checkpoints to save space.

+

WriteZeroPageXlogRec

+

Writes the CLOG_ZEROPAGE XLOG log for future use when an Xlog page is created.

+

clog_redo

+

Performs redo operations related to Clogs, including CLOG_ZEROPAGE and CLOG_TRUNCATE.

+
+ + - **NUMA-aware Data and Thread Access Distribution** + + Remote NUMA: Memory access involves two physical locations: access thread and accessed memory. Memory access is performed locally only when the locations are on the same NUMA node. Otherwise, cross-node remote access is involved, and in this case, the performance overhead is high. + + The numactl open-source software provides the libnuma library that allows applications to easily bind threads to a specific NUMA node or CPU list and allocate memory to a specified NUMA node. The following describes the APIs that may be involved in the openGauss code. + + \(1\) int numa\_run\_on\_node\(int node\): Runs the current task and its subtasks on a specified node. The function corresponding to this API are as follows: + + ``` + numa_run_on_node: Runs the current task and its subtasks on a specific node. These tasks are not migrated to the CPUs of other nodes until the node association is reset by using the numa_run_on_node_mask function. –1 is passed to let the kernel schedule the tasks again on all nodes. The value 0 is returned when the operation is successful, and the value –1 is returned when the operation fails. The error code is recorded in errno. + ``` + + \(2\) void numa\_set\_localalloc\(void\): Sets the memory allocation policy of the caller thread to local allocation. That is, memory is preferentially allocated from the current node. The function corresponding to this API are as follows: + + ``` + numa_set_localalloc: Sets the memory allocation policy of the call task to local allocation. In this mode, the preferred node for memory allocation is the node where the task is being executed during memory allocation. + ``` + + \(3\) void numa\_alloc\_onnode\(void\): Allocates memory to a specified NUMA node. The function corresponding to this API are as follows: + + ``` + numa_alloc_onnode: Allocates memory to a specific node. The allocated size is a multiple of the system page size and is rounded up. If a specified node rejects the process externally, the call fails. Compared with the Malloc(3) function series, this function works slowly. The numa_free function must be used to release the memory. When an error occurs, NULL is returned. + ``` + + The internal data structure of openGauss is optimized based on the NUMA architecture. + + 1\)Global PGPROC array optimization + + ![](../figures/1719.png) + + Figure 5-24 Global PGPROC array optimization + + As shown in Figure 5-24, the system allocates a dedicated PGPROC structure for each client connection to maintain related information. ProcGlobal-\>allProcs is a global array with the PGPROC structure. However, the NUMA node where the physical memory is located is uncertain. As a result, when each transaction thread accesses its PGPROC structure, the thread may be scheduled among multiple NUMA nodes by the operating system. In addition, the physical memory location of the corresponding PGPROC structure is also uncertain, and there is a high probability that memory is accessed remotely. + + Because the PGPROC structure is frequently accessed, the global structure array is divided into multiple subarrays based on the number of NUMA nodes, and each subarray uses **numa\_alloc\_onnode** to allocate memory to NUMA nodes. To minimize structural changes to the current code, the structure of ProcGlobal-\>allProcs is changed from PGPROC\* to PGPROC\*\*. All access to ProcGlobal-\>allProcs needs to be adjusted accordingly \(an additional layer of indirect pointer reference is added\). The related code is as follows: + + ``` + #ifdef __USE_NUMA + if (nNumaNodes > 1) { + ereport(INFO, (errmsg("InitProcGlobal nNumaNodes: %d, inheritThreadPool: %d, groupNum: %d", + nNumaNodes, g_instance.numa_cxt.inheritThreadPool, + (g_threadPoolControler ? g_threadPoolControler->GetGroupNum() : 0)))); + + int groupProcCount = (TotalProcs + nNumaNodes - 1) / nNumaNodes; + size_t allocSize = groupProcCount * sizeof(PGPROC); + for (int nodeNo = 0; nodeNo < nNumaNodes; nodeNo++) { + initProcs[nodeNo] = (PGPROC *)numa_alloc_onnode(allocSize, nodeNo); + if (!initProcs[nodeNo]) { + ereport(FATAL, (errcode(ERRCODE_OUT_OF_MEMORY), + errmsg("InitProcGlobal NUMA memory allocation in node %d failed.", nodeNo))); + } + add_numa_alloc_info(initProcs[nodeNo], allocSize); + int ret = memset_s(initProcs[nodeNo], groupProcCount * sizeof(PGPROC), 0, groupProcCount * sizeof(PGPROC)); + securec_check_c(ret, "\0", "\0"); + } + } else { + #endif + ``` + + 2\) Global WALInsertLock array optimization + + WALInsertLocks are used to perform concurrency protection on WAL Insert operations. You can configure multiple WALInsertLocks, for example, 16. Before optimization, all WALInsertLocks are in the same global array and are allocated by using the shared memory. When a transaction thread is running, one of the WALInsertLocks in the entire global array is allocated for use. Therefore, there is a high probability that remote memory access is involved. That is, there is cross-node and cross-package contention among multiple threads. WALInsertLocks can also allocate memory separately by NUMA node, and each transaction thread uses only the WALInsertLock in the local node group. In this way, data contention can be limited to the same NUMA node. Figure 5-25 shows the basic principles. + + ![](../figures/1720.png) + + Figure 5-25 Global WALInsertLock array optimization principles + + For example, if 16 WALInsertLocks and four NUMA nodes are configured, the original array with 16 elements will be split into four arrays, and each array has four elements. The global structure is WALInsertLockPadded \*\*GlobalWALInsertLocks. The local WALInsertLocks of the thread point to WALInsertLock\[4\] on the current node. Different NUMA nodes have WALInsertLock subarrays with different addresses. GlobalWALInsertLocks are used to trace WALInsertLock arrays under multiple nodes to facilitate traversal. Figure 5-26 shows the WALInsertLock grouping diagram. + + ![](../figures/zh-cn_image_0000001208124506.png) + + Figure 5-26 WALInsertLock grouping diagram + + The code for initializing the WALInsertLock structure is as follows: + + ``` + WALInsertLockPadded** insertLockGroupPtr = + (WALInsertLockPadded**)CACHELINEALIGN(palloc0(nNumaNodes * sizeof(WALInsertLockPadded*) + PG_CACHE_LINE_SIZE)); + #ifdef __USE_NUMA + if (nNumaNodes > 1) { + size_t allocSize = sizeof(WALInsertLockPadded) * g_instance.xlog_cxt.num_locks_in_group + PG_CACHE_LINE_SIZE; + for (int i = 0; i < nNumaNodes; i++) { + char* pInsertLock = (char*)numa_alloc_onnode(allocSize, i); + if (pInsertLock == NULL) { + ereport(PANIC, (errmsg("XLOGShmemInit could not alloc memory on node %d", i))); + } + add_numa_alloc_info(pInsertLock, allocSize); + insertLockGroupPtr[i] = (WALInsertLockPadded*)(CACHELINEALIGN(pInsertLock)); + } + } else { + #endif + char* pInsertLock = (char*)CACHELINEALIGN(palloc( + sizeof(WALInsertLockPadded) * g_instance.attr.attr_storage.num_xloginsert_locks + PG_CACHE_LINE_SIZE)); + insertLockGroupPtr[0] = (WALInsertLockPadded*)(CACHELINEALIGN(pInsertLock)); + #ifdef __USE_NUMA + } + #endif + ``` + + On an ARM platform, the two-dimensional array GlobalWALInsertLocks needs to be traversed to access WALInsertLocks. Specifically, NUMA nodes at the first layer and the WALInsertLock array on the nodes at the second layer are traversed. + + The LWLock memory structure referenced by WALInsertLocks is also optimized and adapted on the ARM platform. The code is as follows: + + ``` + typedef struct + { + LWLock lock; + #ifdef __aarch64__ + pg_atomic_uint32xlogGroupFirst; + #endif + XLogRecPtrinsertingAt; + } WALInsertLock; + ``` + + The lock member variable references an element in the global LWLock array in the shared memory. After WALInsertLock optimization, although the WALInsertLocks have been distributed by NUMA node, the LWLocks referenced by the WALInsertLocks cannot control their physical memory locations. Therefore, fierce cross-node contention is still involved when the WALInsertLocks are accessed. Therefore, the LWLocks are directly embedded into the WALInsertLocks. In this way, the LWLocks in use can be distributed to NUMA nodes, and access to cache lines is reduced. + + + +## 5.4 Summary + +This chapter describes the transaction system and concurrency control mechanism of openGauss. + +As an important role of the database, the transaction system connects the SQL, execution, and storage modules. After receiving an external command, the transaction system determines the execution direction based on the current internal system state. This ensures the continuity and accuracy of transaction processing. + +In addition to the basic and core transaction system of openGauss, this chapter also describes how openGauss optimizes its performance based on Kunpeng servers. + +In a word, the transaction system and concurrency control module of openGauss provides extreme speed and stability. + diff --git a/content/en/post/2022/Using-DataChecker-to-Ensure-Data-Accuracy-After-Migration.md b/content/en/post/2022/Using-DataChecker-to-Ensure-Data-Accuracy-After-Migration.md new file mode 100644 index 0000000000000000000000000000000000000000..eab3ce1879c996196dbeb23b680b8fb00db863d0 --- /dev/null +++ b/content/en/post/2022/Using-DataChecker-to-Ensure-Data-Accuracy-After-Migration.md @@ -0,0 +1,391 @@ ++++ + +title = "Using DataChecker to Ensure Data Accuracy After Migration" + +date = "2021-06-25" + +tags = [ "Using DataChecker to Ensure Data Accuracy After Migration"] + +archives = "2021-06" + +author = "Wenhao Zhao" + +summary = "Using DataChecker to Ensure Data Accuracy After Migration" + +img = "/en/post/2022/title/img16.png" + +times = "12:30" + ++++ + +# Using DataChecker to Ensure Data Accuracy After Migration + +We have introduced several tools for migrating data from Oracle or MySQL to openGauss. Now, we can use the DataChecker tool to ensure data accuracy after migration. + +## 1 Introduction to DataChecker + +DataChecker is a tool written in Java for checking data consistency between two databases. Some of its architecture and implementation are based on Alibaba's open-source data migration tool yugong. + +Code repository: https://gitee.com/opengauss/openGauss-tools-datachecker + +- 1.1 Application Scenario + + Generally, DataChecker is used to verify data accuracy after the data is migrated. After migrating a large amount of data from one database to another, you need to check whether the migrated data is accurate and complete. In this case, you can use DataChecker to check whether the data in the two databases is consistent. + + +- 1.2 Implementation Principles + + The architecture of DataChecker consists of two parts: Extractor and Applier. + + ![](../figures/zh-cn_image_0000001251852313.png) + + Extractor is used to extract data from the source database. Data is extracted in batches based on the sequence of the data in the source table. + + Applier is used to locate the data extracted by Extractor in the target database, compare the columns one by one, and return the result. + + +## 2 Usage Guide + +- 2.1 Environment Requirements + + **Operating System** + + DataChecker is developed based on Java with bat and shell scripts. It supports both Windows and Linux. + + JDK 1.6.25 or later is recommended. + + **Database** + + The source database supports MySQL and will support Oracle in the future. + + The target database supports only openGauss. + +- 2.2 Downloading DataChecker + + You can download the source code and compiled package at https://gitee.com/opengauss/openGauss-tools-datachecker. + + Self-compilation: + + ``` + git clone git@gitee.com:opengauss/openGauss-tools-datachecker.git + cd openGauss-tools-datachecker + mvn clean install -Dmaven.test.skip -Denv=release + ``` + + If you do not want to compile the binary package by yourself, you can obtain the complied binary package **DataChecker-1.0.0-SNAPSHOT.tar.gz** in the **target** folder in the cloned home directory. + +- 2.3 Directory Structure + + The structure of the **target** directory is as follows: + + + + + +

/target

+

bin/

+

startup.bat

+

startup.sh

+

stop.sh

+

conf/

+

gauss.properties

+

logback.xml

+

lib/

+

logs/

+
+ + The **bin** directory contains three files, namely, **startup.bat**, **startup.sh**, and **stop.sh**, for starting and stopping programs in Windows and Linux. + + The **conf** directory contains two configuration files. Generally, only **gauss.properties** is configured. + + The **lib** directory stores the dependency files required for running. + + The **logs** directory stores the result logs after running. + +- 2.4 Configuration Modification + + Modify the configuration in the **/conf/ gauss.properties** file. Generally, you only need to modify basic information, such as the addresses of the source and target databases and the tables to be verified. For other information, you can use the default values or modify it as required. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

Parameter

+

Description

+

Default Value

+

gauss.database.source.username

+

Specifies the username for accessing the source database.

+

N/A

+

gauss.database.source.password

+

Specifies the password for accessing the source database.

+

N/A

+

gauss.database.source.type

+

Specifies the type of the source database.

+

Mysql

+

gauss.database.source.url

+

Specifies the URL for connecting to the source database. The URL must comply with certain format requirements.

+

N/A

+

gauss.database.source.encode

+

Specifies the encoding format of the source database.

+

UTF-8

+

gauss.database.target.username

+

Specifies the username for accessing the target database.

+

N/A

+

gauss.database.target.password

+

Specifies the password for accessing the target database.

+

N/A

+

gauss.database.target.type

+

Specifies the type of the target database.

+

OPGS (openGauss)

+

gauss.database.target.url

+

Specifies the URL for connecting to the target database. The URL must comply with certain format requirements.

+

N/A

+

gauss.database.target.encode

+

Specifies the encoding format of the target database.

+

UTF-8

+

gauss.table.onceCrawNum

+

Specifies the maximum number of records processed by Extractor or Applier in each batch.

+

1000

+

gauss.table.tpsLimit

+

Specifies the limit on transactions per second (TPS). The value 0 indicates that TPS is not limited.

+

0

+

gauss.table.skipApplierException

+

The value true indicates that a single abnormal data record can be ignored when a database exception occurs in Applier, for example, constraint key conflict.

+

false

+

gauss.table.white

+

Specifies the whitelist that lists the tables to be verified.

+

The format of the value is schema.tablename. Multiple table names can be separated by commas (,). To verify all tables in a schema, you only need to enter the schema name.

+

N/A

+

gauss.table.black

+

Specifies the blacklist that lists the tables to be ignored. The format of the value is the same as that specified by gauss.table.white.

+

N/A

+

gauss.table.inc.tablepks

+

Specifies the primary key of the tables to be verified, which is used to speed up verification. The format of the value is tablename1&pk1&pk2|tablename2&pk1.

+

N/A

+

gauss.table.concurrent.enable

+

Specifies whether to enable parallel processing for multiple tables. If the value is false, serial processing is required.

+

true

+

gauss.table.concurrent.size

+

Specifies the number of tables that can be concurrently processed.

+

5

+

gauss.table.retry.times

+

Specifies the number of retry times after an error occurs in table verification.

+

3

+

gauss.extractor.dump

+

Specifies whether to record all data extracted by Extractor.

+

false

+

gauss.extractor.concurrent.global

+

Extractor adopts the global thread pool mode. If the value is true, all Extractor tasks use a group of thread pools. The thread pool size is specified by concurrent.size.

+

false

+

gauss.extractor.concurrent.size

+

Specifies the number of threads that can be concurrently processed. This parameter takes effect only after concurrent.enable is enabled.

+

30

+

gauss.applier.dump

+

Specifies whether to record all data extracted by Applier.

+

false

+

gauss.applier.concurrent.enable

+

Specifies whether parallel processing is enabled for Applier.

+

true

+

gauss.applier.concurrent.global

+

Applier adopts the global thread pool mode. If the value is true, all Applier tasks use a group of thread pools. The thread pool size is specified by concurrent.size.

+

false

+

gauss.applier.concurrent.size

+

Specifies the number of threads that can be concurrently processed. This parameter takes effect only after concurrent.enable is enabled.

+

30

+

gauss.stat.print.interval

+

Specifies the frequency of printing statistical information.

+

If the value is 5, statistical information is printed once after five rounds of Extractor and Applier operations are complete.

+

5

+
+ +- 2.5 Starting and Stopping the Tool + + **Starting the Tool in Linux** + + sh startup.sh + + **Stopping the Tool in Linux** + + sh stop.sh + + **Starting the Tool in Windows** + + startup.bat + + **Stopping the Tool in Windows** + + You can directly close the terminal. + +- 2.6 Log Description + + The log structure is as follows: + + + + + +

/logs

+

summary/

+

summary.log

+

gauss/

+

table.log

+

${table}/

+

table.log

+

extractor.log

+

applier.log

+

check.log

+
+ + The **table.log** file in the **gauss** directory records all logs in the entire verification process. + + The **summary.log** file in the **summary** directory records the names of all tables whose verification results are incorrect. That is, the data in the two tables is inconsistent. + + _$\{table\}_ indicates the name of each table. In the **$\{table\}** directory, the **table.log** file records all logs generated during verification of a table, the **extractor.log** file records all logs generated during data extraction, and the **applier.log** file records all logs generated during verification implementation \(data comparison\). The **check.log** file records the data that fails to be verified in a specific line. If the **check.log** file does not exist, the verification result is correct. + +- 2.7 Example + + **Preparing the Database** + + Create a table in the **mysql** schema in MySQL, as shown in the following figure. + + ![](../figures/zh-cn_image_0000001252252279.png) + + Assume that after data is migrated to openGauss, only four of the five data records are successfully migrated, as shown in the following figure. + + ![](../figures/zh-cn_image_0000001206972348.png) + + **Configuring gauss.properties** + + ![](../figures/zh-cn_image_0000001206812360.png) + + **Running startup.bat or startup.sh** + + ![](../figures/zh-cn_image_0000001207132328.png) + + **Viewing Logs** + + Check the **/logs/summary/summary.log** file and locate the **mysql.test** table where the error occurs. + + Access **/logs/mysql.test/** to view details. + + ![](../figures/zh-cn_image_0000001206972352.png) + + There are four log files. You can mainly view **check.log**. + + ![](../figures/zh-cn_image_0000001252252281.png) + + The record whose ID is 5 and whose name is 5 fails to be migrated. + + diff --git a/content/en/post/2022/Using-Ora2Pg-to-Migrate-Data-from-Oracle-to-openGauss.md b/content/en/post/2022/Using-Ora2Pg-to-Migrate-Data-from-Oracle-to-openGauss.md new file mode 100644 index 0000000000000000000000000000000000000000..ea0f91887c51e3f01f8c0c5c36456795ea389cee --- /dev/null +++ b/content/en/post/2022/Using-Ora2Pg-to-Migrate-Data-from-Oracle-to-openGauss.md @@ -0,0 +1,273 @@ ++++ + +title = "Using Ora2Pg to Migrate Data from Oracle to openGauss" + +date = "2021-06-09" + +tags = [ "Using Ora2Pg to Migrate Data from Oracle to openGauss"] + +archives = "2021-06" + +author = "Xiaobin Chen" + +summary = "Using Ora2Pg to Migrate Data from Oracle to openGauss" + +img = "/en/post/2022/title/img14.png" + +times = "12:30" + ++++ + +# Using Ora2Pg to Migrate Data from Oracle to openGauss + +## **Introduction to Ora2Pg** + +Ora2Pg is an open-source tool for migrating data from Oracle to PostgreSQL. By connecting to Oracle, Ora2Pg automatically scans and extracts object structures and data, generates SQL scripts, and applies the scripts to PostgreSQL manually or automatically. + +Official website: https://ora2pg.darold.net/ + +## **Advantages of Ora2Pg** + +- Supports exporting most types of database objects, including tables, views, sequences, indexes, foreign keys, constraints, functions, stored procedures, and others. +- Automatically converts the PL/SQL syntax to the PL/pgSQL syntax, avoiding manual modification to some extent. +- Generates migration reports, containing migration difficulty evaluation and person-day estimation. +- Compresses exported data to reduce disk overhead as required. +- Provides various configuration items, allowing you to customize migration operations. + +## **Application of Ora2Pg in openGauss** + +The main language of Ora2Pg is Perl. It uses the Perl DBI module and connects to the target PostgreSQL database by using DBD: Pg. openGauss is compatible with PostgreSQL communication protocols and most syntaxes. As such, you only need to modify some names. Ora2Pg can also be used in openGauss. + +## **Usage Example of Ora2Pg** + +Ora2Pg exports DDL statements of object structures to SQL files. Table data can be exported to files by running the INSERT or COPY statement or be directly imported to the target database without generating intermediate SQL files. + +Ora2Pg provides the **ora2pg** command to complete these processes. Generally, multiple commands need to be executed for one migration. Ora2Pg provides a more convenient method to obtain the export and import scripts and the migration directory template by creating a migration project. The scripts integrate several **ora2pg** commands and the **psql** command \(**gsql** for openGauss\) required for importing SQL files. You only need to run the two scripts to complete the migration. The demonstration in this document uses the scripts. + +- 1. Installing Dependencies + + The language of Ora2Pg is Perl. Therefore, you need to install the required Perl module. + + ``` + # Perform the following operations as the root user: + yum install -y perl-ExtUtils-CBuilder perl-ExtUtils-MakeMaker + yum install perl-CPAN + ``` + + Install DBI, DBD:Pg, DBD:Oracle on which Ora2Pg depends to connect to the database. + + ``` + perl -MCPAN -e 'install DBI' + perl -MCPAN -e 'install DBD:Pg' + ``` + + Install Oracle Instant Client or Oracle on the local host before installing DBD:Oracle. + + ``` + # Download Oracle Instant Client from the Oracle official website and install it. + rpm -ivh oracle-instantclient12.2-basic-12.2.0.1.0-1.x86_64.rpm + rpm -ivh oracle-instantclient12.2-devel-12.2.0.1.0-1.x86_64.rpm + rpm -ivh oracle-instantclient12.2-jdbc-12.2.0.1.0-1.x86_64.rpm + rpm -ivh oracle-instantclient12.2-sqlplus-12.2.0.1.0-1.x86_64.rpm + # Set the environment variable ORACLE_HOME. + export ORACLE_HOME=/usr/lib/oracle/11.2/client64 + # Set ORACLE_HOME as follows if Oracle has been installed on the local host: + export ORACLE_HOME=/opt/oracle/product/19c/dbhome_1 + export LD_LIBRARY_PATH=$ORACLE_HOME/lib + # Install DBD:Oracle. + perl -MCPAN -e 'install DBD:Oracle' + ``` + + +- 2. Installing Ora2Pg and Creating a Migration Project + + Source code path: https://github.com/darold/ora2pg + + ``` + # Go to the code directory. is the target installation path. + perl Makefile.PL PREFIX= + make && make install + + # Set environment variables and check whether the installation is successful. is the path for downloading the code. + export PERL5LIB=/lib + export PATH=$PATH:/usr/local/bin + ora2pg –help + + # Create a migration project. + ora2pg --init_project oramig + ``` + + After a migration project is created, the **oramig** directory template is generated in the current directory, as shown in the following figure. The directory template contains the **export\_schema.sh** and **import\_all.sh** scripts, which are used for subsequent data export and import. The **schema** and **sources** directories store DDL statements of each object. The **schema** directory stores statements after the PL/SQL syntax is converted into the PL/pgSQL syntax, while the **sources** directory stores PL/SQL statements before conversion. The **data** directory stores table data files, the **config** directory contains the **ora2pg.conf** configuration file, and the **reports** directory stores migration reports. + + ``` + ./oramig/ + schema/ + dblinks/ + directories/ + functions/ + grants/ + mviews/ + packages/ + partitions/ + procedures/ + sequences/ + synonyms/ + tables/ + tablespaces/ + triggers/ + types/ + views/ + sources/ + functions/ + mviews/ + packages/ + partitions/ + procedures/ + triggers/ + types/ + views/ + data/ + config/ + reports/ + ``` + + Now, you can run the **ora2pg** command. The following lists some command-line parameters that can be specified when you run the **ora2pg** command. These parameters can be set in the **ora2pg.conf** file. When you specify a configuration file, the values of the command-line parameters overwrite the corresponding values in the configuration file. + + ``` + Usage: ora2pg [-dhpqv --estimate_cost --dump_as_html] [--option value] + + -a | --allow str: Specifies the list of objects that can be exported. The objects are separated by commas (,). + -b | --basedir dir: Specifies the default export directory for storing exported SQL files. + -c | --conf file: Specifies the path of the configuration file. + -e | --exclude str: Specifies the list of objects that are excluded from the export. The objects are separated by commas (,). + -i | --input file: Specifies the SQL file to be imported. You do not need to connect to Oracle when importing the file. + -o | --out file: Specifies the path for storing the exported SQL file. The default value is the output.sql file in the current directory. + -p | --plsql: Enables the conversion from PL/SQL code to PL/pgSQL code. + -s | --source DSN: Specifies the data source of Oracle DBI. + -t | --type export: Specifies the export type. This parameter will overwrite the export type (specified by TYPE) in the configuration file. + -u | --user name: Specifies the username for connecting to Oracle. You can also use the ORA2PG_USER environment variable. + -w | --password pwd: Specifies the user password for connecting to Oracle. You can also use the ORA2PG_PASSWD environment variable. + --init_project NAME: Initializes a typical Ora2Pg project and generates a directory template. + --view_as_table str: Exports views as tables. Use commas (,) to separate multiple views. + ``` + +- 3. Configuring Ora2Pg + + The **ora2pg.conf** file contains all configuration items, which can be used to customize migration operations. The following describes some common configuration items. + + **ORACLE\_HOME**: Specifies the environment variable _ORACLE\_HOME_. The DBD:Oracle module uses this variable to search for the required Oracle database. The setting method is involved in dependency installation. + + **ORACLE\_DSN**: Specifies the data source name in the standard DBI DSN form. For example: + + ``` + ORACLE_DSN dbi:Oracle:host=oradb_host.myhost.com;sid=DB_SID;port=1521 + ``` + + or + + ``` + ORACLE_DSN dbi:Oracle:DB_SID + ``` + + For the second method, you need to declare the system identifier \(SID\) in the **$ORACLE\_HOME/network/admin/tnsnames.ora** file or the **tnsnames.ora** file in the directory specified by the environment variable _TNS\_ADMIN_. + + **ORACLE\_USER** and **ORACLE\_PWD**: Define the username and password for connecting to Oracle, respectively. Note that if possible, log in as the Oracle super administrator to avoid permission issues and ensure that nothing is missing. + + **PG\_DSN**: Specifies the name of the target database. The following uses openGauss as an example. The database to be connected is **mydb** with the IP address **localhost** and the port number **5432**. + + ``` + PG_DSN dbi:Pg:dbname=mydb;host=localhost;port=5432 + ``` + + **PG\_USER** and **PG\_PWD**: Specify the username and password for connecting to the target database, respectively. Note that the user must have the permission to remotely connect to openGauss as well as read and write permissions on the corresponding database. Specifically, the host where Ora2Pg runs and the user must be in the remote access whitelist of openGauss. + + **SCHEMA**: Specifies the schema to be exported. As shown in the following, the objects associated to the **APPS** schema are extracted. + + ``` + SCHEMA APPS + ``` + + **ORA\_INITIAL\_COMMAND**: Sends an initial command to Oracle after the connection. For example, disable an access control policy before reading an object or set some session parameters. + + **TYPE**: Specifies the type of the objects to be exported, including tables, views, sequences, indexes, foreign keys, constraints, functions, stored procedures, and others. The default value is **TABLE**. As shown in the following, ordinary tables and views are exported. + + ``` + TYPE TABLE VIEW + ``` + + For more details about the configuration items, visit the official website at the following link: + + https://ora2pg.darold.net/documentation.html + + Test the connection. After configuring the DSN of Oracle, run the following command to test the database connection: + + ``` + ora2pg -t SHOW_VERSION -c config/ora2pg.conf + ``` + + The version of Oracle will be displayed by running the preceding command. + +- 4. Running Migration Scripts + + The configuration is as follows. Connect to the target database as the **system** user. + + ``` + ORACLE_HOME /opt/oracle/product/19c/dbhome_1 + ORACLE_DSN dbi:Oracle:host=127.0.0.1;sid=ORCLCDB;port=1521 + ORACLE_USER system + ORACLE_PWD manager + SCHEMA testuser + PG_DSN dbi:Pg:dbname=mydb;host=127.0.0.1;port=5432 + PG_USER testuser + PG_PWD openGauss123 + ``` + + Modify the export type of **export\_schema.sh**. In this migration, tables and functions are exported. + + ![](../figures/zh-cn_image_0000001207289100.jpg) + + Run the export script and wait until the migration is complete. A DDL file of the corresponding type is generated in the **schema** and **source** subdirectories, and the command for exporting table data is provided at the end. + + ``` + sh export_schema.sh + ``` + + ![](../figures/zh-cn_image_0000001252129111.jpg) + + In addition, a migration report in HTML format is generated in the **reports** directory. + + ![](../figures/zh-cn_image_0000001252009063.jpg) + + Before running the import script, perform the following operations: + + 1. Create a database in openGauss, create a user in the database, and set the owner of **mydb** to the user. \(In **import\_all.sh**, **createuser** and **createdb** of PostgreSQL are used to create users and databases.\) + + ![](../figures/zh-cn_image_0000001206809156.jpg) + + 2. Create the environment variable in the **bin** directory of openGauss, so that the client tool gsql can be used. + 3. Change **psql** in **import\_all.sh** to **gsql**. + 4. Add an option for executing the script when data is imported by a common user, and specify the user password to avoid frequent password input. + + Run the import script. In the script, the **testuser** user is used to log in to the **mydb** database with the IP address **127.0.0.1** and the port number **5432**. The **-f** option indicates that the check on whether the user and database need to be created is skipped. + + ``` + sh import_all.sh -d mydb -o testuser –h 127.0.0.1 -p 5432 –f 1 + ``` + + After the script is executed, the system prompts you whether to import the object structures and data, as shown in the following figure. \(In the preceding information, the **-w** option is added by manually modifying the **import\_all.sh** script.\) + + ![](../figures/zh-cn_image_0000001206809160.jpg) + + For table indexes and constraints, you can import them after data is imported. + + Log in to openGauss to view the migration result. + + ![](../figures/zh-cn_image_0000001252249073.jpg) + + + +## **Disadvantages of Ora2Pg** + +1. Ora2Pg uses regular expressions and adopts text replacement to convert the PL/SQL syntax to the PL/pgSQL syntax. However, for design reasons, Ora2Pg supports only conversion of some syntaxes. +2. DDL statements are generated by concatenating character strings. However, this method does not fully support some syntaxes, such as the syntax for creating partitioned tables. + diff --git a/content/en/post/2022/Using-pg_chameleon-to-Migrate-Data-from-MySQL-to-openGauss.md b/content/en/post/2022/Using-pg_chameleon-to-Migrate-Data-from-MySQL-to-openGauss.md new file mode 100644 index 0000000000000000000000000000000000000000..484a8ca496b3fde816340b526599137f090cdc49 --- /dev/null +++ b/content/en/post/2022/Using-pg_chameleon-to-Migrate-Data-from-MySQL-to-openGauss.md @@ -0,0 +1,219 @@ ++++ + +title = "Using pg chameleon to Migrate Data from MySQL to openGauss" + +date = "2021-06-17" + +tags = ["Using pg chameleon to Migrate Data from MySQL to openGauss"] + +archives = "2021-06" + +author = "Ju Peng" + +summary = "Using pg chameleon to Migrate Data from MySQL to openGauss" + +img = "/en/post/2022/title/img15.jpg" + +times = "12:30" + ++++ + +# Using pg\_chameleon to Migrate Data from MySQL to openGauss + +## Introduction to pg\_chameleon + +pg\_chameleon is a real-time replication tool compiled in Python 3 for migrating data from MySQL to PostgreSQL. The tool uses the mysql-replication library to extract row images from MySQL. The row images are stored in PostgreSQL in JSONB format. + +A **pl/pgsql** function in PostgreSQL is executed to decode row images in JSONB format and replay the changes to PostgreSQL. In addition, the tool uses the read-only mode to pull full data from MySQL to PostgreSQL through initial configuration. In this way, the tool provides the function of copying the initial full data and subsequent incremental data online in real time. + +pg\_chameleon has the following features: + +- Provides online real-time replication by reading the MySQL BinLog. +- Supports reading data from multiple MySQL schemas and restoring the data to the target PostgreSQL database. The source schemas and target schemas can use different names. +- Implements real-time replication through a daemon. The daemon consists of two subprocesses. One is responsible for reading logs from MySQL, and the other is responsible for replaying changes to PostgreSQL. + +openGauss is compatible with PostgreSQL communication protocols and most syntaxes. For this reason, you can use pg\_chameleon to migrate data from MySQL to openGauss. In addition, the real-time replication capabilities of pg\_chameleon greatly reduce the service interruption duration during database switchover. + +## pg\_chameleon Issues in openGauss + +1. pg\_chameleon depends on the psycopg2 driver, and the psycopg2 driver uses the pg\_config tool to check the PostgreSQL version and restricts PostgreSQL of earlier versions from using this driver. The pg\_config tool of openGauss returns the version of openGauss \(the current version is openGauss 2.0.0\). As a result, the driver reports a version error " Psycopg requires PostgreSQL client library \(libpq\) \>= 9.1". You need to use psycopg2 through source code compilation and remove related restrictions in the source header file **psycopg/psycopg.h**. +2. pg\_chameleon sets the GUC parameter **LOCK\_TIMEOUT** to limit the timeout for waiting for locks in PostgreSQL. openGauss does not support this parameter. \(openGauss supports the GUC parameter **lockwait\_timeout**, which needs to be set by the administrator.\) You need to delete related settings from the source code of pg\_chameleon. +3. pg\_chameleon uses the syntax of the UPSERT statement to specify the replacement operation when a constraint is violated. The function and syntax of the UPSERT statement supported by openGauss is different from those supported by PostgreSQL. openGauss uses the **ON DUPLICATE KEY UPDATE \{ column\_name = \{ expression | DEFAULT \} \} \[, ...\]** syntax, while PostgreSQL uses the **ON CONFLICT \[ conflict\_target \] DO UPDATE SET \{ column\_name = \{ expression | DEFAULT \} \}** syntax. Therefore, these two databases differ slightly in functions and syntaxes. You need to modify the related UPSERT statement in the source code of pg\_chameleon. +4. pg\_chameleon uses the **CREATE SCHEMA IF NOT EXISTS** and **CREATE INDEX IF NOT EXISTS** syntaxes. openGauss does not support the **IF NOT EXISTS** option of schemas and indexes. You need to modify the logic so that the system checks whether the schemas and indexes exist before creating them. +5. To select the array range, openGauss runs **column\_name\[start, end\]**, while PostgreSQL runs **column\_name\[start:end\]**. You need to modify the array range selection mode in the source code of pg\_chameleon. +6. pg\_chameleon uses the INHERITS function, but openGauss does not support inherited tables. You need to modify the SQL statements and tables that use inherited tables. + +Next, use pg\_chameleon to migrate data from MySQL to openGauss. + +## Configuring pg\_chameleon + +pg\_chameleon uses the **config-example.yaml** configuration file in **\~/.pg\_chameleon/configuration** to define configurations during migration. The configuration file consists of four parts: **global settings**, **type\_override**, **postgres destination connection**, and **sources**. **global settings** is used to set the log file path, log level, and others. **type\_override** allows users to customize type conversion rules and overwrite existing default conversion rules. **postgres destination connection** is used to configure the parameters for connecting to openGauss. **sources** is used to define the parameters for connecting to MySQL and other configurable items during replication. + +For more details about the configuration items, see the official website: + +https://pgchameleon.org/documents\_v2/configuration\_file.html + +The following is an example of the configuration file: + +``` +# global settings +pid_dir: '~/.pg_chameleon/pid/' +log_dir: '~/.pg_chameleon/logs/' +log_dest: file +log_level: info +log_days_keep: 10 +rollbar_key: '' +rollbar_env: '' +# type_override allows the user to override the default type conversion +# into a different one. +type_override: +"tinyint(1)": + override_to: boolean + override_tables: + - "*" +# postgres destination connection +pg_conn: + host: "1.1.1.1" + port: "5432" + user: "opengauss_test" + password: "password_123" + database: "opengauss_database" + charset: "utf8" +sources: + mysql: + db_conn: + host: "1.1.1.1" + port: "3306" + user: "mysql_test" + password: "password123" + charset: 'utf8' + connect_timeout: 10 + schema_mappings: + mysql_database:sch_mysql_database + limit_tables: + skip_tables: + grant_select_to: + - usr_migration + lock_timeout: "120s" + my_server_id: 1 + replica_batch_size: 10000 + replay_max_rows: 10000 + batch_retention: '1 day' + copy_max_memory: "300M" + copy_mode: 'file' + out_dir: /tmp + sleep_loop: 1 + on_error_replay: continue + on_error_read: continue + auto_maintenance: "disabled" + gtid_enable: false + type: mysql +keep_existing_schema: No +``` + +The preceding configuration file indicates that the username and password for connecting to MySQL are **mysql\_test** and **password123** respectively during data migration. The IP address and port number of the MySQL server are 1.1.1.1 and 3306, respectively. The source database is **mysql\_database**. + +The username and password for connecting to openGauss are **opengauss\_test** and **password\_123**, respectively. The IP address and port number of the openGauss server are 1.1.1.1 and 5432, respectively. The target database is **opengauss\_database**. The **sch\_mysql\_database** schema is created in **opengauss\_database**, and all tables to be migrated are in this schema. + +Note that the user must have the permission to remotely connect to MySQL and openGauss as well as the read and write permissions on the corresponding databases. For openGauss, the host where pg\_chameleon runs must be in the remote access whitelist of openGauss. For MySQL, the user must have the **RELOAD**, **REPLICATION CLIENT**, and **REPLICATION SLAVE** permissions. + +The following describes the migration procedure. + +## Creating Users and Databases + +The following shows how to create the users and databases in openGauss required for migration. + +![](../figures/zh-cn_image_0000001252011743.jpg) + +The following shows how to create the users in MySQL required for migration and grant related permissions to the users. + +![](../figures/zh-cn_image_0000001252131781.jpg) + +## Enabling the Replication Function of MySQL + +Modify the MySQL configuration file. Generally, the configuration file is **/etc/my.cnf** or the **cnf** configuration file in the **/etc/my.cnf.d**/ folder. Modify the following configurations in the **\[mysqld\]** configuration block \(if the **\[mysqld\]** configuration block does not exist, add it\): + +``` +[mysqld] +binlog_format= ROW +log_bin = mysql-bin +server_id = 1 +binlog_row_image=FULL + expire_logs_days = 10 +``` + +After the modification, restart MySQL for the configurations to take effect. + +## Runing pg\_chameleon to Migrate Data + +1. Create and activate a virtual Python environment. + + **_python3 -m venv venv_** + + **_source venv/bin/activate_** + +2. Download and install psycopg2 and pg\_chameleon. + + Run the **pip install pip --upgrade** command to upgrade pip. + + Add the folder where the pg\_config tool of openGauss is located to the _$PATH_ environment variable. Example: + + **_export PATH=\{openGauss-server\}/dest/bin:$PATH_** + + Download the source code of psycopg2 at https://github.com/psycopg/psycopg2, remove the restriction of checking the PostgreSQL version, and run the **python setup.py install** command to compile the source code and install the tool. + + Download the source code of pg\_chameleon at https://github.com/the4thdoctor/pg\_chameleon, solve the preceding issues in openGauss, and run the **python setup.py install** command to compile the source code and install the tool. + +3. Create the configuration file directory of pg\_chameleon. + + **_chameleon set\_configuration\_files_** + +4. Modify the configuration file of pg\_chameleon. + + **_cd \~/.pg\_chameleon/configuration_** + + **_cp config-example.yml default.yml_** + + Modify the **default.yml** file as required. Modify the connection configuration information, user information, database information, and schema mapping specified by **pg\_conn** and **mysql**. An example of the configuration file is provided for reference. + +5. Initialize the replication stream. + + **_chameleon create\_replica\_schema --config default_** + + **_chameleon add\_source --config default --source mysql_** + + In this step, an auxiliary schema and table are created for the replication process in openGauss. + +6. Copy basic data. + + **_chameleon init\_replica --config default --source mysql_** + + After this step is complete, the current full data in MySQL is copied to openGauss. + + You can view the replication result in openGauss. + + ![](../figures/zh-cn_image_0000001207291774.jpg) + +7. Enable online real-time replication. + + **_chameleon start\_replica --config default --source mysql_** + + After real-time replication is enabled, insert a data record into MySQL. + + ![](../figures/zh-cn_image_0000001207131798.jpg) + + View the data in the **test\_decimal** table in openGauss. + + ![](../figures/zh-cn_image_0000001252131783.jpg) + + The newly inserted data record is successfully copied to openGauss. + +8. Disable online replication. + + **_chameleon stop\_replica --config default --source mysql_** + + **_chameleon detach\_replica --config default --source mysql_** + + **_chameleon drop\_replica\_schema --config default_** + + diff --git a/content/en/post/2022/Using-pgloader-to-Migrate-Data-from-MySQL-to-openGauss.md b/content/en/post/2022/Using-pgloader-to-Migrate-Data-from-MySQL-to-openGauss.md new file mode 100644 index 0000000000000000000000000000000000000000..e868c717d36d06875f607ef7534604106ecea9bc --- /dev/null +++ b/content/en/post/2022/Using-pgloader-to-Migrate-Data-from-MySQL-to-openGauss.md @@ -0,0 +1,133 @@ ++++ + +title = "Using pgloader to Migrate Data from MySQL to openGauss" + +date = "2021-05-27" + +tags = [ "Using pgloader to Migrate Data from MySQL to openGauss"] + +archives = "2021-05" + +author = "Ju Peng" + +summary = "Using pgloader to Migrate Data from MySQL to openGauss" + +img = "/en/post/2022/title/img13.png" + +times = "12:30" + ++++ + +# Using pgloader to Migrate Data from MySQL to openGauss + +## Introduction to pgloader + +pgloader is a data import tool that uses the COPY command to import data to PostgreSQL. pgloader works in two modes: importing data from files and migrating databases. In both modes, pgloader uses the COPY protocol of PostgreSQL to efficiently transfer data. + +openGauss is compatible with PostgreSQL communication protocols and most syntaxes. For this reason, you can use pgloader to migrate data from MySQL to openGauss. + +## pgloader Issues in openGauss + +openGauss performs security hardening on native PostgreSQL communication protocols. As a result, it is incompatible with the default PostgreSQL communication protocols, and by default, the native PostgreSQL that uses pgloader cannot connect to openGauss. An error similar to the following is reported: + +![](../figures/zh-cn_image_0000001252128947.jpg) + +The solution is to modify the GUC parameter. The involved GUC parameter is **password\_encryption\_type**. By default, PostgreSQL uses MD5 encryption, which is insecure. To improve the security capabilities of openGauss, openGauss uses SHA256 encryption by default. Therefore, the preceding error is reported. openGauss does not delete MD5 encryption and its verification logic. As such, MD5 encryption can be enabled by setting the GUC parameter. + +Method: + +**_gs\_guc reload -D $PGDATA -c "password\_encryption\_type = 1"_** + +**You must create a user after setting the preceding parameter.** Then, you can log in to the database as the user. + +Next, use pgloader to migrate data from MySQL to openGauss. + +## Installing pgloader + +You can install pgloader directly from apt.postgresql.org or the official Debian repository at packages.debian.org/pgloader. + +**_$ apt-get install pgloader_** + +You can also directly use pgloader through Docker images. + +**_$ docker pull dimitri/pgloader_** + +**_$ docker run --rm --name pgloader dimitri/pgloader:latest pgloader --version_** + +**_$ docker run --rm --name pgloader dimitri/pgloader:latest pgloader –help_** + +## Configuring pgloader + +pgloader provides various configuration items for you to customize migration operations. For example, you can run the **include drop** command to delete all tables whose names appear in MySQL in the target database, so that the tool can be started automatically in a clean environment by running the same command for multiple consecutive times. + +The following describes some common configuration items. + +**FROM**: URL of the source database. The format is as follows: + +``` + mysql://[user[:password]@][netloc][:port][/dbname][?option=value&...] +``` + +**INTO**: URL of the target database. The format is as follows: + +``` + postgresql://[user[:password]@][netloc][:port][/dbname][?option=value&...] +``` + +**WITH**: options when data is loaded from MySQL. The options are **include drop**, **create tables**, and **create indexes**. **CAST**: user-defined type conversion rule. You are allowed to overwrite existing default conversion rules or modify them in special cases. + +For partial migration, you can use the configuration items **including only table names matching** and **excluding table names matching** to migrate only specific tables or exclude specific tables during migration. + +For details about the configuration items, see the official website: + +https://pgloader.readthedocs.io/en/latest/ref/mysql.html + +The following is an example of the configuration file for migrating data from MySQL to openGauss: + +``` +LOAD DATABASE +FROM mysql://mysql_test:password123@1.1.1.1:3306/mysql_database +INTO postgresql://opengauss_test:password_123@1.1.1.1:5432/opengauss_database + +WITH include drop, create tables, create indexes, reset no sequences, + workers = 8, concurrency = 1, + multiple readers per thread, rows per range = 50000 + + CAST + type varchar when(= 1 precision) to "boolean" drop typemod keep default keep not null; +``` + +The preceding configuration file indicates that the username and password for connecting to MySQL are **mysql\_test** and **password123** respectively during data migration. The IP address and port number of the MySQL server are **1.1.1.1** and **3306**, respectively. The source database is **mysql\_database**. + +The username and password for connecting to openGauss are **opengauss\_test** and **password\_123**, respectively. The IP address and port number of the openGauss server are **1.1.1.1** and **5432**, respectively, and the target database is **opengauss\_database**. + +Note that the user must have the permission to remotely connect to MySQL and openGauss and the read and write permissions on the corresponding databases. For openGauss, the host where pgloader runs must be in the remote access whitelist of openGauss. + +## Creating Users and Databases + +This section describes how to create users and databases in openGauss required for migration. + +![](../figures/zh-cn_image_0000001251848955.jpg) + +## Runing pgloader to Migrate Data + +The following shows how to install pgloader by using Docker images. Name the prepared configuration file **openGauss.loader**. + +![](../figures/zh-cn_image_0000001251848959.jpg) + +Run the **docker run -tid --name pgloader\_test dimitri/pgloader** command to start Docker. + +Run the **docker cp ./openGauss.loader pgloader\_test:/** command to copy the configuration file to Docker. + +Run the **docker exec -it pgloader\_test /bin/bash** command to access Docker. + +![](../figures/zh-cn_image_0000001252248915.jpg) + +Run the **pgloader openGauss.loader** command to start pgloader, wait until the data migration is complete, and view the migration result report. + +![](../figures/zh-cn_image_0000001252008911.jpg) + +View the migration result in openGauss. + +![](../figures/zh-cn_image_0000001206968992.jpg) + diff --git a/content/en/post/2022/Using-the-Python-Driver-psycopg2-of-openGauss.md b/content/en/post/2022/Using-the-Python-Driver-psycopg2-of-openGauss.md new file mode 100644 index 0000000000000000000000000000000000000000..436f3f1d3c68dc2cfc621e9262af40b48dd83d40 --- /dev/null +++ b/content/en/post/2022/Using-the-Python-Driver-psycopg2-of-openGauss.md @@ -0,0 +1,69 @@ ++++ + +title = "Using the Python Driver psycopg2 of openGauss" + +date = "2021-04-02" + +tags = [ "Using the Python Driver psycopg2 of openGauss"] + +archives = "2021-04" + +author = "Tianqing Wang" + +summary = "Using the Python Driver psycopg2 of openGauss" + +img = "/en/post/2022/title/img12.png" + +times = "12:30" + ++++ + +# Using the Python Driver psycopg2 of openGauss + +## Introduction to psycopg2 + +psycopg2 is a Python driver of PostgreSQL. It is the only Python driver specified and supported by PostgreSQL and is the most widely used and stable Python driver of PostgreSQL. + +## psycopg2 Issues in openGauss + +openGauss is evolved from PostgreSQL XC \(PGXC\) and performs security hardening on native PostgreSQL communication protocols. Therefore, it is incompatible with the default PostgreSQL communication protocols, and by default, the native PostgreSQL that uses psycpog2 cannot connect to GaussDB. An error similar to the following is reported: + +![](../figures/zh-cn_image_0000001252248517.jpg) + +## Solution 1: Modifying the GUC Parameter + +The involved GUC parameter is **password\_encryption\_type**. By default, PostgreSQL uses MD5 encryption, which is insecure. According to Huawei's security and trustworthiness requirements, openGauss uses SHA256 encryption by default. Therefore, the preceding error is reported. openGauss does not delete MD5 encryption and its verification logic. As such, MD5 encryption can be enabled by modifying the GUC parameter. + +Method: + +``` +gs_guc reload -D $PGDATA -c "password_encryption_type = 1" +``` + +You must create a user after setting the preceding parameter. Then, you can log in to the database as the user. + +## Solution 2: Replacing libpq + +MD5 encryption is risky. To use a more secure encryption algorithm, you must replace the original libpq of PostgreSQL. To replace libpq, perform the following steps: + +1. Run the **pip install psycopg2-binary** command to install the Python driver of PostgreSQL. +2. Switch to the installation directory of psycopg2, which is generally **/$PYTHONINSTALL/lib/pythonx.x/site-packages/psycopg2**. +3. Execute the **ldd ./\_psycopg.cpython-37m-x86\_64-linux-gnu.so** file. The file name is for reference only. +4. Copy libpq and related SO files in the **lib** directory of openGauss to replace the original files of PostgreSQL with the same names. + +## Solution 3: Recompiling psycopg2 + +In addition to manually replacing libpq, you can also use the psycopg2 source code to compile a package in the environment with openGauss installed. In this way, the compiled psycopg2 package contains the libpq and its dependency files of openGauss. **Note**: + +1. If PostgreSQL is installed in the environment, ensure that the path of the openGauss library file has a higher priority. Specifically, the path is placed in the front part of _LD\_LIBRARY\_PATH_. +2. The **libpq.so** file has many dependency files which contain some algorithm libraries. These files must be released together with the **libpq.so** file. You can run the **ldd** command to view the dependency file list. + +## Compilation method: + +1. Install openGauss in the environment and configure environment variables. +2. Download the psycopg2 source code and switch to the root directory of the source code. +3. Run the **python setup.py build** command. +4. In this case, an error is reported, indicating that the version does not match. Then, modify the version in the corresponding position in the **setup.py** file to shield the error. You can also run the **sed -i "s/\(pgmajor, pgminor, pgpatch\)/\(9, 2, 4\)/g" setup.py** command to replace the version \(in about line 440 in the file\). +5. Perform step 3 again. +6. After the compilation is complete, the **build** subdirectory is generated in the root directory and it contains the compiled package. + diff --git a/content/en/post/2022/Ustore-Rebuilding-the-Soul-of-openGauss-Data-Storage.md b/content/en/post/2022/Ustore-Rebuilding-the-Soul-of-openGauss-Data-Storage.md new file mode 100644 index 0000000000000000000000000000000000000000..0f7cbe687ea2c927dbaa3b268b42b8db77b7dca3 --- /dev/null +++ b/content/en/post/2022/Ustore-Rebuilding-the-Soul-of-openGauss-Data-Storage.md @@ -0,0 +1,112 @@ ++++ + +title = "Ustore, Rebuilding the 'Soul' of openGauss Data Storage" + +date = "2021-10-11" + +tags = [ "Ustore, Rebuilding the 'Soul' of openGauss Data Storage"] + +archives = "2021-10" + +author = "Qiang Li" + +summary = "Ustore, Rebuilding the ‘Soul’ of openGauss Data Storage" + +img = "/en/post/2022/title/img10.png" + +times = "12:30" + ++++ + +# Ustore, Rebuilding the "Soul" of openGauss Data Storage + +On August 20, 2021, HUAWEI CLOUD GaussDB \(for openGauss\) officially launched a new kernel feature, Ustore, a storage engine that provides high-performance database services for enterprise-level users and further injects energy into enterprise digital transformation. The openGauss community will also release this feature soon to explore the cutting-edge theories and best practices of databases with many database kernel developers. + +The Ustore storage engine, also called in-place update storage engine, is a new storage mode added to the openGauss Kernel. The row storage engine used by the earlier openGauss Kernel versions is in append update mode. The append update mode has good performance in addition, deletion, and HOT \(Heap Only Tuple\) update \(that is, update on the same page\) in the service. However, in a non-HOT UPDATE scenario across data pages, garbage collection is not efficient. Ustore can solve this problem. + +## **Ustore Design Principles** + +Ustore stores valid data of the latest version and junk data of historical versions separately. The valid data of the latest version is stored on the data page, and an independent UNDO space is created for managing the junk data of historical versions in a unified manner. Therefore, the data space does not expand due to frequent updates, and the junk data is recycled more efficiently. Ustore adopts the NUMA-aware UNDO subsystem design, which enables the UNDO subsystem to be effectively expanded on a multi-core platform. In addition, Ustore adopts the multi-version index technology to clear indexes and improve the efficiency of recycling and reusing storage space. + +Ustore works with the UNDO space to implement more efficient and comprehensive flashback query and recycle bin mechanisms, quickly rolls back misoperations, and provides rich enterprise-level functions for openGauss. + +![](../figures/ustore.png) + +Ustore data storage: The latest data is stored on the original page, and the old data is managed in the UNDO space. + +## **Core Advantages of Ustore** + +**High performance**: For services with different loads, such as insertion, update, and deletion, the performance and resource usage are relatively balanced. The in-place update mode is used for update operations. In frequent update scenarios, the performance is higher and more stable. It is suitable for typical OLTP service scenarios that require short transactions, frequent updates, and high performance. + +**Efficient storage**:In-place update is supported to the maximum extent, greatly saving space. The rollback segment and data pages are stored separately to ensure more efficient and stable I/O usage. The UNDO subsystem uses the NUMA-aware design and features better multi-core scalability. The UNDO space is allocated and recycled in a unified manner, which improves reuse efficiency and ensures more efficient and stable storage space usage. + +**Fine-grained resource control**: Ustore provides multi-dimensional transaction monitoring. It can monitor transaction running based on the transaction running duration, size of UNDO space used by a single transaction, and overall UNDO space limitation to prevent abnormal and unexpected behaviors. This helps the database administrator to regulate and restrict the use of database system resources. + +Ustore provides stable performance in scenarios where data is frequently updated, enabling service systems to run more stably and adapt to more service scenarios and workloads, especially core financial service scenarios that have higher requirements on performance and stability. + +In the future, openGauss will use the AI autonomy technology to optimize Ustore to be more intelligent, secure, and efficient, providing customers with more advanced and high-quality database services. + +## **Ustore Usage Guide** + +- **Introduction** + + Ustore coexists with the original append update \(Astore\) storage engine. Ustore shields the implementation details of the storage layer. The SQL syntax is basically the same as that of the original Astore storage engine. The only difference lies in table creation and index creation. + +- **Table Creation Methods** + + Ustore contains UNDO logs. Before creating a table for Ustore, you need to set **undo\_zone\_count** in the **postgresql.conf** file. This parameter indicates the number of UNDO logs. The recommended value is **16384**, that is, **undo\_zone\_count=16384**. After the configuration is complete, restart the database. + + \[postgresql.conf configuration\] + + ``` + undo_zone_count=16384 + ``` + + Method 1: Specify the storage engine type when creating a table. + + ``` + create table test(id int, name varchar(10)) with (storage_type=ustore); + ``` + + Method 2: Specify Ustore by configuring GUC parameters. + + - Step 1: Before starting a database, set **enable\_default\_ustore\_table** to **on** in **postgresql.conf** to specify that Ustore is used when a user creates a table by default. + + \[postgresql.conf configuration\] + + ``` + enable_default_ustore_table=on + ``` + + - Step 2: Create a table. + + ``` + create table test(id int, name varchar(10)); + ``` + + +- **Index Creation Methods** + + UBtree is developed for Ustore and is the only index type supported by Ustore. + + Assume that the **test** table structure is as follows and a UBtree index is to be added to the **age** column of the **test** table. + + ![](../figures/zh-cn_image_0000001207529644.jpg) + + Method 1: If the index type is not specified, a UBtree index is created by default. + + ``` + create index ubt_idx on test(age); + ``` + + ![](../figures/zh-cn_image_0000001252089553.jpg) + + Method 2: When creating an index, use the **using** keyword to set the index type to **ubtree**. + + ``` + create index ubt_idx on test using ubtree(age); + ``` + + ![](../figures/zh-cn_image_0000001207369652.jpg) + + diff --git a/content/en/post/2022/figures/10.png b/content/en/post/2022/figures/10.png new file mode 100644 index 0000000000000000000000000000000000000000..5d61e6965ae3f814132504cfbd8a91f5c7a44268 Binary files /dev/null and b/content/en/post/2022/figures/10.png differ diff --git a/content/en/post/2022/figures/101.png b/content/en/post/2022/figures/101.png new file mode 100644 index 0000000000000000000000000000000000000000..fe28fc91940a3fc0764fde8ea77379c31c25b394 Binary files /dev/null and b/content/en/post/2022/figures/101.png differ diff --git a/content/en/post/2022/figures/102.png b/content/en/post/2022/figures/102.png new file mode 100644 index 0000000000000000000000000000000000000000..64c03cb4c53f4ef0fbd4c40b62a75f342ea22e4a Binary files /dev/null and b/content/en/post/2022/figures/102.png differ diff --git a/content/en/post/2022/figures/110.png b/content/en/post/2022/figures/110.png new file mode 100644 index 0000000000000000000000000000000000000000..4d3611819a7940cc2e8ab40a9a0a2e9d00a0433e Binary files /dev/null and b/content/en/post/2022/figures/110.png differ diff --git a/content/en/post/2022/figures/111.png b/content/en/post/2022/figures/111.png new file mode 100644 index 0000000000000000000000000000000000000000..d33562fee70e4991b075fe3dcc0053043ab5474d Binary files /dev/null and b/content/en/post/2022/figures/111.png differ diff --git a/content/en/post/2022/figures/112.png b/content/en/post/2022/figures/112.png new file mode 100644 index 0000000000000000000000000000000000000000..33418030dc5d44a843b9d6a9e5e41b61ed33a125 Binary files /dev/null and b/content/en/post/2022/figures/112.png differ diff --git a/content/en/post/2022/figures/113.png b/content/en/post/2022/figures/113.png new file mode 100644 index 0000000000000000000000000000000000000000..79e337e8fe9fe608a65e54e98f5f661c42ba00f8 Binary files /dev/null and b/content/en/post/2022/figures/113.png differ diff --git a/content/en/post/2022/figures/114.png b/content/en/post/2022/figures/114.png new file mode 100644 index 0000000000000000000000000000000000000000..ac3d4c30543eba9577568c14a654c48b630d09f5 Binary files /dev/null and b/content/en/post/2022/figures/114.png differ diff --git a/content/en/post/2022/figures/115.png b/content/en/post/2022/figures/115.png new file mode 100644 index 0000000000000000000000000000000000000000..f2d836d73e0562d7ecab5209d5402424083a48d9 Binary files /dev/null and b/content/en/post/2022/figures/115.png differ diff --git a/content/en/post/2022/figures/171.png b/content/en/post/2022/figures/171.png new file mode 100644 index 0000000000000000000000000000000000000000..31a3af3994dc99bfa556b1e1489191261b784448 Binary files /dev/null and b/content/en/post/2022/figures/171.png differ diff --git a/content/en/post/2022/figures/1710.png b/content/en/post/2022/figures/1710.png new file mode 100644 index 0000000000000000000000000000000000000000..b10f33340de65b253f1efa031dba011bb13d200a Binary files /dev/null and b/content/en/post/2022/figures/1710.png differ diff --git a/content/en/post/2022/figures/1711.png b/content/en/post/2022/figures/1711.png new file mode 100644 index 0000000000000000000000000000000000000000..47a426f405ef1fa7b01fbe764f8221c664c77bd3 Binary files /dev/null and b/content/en/post/2022/figures/1711.png differ diff --git a/content/en/post/2022/figures/1712.png b/content/en/post/2022/figures/1712.png new file mode 100644 index 0000000000000000000000000000000000000000..96df9deebfa7abf6856fa4639e8dcc68dd2f4d1d Binary files /dev/null and b/content/en/post/2022/figures/1712.png differ diff --git a/content/en/post/2022/figures/1713.png b/content/en/post/2022/figures/1713.png new file mode 100644 index 0000000000000000000000000000000000000000..45e51fb431dc28322a4cb52721c73060ef6a39b6 Binary files /dev/null and b/content/en/post/2022/figures/1713.png differ diff --git a/content/en/post/2022/figures/1714.png b/content/en/post/2022/figures/1714.png new file mode 100644 index 0000000000000000000000000000000000000000..c0a72b15bcd892ffbbd6a21f729a1346b1d3d560 Binary files /dev/null and b/content/en/post/2022/figures/1714.png differ diff --git a/content/en/post/2022/figures/1715.png b/content/en/post/2022/figures/1715.png new file mode 100644 index 0000000000000000000000000000000000000000..5e810bf3660a524d9363ca6e6b777aa1430b99a0 Binary files /dev/null and b/content/en/post/2022/figures/1715.png differ diff --git a/content/en/post/2022/figures/1716.png b/content/en/post/2022/figures/1716.png new file mode 100644 index 0000000000000000000000000000000000000000..c44b3c438fb8ecca1813f63d028d2692fe7235ef Binary files /dev/null and b/content/en/post/2022/figures/1716.png differ diff --git a/content/en/post/2022/figures/1717.png b/content/en/post/2022/figures/1717.png new file mode 100644 index 0000000000000000000000000000000000000000..cf4c3cff2ed6599688c1873808c0a099d2392b9b Binary files /dev/null and b/content/en/post/2022/figures/1717.png differ diff --git a/content/en/post/2022/figures/1718.png b/content/en/post/2022/figures/1718.png new file mode 100644 index 0000000000000000000000000000000000000000..80651de5479d1567cb6e4755a09bcd38f526c264 Binary files /dev/null and b/content/en/post/2022/figures/1718.png differ diff --git a/content/en/post/2022/figures/1719.png b/content/en/post/2022/figures/1719.png new file mode 100644 index 0000000000000000000000000000000000000000..eb8ad041ac91893b2a698357a734a33caba2a18d Binary files /dev/null and b/content/en/post/2022/figures/1719.png differ diff --git a/content/en/post/2022/figures/172.png b/content/en/post/2022/figures/172.png new file mode 100644 index 0000000000000000000000000000000000000000..a27ed4268cb996c699b3da07b4bf20c76c5b9456 Binary files /dev/null and b/content/en/post/2022/figures/172.png differ diff --git a/content/en/post/2022/figures/1720.png b/content/en/post/2022/figures/1720.png new file mode 100644 index 0000000000000000000000000000000000000000..d487a082859b13163a8b536697364eb419d42cb1 Binary files /dev/null and b/content/en/post/2022/figures/1720.png differ diff --git a/content/en/post/2022/figures/173.png b/content/en/post/2022/figures/173.png new file mode 100644 index 0000000000000000000000000000000000000000..754f5f16506a45d2a2650a2abc240a06f7cf93ab Binary files /dev/null and b/content/en/post/2022/figures/173.png differ diff --git a/content/en/post/2022/figures/174.png b/content/en/post/2022/figures/174.png new file mode 100644 index 0000000000000000000000000000000000000000..2cc90d29f631721724024168a7c9890748810aad Binary files /dev/null and b/content/en/post/2022/figures/174.png differ diff --git a/content/en/post/2022/figures/175.png b/content/en/post/2022/figures/175.png new file mode 100644 index 0000000000000000000000000000000000000000..e54391094200dd111a9e204552ec0fbd4efc8b32 Binary files /dev/null and b/content/en/post/2022/figures/175.png differ diff --git a/content/en/post/2022/figures/176.png b/content/en/post/2022/figures/176.png new file mode 100644 index 0000000000000000000000000000000000000000..45d17662b1a4908f243877970023d8c762ccc5ba Binary files /dev/null and b/content/en/post/2022/figures/176.png differ diff --git a/content/en/post/2022/figures/177.png b/content/en/post/2022/figures/177.png new file mode 100644 index 0000000000000000000000000000000000000000..e1cb44d7ac45a6f54b7eb9576376e938f418f354 Binary files /dev/null and b/content/en/post/2022/figures/177.png differ diff --git a/content/en/post/2022/figures/178.png b/content/en/post/2022/figures/178.png new file mode 100644 index 0000000000000000000000000000000000000000..a9311ba251d1d3362e11567372a45636c6b75ea5 Binary files /dev/null and b/content/en/post/2022/figures/178.png differ diff --git a/content/en/post/2022/figures/179.png b/content/en/post/2022/figures/179.png new file mode 100644 index 0000000000000000000000000000000000000000..4392c3d13133f7c15ef18582ebe827cf850e02b6 Binary files /dev/null and b/content/en/post/2022/figures/179.png differ diff --git a/content/en/post/2022/figures/21.png b/content/en/post/2022/figures/21.png new file mode 100644 index 0000000000000000000000000000000000000000..7fef241d0b69cc8f0b570851fab31d5a33f786b3 Binary files /dev/null and b/content/en/post/2022/figures/21.png differ diff --git a/content/en/post/2022/figures/24.png b/content/en/post/2022/figures/24.png new file mode 100644 index 0000000000000000000000000000000000000000..d3a67599f7acbf50dc5d1cc1cbfab602ab7260a6 Binary files /dev/null and b/content/en/post/2022/figures/24.png differ diff --git a/content/en/post/2022/figures/241.png b/content/en/post/2022/figures/241.png new file mode 100644 index 0000000000000000000000000000000000000000..bc1835110116a2ebb927b25cb875328c1bf54963 Binary files /dev/null and b/content/en/post/2022/figures/241.png differ diff --git a/content/en/post/2022/figures/26-openGauss-Log-Consensus-Framework.png b/content/en/post/2022/figures/26-openGauss-Log-Consensus-Framework.png new file mode 100644 index 0000000000000000000000000000000000000000..b195b70364223a7f8ebb41616be6486cf9dbcfec Binary files /dev/null and b/content/en/post/2022/figures/26-openGauss-Log-Consensus-Framework.png differ diff --git a/content/en/post/2022/figures/28.png b/content/en/post/2022/figures/28.png new file mode 100644 index 0000000000000000000000000000000000000000..0434f76df184b1b47cf2b1fca208b230b9c30a86 Binary files /dev/null and b/content/en/post/2022/figures/28.png differ diff --git a/content/en/post/2022/figures/282.png b/content/en/post/2022/figures/282.png new file mode 100644 index 0000000000000000000000000000000000000000..3fa6fb4876e998a47bc0baf98b51b725c725f7df Binary files /dev/null and b/content/en/post/2022/figures/282.png differ diff --git a/content/en/post/2022/figures/283.png b/content/en/post/2022/figures/283.png new file mode 100644 index 0000000000000000000000000000000000000000..2f63883b3c90c5de5d72929d35775a992a197282 Binary files /dev/null and b/content/en/post/2022/figures/283.png differ diff --git a/content/en/post/2022/figures/284.png b/content/en/post/2022/figures/284.png new file mode 100644 index 0000000000000000000000000000000000000000..2607a6f3c3d7f244ca0b659c5bb25378cfb39759 Binary files /dev/null and b/content/en/post/2022/figures/284.png differ diff --git a/content/en/post/2022/figures/285.png b/content/en/post/2022/figures/285.png new file mode 100644 index 0000000000000000000000000000000000000000..242fa313e1de78abf1a8bbd2c7a1cea2ee147ac1 Binary files /dev/null and b/content/en/post/2022/figures/285.png differ diff --git a/content/en/post/2022/figures/3.png b/content/en/post/2022/figures/3.png new file mode 100644 index 0000000000000000000000000000000000000000..5cce1c14b8225575e12cafa23fb2a6fcba03db65 Binary files /dev/null and b/content/en/post/2022/figures/3.png differ diff --git a/content/en/post/2022/figures/31.png b/content/en/post/2022/figures/31.png new file mode 100644 index 0000000000000000000000000000000000000000..cb1df175f89046f16d3d7bd2831a985aaa7392e1 Binary files /dev/null and b/content/en/post/2022/figures/31.png differ diff --git a/content/en/post/2022/figures/311.png b/content/en/post/2022/figures/311.png new file mode 100644 index 0000000000000000000000000000000000000000..b261164ac2e457dad770e52f7badd2a6b5936cf5 Binary files /dev/null and b/content/en/post/2022/figures/311.png differ diff --git a/content/en/post/2022/figures/312.png b/content/en/post/2022/figures/312.png new file mode 100644 index 0000000000000000000000000000000000000000..bce02962735d39a5f4fc148e995e0b3caa0b421e Binary files /dev/null and b/content/en/post/2022/figures/312.png differ diff --git a/content/en/post/2022/figures/313.png b/content/en/post/2022/figures/313.png new file mode 100644 index 0000000000000000000000000000000000000000..16550186cea3f9e249d0c1931e0c5561210f2264 Binary files /dev/null and b/content/en/post/2022/figures/313.png differ diff --git a/content/en/post/2022/figures/314.png b/content/en/post/2022/figures/314.png new file mode 100644 index 0000000000000000000000000000000000000000..07c1fa36a50f494779c53f164b94f9df348b9869 Binary files /dev/null and b/content/en/post/2022/figures/314.png differ diff --git a/content/en/post/2022/figures/315.png b/content/en/post/2022/figures/315.png new file mode 100644 index 0000000000000000000000000000000000000000..2dbce91a21ceda6c429dab9f1cae74c3b2480833 Binary files /dev/null and b/content/en/post/2022/figures/315.png differ diff --git a/content/en/post/2022/figures/32.png b/content/en/post/2022/figures/32.png new file mode 100644 index 0000000000000000000000000000000000000000..c326161f098cb282f02ffb48c251341a87d45616 Binary files /dev/null and b/content/en/post/2022/figures/32.png differ diff --git a/content/en/post/2022/figures/320.png b/content/en/post/2022/figures/320.png new file mode 100644 index 0000000000000000000000000000000000000000..44949c637ec7bcc1b58b49e046cde0bbfa3e0c7f Binary files /dev/null and b/content/en/post/2022/figures/320.png differ diff --git a/content/en/post/2022/figures/41.png b/content/en/post/2022/figures/41.png new file mode 100644 index 0000000000000000000000000000000000000000..d8bed44c9bff6755f589f40604de04d65d29c61c Binary files /dev/null and b/content/en/post/2022/figures/41.png differ diff --git a/content/en/post/2022/figures/42.png b/content/en/post/2022/figures/42.png new file mode 100644 index 0000000000000000000000000000000000000000..2c7025ca1edbdbbc95d13a04718b99964bba336b Binary files /dev/null and b/content/en/post/2022/figures/42.png differ diff --git a/content/en/post/2022/figures/43.png b/content/en/post/2022/figures/43.png new file mode 100644 index 0000000000000000000000000000000000000000..298eafb7ce915df6b3fd3bddf1acc8f02fd24165 Binary files /dev/null and b/content/en/post/2022/figures/43.png differ diff --git a/content/en/post/2022/figures/44.png b/content/en/post/2022/figures/44.png new file mode 100644 index 0000000000000000000000000000000000000000..f68edfb6099af7289e848ed3bcd08491a92a524f Binary files /dev/null and b/content/en/post/2022/figures/44.png differ diff --git a/content/en/post/2022/figures/45.png b/content/en/post/2022/figures/45.png new file mode 100644 index 0000000000000000000000000000000000000000..3de93e51ee74eedb290bcbd967da09e7cd9ef5f1 Binary files /dev/null and b/content/en/post/2022/figures/45.png differ diff --git a/content/en/post/2022/figures/46.png b/content/en/post/2022/figures/46.png new file mode 100644 index 0000000000000000000000000000000000000000..2cebd77accb5d41fcb9353fa3e2e55ed2454050b Binary files /dev/null and b/content/en/post/2022/figures/46.png differ diff --git a/content/en/post/2022/figures/47.png b/content/en/post/2022/figures/47.png new file mode 100644 index 0000000000000000000000000000000000000000..b954fcd5072264f1a32ea5a47b95a79e6f6d9a3c Binary files /dev/null and b/content/en/post/2022/figures/47.png differ diff --git a/content/en/post/2022/figures/48.png b/content/en/post/2022/figures/48.png new file mode 100644 index 0000000000000000000000000000000000000000..a2370f1c980dbea7b96e3325312786ae48e7131f Binary files /dev/null and b/content/en/post/2022/figures/48.png differ diff --git a/content/en/post/2022/figures/49.png b/content/en/post/2022/figures/49.png new file mode 100644 index 0000000000000000000000000000000000000000..8c3e7fc51156cd890df9ebdbdb1329cc9a85bb39 Binary files /dev/null and b/content/en/post/2022/figures/49.png differ diff --git a/content/en/post/2022/figures/61.png b/content/en/post/2022/figures/61.png new file mode 100644 index 0000000000000000000000000000000000000000..95faa3f21e351f477e7d95b2239e73a4f4491385 Binary files /dev/null and b/content/en/post/2022/figures/61.png differ diff --git a/content/en/post/2022/figures/62-0.png b/content/en/post/2022/figures/62-0.png new file mode 100644 index 0000000000000000000000000000000000000000..3beaa6cba81da70f283788ee53b940019f549f78 Binary files /dev/null and b/content/en/post/2022/figures/62-0.png differ diff --git a/content/en/post/2022/figures/62.png b/content/en/post/2022/figures/62.png new file mode 100644 index 0000000000000000000000000000000000000000..3beaa6cba81da70f283788ee53b940019f549f78 Binary files /dev/null and b/content/en/post/2022/figures/62.png differ diff --git a/content/en/post/2022/figures/7.png b/content/en/post/2022/figures/7.png new file mode 100644 index 0000000000000000000000000000000000000000..3476e4fa7c5da609f4f28088233391042198b259 Binary files /dev/null and b/content/en/post/2022/figures/7.png differ diff --git a/content/en/post/2022/figures/file0001.jpg b/content/en/post/2022/figures/file0001.jpg new file mode 100644 index 0000000000000000000000000000000000000000..0589583cba52349d44bd06d4e0aed3e8cb1616c3 Binary files /dev/null and b/content/en/post/2022/figures/file0001.jpg differ diff --git a/content/en/post/2022/figures/ustore.png b/content/en/post/2022/figures/ustore.png new file mode 100644 index 0000000000000000000000000000000000000000..b74bf5df6e8da4550d034f11ee2098239bd9c2e9 Binary files /dev/null and b/content/en/post/2022/figures/ustore.png differ diff --git a/content/en/post/2022/figures/zh-cn_image_0000001206146876.jpg b/content/en/post/2022/figures/zh-cn_image_0000001206146876.jpg new file mode 100644 index 0000000000000000000000000000000000000000..228837de1190f373617c82088cf80a63928b89fd Binary files /dev/null and b/content/en/post/2022/figures/zh-cn_image_0000001206146876.jpg differ diff --git a/content/en/post/2022/figures/zh-cn_image_0000001206167254.jpg b/content/en/post/2022/figures/zh-cn_image_0000001206167254.jpg new file mode 100644 index 0000000000000000000000000000000000000000..a49980daf88fbb5de8a7e155827e723ec0104c26 Binary files /dev/null and b/content/en/post/2022/figures/zh-cn_image_0000001206167254.jpg differ diff --git a/content/en/post/2022/figures/zh-cn_image_0000001206317236.gif b/content/en/post/2022/figures/zh-cn_image_0000001206317236.gif new file mode 100644 index 0000000000000000000000000000000000000000..75e84ef5295b3194ad40505c3f5de3d63d279cae Binary files /dev/null and b/content/en/post/2022/figures/zh-cn_image_0000001206317236.gif differ diff --git a/content/en/post/2022/figures/zh-cn_image_0000001206327254.gif b/content/en/post/2022/figures/zh-cn_image_0000001206327254.gif new file mode 100644 index 0000000000000000000000000000000000000000..84a4ab2c68a128da887432bb6d5d8beaaa79997e Binary files /dev/null and b/content/en/post/2022/figures/zh-cn_image_0000001206327254.gif differ diff --git a/content/en/post/2022/figures/zh-cn_image_0000001206327256.gif b/content/en/post/2022/figures/zh-cn_image_0000001206327256.gif new file mode 100644 index 0000000000000000000000000000000000000000..6068b15a50a1f3cc0574ab51d66b4a7a6b379f45 Binary files /dev/null and b/content/en/post/2022/figures/zh-cn_image_0000001206327256.gif differ diff --git a/content/en/post/2022/figures/zh-cn_image_0000001206327258.jpg b/content/en/post/2022/figures/zh-cn_image_0000001206327258.jpg new file mode 100644 index 0000000000000000000000000000000000000000..9f6fcb791326728455ea10edffca6e3c63acfcc9 Binary files /dev/null and b/content/en/post/2022/figures/zh-cn_image_0000001206327258.jpg differ diff --git a/content/en/post/2022/figures/zh-cn_image_0000001206327260.gif b/content/en/post/2022/figures/zh-cn_image_0000001206327260.gif new file mode 100644 index 0000000000000000000000000000000000000000..ae0c3f1f2615bd71a1e12fb24ee70bdd738b8bd0 Binary files /dev/null and b/content/en/post/2022/figures/zh-cn_image_0000001206327260.gif differ diff --git a/content/en/post/2022/figures/zh-cn_image_0000001206487222.jpg b/content/en/post/2022/figures/zh-cn_image_0000001206487222.jpg new file mode 100644 index 0000000000000000000000000000000000000000..d3d17dee6991e253cfb0c5e2ccd30a7d8df86b89 Binary files /dev/null and b/content/en/post/2022/figures/zh-cn_image_0000001206487222.jpg differ diff --git a/content/en/post/2022/figures/zh-cn_image_0000001206487224.gif b/content/en/post/2022/figures/zh-cn_image_0000001206487224.gif new file mode 100644 index 0000000000000000000000000000000000000000..10a6848a70d9a5fc11c20f9844a0d8e67b3bc4e6 Binary files /dev/null and b/content/en/post/2022/figures/zh-cn_image_0000001206487224.gif differ diff --git a/content/en/post/2022/figures/zh-cn_image_0000001206626828.jpg b/content/en/post/2022/figures/zh-cn_image_0000001206626828.jpg new file mode 100644 index 0000000000000000000000000000000000000000..c86391c33e778d1167af74bf0d8018970e4e720b Binary files /dev/null and b/content/en/post/2022/figures/zh-cn_image_0000001206626828.jpg differ diff --git a/content/en/post/2022/figures/zh-cn_image_0000001206647202.gif b/content/en/post/2022/figures/zh-cn_image_0000001206647202.gif new file mode 100644 index 0000000000000000000000000000000000000000..dd5a8bceae4d81ffcafc2c0caf79c490f4ef7fad Binary files /dev/null and b/content/en/post/2022/figures/zh-cn_image_0000001206647202.gif differ diff --git a/content/en/post/2022/figures/zh-cn_image_0000001206647206.gif b/content/en/post/2022/figures/zh-cn_image_0000001206647206.gif new file mode 100644 index 0000000000000000000000000000000000000000..1a6dad0491198086a1b93e478c6d235407541523 Binary files /dev/null and b/content/en/post/2022/figures/zh-cn_image_0000001206647206.gif differ diff --git a/content/en/post/2022/figures/zh-cn_image_0000001206647208.gif b/content/en/post/2022/figures/zh-cn_image_0000001206647208.gif new file mode 100644 index 0000000000000000000000000000000000000000..aa083f31589390271ac3b2017cf7c519ddb4f8bb Binary files /dev/null and b/content/en/post/2022/figures/zh-cn_image_0000001206647208.gif differ diff --git a/content/en/post/2022/figures/zh-cn_image_0000001206760224.jpg b/content/en/post/2022/figures/zh-cn_image_0000001206760224.jpg new file mode 100644 index 0000000000000000000000000000000000000000..654116daba11cecd31fb45cf56eaf00f0f61c13e Binary files /dev/null and b/content/en/post/2022/figures/zh-cn_image_0000001206760224.jpg differ diff --git a/content/en/post/2022/figures/zh-cn_image_0000001206760226.jpg b/content/en/post/2022/figures/zh-cn_image_0000001206760226.jpg new file mode 100644 index 0000000000000000000000000000000000000000..eca4505048123faef80bb6bc5a113e1ab50c4e41 Binary files /dev/null and b/content/en/post/2022/figures/zh-cn_image_0000001206760226.jpg differ diff --git a/content/en/post/2022/figures/zh-cn_image_0000001206760228.jpg b/content/en/post/2022/figures/zh-cn_image_0000001206760228.jpg new file mode 100644 index 0000000000000000000000000000000000000000..d63b73cde195dbbda83bfdc3681946a1cf33bdcb Binary files /dev/null and b/content/en/post/2022/figures/zh-cn_image_0000001206760228.jpg differ diff --git a/content/en/post/2022/figures/zh-cn_image_0000001206760230.gif b/content/en/post/2022/figures/zh-cn_image_0000001206760230.gif new file mode 100644 index 0000000000000000000000000000000000000000..b5540a9d276cf7fb29a04aa0510c55d984842dda Binary files /dev/null and b/content/en/post/2022/figures/zh-cn_image_0000001206760230.gif differ diff --git a/content/en/post/2022/figures/zh-cn_image_0000001206760240.gif b/content/en/post/2022/figures/zh-cn_image_0000001206760240.gif new file mode 100644 index 0000000000000000000000000000000000000000..17a8b25ba0b9f4ba06d42e9da58911f7e10a0983 Binary files /dev/null and b/content/en/post/2022/figures/zh-cn_image_0000001206760240.gif differ diff --git a/content/en/post/2022/figures/zh-cn_image_0000001206801884.png b/content/en/post/2022/figures/zh-cn_image_0000001206801884.png new file mode 100644 index 0000000000000000000000000000000000000000..da0837a0e01c172b4c7a867c7ef078757d5f4dec Binary files /dev/null and b/content/en/post/2022/figures/zh-cn_image_0000001206801884.png differ diff --git a/content/en/post/2022/figures/zh-cn_image_0000001206801888.png b/content/en/post/2022/figures/zh-cn_image_0000001206801888.png new file mode 100644 index 0000000000000000000000000000000000000000..da0837a0e01c172b4c7a867c7ef078757d5f4dec Binary files /dev/null and b/content/en/post/2022/figures/zh-cn_image_0000001206801888.png differ diff --git a/content/en/post/2022/figures/zh-cn_image_0000001206801890.png b/content/en/post/2022/figures/zh-cn_image_0000001206801890.png new file mode 100644 index 0000000000000000000000000000000000000000..794bd1c5236a2d365b76a163db009937b64a8fb5 Binary files /dev/null and b/content/en/post/2022/figures/zh-cn_image_0000001206801890.png differ diff --git a/content/en/post/2022/figures/zh-cn_image_0000001206807380.png b/content/en/post/2022/figures/zh-cn_image_0000001206807380.png new file mode 100644 index 0000000000000000000000000000000000000000..27c6e12bb0c4384b3682fc5c706b1eef20b0ca72 Binary files /dev/null and b/content/en/post/2022/figures/zh-cn_image_0000001206807380.png differ diff --git a/content/en/post/2022/figures/zh-cn_image_0000001206809156.jpg b/content/en/post/2022/figures/zh-cn_image_0000001206809156.jpg new file mode 100644 index 0000000000000000000000000000000000000000..a77c8c3ac6df6226a7e5f87934dbc688f74faa84 Binary files /dev/null and b/content/en/post/2022/figures/zh-cn_image_0000001206809156.jpg differ diff --git a/content/en/post/2022/figures/zh-cn_image_0000001206809160.jpg b/content/en/post/2022/figures/zh-cn_image_0000001206809160.jpg new file mode 100644 index 0000000000000000000000000000000000000000..700877443d4509d152563c4e490675e791726268 Binary files /dev/null and b/content/en/post/2022/figures/zh-cn_image_0000001206809160.jpg differ diff --git a/content/en/post/2022/figures/zh-cn_image_0000001206812360.png b/content/en/post/2022/figures/zh-cn_image_0000001206812360.png new file mode 100644 index 0000000000000000000000000000000000000000..0c09237f0dd8bc16bf58815eb9aff422fe31566b Binary files /dev/null and b/content/en/post/2022/figures/zh-cn_image_0000001206812360.png differ diff --git a/content/en/post/2022/figures/zh-cn_image_0000001206920214.jpg b/content/en/post/2022/figures/zh-cn_image_0000001206920214.jpg new file mode 100644 index 0000000000000000000000000000000000000000..ca04c936111fd6eaa9802807f2a1366d4f58d518 Binary files /dev/null and b/content/en/post/2022/figures/zh-cn_image_0000001206920214.jpg differ diff --git a/content/en/post/2022/figures/zh-cn_image_0000001206920220.gif b/content/en/post/2022/figures/zh-cn_image_0000001206920220.gif new file mode 100644 index 0000000000000000000000000000000000000000..6885eb9a1fc1d52c561790da736f4b9910bfc487 Binary files /dev/null and b/content/en/post/2022/figures/zh-cn_image_0000001206920220.gif differ diff --git a/content/en/post/2022/figures/zh-cn_image_0000001206920224.gif b/content/en/post/2022/figures/zh-cn_image_0000001206920224.gif new file mode 100644 index 0000000000000000000000000000000000000000..547b6a61c94d46c0e71829c8630a19c3488c909a Binary files /dev/null and b/content/en/post/2022/figures/zh-cn_image_0000001206920224.gif differ diff --git a/content/en/post/2022/figures/zh-cn_image_0000001206961046.jpg b/content/en/post/2022/figures/zh-cn_image_0000001206961046.jpg new file mode 100644 index 0000000000000000000000000000000000000000..30aaded16a123b984b990d35b4a592da88e241d6 Binary files /dev/null and b/content/en/post/2022/figures/zh-cn_image_0000001206961046.jpg differ diff --git a/content/en/post/2022/figures/zh-cn_image_0000001206961884.png b/content/en/post/2022/figures/zh-cn_image_0000001206961884.png new file mode 100644 index 0000000000000000000000000000000000000000..da0837a0e01c172b4c7a867c7ef078757d5f4dec Binary files /dev/null and b/content/en/post/2022/figures/zh-cn_image_0000001206961884.png differ diff --git a/content/en/post/2022/figures/zh-cn_image_0000001206967370.png b/content/en/post/2022/figures/zh-cn_image_0000001206967370.png new file mode 100644 index 0000000000000000000000000000000000000000..b7ecd3e4db8c7078cf49c979145db58c371d997d Binary files /dev/null and b/content/en/post/2022/figures/zh-cn_image_0000001206967370.png differ diff --git a/content/en/post/2022/figures/zh-cn_image_0000001206968992.jpg b/content/en/post/2022/figures/zh-cn_image_0000001206968992.jpg new file mode 100644 index 0000000000000000000000000000000000000000..b5fc6157ce28ed455a19ef657f60b5355cd62de5 Binary files /dev/null and b/content/en/post/2022/figures/zh-cn_image_0000001206968992.jpg differ diff --git a/content/en/post/2022/figures/zh-cn_image_0000001206972348.png b/content/en/post/2022/figures/zh-cn_image_0000001206972348.png new file mode 100644 index 0000000000000000000000000000000000000000..a7cef3daed489a6cdd6c92096dbc741966668924 Binary files /dev/null and b/content/en/post/2022/figures/zh-cn_image_0000001206972348.png differ diff --git a/content/en/post/2022/figures/zh-cn_image_0000001206972352.png b/content/en/post/2022/figures/zh-cn_image_0000001206972352.png new file mode 100644 index 0000000000000000000000000000000000000000..8c9328586a0b976dee933ef8226d74382fc1c7b0 Binary files /dev/null and b/content/en/post/2022/figures/zh-cn_image_0000001206972352.png differ diff --git a/content/en/post/2022/figures/zh-cn_image_0000001207080190.gif b/content/en/post/2022/figures/zh-cn_image_0000001207080190.gif new file mode 100644 index 0000000000000000000000000000000000000000..a79f840b314aa57ee5972549172457ee65545cf2 Binary files /dev/null and b/content/en/post/2022/figures/zh-cn_image_0000001207080190.gif differ diff --git a/content/en/post/2022/figures/zh-cn_image_0000001207089084.gif b/content/en/post/2022/figures/zh-cn_image_0000001207089084.gif new file mode 100644 index 0000000000000000000000000000000000000000..09e9fcca18283c4fcf0ea0218b9e60f10c4131c6 Binary files /dev/null and b/content/en/post/2022/figures/zh-cn_image_0000001207089084.gif differ diff --git a/content/en/post/2022/figures/zh-cn_image_0000001207121020.jpg b/content/en/post/2022/figures/zh-cn_image_0000001207121020.jpg new file mode 100644 index 0000000000000000000000000000000000000000..15a89fa9fcc8879918d9698cbdf93204cf34fdba Binary files /dev/null and b/content/en/post/2022/figures/zh-cn_image_0000001207121020.jpg differ diff --git a/content/en/post/2022/figures/zh-cn_image_0000001207121854.png b/content/en/post/2022/figures/zh-cn_image_0000001207121854.png new file mode 100644 index 0000000000000000000000000000000000000000..8c66ceccf7240eb405c23afd58919d1599d8cf68 Binary files /dev/null and b/content/en/post/2022/figures/zh-cn_image_0000001207121854.png differ diff --git a/content/en/post/2022/figures/zh-cn_image_0000001207121858.png b/content/en/post/2022/figures/zh-cn_image_0000001207121858.png new file mode 100644 index 0000000000000000000000000000000000000000..8c66ceccf7240eb405c23afd58919d1599d8cf68 Binary files /dev/null and b/content/en/post/2022/figures/zh-cn_image_0000001207121858.png differ diff --git a/content/en/post/2022/figures/zh-cn_image_0000001207131798.jpg b/content/en/post/2022/figures/zh-cn_image_0000001207131798.jpg new file mode 100644 index 0000000000000000000000000000000000000000..94cb8630977c991a55a2eea8f41917a8fdbb4fb1 Binary files /dev/null and b/content/en/post/2022/figures/zh-cn_image_0000001207131798.jpg differ diff --git a/content/en/post/2022/figures/zh-cn_image_0000001207132328.png b/content/en/post/2022/figures/zh-cn_image_0000001207132328.png new file mode 100644 index 0000000000000000000000000000000000000000..67fe77a94e77f60888a616acdf35756a2154454d Binary files /dev/null and b/content/en/post/2022/figures/zh-cn_image_0000001207132328.png differ diff --git a/content/en/post/2022/figures/zh-cn_image_0000001207138590.jpg b/content/en/post/2022/figures/zh-cn_image_0000001207138590.jpg new file mode 100644 index 0000000000000000000000000000000000000000..dc924b238f05ed6c9a29dde627d99d6f2abbbd8e Binary files /dev/null and b/content/en/post/2022/figures/zh-cn_image_0000001207138590.jpg differ diff --git a/content/en/post/2022/figures/zh-cn_image_0000001207240170.gif b/content/en/post/2022/figures/zh-cn_image_0000001207240170.gif new file mode 100644 index 0000000000000000000000000000000000000000..f4f5fcac4770318fdc202729c05c29b08f93c2fb Binary files /dev/null and b/content/en/post/2022/figures/zh-cn_image_0000001207240170.gif differ diff --git a/content/en/post/2022/figures/zh-cn_image_0000001207249058.gif b/content/en/post/2022/figures/zh-cn_image_0000001207249058.gif new file mode 100644 index 0000000000000000000000000000000000000000..1480cb5987cda99a0f9f83a6c12e46a0f0961084 Binary files /dev/null and b/content/en/post/2022/figures/zh-cn_image_0000001207249058.gif differ diff --git a/content/en/post/2022/figures/zh-cn_image_0000001207280996.jpg b/content/en/post/2022/figures/zh-cn_image_0000001207280996.jpg new file mode 100644 index 0000000000000000000000000000000000000000..3c2b120df77d6834c9c2a8496d29b5caa916eb8c Binary files /dev/null and b/content/en/post/2022/figures/zh-cn_image_0000001207280996.jpg differ diff --git a/content/en/post/2022/figures/zh-cn_image_0000001207280998.jpg b/content/en/post/2022/figures/zh-cn_image_0000001207280998.jpg new file mode 100644 index 0000000000000000000000000000000000000000..d0ee4bb373bf011373a10fab492eda1ad7b463d7 Binary files /dev/null and b/content/en/post/2022/figures/zh-cn_image_0000001207280998.jpg differ diff --git a/content/en/post/2022/figures/zh-cn_image_0000001207289100.jpg b/content/en/post/2022/figures/zh-cn_image_0000001207289100.jpg new file mode 100644 index 0000000000000000000000000000000000000000..f407f575746cda2f62075eefb9d0316e29635d96 Binary files /dev/null and b/content/en/post/2022/figures/zh-cn_image_0000001207289100.jpg differ diff --git a/content/en/post/2022/figures/zh-cn_image_0000001207291774.jpg b/content/en/post/2022/figures/zh-cn_image_0000001207291774.jpg new file mode 100644 index 0000000000000000000000000000000000000000..36cdd3ce6397e97c87a437e1ec45264eabdd184f Binary files /dev/null and b/content/en/post/2022/figures/zh-cn_image_0000001207291774.jpg differ diff --git a/content/en/post/2022/figures/zh-cn_image_0000001207302704.gif b/content/en/post/2022/figures/zh-cn_image_0000001207302704.gif new file mode 100644 index 0000000000000000000000000000000000000000..f1f4ef0c05460c220b09ccd897f0336e3e3265c2 Binary files /dev/null and b/content/en/post/2022/figures/zh-cn_image_0000001207302704.gif differ diff --git a/content/en/post/2022/figures/zh-cn_image_0000001207302710.gif b/content/en/post/2022/figures/zh-cn_image_0000001207302710.gif new file mode 100644 index 0000000000000000000000000000000000000000..f1f4ef0c05460c220b09ccd897f0336e3e3265c2 Binary files /dev/null and b/content/en/post/2022/figures/zh-cn_image_0000001207302710.gif differ diff --git a/content/en/post/2022/figures/zh-cn_image_0000001207369652.jpg b/content/en/post/2022/figures/zh-cn_image_0000001207369652.jpg new file mode 100644 index 0000000000000000000000000000000000000000..6b4dbb2590c6667efb62a1132fd166a2b89e4476 Binary files /dev/null and b/content/en/post/2022/figures/zh-cn_image_0000001207369652.jpg differ diff --git a/content/en/post/2022/figures/zh-cn_image_0000001207462684.gif b/content/en/post/2022/figures/zh-cn_image_0000001207462684.gif new file mode 100644 index 0000000000000000000000000000000000000000..f1f4ef0c05460c220b09ccd897f0336e3e3265c2 Binary files /dev/null and b/content/en/post/2022/figures/zh-cn_image_0000001207462684.gif differ diff --git a/content/en/post/2022/figures/zh-cn_image_0000001207462688.gif b/content/en/post/2022/figures/zh-cn_image_0000001207462688.gif new file mode 100644 index 0000000000000000000000000000000000000000..f1f4ef0c05460c220b09ccd897f0336e3e3265c2 Binary files /dev/null and b/content/en/post/2022/figures/zh-cn_image_0000001207462688.gif differ diff --git a/content/en/post/2022/figures/zh-cn_image_0000001207462698.gif b/content/en/post/2022/figures/zh-cn_image_0000001207462698.gif new file mode 100644 index 0000000000000000000000000000000000000000..f1f4ef0c05460c220b09ccd897f0336e3e3265c2 Binary files /dev/null and b/content/en/post/2022/figures/zh-cn_image_0000001207462698.gif differ diff --git a/content/en/post/2022/figures/zh-cn_image_0000001207462700.gif b/content/en/post/2022/figures/zh-cn_image_0000001207462700.gif new file mode 100644 index 0000000000000000000000000000000000000000..f1f4ef0c05460c220b09ccd897f0336e3e3265c2 Binary files /dev/null and b/content/en/post/2022/figures/zh-cn_image_0000001207462700.gif differ diff --git a/content/en/post/2022/figures/zh-cn_image_0000001207516746.png b/content/en/post/2022/figures/zh-cn_image_0000001207516746.png new file mode 100644 index 0000000000000000000000000000000000000000..9bb3c60728e778a9c2bd906d42d6751f8705eb6d Binary files /dev/null and b/content/en/post/2022/figures/zh-cn_image_0000001207516746.png differ diff --git a/content/en/post/2022/figures/zh-cn_image_0000001207529644.jpg b/content/en/post/2022/figures/zh-cn_image_0000001207529644.jpg new file mode 100644 index 0000000000000000000000000000000000000000..27c5e483e2dfd78b2870fd1de41e202eb2bbf6d5 Binary files /dev/null and b/content/en/post/2022/figures/zh-cn_image_0000001207529644.jpg differ diff --git a/content/en/post/2022/figures/zh-cn_image_0000001207539820.jpg b/content/en/post/2022/figures/zh-cn_image_0000001207539820.jpg new file mode 100644 index 0000000000000000000000000000000000000000..f2331927a09915200151614b250d0aa4f30b2977 Binary files /dev/null and b/content/en/post/2022/figures/zh-cn_image_0000001207539820.jpg differ diff --git a/content/en/post/2022/figures/zh-cn_image_0000001207622650.gif b/content/en/post/2022/figures/zh-cn_image_0000001207622650.gif new file mode 100644 index 0000000000000000000000000000000000000000..f1f4ef0c05460c220b09ccd897f0336e3e3265c2 Binary files /dev/null and b/content/en/post/2022/figures/zh-cn_image_0000001207622650.gif differ diff --git a/content/en/post/2022/figures/zh-cn_image_0000001207622660.gif b/content/en/post/2022/figures/zh-cn_image_0000001207622660.gif new file mode 100644 index 0000000000000000000000000000000000000000..f1f4ef0c05460c220b09ccd897f0336e3e3265c2 Binary files /dev/null and b/content/en/post/2022/figures/zh-cn_image_0000001207622660.gif differ diff --git a/content/en/post/2022/figures/zh-cn_image_0000001207677032.jpg b/content/en/post/2022/figures/zh-cn_image_0000001207677032.jpg new file mode 100644 index 0000000000000000000000000000000000000000..7d310d14aedf33b41d6c6191a886a6e980238cf8 Binary files /dev/null and b/content/en/post/2022/figures/zh-cn_image_0000001207677032.jpg differ diff --git a/content/en/post/2022/figures/zh-cn_image_0000001207699778.jpg b/content/en/post/2022/figures/zh-cn_image_0000001207699778.jpg new file mode 100644 index 0000000000000000000000000000000000000000..f3b7fd5149932cc3db01d1968d33b88364cf3b71 Binary files /dev/null and b/content/en/post/2022/figures/zh-cn_image_0000001207699778.jpg differ diff --git a/content/en/post/2022/figures/zh-cn_image_0000001207699780.jpg b/content/en/post/2022/figures/zh-cn_image_0000001207699780.jpg new file mode 100644 index 0000000000000000000000000000000000000000..6f82934da20727218ccca9fa29ce456a3735de49 Binary files /dev/null and b/content/en/post/2022/figures/zh-cn_image_0000001207699780.jpg differ diff --git a/content/en/post/2022/figures/zh-cn_image_0000001207772870.png b/content/en/post/2022/figures/zh-cn_image_0000001207772870.png new file mode 100644 index 0000000000000000000000000000000000000000..d91afda7c16497164886e3a8c8255009482b5fab Binary files /dev/null and b/content/en/post/2022/figures/zh-cn_image_0000001207772870.png differ diff --git a/content/en/post/2022/figures/zh-cn_image_0000001207782632.gif b/content/en/post/2022/figures/zh-cn_image_0000001207782632.gif new file mode 100644 index 0000000000000000000000000000000000000000..f1f4ef0c05460c220b09ccd897f0336e3e3265c2 Binary files /dev/null and b/content/en/post/2022/figures/zh-cn_image_0000001207782632.gif differ diff --git a/content/en/post/2022/figures/zh-cn_image_0000001207782634.gif b/content/en/post/2022/figures/zh-cn_image_0000001207782634.gif new file mode 100644 index 0000000000000000000000000000000000000000..f1f4ef0c05460c220b09ccd897f0336e3e3265c2 Binary files /dev/null and b/content/en/post/2022/figures/zh-cn_image_0000001207782634.gif differ diff --git a/content/en/post/2022/figures/zh-cn_image_0000001207782636.gif b/content/en/post/2022/figures/zh-cn_image_0000001207782636.gif new file mode 100644 index 0000000000000000000000000000000000000000..f1f4ef0c05460c220b09ccd897f0336e3e3265c2 Binary files /dev/null and b/content/en/post/2022/figures/zh-cn_image_0000001207782636.gif differ diff --git a/content/en/post/2022/figures/zh-cn_image_0000001207782640.gif b/content/en/post/2022/figures/zh-cn_image_0000001207782640.gif new file mode 100644 index 0000000000000000000000000000000000000000..f1f4ef0c05460c220b09ccd897f0336e3e3265c2 Binary files /dev/null and b/content/en/post/2022/figures/zh-cn_image_0000001207782640.gif differ diff --git a/content/en/post/2022/figures/zh-cn_image_0000001207782650.gif b/content/en/post/2022/figures/zh-cn_image_0000001207782650.gif new file mode 100644 index 0000000000000000000000000000000000000000..f1f4ef0c05460c220b09ccd897f0336e3e3265c2 Binary files /dev/null and b/content/en/post/2022/figures/zh-cn_image_0000001207782650.gif differ diff --git a/content/en/post/2022/figures/zh-cn_image_0000001207863420.png b/content/en/post/2022/figures/zh-cn_image_0000001207863420.png new file mode 100644 index 0000000000000000000000000000000000000000..c6bcab86885267de1ba5c1842191dfda57d64f5c Binary files /dev/null and b/content/en/post/2022/figures/zh-cn_image_0000001207863420.png differ diff --git a/content/en/post/2022/figures/zh-cn_image_0000001207963344.png b/content/en/post/2022/figures/zh-cn_image_0000001207963344.png new file mode 100644 index 0000000000000000000000000000000000000000..168fe7a5ed2faf1cac1e912a310eac8253c03250 Binary files /dev/null and b/content/en/post/2022/figures/zh-cn_image_0000001207963344.png differ diff --git a/content/en/post/2022/figures/zh-cn_image_0000001208124506.png b/content/en/post/2022/figures/zh-cn_image_0000001208124506.png new file mode 100644 index 0000000000000000000000000000000000000000..8374d59a434b465d6c14a71ff88fd85dbd43c859 Binary files /dev/null and b/content/en/post/2022/figures/zh-cn_image_0000001208124506.png differ diff --git a/content/en/post/2022/figures/zh-cn_image_0000001208315958.gif b/content/en/post/2022/figures/zh-cn_image_0000001208315958.gif new file mode 100644 index 0000000000000000000000000000000000000000..d5f34e46bc5949d7b185c1f8837f4d3251467161 Binary files /dev/null and b/content/en/post/2022/figures/zh-cn_image_0000001208315958.gif differ diff --git a/content/en/post/2022/figures/zh-cn_image_0000001208473690.png b/content/en/post/2022/figures/zh-cn_image_0000001208473690.png new file mode 100644 index 0000000000000000000000000000000000000000..19d132b17f4f0ebbecb007ab43fe828785780881 Binary files /dev/null and b/content/en/post/2022/figures/zh-cn_image_0000001208473690.png differ diff --git a/content/en/post/2022/figures/zh-cn_image_0000001208491336.png b/content/en/post/2022/figures/zh-cn_image_0000001208491336.png new file mode 100644 index 0000000000000000000000000000000000000000..c1a5116fddd120281ef5bd081d6cf75850e7b2c2 Binary files /dev/null and b/content/en/post/2022/figures/zh-cn_image_0000001208491336.png differ diff --git a/content/en/post/2022/figures/zh-cn_image_0000001211903080.jpg b/content/en/post/2022/figures/zh-cn_image_0000001211903080.jpg new file mode 100644 index 0000000000000000000000000000000000000000..9cdb72b536cff313b1fb1f8aaab0504378605770 Binary files /dev/null and b/content/en/post/2022/figures/zh-cn_image_0000001211903080.jpg differ diff --git a/content/en/post/2022/figures/zh-cn_image_0000001211903084.gif b/content/en/post/2022/figures/zh-cn_image_0000001211903084.gif new file mode 100644 index 0000000000000000000000000000000000000000..884cb96cf8aaa5087e8aa1b5b7251933566b5832 Binary files /dev/null and b/content/en/post/2022/figures/zh-cn_image_0000001211903084.gif differ diff --git a/content/en/post/2022/figures/zh-cn_image_0000001212062138.gif b/content/en/post/2022/figures/zh-cn_image_0000001212062138.gif new file mode 100644 index 0000000000000000000000000000000000000000..1a6dad0491198086a1b93e478c6d235407541523 Binary files /dev/null and b/content/en/post/2022/figures/zh-cn_image_0000001212062138.gif differ diff --git a/content/en/post/2022/figures/zh-cn_image_0000001212063076.gif b/content/en/post/2022/figures/zh-cn_image_0000001212063076.gif new file mode 100644 index 0000000000000000000000000000000000000000..3f547552be03f11e95859e714b6c2b192c50ec63 Binary files /dev/null and b/content/en/post/2022/figures/zh-cn_image_0000001212063076.gif differ diff --git a/content/en/post/2022/figures/zh-cn_image_0000001212089804.png b/content/en/post/2022/figures/zh-cn_image_0000001212089804.png new file mode 100644 index 0000000000000000000000000000000000000000..c2d9e09460039e616a587d46edccea8974961635 Binary files /dev/null and b/content/en/post/2022/figures/zh-cn_image_0000001212089804.png differ diff --git a/content/en/post/2022/figures/zh-cn_image_0000001212222114.gif b/content/en/post/2022/figures/zh-cn_image_0000001212222114.gif new file mode 100644 index 0000000000000000000000000000000000000000..84a4ab2c68a128da887432bb6d5d8beaaa79997e Binary files /dev/null and b/content/en/post/2022/figures/zh-cn_image_0000001212222114.gif differ diff --git a/content/en/post/2022/figures/zh-cn_image_0000001212223058.gif b/content/en/post/2022/figures/zh-cn_image_0000001212223058.gif new file mode 100644 index 0000000000000000000000000000000000000000..c4c2808701d1b65c787f0750231c77a5f97370f9 Binary files /dev/null and b/content/en/post/2022/figures/zh-cn_image_0000001212223058.gif differ diff --git a/content/en/post/2022/figures/zh-cn_image_0000001212382082.gif b/content/en/post/2022/figures/zh-cn_image_0000001212382082.gif new file mode 100644 index 0000000000000000000000000000000000000000..3cf82a6400b989b25079d51716b8cb359cacddca Binary files /dev/null and b/content/en/post/2022/figures/zh-cn_image_0000001212382082.gif differ diff --git a/content/en/post/2022/figures/zh-cn_image_0000001234914846.png b/content/en/post/2022/figures/zh-cn_image_0000001234914846.png new file mode 100644 index 0000000000000000000000000000000000000000..f4ab2dc650b33b2ea0cef158021090f049578191 Binary files /dev/null and b/content/en/post/2022/figures/zh-cn_image_0000001234914846.png differ diff --git a/content/en/post/2022/figures/zh-cn_image_0000001235074794.png b/content/en/post/2022/figures/zh-cn_image_0000001235074794.png new file mode 100644 index 0000000000000000000000000000000000000000..e793845012589b55c232bd25e311894c27419b70 Binary files /dev/null and b/content/en/post/2022/figures/zh-cn_image_0000001235074794.png differ diff --git a/content/en/post/2022/figures/zh-cn_image_0000001235076358.jpg b/content/en/post/2022/figures/zh-cn_image_0000001235076358.jpg new file mode 100644 index 0000000000000000000000000000000000000000..98e7ea5e683f31be75b5a69ddf5bc94761b07a4c Binary files /dev/null and b/content/en/post/2022/figures/zh-cn_image_0000001235076358.jpg differ diff --git a/content/en/post/2022/figures/zh-cn_image_0000001251117213.gif b/content/en/post/2022/figures/zh-cn_image_0000001251117213.gif new file mode 100644 index 0000000000000000000000000000000000000000..7c62269304f4445a76ca89694e7dfa26674f2a4e Binary files /dev/null and b/content/en/post/2022/figures/zh-cn_image_0000001251117213.gif differ diff --git a/content/en/post/2022/figures/zh-cn_image_0000001251127227.gif b/content/en/post/2022/figures/zh-cn_image_0000001251127227.gif new file mode 100644 index 0000000000000000000000000000000000000000..c04be5cb6f6d2e0a974aa0f9a9133f6435f93efb Binary files /dev/null and b/content/en/post/2022/figures/zh-cn_image_0000001251127227.gif differ diff --git a/content/en/post/2022/figures/zh-cn_image_0000001251127229.gif b/content/en/post/2022/figures/zh-cn_image_0000001251127229.gif new file mode 100644 index 0000000000000000000000000000000000000000..f53c01bb97471dcd52f9d4571d9b4159c1739d55 Binary files /dev/null and b/content/en/post/2022/figures/zh-cn_image_0000001251127229.gif differ diff --git a/content/en/post/2022/figures/zh-cn_image_0000001251127237.gif b/content/en/post/2022/figures/zh-cn_image_0000001251127237.gif new file mode 100644 index 0000000000000000000000000000000000000000..3f547552be03f11e95859e714b6c2b192c50ec63 Binary files /dev/null and b/content/en/post/2022/figures/zh-cn_image_0000001251127237.gif differ diff --git a/content/en/post/2022/figures/zh-cn_image_0000001251237167.gif b/content/en/post/2022/figures/zh-cn_image_0000001251237167.gif new file mode 100644 index 0000000000000000000000000000000000000000..8c50b89bbfbe0aa69fcd90c2b2169e6c5241770f Binary files /dev/null and b/content/en/post/2022/figures/zh-cn_image_0000001251237167.gif differ diff --git a/content/en/post/2022/figures/zh-cn_image_0000001251247175.jpg b/content/en/post/2022/figures/zh-cn_image_0000001251247175.jpg new file mode 100644 index 0000000000000000000000000000000000000000..9cdb72b536cff313b1fb1f8aaab0504378605770 Binary files /dev/null and b/content/en/post/2022/figures/zh-cn_image_0000001251247175.jpg differ diff --git a/content/en/post/2022/figures/zh-cn_image_0000001251327177.jpg b/content/en/post/2022/figures/zh-cn_image_0000001251327177.jpg new file mode 100644 index 0000000000000000000000000000000000000000..fb6c0db5a8dbb4c25d3f520aadb509f2c242308e Binary files /dev/null and b/content/en/post/2022/figures/zh-cn_image_0000001251327177.jpg differ diff --git a/content/en/post/2022/figures/zh-cn_image_0000001251327179.gif b/content/en/post/2022/figures/zh-cn_image_0000001251327179.gif new file mode 100644 index 0000000000000000000000000000000000000000..3cf82a6400b989b25079d51716b8cb359cacddca Binary files /dev/null and b/content/en/post/2022/figures/zh-cn_image_0000001251327179.gif differ diff --git a/content/en/post/2022/figures/zh-cn_image_0000001251447219.gif b/content/en/post/2022/figures/zh-cn_image_0000001251447219.gif new file mode 100644 index 0000000000000000000000000000000000000000..92332dbc496b8f77bc8e4241a6075f47ef0ab113 Binary files /dev/null and b/content/en/post/2022/figures/zh-cn_image_0000001251447219.gif differ diff --git a/content/en/post/2022/figures/zh-cn_image_0000001251458611.jpg b/content/en/post/2022/figures/zh-cn_image_0000001251458611.jpg new file mode 100644 index 0000000000000000000000000000000000000000..1d62974f01edb70935412c5a4a16742240fa6fd2 Binary files /dev/null and b/content/en/post/2022/figures/zh-cn_image_0000001251458611.jpg differ diff --git a/content/en/post/2022/figures/zh-cn_image_0000001251538613.gif b/content/en/post/2022/figures/zh-cn_image_0000001251538613.gif new file mode 100644 index 0000000000000000000000000000000000000000..54e8ac2186e10d828b6a2e28aff9fa7571c388a4 Binary files /dev/null and b/content/en/post/2022/figures/zh-cn_image_0000001251538613.gif differ diff --git a/content/en/post/2022/figures/zh-cn_image_0000001251538617.jpg b/content/en/post/2022/figures/zh-cn_image_0000001251538617.jpg new file mode 100644 index 0000000000000000000000000000000000000000..97179c0a1aa8a711a45fd653bcf9175163e5be9b Binary files /dev/null and b/content/en/post/2022/figures/zh-cn_image_0000001251538617.jpg differ diff --git a/content/en/post/2022/figures/zh-cn_image_0000001251640179.jpg b/content/en/post/2022/figures/zh-cn_image_0000001251640179.jpg new file mode 100644 index 0000000000000000000000000000000000000000..2c3bde43e77f33ea9c4f6510c1cdf0200af92a64 Binary files /dev/null and b/content/en/post/2022/figures/zh-cn_image_0000001251640179.jpg differ diff --git a/content/en/post/2022/figures/zh-cn_image_0000001251640181.jpg b/content/en/post/2022/figures/zh-cn_image_0000001251640181.jpg new file mode 100644 index 0000000000000000000000000000000000000000..ef8f45700bbc2fe67006be4cfc2d8a9e8007639a Binary files /dev/null and b/content/en/post/2022/figures/zh-cn_image_0000001251640181.jpg differ diff --git a/content/en/post/2022/figures/zh-cn_image_0000001251754525.gif b/content/en/post/2022/figures/zh-cn_image_0000001251754525.gif new file mode 100644 index 0000000000000000000000000000000000000000..a279e25c47f88ae57424695ffeddffecd8458f29 Binary files /dev/null and b/content/en/post/2022/figures/zh-cn_image_0000001251754525.gif differ diff --git a/content/en/post/2022/figures/zh-cn_image_0000001251760151.jpg b/content/en/post/2022/figures/zh-cn_image_0000001251760151.jpg new file mode 100644 index 0000000000000000000000000000000000000000..1ff4ebfd5c9b880aca09f24608ff0fd4ebd88729 Binary files /dev/null and b/content/en/post/2022/figures/zh-cn_image_0000001251760151.jpg differ diff --git a/content/en/post/2022/figures/zh-cn_image_0000001251760153.jpg b/content/en/post/2022/figures/zh-cn_image_0000001251760153.jpg new file mode 100644 index 0000000000000000000000000000000000000000..56b7a448855acb1f4adb50bec4f6c118707a1a49 Binary files /dev/null and b/content/en/post/2022/figures/zh-cn_image_0000001251760153.jpg differ diff --git a/content/en/post/2022/figures/zh-cn_image_0000001251800179.gif b/content/en/post/2022/figures/zh-cn_image_0000001251800179.gif new file mode 100644 index 0000000000000000000000000000000000000000..a74e9002808e88e7bddd126586f61ac054c110cc Binary files /dev/null and b/content/en/post/2022/figures/zh-cn_image_0000001251800179.gif differ diff --git a/content/en/post/2022/figures/zh-cn_image_0000001251800185.jpg b/content/en/post/2022/figures/zh-cn_image_0000001251800185.jpg new file mode 100644 index 0000000000000000000000000000000000000000..5f58e43aa09bf7387a67c344dfe7496e87a5bd96 Binary files /dev/null and b/content/en/post/2022/figures/zh-cn_image_0000001251800185.jpg differ diff --git a/content/en/post/2022/figures/zh-cn_image_0000001251800187.jpg b/content/en/post/2022/figures/zh-cn_image_0000001251800187.jpg new file mode 100644 index 0000000000000000000000000000000000000000..3fde9e339e1aabcbe4304c411bb7edbc4bba9b72 Binary files /dev/null and b/content/en/post/2022/figures/zh-cn_image_0000001251800187.jpg differ diff --git a/content/en/post/2022/figures/zh-cn_image_0000001251839693.png b/content/en/post/2022/figures/zh-cn_image_0000001251839693.png new file mode 100644 index 0000000000000000000000000000000000000000..5d39aa1e622cf0fbd4acee0b4e35fd01e08c03d8 Binary files /dev/null and b/content/en/post/2022/figures/zh-cn_image_0000001251839693.png differ diff --git a/content/en/post/2022/figures/zh-cn_image_0000001251841009.jpg b/content/en/post/2022/figures/zh-cn_image_0000001251841009.jpg new file mode 100644 index 0000000000000000000000000000000000000000..118e670379797b46fd17863507ffcb89b1e9829c Binary files /dev/null and b/content/en/post/2022/figures/zh-cn_image_0000001251841009.jpg differ diff --git a/content/en/post/2022/figures/zh-cn_image_0000001251841849.png b/content/en/post/2022/figures/zh-cn_image_0000001251841849.png new file mode 100644 index 0000000000000000000000000000000000000000..da0837a0e01c172b4c7a867c7ef078757d5f4dec Binary files /dev/null and b/content/en/post/2022/figures/zh-cn_image_0000001251841849.png differ diff --git a/content/en/post/2022/figures/zh-cn_image_0000001251847329.png b/content/en/post/2022/figures/zh-cn_image_0000001251847329.png new file mode 100644 index 0000000000000000000000000000000000000000..87e57f270b9bd9442d10d4ac62b4c75554119cba Binary files /dev/null and b/content/en/post/2022/figures/zh-cn_image_0000001251847329.png differ diff --git a/content/en/post/2022/figures/zh-cn_image_0000001251848955.jpg b/content/en/post/2022/figures/zh-cn_image_0000001251848955.jpg new file mode 100644 index 0000000000000000000000000000000000000000..e52cfb46a93a085d139243e7a4d81990d785775c Binary files /dev/null and b/content/en/post/2022/figures/zh-cn_image_0000001251848955.jpg differ diff --git a/content/en/post/2022/figures/zh-cn_image_0000001251848959.jpg b/content/en/post/2022/figures/zh-cn_image_0000001251848959.jpg new file mode 100644 index 0000000000000000000000000000000000000000..8f425f2126e2e3478d12b6b68e5cfdcf9d164c5e Binary files /dev/null and b/content/en/post/2022/figures/zh-cn_image_0000001251848959.jpg differ diff --git a/content/en/post/2022/figures/zh-cn_image_0000001251852313.png b/content/en/post/2022/figures/zh-cn_image_0000001251852313.png new file mode 100644 index 0000000000000000000000000000000000000000..2ff382c8db1c58558d7cc60201865a05ad5f53b8 Binary files /dev/null and b/content/en/post/2022/figures/zh-cn_image_0000001251852313.png differ diff --git a/content/en/post/2022/figures/zh-cn_image_0000001251862655.gif b/content/en/post/2022/figures/zh-cn_image_0000001251862655.gif new file mode 100644 index 0000000000000000000000000000000000000000..f1f4ef0c05460c220b09ccd897f0336e3e3265c2 Binary files /dev/null and b/content/en/post/2022/figures/zh-cn_image_0000001251862655.gif differ diff --git a/content/en/post/2022/figures/zh-cn_image_0000001251862661.gif b/content/en/post/2022/figures/zh-cn_image_0000001251862661.gif new file mode 100644 index 0000000000000000000000000000000000000000..f1f4ef0c05460c220b09ccd897f0336e3e3265c2 Binary files /dev/null and b/content/en/post/2022/figures/zh-cn_image_0000001251862661.gif differ diff --git a/content/en/post/2022/figures/zh-cn_image_0000001251862669.gif b/content/en/post/2022/figures/zh-cn_image_0000001251862669.gif new file mode 100644 index 0000000000000000000000000000000000000000..f1f4ef0c05460c220b09ccd897f0336e3e3265c2 Binary files /dev/null and b/content/en/post/2022/figures/zh-cn_image_0000001251862669.gif differ diff --git a/content/en/post/2022/figures/zh-cn_image_0000001251894929.jpg b/content/en/post/2022/figures/zh-cn_image_0000001251894929.jpg new file mode 100644 index 0000000000000000000000000000000000000000..c9b35c13e54358bc85b10783f9163cc001179641 Binary files /dev/null and b/content/en/post/2022/figures/zh-cn_image_0000001251894929.jpg differ diff --git a/content/en/post/2022/figures/zh-cn_image_0000001251917015.jpg b/content/en/post/2022/figures/zh-cn_image_0000001251917015.jpg new file mode 100644 index 0000000000000000000000000000000000000000..b88aa4c452e7ee95ed84cb0228af984a0030c0a8 Binary files /dev/null and b/content/en/post/2022/figures/zh-cn_image_0000001251917015.jpg differ diff --git a/content/en/post/2022/figures/zh-cn_image_0000001251920351.png b/content/en/post/2022/figures/zh-cn_image_0000001251920351.png new file mode 100644 index 0000000000000000000000000000000000000000..6d2460cc06ae9f72080e754800680feb5c9899ad Binary files /dev/null and b/content/en/post/2022/figures/zh-cn_image_0000001251920351.png differ diff --git a/content/en/post/2022/figures/zh-cn_image_0000001251954519.gif b/content/en/post/2022/figures/zh-cn_image_0000001251954519.gif new file mode 100644 index 0000000000000000000000000000000000000000..409fca5e88397484714f9b9892ba1776f166220f Binary files /dev/null and b/content/en/post/2022/figures/zh-cn_image_0000001251954519.gif differ diff --git a/content/en/post/2022/figures/zh-cn_image_0000001251960129.jpg b/content/en/post/2022/figures/zh-cn_image_0000001251960129.jpg new file mode 100644 index 0000000000000000000000000000000000000000..4f550712f0a788e3bc9e133cf9fe82946402aab4 Binary files /dev/null and b/content/en/post/2022/figures/zh-cn_image_0000001251960129.jpg differ diff --git a/content/en/post/2022/figures/zh-cn_image_0000001251960131.gif b/content/en/post/2022/figures/zh-cn_image_0000001251960131.gif new file mode 100644 index 0000000000000000000000000000000000000000..eb0ab38988bbb9750e7d3f79ec73c5ce456e1bde Binary files /dev/null and b/content/en/post/2022/figures/zh-cn_image_0000001251960131.gif differ diff --git a/content/en/post/2022/figures/zh-cn_image_0000001251960133.gif b/content/en/post/2022/figures/zh-cn_image_0000001251960133.gif new file mode 100644 index 0000000000000000000000000000000000000000..05d560db8c8e933b1ef0a6b97e5f494f3e5882b0 Binary files /dev/null and b/content/en/post/2022/figures/zh-cn_image_0000001251960133.gif differ diff --git a/content/en/post/2022/figures/zh-cn_image_0000001251960135.gif b/content/en/post/2022/figures/zh-cn_image_0000001251960135.gif new file mode 100644 index 0000000000000000000000000000000000000000..dfc418872a748fb17d0935caa5492e94d7afb341 Binary files /dev/null and b/content/en/post/2022/figures/zh-cn_image_0000001251960135.gif differ diff --git a/content/en/post/2022/figures/zh-cn_image_0000001251969031.jpg b/content/en/post/2022/figures/zh-cn_image_0000001251969031.jpg new file mode 100644 index 0000000000000000000000000000000000000000..bdc1176d998aff1dad60e108312951a96e522feb Binary files /dev/null and b/content/en/post/2022/figures/zh-cn_image_0000001251969031.jpg differ diff --git a/content/en/post/2022/figures/zh-cn_image_0000001252008911.jpg b/content/en/post/2022/figures/zh-cn_image_0000001252008911.jpg new file mode 100644 index 0000000000000000000000000000000000000000..fd39b5b03365ff8aef55fb1e9b3f5cd11dcc8679 Binary files /dev/null and b/content/en/post/2022/figures/zh-cn_image_0000001252008911.jpg differ diff --git a/content/en/post/2022/figures/zh-cn_image_0000001252009063.jpg b/content/en/post/2022/figures/zh-cn_image_0000001252009063.jpg new file mode 100644 index 0000000000000000000000000000000000000000..c5f70d464d6f586efaa14aec019d04fc906b827e Binary files /dev/null and b/content/en/post/2022/figures/zh-cn_image_0000001252009063.jpg differ diff --git a/content/en/post/2022/figures/zh-cn_image_0000001252011743.jpg b/content/en/post/2022/figures/zh-cn_image_0000001252011743.jpg new file mode 100644 index 0000000000000000000000000000000000000000..3f287b0c0453b163744545ae173e3bda1ffc2e50 Binary files /dev/null and b/content/en/post/2022/figures/zh-cn_image_0000001252011743.jpg differ diff --git a/content/en/post/2022/figures/zh-cn_image_0000001252022603.gif b/content/en/post/2022/figures/zh-cn_image_0000001252022603.gif new file mode 100644 index 0000000000000000000000000000000000000000..f1f4ef0c05460c220b09ccd897f0336e3e3265c2 Binary files /dev/null and b/content/en/post/2022/figures/zh-cn_image_0000001252022603.gif differ diff --git a/content/en/post/2022/figures/zh-cn_image_0000001252022613.gif b/content/en/post/2022/figures/zh-cn_image_0000001252022613.gif new file mode 100644 index 0000000000000000000000000000000000000000..f1f4ef0c05460c220b09ccd897f0336e3e3265c2 Binary files /dev/null and b/content/en/post/2022/figures/zh-cn_image_0000001252022613.gif differ diff --git a/content/en/post/2022/figures/zh-cn_image_0000001252065761.gif b/content/en/post/2022/figures/zh-cn_image_0000001252065761.gif new file mode 100644 index 0000000000000000000000000000000000000000..16e890b20e4844d82ac516c3636100279a338f45 Binary files /dev/null and b/content/en/post/2022/figures/zh-cn_image_0000001252065761.gif differ diff --git a/content/en/post/2022/figures/zh-cn_image_0000001252089553.jpg b/content/en/post/2022/figures/zh-cn_image_0000001252089553.jpg new file mode 100644 index 0000000000000000000000000000000000000000..00bf030e77b27468c7716503a3dc6e226b46bcc0 Binary files /dev/null and b/content/en/post/2022/figures/zh-cn_image_0000001252089553.jpg differ diff --git a/content/en/post/2022/figures/zh-cn_image_0000001252121009.png b/content/en/post/2022/figures/zh-cn_image_0000001252121009.png new file mode 100644 index 0000000000000000000000000000000000000000..fa0b36a2b947332196dccc17457f9b88d9dde7fb Binary files /dev/null and b/content/en/post/2022/figures/zh-cn_image_0000001252121009.png differ diff --git a/content/en/post/2022/figures/zh-cn_image_0000001252127325.png b/content/en/post/2022/figures/zh-cn_image_0000001252127325.png new file mode 100644 index 0000000000000000000000000000000000000000..5d6ef1abffc202469448466d450404cbab350105 Binary files /dev/null and b/content/en/post/2022/figures/zh-cn_image_0000001252127325.png differ diff --git a/content/en/post/2022/figures/zh-cn_image_0000001252128947.jpg b/content/en/post/2022/figures/zh-cn_image_0000001252128947.jpg new file mode 100644 index 0000000000000000000000000000000000000000..0926c62ee39836d310974a2fcd320bcc98c9d242 Binary files /dev/null and b/content/en/post/2022/figures/zh-cn_image_0000001252128947.jpg differ diff --git a/content/en/post/2022/figures/zh-cn_image_0000001252129111.jpg b/content/en/post/2022/figures/zh-cn_image_0000001252129111.jpg new file mode 100644 index 0000000000000000000000000000000000000000..5377e9910e91ab654dad433d24a144dc8c545b7a Binary files /dev/null and b/content/en/post/2022/figures/zh-cn_image_0000001252129111.jpg differ diff --git a/content/en/post/2022/figures/zh-cn_image_0000001252131781.jpg b/content/en/post/2022/figures/zh-cn_image_0000001252131781.jpg new file mode 100644 index 0000000000000000000000000000000000000000..c18dee87cc5d4bfc66442620ae5ef365c380f748 Binary files /dev/null and b/content/en/post/2022/figures/zh-cn_image_0000001252131781.jpg differ diff --git a/content/en/post/2022/figures/zh-cn_image_0000001252131783.jpg b/content/en/post/2022/figures/zh-cn_image_0000001252131783.jpg new file mode 100644 index 0000000000000000000000000000000000000000..e451ddcc59c67f213fbadb0873b9658ae40dec50 Binary files /dev/null and b/content/en/post/2022/figures/zh-cn_image_0000001252131783.jpg differ diff --git a/content/en/post/2022/figures/zh-cn_image_0000001252142637.gif b/content/en/post/2022/figures/zh-cn_image_0000001252142637.gif new file mode 100644 index 0000000000000000000000000000000000000000..70646e3aebed5ba53deea2b4bc3ffdf91805ed64 Binary files /dev/null and b/content/en/post/2022/figures/zh-cn_image_0000001252142637.gif differ diff --git a/content/en/post/2022/figures/zh-cn_image_0000001252142639.gif b/content/en/post/2022/figures/zh-cn_image_0000001252142639.gif new file mode 100644 index 0000000000000000000000000000000000000000..f1f4ef0c05460c220b09ccd897f0336e3e3265c2 Binary files /dev/null and b/content/en/post/2022/figures/zh-cn_image_0000001252142639.gif differ diff --git a/content/en/post/2022/figures/zh-cn_image_0000001252142643.gif b/content/en/post/2022/figures/zh-cn_image_0000001252142643.gif new file mode 100644 index 0000000000000000000000000000000000000000..f1f4ef0c05460c220b09ccd897f0336e3e3265c2 Binary files /dev/null and b/content/en/post/2022/figures/zh-cn_image_0000001252142643.gif differ diff --git a/content/en/post/2022/figures/zh-cn_image_0000001252142645.gif b/content/en/post/2022/figures/zh-cn_image_0000001252142645.gif new file mode 100644 index 0000000000000000000000000000000000000000..f1f4ef0c05460c220b09ccd897f0336e3e3265c2 Binary files /dev/null and b/content/en/post/2022/figures/zh-cn_image_0000001252142645.gif differ diff --git a/content/en/post/2022/figures/zh-cn_image_0000001252142649.gif b/content/en/post/2022/figures/zh-cn_image_0000001252142649.gif new file mode 100644 index 0000000000000000000000000000000000000000..f1f4ef0c05460c220b09ccd897f0336e3e3265c2 Binary files /dev/null and b/content/en/post/2022/figures/zh-cn_image_0000001252142649.gif differ diff --git a/content/en/post/2022/figures/zh-cn_image_0000001252142659.gif b/content/en/post/2022/figures/zh-cn_image_0000001252142659.gif new file mode 100644 index 0000000000000000000000000000000000000000..f1f4ef0c05460c220b09ccd897f0336e3e3265c2 Binary files /dev/null and b/content/en/post/2022/figures/zh-cn_image_0000001252142659.gif differ diff --git a/content/en/post/2022/figures/zh-cn_image_0000001252142663.gif b/content/en/post/2022/figures/zh-cn_image_0000001252142663.gif new file mode 100644 index 0000000000000000000000000000000000000000..f1f4ef0c05460c220b09ccd897f0336e3e3265c2 Binary files /dev/null and b/content/en/post/2022/figures/zh-cn_image_0000001252142663.gif differ diff --git a/content/en/post/2022/figures/zh-cn_image_0000001252197021.jpg b/content/en/post/2022/figures/zh-cn_image_0000001252197021.jpg new file mode 100644 index 0000000000000000000000000000000000000000..bab2e9ebcbd4e0149c9c29dd89b5a47e3c278f08 Binary files /dev/null and b/content/en/post/2022/figures/zh-cn_image_0000001252197021.jpg differ diff --git a/content/en/post/2022/figures/zh-cn_image_0000001252248517.jpg b/content/en/post/2022/figures/zh-cn_image_0000001252248517.jpg new file mode 100644 index 0000000000000000000000000000000000000000..ec0921eb10ba4b47ce5bcbed2f37671684b91220 Binary files /dev/null and b/content/en/post/2022/figures/zh-cn_image_0000001252248517.jpg differ diff --git a/content/en/post/2022/figures/zh-cn_image_0000001252248915.jpg b/content/en/post/2022/figures/zh-cn_image_0000001252248915.jpg new file mode 100644 index 0000000000000000000000000000000000000000..30d56f5fd034312c2a6de097ea0da02872a62cde Binary files /dev/null and b/content/en/post/2022/figures/zh-cn_image_0000001252248915.jpg differ diff --git a/content/en/post/2022/figures/zh-cn_image_0000001252249073.jpg b/content/en/post/2022/figures/zh-cn_image_0000001252249073.jpg new file mode 100644 index 0000000000000000000000000000000000000000..32267eb3adccc4e8da8ba7ef5a6fee4dc73491fe Binary files /dev/null and b/content/en/post/2022/figures/zh-cn_image_0000001252249073.jpg differ diff --git a/content/en/post/2022/figures/zh-cn_image_0000001252252279.png b/content/en/post/2022/figures/zh-cn_image_0000001252252279.png new file mode 100644 index 0000000000000000000000000000000000000000..ee03260c1114017c098616ad6919496bf4c36eb0 Binary files /dev/null and b/content/en/post/2022/figures/zh-cn_image_0000001252252279.png differ diff --git a/content/en/post/2022/figures/zh-cn_image_0000001252252281.png b/content/en/post/2022/figures/zh-cn_image_0000001252252281.png new file mode 100644 index 0000000000000000000000000000000000000000..5551f4b9e9fd8b94d27a67f072eca449d4e84757 Binary files /dev/null and b/content/en/post/2022/figures/zh-cn_image_0000001252252281.png differ diff --git a/content/en/post/2022/figures/zh-cn_image_0000001252262611.gif b/content/en/post/2022/figures/zh-cn_image_0000001252262611.gif new file mode 100644 index 0000000000000000000000000000000000000000..f1f4ef0c05460c220b09ccd897f0336e3e3265c2 Binary files /dev/null and b/content/en/post/2022/figures/zh-cn_image_0000001252262611.gif differ diff --git a/content/en/post/2022/figures/zh-cn_image_0000001252262625.gif b/content/en/post/2022/figures/zh-cn_image_0000001252262625.gif new file mode 100644 index 0000000000000000000000000000000000000000..f1f4ef0c05460c220b09ccd897f0336e3e3265c2 Binary files /dev/null and b/content/en/post/2022/figures/zh-cn_image_0000001252262625.gif differ diff --git a/content/en/post/2022/figures/zh-cn_image_0000001252341007.gif b/content/en/post/2022/figures/zh-cn_image_0000001252341007.gif new file mode 100644 index 0000000000000000000000000000000000000000..10cf74ea0fd6c4f98f395574611710262cc2f51d Binary files /dev/null and b/content/en/post/2022/figures/zh-cn_image_0000001252341007.gif differ diff --git a/content/en/post/2022/figures/zh-cn_image_0000001252343171.png b/content/en/post/2022/figures/zh-cn_image_0000001252343171.png new file mode 100644 index 0000000000000000000000000000000000000000..50f6b660a383f2190e3f0e0c1424825ab2d15cd5 Binary files /dev/null and b/content/en/post/2022/figures/zh-cn_image_0000001252343171.png differ diff --git a/content/en/post/2022/figures/zh-cn_image_0000001252343211.png b/content/en/post/2022/figures/zh-cn_image_0000001252343211.png new file mode 100644 index 0000000000000000000000000000000000000000..df0575d8469a6813bfa359d5bf596a80933ccba5 Binary files /dev/null and b/content/en/post/2022/figures/zh-cn_image_0000001252343211.png differ diff --git a/content/en/post/2022/figures/zh-cn_image_0000001252343303.png b/content/en/post/2022/figures/zh-cn_image_0000001252343303.png new file mode 100644 index 0000000000000000000000000000000000000000..550d3e815c93582cd76716d6e5f87ecdf5ca5cd7 Binary files /dev/null and b/content/en/post/2022/figures/zh-cn_image_0000001252343303.png differ diff --git a/content/en/post/2022/figures/zh-cn_image_0000001252343507.png b/content/en/post/2022/figures/zh-cn_image_0000001252343507.png new file mode 100644 index 0000000000000000000000000000000000000000..7de7abad590c2246b95d3f559a7d0bfa1f291692 Binary files /dev/null and b/content/en/post/2022/figures/zh-cn_image_0000001252343507.png differ diff --git a/content/en/post/2022/figures/zh-cn_image_0000001252412855.png b/content/en/post/2022/figures/zh-cn_image_0000001252412855.png new file mode 100644 index 0000000000000000000000000000000000000000..c42ff339d428983b28f0f19ed990bf743a457cc7 Binary files /dev/null and b/content/en/post/2022/figures/zh-cn_image_0000001252412855.png differ diff --git a/content/en/post/2022/figures/zh-cn_image_0000001252463513.png b/content/en/post/2022/figures/zh-cn_image_0000001252463513.png new file mode 100644 index 0000000000000000000000000000000000000000..54dc20ba7b906ff980fd8c78029fc3475e0e33f6 Binary files /dev/null and b/content/en/post/2022/figures/zh-cn_image_0000001252463513.png differ diff --git a/content/en/post/2022/figures/zh-cn_image_0000001252563289.png b/content/en/post/2022/figures/zh-cn_image_0000001252563289.png new file mode 100644 index 0000000000000000000000000000000000000000..b9b46d25da629f089c73bfbf6c85ae3cb13350de Binary files /dev/null and b/content/en/post/2022/figures/zh-cn_image_0000001252563289.png differ diff --git a/content/en/post/2022/figures/zh-cn_image_0000001252579733.jpg b/content/en/post/2022/figures/zh-cn_image_0000001252579733.jpg new file mode 100644 index 0000000000000000000000000000000000000000..e8ece6ba0c196a037ef02c33657c65d6fb6c0f09 Binary files /dev/null and b/content/en/post/2022/figures/zh-cn_image_0000001252579733.jpg differ diff --git a/content/en/post/2022/figures/zh-cn_image_0000001252700965.gif b/content/en/post/2022/figures/zh-cn_image_0000001252700965.gif new file mode 100644 index 0000000000000000000000000000000000000000..bf93b81bb2dcc230e81ff930c06b0d97b899c711 Binary files /dev/null and b/content/en/post/2022/figures/zh-cn_image_0000001252700965.gif differ diff --git a/content/en/post/2022/figures/zh-cn_image_0000001252703087.png b/content/en/post/2022/figures/zh-cn_image_0000001252703087.png new file mode 100644 index 0000000000000000000000000000000000000000..7191ba77b86241a7388983442ba811a84a12fc2b Binary files /dev/null and b/content/en/post/2022/figures/zh-cn_image_0000001252703087.png differ diff --git a/content/en/post/2022/figures/zh-cn_image_0000001252703155.png b/content/en/post/2022/figures/zh-cn_image_0000001252703155.png new file mode 100644 index 0000000000000000000000000000000000000000..1decdffa2c685fbae2f39fbc25b2b664524f46aa Binary files /dev/null and b/content/en/post/2022/figures/zh-cn_image_0000001252703155.png differ diff --git a/content/en/post/2022/figures/zh-cn_image_0000001252703473.png b/content/en/post/2022/figures/zh-cn_image_0000001252703473.png new file mode 100644 index 0000000000000000000000000000000000000000..23105e8c997e1127c35cf871d1ac0d235a95aed1 Binary files /dev/null and b/content/en/post/2022/figures/zh-cn_image_0000001252703473.png differ diff --git a/content/en/post/2022/figures/zh-cn_image_0000001252803745.png b/content/en/post/2022/figures/zh-cn_image_0000001252803745.png new file mode 100644 index 0000000000000000000000000000000000000000..4180157997ea0d2ec282f60a4a3d5e7a9b534ac3 Binary files /dev/null and b/content/en/post/2022/figures/zh-cn_image_0000001252803745.png differ diff --git a/content/en/post/2022/figures/zh-cn_image_0000001253422853.png b/content/en/post/2022/figures/zh-cn_image_0000001253422853.png new file mode 100644 index 0000000000000000000000000000000000000000..3b10332be8f58e0c03af456e0604109ee5db4e2b Binary files /dev/null and b/content/en/post/2022/figures/zh-cn_image_0000001253422853.png differ diff --git a/content/en/post/2022/figures/zh-cn_image_0000001256862067.gif b/content/en/post/2022/figures/zh-cn_image_0000001256862067.gif new file mode 100644 index 0000000000000000000000000000000000000000..c04be5cb6f6d2e0a974aa0f9a9133f6435f93efb Binary files /dev/null and b/content/en/post/2022/figures/zh-cn_image_0000001256862067.gif differ diff --git a/content/en/post/2022/figures/zh-cn_image_0000001256862995.jpg b/content/en/post/2022/figures/zh-cn_image_0000001256862995.jpg new file mode 100644 index 0000000000000000000000000000000000000000..a49980daf88fbb5de8a7e155827e723ec0104c26 Binary files /dev/null and b/content/en/post/2022/figures/zh-cn_image_0000001256862995.jpg differ diff --git a/content/en/post/2022/figures/zh-cn_image_0000001256862999.gif b/content/en/post/2022/figures/zh-cn_image_0000001256862999.gif new file mode 100644 index 0000000000000000000000000000000000000000..5118bcf791e22a98df4d4df0d8da0879de1bb2d5 Binary files /dev/null and b/content/en/post/2022/figures/zh-cn_image_0000001256862999.gif differ diff --git a/content/en/post/2022/figures/zh-cn_image_0000001256981999.gif b/content/en/post/2022/figures/zh-cn_image_0000001256981999.gif new file mode 100644 index 0000000000000000000000000000000000000000..6068b15a50a1f3cc0574ab51d66b4a7a6b379f45 Binary files /dev/null and b/content/en/post/2022/figures/zh-cn_image_0000001256981999.gif differ diff --git a/content/en/post/2022/figures/zh-cn_image_0000001256982939.png b/content/en/post/2022/figures/zh-cn_image_0000001256982939.png new file mode 100644 index 0000000000000000000000000000000000000000..04a7bdd493c1557410c903c0d58b22ae0930edb1 Binary files /dev/null and b/content/en/post/2022/figures/zh-cn_image_0000001256982939.png differ diff --git a/content/en/post/2022/figures/zh-cn_image_0000001257063005.gif b/content/en/post/2022/figures/zh-cn_image_0000001257063005.gif new file mode 100644 index 0000000000000000000000000000000000000000..1d26cd06f041eafd518d7bc15a7e924913ad563e Binary files /dev/null and b/content/en/post/2022/figures/zh-cn_image_0000001257063005.gif differ diff --git a/content/en/post/2022/figures/zh-cn_image_0000001257142015.gif b/content/en/post/2022/figures/zh-cn_image_0000001257142015.gif new file mode 100644 index 0000000000000000000000000000000000000000..f53c01bb97471dcd52f9d4571d9b4159c1739d55 Binary files /dev/null and b/content/en/post/2022/figures/zh-cn_image_0000001257142015.gif differ diff --git a/content/en/post/2022/figures/zh-cn_image_0000001257142943.jpg b/content/en/post/2022/figures/zh-cn_image_0000001257142943.jpg new file mode 100644 index 0000000000000000000000000000000000000000..9f6fcb791326728455ea10edffca6e3c63acfcc9 Binary files /dev/null and b/content/en/post/2022/figures/zh-cn_image_0000001257142943.jpg differ diff --git a/content/en/post/2022/figures/zh-cn_image_0000001257142945.gif b/content/en/post/2022/figures/zh-cn_image_0000001257142945.gif new file mode 100644 index 0000000000000000000000000000000000000000..72bae1d09b9e4a5803363a6aebae0ed7718cfc82 Binary files /dev/null and b/content/en/post/2022/figures/zh-cn_image_0000001257142945.gif differ diff --git a/content/en/post/2022/figures/zh-cn_image_0000001278996725.jpg b/content/en/post/2022/figures/zh-cn_image_0000001278996725.jpg new file mode 100644 index 0000000000000000000000000000000000000000..b9cf3ce39a9d748ce506ff3cfda39c51cbc803c0 Binary files /dev/null and b/content/en/post/2022/figures/zh-cn_image_0000001278996725.jpg differ diff --git a/content/en/post/2022/figures/zh-cn_image_0000001279274373.png b/content/en/post/2022/figures/zh-cn_image_0000001279274373.png new file mode 100644 index 0000000000000000000000000000000000000000..443c4327143c5225557e35a85c2d72252d7b0829 Binary files /dev/null and b/content/en/post/2022/figures/zh-cn_image_0000001279274373.png differ diff --git a/content/en/post/2022/figures/zh-cn_image_0000001279474617.png b/content/en/post/2022/figures/zh-cn_image_0000001279474617.png new file mode 100644 index 0000000000000000000000000000000000000000..6b1226a31b6dbc83050524e4c164c5608b415e0c Binary files /dev/null and b/content/en/post/2022/figures/zh-cn_image_0000001279474617.png differ diff --git "a/content/en/post/2022/figures/\345\233\276\347\211\2071.png" "b/content/en/post/2022/figures/\345\233\276\347\211\2071.png" new file mode 100644 index 0000000000000000000000000000000000000000..281105f5e1958b671e00ab2c8a925a98404deca2 Binary files /dev/null and "b/content/en/post/2022/figures/\345\233\276\347\211\2071.png" differ diff --git a/content/en/post/2022/openGauss-AI-Capability-Upgrade-Building-a-New-AI-Native-Database.md b/content/en/post/2022/openGauss-AI-Capability-Upgrade-Building-a-New-AI-Native-Database.md new file mode 100644 index 0000000000000000000000000000000000000000..49092fd0039e0d20c5743bb6509ee33a92fe0d58 --- /dev/null +++ b/content/en/post/2022/openGauss-AI-Capability-Upgrade-Building-a-New-AI-Native-Database.md @@ -0,0 +1,72 @@ ++++ + +title = "openGauss AI Capability Upgrade, Building a New AI-Native Database" + +date = "2022-03-15" + +tags = [" AI-Native Database"] + +archives = "2022-03" + +author = "Tianqing Wang" + +summary = "openGauss AI Capability Upgrade, Building a New AI-Native Database" + +img = "/en/post/2022/title/img16.png" + +times = "17:30" + ++++ + +# openGauss AI Capability Upgrade, Building a New AI-Native Database + + + +What will happen when databases are combined with AI? The answers may vary among different database vendors, open-source communities, and teachers and students. Although it is difficult to form a uniform accurate concept, their answers all point to the same direction. Since the first version was released in the open-source community, openGauss has continuously evolved and contributed code in this field. openGauss 3.0.0 continues in this vein. + +In this release, the openGauss AI capability is changed in the following ways: + +1. The AI4DB functions are integrated into the open-source openGauss database autonomous platform. +2. The AI4DB capabilities are refactored to run plug-ins as services. +3. The Prometheus ecosystem is supported. +4. New features such as slow SQL root cause analysis and time series forecast are added to optimize the existing AI capabilities. +5. DB4AI supports more algorithms. + +## **Upgrading the Native DB4AI Engine** + +In this release of openGauss, the native DB4AI engine further supports more machine learning algorithms, such as the non-linear kernel function of SVM and XGBoost. In addition, openGauss provides the Explain API to view model information. + +## **Supporting AI4DB Plug-ins as Services** + +The original openGauss AI4DB capability is an offline tool. It cannot monitor the database completely in the background or periodically detect database problems. Therefore, in this release, the openGauss implements the background monitoring service and periodically checks the database system status in the background. In this way, the autonomous database platform DBMind is formed. The diagnosis results are saved in offline calculation mode. Users can use software such as Grafana to visualize the results so that problems can be detected and root causes can be obtained in a timely manner. + +The running status of the openGauss database system needs to be periodically monitored in the background. Therefore, you need to connect to the monitoring platform to collect database monitoring metrics and perform offline computation. In this release, openGauss provides two types of exporters to interconnect with the Prometheus platform. The architecture is as follows: + +![](../figures/zh-cn_image_0000001235076358.jpg) + +openGauss-exporter is used to obtain monitoring metrics of the database system, and reprocessing-exporter is used to perform secondary processing on data stored in Prometheus. The data of the preceding two exporters can be periodically collected by Prometheus. The DBMind system periodically obtains time series data from Prometheus and performs parallel computing on the DBMind deployment machine. After the computing is complete, the computing result is stored in the meta-database. Then, you may obtain the diagnosis result from the metabase, and further, may perform visualization by configuring Grafana or the like. + +![](../figures/zh-cn_image_0000001278996725.jpg) + +The preceding figure shows an example of visualization using Grafana based on data in the meta-database. + +In this release, openGauss fully integrates the existing AI capabilities and redesigns them in plug-ins. For example, if you want to call the parameter tuning function to debug database parameters based on reinforcement learning, run the following command: + +gs\_dbmind component xtuner tune ... + +That is, the **gs\_dbmind** command is used to call all AI functions, and the **component** subcommand is used to call a specific AI function. You can run the following command to view the help information: + +gs\_dbmind component --help + +Based on the preceding design, if developers in the openGauss community want to contribute a certain database AI function, they only need to ensure that the API can be obtained by gs\_dbmind. In addition, the developed plug-ins can call all APIs provided by DBMind, for example, the data access interface \(DAI\) for obtaining data from Prometheus and the database access object \(DAO\) API for inserting data into the meta-database. + +## **Comprehensively Improving the AI4DB AI Capabilities** + +In this release, the openGauss upgrades the existing functions such as index recommendation and time series forecast. In addition, it provides root cause analysis and recommendation for slow SQL statements to help DBAs quickly identify slow SQL statements. Based on the monitored database running metrics, it uses the AI feature library identification algorithm to innovatively provide the causes and confidence of slow SQL statements, and provides optimization suggestions. + +## **Laying a Foundation for the Development of a Comprehensive Database AI Autonomous Platform** + +As mentioned above, openGauss 3.0.0 innovatively integrates historical AI capabilities and discards the burden left over from the historical R&D process, the innovative DBMind platform is service-oriented, offline, plug-in-based, and freely assembled. It is released with the database installation package. You can use tools such as Grafana to customize and visualize the diagnosis result. \(We will provide a Grafana example.\) + +This lays a foundation for us to further upgrade the DBMind platform in the future. It is estimated that openGauss will enrich more AI functions to the platform this year, separate the platform from the existing code repository, and provide a native web front-end and back-end display platform. In addition, the automatic recovery function is supported, enabling users to experience one-click and out-of-the-box database autonomous driving. + diff --git a/content/en/post/2022/openGauss-AI4DB-and-DB4AI.md b/content/en/post/2022/openGauss-AI4DB-and-DB4AI.md new file mode 100644 index 0000000000000000000000000000000000000000..d41eef5d668fbe944e192c68591f1f29b7bd089b --- /dev/null +++ b/content/en/post/2022/openGauss-AI4DB-and-DB4AI.md @@ -0,0 +1,113 @@ ++++ + +title = "openGauss AI4DB and DB4AI" + +date = "2021-01-19" + +tags = [ "openGauss AI4DB and DB4AI"] + +archives = "2021-01" + +author = "Tianqing Wang" + +summary = "openGauss AI4DB and DB4AI" + +img = "/en/post/2022/title/img9.png" + +times = "12:30" + ++++ + +# openGauss AI4DB and DB4AI + +The AI feature is one of the key features of openGauss. In the earlier versions, openGauss provided the open-source AI parameter self-tuning and slow SQL discovery functions, which attracted attention from developers and users. To further improve AI capabilities, new functions are added to the latest openGauss based on the AI4DB and DB4AI features. + +The AI4DB feature enables openGauss through AI technologies, simplifies database operation and management, and provides users with end-to-end self-O&M and self-optimization suites. In the new version, the **database intelligent index recommendation** and **database monitoring and exception detection** functions are added. The DB4AI feature provides the AI computing capability in the database. The built-in AI algorithm of the database provides users with inclusive AI capabilities. In the new version, the **machine learning algorithm framework DeepSQL** in the database is added. The following describes the foregoing new functions in detail. + +## 1. AI4DB + +- **Intelligent Database Index Recommendation** + + In a large-scale relational database, index design and optimization are critical to the execution efficiency of SQL statements. For a long time, database administrators often manually design and adjust indexes based on previous knowledge and experience. This consumes a lot of time and manpower, and manual design cannot ensure the optimal index. + + openGauss provides the intelligent index recommendation function. This function automates and standardizes the index design process and recommends the optimal index for a single query statement or workload, improving job efficiency and reducing O&M operations of database management personnel. The intelligent index recommendation function of the openGauss covers multiple application scenarios and includes the following features: + +- 1. Index recommendation for a single query statement + + This feature generates recommended indexes for a single query statement entered by a user based on the semantic information of the query statement and database statistics. + + You can use the **gs\_index\_advise** system function of openGauss to recommend single-column and combined indexes. A specific example is as follows. The recommendation result of the index includes the corresponding table name and column name. + + ![](../figures/zh-cn_image_0000001207280996.jpg) + +- 2. Virtual index + + This feature can simulate the creation of real indexes and avoid the time and space overhead required for creating real indexes. You can use the optimizer to evaluate the impact of virtual indexes on specified query statements. + + This feature provides a series of operations, including creating and deleting virtual indexes, and evaluating performance and storage space overhead. You can use the openGauss system functions to flexibly operate virtual indexes. Examples of some operations are as follows: + + - Use the **hypopg\_create\_index** system function to create a virtual index. The input parameter is the SQL statement for creating the index. + + ![](../figures/zh-cn_image_0000001251841009.jpg) + + - By executing EXPLAIN for a specific query statement, you can evaluate the index performance based on the execution plan provided by the optimizer. + + The execution plan before the virtual index is created is as follows: + + ![](../figures/zh-cn_image_0000001207121020.jpg) + + After the virtual index is created, the execution plan is changed as follows: + + ![](../figures/zh-cn_image_0000001206961046.jpg) + + By comparing the two execution plans, you can find that the index will reduce the execution time of the specified query statement. Based on this conclusion, you can create a real index. + + +- 3. Workload-based index recommendation + + With the workload consisting of multiple DML statements as the input, the algorithm can recommend a batch of indexes to be created. These indexes can be used to optimize the overall workload. The following figure shows the process of the algorithm. + + ![](../figures/zh-cn_image_0000001252121009.png) + + Based on the preceding two features, this feature compresses the workload, filters out a batch of typical SQL statements, and generates candidate indexes for each SQL statement using the index recommendation function of a single query statement. Finally, further filtering is performed by using the virtual index function, and the index that has the largest positive contribution to the workload is used as the output. + + **Database Metric Monitoring and Exception Detection** + + Database metrics can reflect the health status of the database. Abnormal user operations or database performance deterioration may cause changes in database metrics. Therefore, it is necessary to monitor database metrics. The benefits are as follows: + + (1) This helps you understand the running status of the database from multiple perspectives and better plan the database. + + (2) This helps users detect database exceptions and potential performance problems in advance and report the situation to users in a timely manner to avoid unnecessary loss. + + Anomaly-detection is an AI tool integrated into openGauss. It can be used to collect and predict database metrics, as well as monitor and diagnose exceptions. + + The following figure shows the anomaly-detection structure. + + ![](../figures/zh-cn_image_0000001207280998.jpg) + + This tool consists of an agent and a detector. Agent is a data collection module that collects database metric data and pushes the data to the detector. Detector is an exception detection module. It has three functions: 1. Collect data pushed by the agent and store the data locally. 2. Perform exception detection on the collected metric data. 3. Push exception information to users. + + Currently, the database indicators collected by the tool by default include IO\_Read, IO\_Write, CPU\_Usage, Memory\_Usage, and disk space occupied by the database. Based on the collected data, the tool predicts the change trend of indicators and detects exceptions to implement functions such as insufficient disk space warning, memory leakage warning, and CPU resource consumption warning, preventing unnecessary loss caused by database exceptions. + + Anomaly-detection provides functions such as one-click deployment, one-click startup or shutdown, and metric prediction, which is easy to use. In addition, you can quickly add new monitoring parameters or time series prediction algorithms based on service scenario requirements. + + +## 2. DB4AI + +The database management system can add, delete, modify, and query data records conveniently by constructing an ordered file organization structure. In the AI field, people use the computing power provided by computers to analyze and mine data. Data storage and computing are the key to data governance. + +In traditional scenarios, when a data user wants to analyze and train data stored in a database, the data user usually needs to extract the data from the storage system, cache the data to the memory, and then use a third-party package of Python, such as TensorFlow and scikit-learn, to perform data analysis or model training. This development process is not efficient. First, this process involves the Python language, third-party machine learning packages, and databases. The developed technology stack is fragmented. Second, performance of the process is not excellent. Data of a training model usually needs to be transmitted by using a network. When the data volume is large, a relatively large quantity of network transmission overheads are caused, and data localization calculation is not implemented. In addition, the technical standards of developers are often uneven, which cannot fully exploit the computing power of the CPU or GPU. In some data-sensitive fields, data extraction requires operations such as permission obtaining and anonymization, which further increases costs. Therefore, the AI computing capability is integrated into the database, and the computing power of the database is very advantageous. On one hand, data can be calculated locally. On the other hand, the optimization capability of the database can be used to select the optimal execution plan. Finally, you only need one SQL statement to implement faster model training and prediction than implementing algorithms by yourself. + +DeepSQL is compatible with the Apache MADlib ecosystem and supports more than 60 common algorithms, including regression algorithms \(such as linear regression, logistic regression, and random forest\), classification algorithms \(such as KNN\), and clustering algorithms \(such as K-means\). In addition to basic machine learning algorithms, graph-related algorithms are also included, such as algorithms about the shortest path and graph diameter. Also, it supports data processing methods \(such as PCA\), sparse vector data format, common statistical algorithms \(such as covariance and Pearson coefficient calculation\), training set and test set segmentation, and cross validation. + +In addition to the preceding algorithms obtained through compatibility with MADLib, DeepSQL also supports three common algorithms: prophet, GBDT, and XGBoost. + +The time series prediction algorithm prophet is implemented based on time series data decomposition and local Bayesian. It is an open-source algorithm provided by Facebook and is a practical time series prediction algorithm in engineering scenarios. Compared with other time series prediction algorithms, prophet is faster, more accurate, and more robust, the computation workload is much less than that of the RNN. + +GBDT and XGBoost belong to the Boosting algorithm that uses the regression tree to fit residuals. + +The GBDT algorithm uses the tree module in the MADlib framework to complete the algorithm. It inherits the style of the MADlib function API and uses input parameters to set the hyperparameters of the model. The algorithm supports regression and classification tasks. By default, the model is a regression model. Mean squared error \(MSE\) is used to calculate the residual of the previous base learner in each iteration. For the regression tree computation, the branching strategy is selected by minimizing the square error of each node. + +After the XGBoost algorithm is integrated, the gs\_boost module is implemented to provide SQL-like APIs which are compatible with the MADlib style and support classification and regression tasks. The gs\_xgboost module supports model hyperparameter selection and model evaluation through grid search. + +These are the latest open-source AI features of openGauss. Come and experience these features. If you have any comments or suggestions, feel free to contact us in the open source community. We'd love to hear your thoughts, and we will take this as the direction and motivation for improvement. We believe that with the joint efforts of developers and users, the convergence of openGauss and AI will be continuously strengthened to bring more intelligent and excellent services to users. diff --git a/content/en/post/2022/openGauss-Database-Performance-Optimization.md b/content/en/post/2022/openGauss-Database-Performance-Optimization.md new file mode 100644 index 0000000000000000000000000000000000000000..7b5a4e3cba6782dfc51e26f33896d3016c712c53 --- /dev/null +++ b/content/en/post/2022/openGauss-Database-Performance-Optimization.md @@ -0,0 +1,557 @@ ++++ + +title = "openGauss Database Performance Optimization" + +date = "2020-08-13" + +tags = [ "openGauss Database Performance Optimization"] + +archives = "2020-08" + +author = "Yansong LI" + +summary = "openGauss Database Performance Optimization" + +img = "/en/post/2022/title/img7.png" + +times = "12:30" + ++++ + +# openGauss Database Performance Optimization + +## Overview + +This document describes the key system-level optimization configurations required by the openGauss database to achieve optimal database performance on the openEuler OS based on the TaiShan server. + +## Hardware Specifications + +CPU: Kunpeng 920 \(Hi1620\) ARM AArch64 \(64 cores\) x 2 + +Memory: ≥ 512 GB + +Disk: NVMe SSD \(\> 1 TB\) x 4 + +NIC: 1822 10GE NICEthernet controller: Huawei Technologies Co., Ltd. Hi1822 Family \(4\*25GE\) \(rev 45\) + +## Software Specifications + +OS: openEuler 20.03 \(LTS\) + +Database: openGauss 1.0.0 + +Benchmark: benchmarksql-5.0 + +JDK: jdk1.8.0\_212 + +Ant: apache-ant-1.9.15 + +The following optimizes the database by configuring BIOS, operating system, file system, network, core binding, and constructing TPCC test data. - Third-party tool: JDK ant benchmark- Linux tool: htop iostat + +For details about how to install and use the benchmark htop iostat tool, see _Benchmark Usage_. \(https://opengauss.org/zh/blogs/blogs.html?post/optimize/opengauss-tpcc/\) + +## BIOS Settings + +Log in to a server management system, restart a server, enter the BIOS screen, modify BIOS settings, and restart the server. \(The server management system depends on the actual situation.\) + +- 1. After the machine self-check, startup options are displayed. + + ![](../figures/zh-cn_image_0000001251960129.jpg) + +- 2. Press **Del** to enter the BIOS screen. + + ![](../figures/zh-cn_image_0000001206760224.jpg) + +- 3. Enter the BIOS password. + + ![](../figures/zh-cn_image_0000001206920214.jpg) + +- 4. Restore to factory settings. + + Press **F9** to restore to the factory settings. It is recommended that you restore to the factory settings first because many default BIOS settings may have been changed. + +- **5. **Modify BIOS settings. + + The modification includes: + + ``` + # Choose BIOS > Advanced > MISC Config and set Support Smmu to Disabled. + # Choose BIOS > Advanced > MISC Config and set CPU Prefetching Configuration to Disabled. + # Choose BIOS > Advanced > Memory Config and set Die Interleaving to Disable. + ``` + + ![](../figures/zh-cn_image_0000001251640179.jpg)![](../figures/zh-cn_image_0000001251640181.jpg) + +- **6. **Save the BIOS settings and restart the server. + + Press **F10** to save the settings and exit. Restart the system. + + ![](../figures/zh-cn_image_0000001206760228.jpg) + + +## OS Configuration + +- Optimizing OS Configuration + + **irqbalance** disabled: If a GaussDB process and a client preempt CPU resources, the CPU usage is unbalanced. If the htop shows that some CPUs are overloaded and some are idle, check whether **irqbalance** is disabled. + + ![](../figures/zh-cn_image_0000001206760226.jpg) + + ``` + service irqbalance stop + echo 0 > /proc/sys/kernel/numa_balancing + echo 'never' > /sys/kernel/mm/transparent_hugepage/enabled + echo 'never' > /sys/kernel/mm/transparent_hugepage/defrag + echo none > /sys/block/nvme*n*/queue/scheduler ## Setting the I/O queue scheduling mechanism for NVMe drives + ``` + + +## File System Configuration + +- Change the block size of the XFS file system to 8 KB. + + \(1\) Check the existing block sizes of the mount points corresponding to the NVMe drives. Run the following command to check the NVMe drives that are mounted: + + ``` + df -h | grep nvme + /dev/nvme0n1 3.7T 2.6T 1.2T 69% /data1 + /dev/nvme1n1 3.7T 1.9T 1.8T 51% /data2 + /dev/nvme2n1 3.7T 2.2T 1.6T 59% /data3 + /dev/nvme3n1 3.7T 1.4T 2.3T 39% /data4 + ``` + + You can run the **xfs\_info** command to view information about the NVMe drives. + + xfs\_info /data1 + + ![](../figures/zh-cn_image_0000001251800179.gif) + + In the preceding figure, the block size is 8 KB and does not need to be changed. If the data block size is not 8 KB, back up and format the data. + + \(2\) Back up the data on the disk to be formatted. + + Back up the required data to other disks or machines as required. + + \(3\) Format the disk and set the block size to 8 KB. + + Take the **/dev/nvme0n1** disk and the **/data1** mount point as an example. The commands are as follows: + + ``` + umount /data1 + mkfs.xfs -b size=8192 /dev/nvme0n1 -f + mount /dev/nvme0n1 /data1 + ``` + + \(4\) Run the **xfs\_info** command again to check whether the block size is set correctly. + + +## Network Configuration + +- **1. **Multi-Queue Interrupt Settings + + As TaiShan servers have a large number of cores, NIC multi-queues need to be configured on servers and clients. The recommended configuration is as follows: 16 interrupt queues are configured for NICs on servers, and 48 interrupt queues are configured for NICs on clients. + + Multi-queue Interrupt Setting Tool \(1822-FW\) + + You can obtain the released Hi1822 NIC version from the following link: https://support.huawei.com/enterprise/en/intelligent-accelerator-components/in500-solution-pid-23507369/software. IN500 solution 5.1.0.SPC401 and later versions support multi-queues. + + - \(1\) Decompress **Hi1822-NIC-FW.zip**, go to the directory, and install hinicadm as user **root**. + + ![](../figures/zh-cn_image_0000001251960131.gif) + + - \(2\) Determine the NIC to which the currently connected physical port belongs. The network port and NIC name vary according to the hardware platform. In the following example, the private network port enp3s0 is used and belongs to the hinic0 NIC. + + ![](../figures/zh-cn_image_0000001251960133.gif)![](../figures/zh-cn_image_0000001206920220.gif) + + - \(3\) Go to the **config** directory and use the hinicconfig tool to configure the interrupt queue firmware configuration file. + + 64-queue configuration file: std\_sh\_4x25ge\_dpdk\_cfg\_template0.ini; + + 16-queue configuration file: std\_sh\_4x25ge\_nic\_cfg\_template0.ini; + + Set the number of queues for hinic0 to different values. \(The default value is **16** and it can be changed as needed.\) + + ./hinicconfig hinic0 -f std\_sh\_4x25ge\_dpdk\_cfg\_template0.ini + + Restart the OS for the modification to take effect. Run the **ethtool -l enp3s0** command to view the result. In the following figure, 32 is displayed. + + ![](../figures/zh-cn_image_0000001206760230.gif) + + Run the **ethtool -L enp3s0 combined 48** command to change the value of **combined**. \(The optimized value varies according to the platform and application. For the 128-core platform, the optimized value on the server is **16** and that on the client is **48**.\) + + +- **2. **Interrupt Tuning + + When the openGauss database is fully loaded \(the CPU usage is greater than 90%\), the CPU becomes the bottleneck. In this case, offload network slices to NICs. + + ``` + ethtool –K enp3s0 tso on + ethtool –K enp3s0 lro on + ethtool –K enp3s0 gro on + ethtool –K enp3s0 gso on + ``` + + Take the 1620 platform as an example. The NIC interrupts are bound to the last four cores on each NUMA node, and each core is bound to three interrupts. The core binding interrupt script is as follows. This script is called by gs\_preinstall during the openGauss installation. For details, see the product installation guide. + + ![](../figures/zh-cn_image_0000001251960135.gif) + + ``` + sh bind_net_irq.sh 16 + ``` + +- **3. **Confirming and Updating the NIC Firmware + + Check whether the firmware version of the private NIC in the current environment is 2.5.0.0. + + ``` + ethtool -i enp3s0 + driver: hinic + version: 2.3.2.11 + firmware-version: 2.5.0.0 + expansion-rom-version: + bus-info: 0000:03:00.0 + ``` + + If the version is 2.5.0.0, you are advised to replace it with 2.4.1.0 for better performance. + + NIC Firmware Update Procedure + + \(1\) Upload the NIC firmware driver to the server. The firmware file is **Hi1822\_nic\_prd\_1h\_4x25G.bin**. + + \(2\) Run the following command as user **root**: + + **hinicadm updatefw -i **__** -f **__ + + _Physical NIC device name_ indicates the NIC name in the system. For example, **hinic0** indicates the first NIC, and **hinic1** indicates the second NIC. For details about how to query the NIC name, see "Multi-Queue Interrupt Settings." For example: + + ``` + # hinicadm updatefw -i -f + Please do not remove driver or network device + Loading... + [>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>] [100%] [\] + Loading firmware image succeed. + Please reboot OS to take firmware effect. + ``` + + \(3\) Restart the server and check that whether the firmware version of the private NIC is updated to 2.4.1.0. + + ``` + ethtool -i enp3s0 + driver: hinic + version: 2.3.2.11 + firmware-version: 2.4.1.0 + expansion-rom-version: + bus-info: 0000:03:00.0 + ``` + + The firmware version of the private NIC is successfully updated. + + +## Core Binding on the Database Server and Client + +Install the database by referring to the openGauss installation document. + +The general procedure is as follows: + +◾ Stop a database. + +◾ Modify **postgresql.conf** parameters. + +◾ Start the database in core binding mode by running the **numactl --interleave=all bin/gaussdb -D $\{DATA\_DIR\} --single\_node** command. + +◾ Start the benchmark in core binding mode by running the **numactl -C 0-19,32-51,64-83,96-115 ./runBenchmark.sh props.pg** command. + +Run the preceding command based on the core binding configuration and benchmark configuration file. Cores bound to the benchmark are different from cores bound to the database. + +- **1. **Core Binding Settings on the Server + + \(1\) During the running of service processes, the network interruption reported by the hardware causes frequent context switching, which severely affects the efficiency. Therefore, the network interruption and services must be bound to different cores. For details about the core binding for network interruption, see the previous section. + + \(2\) The thread pool mechanism is introduced in openGauss. When the database is started, the thread pool creates a specified number of threads to provide services. When a thread is created, it is bound to a core. Therefore, the core binding information of the NIC needs to be transferred through the GUC parameter, to facilitate core binding configuration during system running. The following figure shows the parameters when 128 cores are used. + + ![](../figures/7.png) + + Total number of threads = \(Number of CPUs – Number of CPUs processing the network\) x Number of threads per core \(7.25 is recommended\) = \(128 – 16\) x 7.25 = 812. The number of NUMA nodes is 4, and the number of cores for processing interrupts is 16. + + The following is an example of CPU binding for auxiliary allocation: + + ``` + numactl -C 0-27,32-59,64-91,96-123 gaussdb --single_node -D {DATA_DIR} -p {PORT} & + ``` + + Or + + ``` + numactl --interleave=all gaussdb --single_node -D {DATA_DIR} -p {PORT} & + ``` + +- **2. **Server Parameter Setting + + The **- advance\_xlog\_file\_num = 10** parameter is added to the **postgresql.conf** file. + + This parameter indicates that the background thread BackgroundWALWriter periodically checks and initializes the next 10 XLogs in advance to avoid initializing XLogs only when transactions are committed, reducing the transaction commit delay. This parameter is valid only in the performance pressure test. Generally, you do not need to set this parameter. The default value is **0**, indicating that no initialization is performed in advance. - numa\_distribute\_mode = 'all' + + This parameter can be set to **all** or **none**. The value **all** indicates that NUMA optimization is enabled. Working threads and corresponding PGPROC and WALInsertlock are grouped and bound to corresponding NUMA nodes to reduce CPU remote memory access on key paths. The default value is **none**, indicating that the NUMA distribution feature is disabled. It is used only when multiple NUMA nodes are involved and the cost of remote fetch is obviously higher than that of local fetch. You are advised to enable this function during performance pressure tests. + + **thread\_pool\_attr** configuration: + + thread\_pool\_attr = '812,4,\(cpubind: 0-27,32-59,64-91,96-123\)' + + Parameter description: + + ``` + max_connections = 4096 + allow_concurrent_tuple_update = true + audit_enabled = off + checkpoint_segments = 1024 + checkpoint_timeout = 15min + cstore_buffers = 16MB + enable_alarm = off + enable_codegen = false + enable_data_replicate = off + full_page_writes = on + max_files_per_process = 100000 + max_prepared_transactions = 2048 + shared_buffers = 350GB + use_workload_manager = off + wal_buffers = 1GB + work_mem = 1MB + log_min_messages = FATAL + transaction_isolation = 'read committed' + default_transaction_isolation = 'read committed' + synchronous_commit = on + fsync = on + maintenance_work_mem = 2GB + vacuum_cost_limit = 2000 + autovacuum = on + autovacuum_mode = vacuum + autovacuum_max_workers = 5 + autovacuum_naptime = 20s + autovacuum_vacuum_cost_delay = 10 + xloginsert_locks = 48 + update_lockwait_timeout = 20min + + enable_mergejoin = off + enable_nestloop = off + enable_hashjoin = off + enable_bitmapscan = on + enable_material = off + + wal_log_hints = off + log_duration = off + checkpoint_timeout = 15min + autovacuum_vacuum_scale_factor = 0.1 + autovacuum_analyze_scale_factor = 0.02 + enable_save_datachanged_timestamp = false + + log_timezone = 'PRC' + timezone = 'PRC' + lc_messages = 'C' + lc_monetary = 'C' + lc_numeric = 'C' + lc_time = 'C' + + enable_thread_pool = on + thread_pool_attr = '812,4,(cpubind:0-27,32-59,64-91,96-123)' + enable_double_write = off + enable_incremental_checkpoint = on + enable_opfusion = on + advance_xlog_file_num = 10 + numa_distribute_mode = 'all' + + track_activities = off + enable_instr_track_wait = off + enable_instr_rt_percentile = off + track_counts = on + track_sql_count = off + enable_instr_cpu_timer = off + + plog_merge_age = 0 + session_timeout = 0 + + enable_instance_metric_persistent = off + enable_logical_io_statistics = off + enable_page_lsn_check = off + enable_user_metric_persistent = off + enable_xlog_prune = off + + enable_resource_track = off + instr_unique_sql_count=0 + enable_beta_opfusion=on + enable_beta_nestloop_fusion=on + ``` + + +- **3. **Configuring Core Binding for the TPC-C Client + + The client uses numactl to bind the client to cores except the NIC. The following figure uses a 128-core environment as an example. A total of 80 cores are used to process service logic, and the remaining 48 cores are used to process network interruption. + + ![](../figures/zh-cn_image_0000001207080190.gif) + + The corresponding tpmC program is as follows: + + ``` + numactl -C 0-19,32-51,64-83,96-115 ./runBenchmark.sh props.pg + ``` + + Other cores are used to process network interruptions. + + +## Constructing TPC-C Initial Data + +- **1. **Modify benchmark configurations. + + Copy **props.pg** and rename it **props.opengauss.1000w**. Edit the file and replace the following configuration in the file: + + ``` + cp props.pg props.opengauss.1000w + vim props.opengauss.1000w + db=postgres + driver=org.postgresql.Driver + // Modify the connection string, including the IP address, port number, and database. + conn=jdbc:postgresql://ip:port/tpcc1000?prepareThreshold=1&batchMode=on&fetchsize=10 + // Set the user name and password for logging in to the database. + user=user + password=****** + + warehouses=1000 + loadWorkers=200 + + // Set the maximum number of concurrent tasks, which is the same as the maximum number of work tasks on the server. + terminals=812 + //To run specified transactions per terminal- runMins must equal zero + runTxnsPerTerminal=0 + //To run for specified minutes- runTxnsPerTerminal must equal zero + runMins=5 + //Number of total transactions per minute + limitTxnsPerMin=0 + + //Set to true to run in 4.x compatible mode. Set to false to use the + //entire configured database evenly. + terminalWarehouseFixed=false + + //The following five values must add up to 100 + //The default percentages of 45, 43, 4, 4 & 4 match the TPC-C spec + newOrderWeight=45 + paymentWeight=43 + orderStatusWeight=4 + deliveryWeight=4 + stockLevelWeight=4 + + // Directory name to create for collecting detailed result data. + // Comment this out to suppress. + resultDirectory=my_result_%tY-%tm-%td_%tH%tM%tS + osCollectorScript=./misc/os_collector_linux.py + osCollectorInterval=1 + // Collect OS load information. + //osCollectorSSHAddr=osuer@10.44.133.78 + //osCollectorDevices=net_enp3s0 blk_nvme0n1 blk_nvme1n1 blk_nvme2n1 blk_nvme3n1 + ``` + +- **2. **Prepare for importing TPC-C data. + + \(1\) Replace the **tableCreats.sql** file. + + Download the **tableCreates.sql** file \(at https://blog.opengauss.org/zh/post/optimize/images/tableCreates.sql\). Use this file to replace the corresponding file in **benchmarksql-5.0/run/sql.common/** of the benchmark SQL. + + The file is modified as follows: + + ◾ Two tablespaces are added. + + ``` + CREATE TABLESPACE example2 relative location 'tablespace2'; + CREATE TABLESPACE example3 relative location 'tablespace3'; + ``` + + ◾ The **bmsql\_hist\_id\_seq** sequence is deleted. + + ◾ The FACTOR attribute is added to each table. + + ``` + create table bmsql_stock ( + s_w_id integer not null, + ..... + s_dist_10 char(24) + ) WITH (FILLFACTOR=80) tablespace example3; + ``` + + \(2\) Modify the **indexCreates.sql** file. + + Modify the **run/sql.common/indexCreates.sql** file. + + ![](../figures/zh-cn_image_0000001207240170.gif) + + Modify the content in the red box in the preceding figure as follows: + + ![](../figures/zh-cn_image_0000001206920224.gif) + + Add the content in red in the following figure to the file so that the data can be automatically generated in different data tablespaces when the benchmark tool automatically generates data. If the content is not added, modify the data in the database after the benchmark tool generates data for disk division. + + ![](../figures/zh-cn_image_0000001251800185.jpg) + + \(3\) Modify the **runDatabaseBuild.sh** file. Modify the content in the following figure to avoid unsupported foreign keys during data generation. + + ![](../figures/zh-cn_image_0000001251800187.jpg) + +- **3. **Import data. + + Execute **runDatabaseBuild.sh** to import data. + +- **4. **Back up data. + + To facilitate multiple tests and reduce the time for importing data, you can back up the exported data. A common method is to stop the database and copy the entire data directory. The reference script for restoration is as follows: + + ``` + #!/bin/bash + rm -rf /ssd/omm108/gaussdata + rm -rf /usr1/omm108dir/tablespace2 + rm -rf /usr2/omm108dir/tablespace3 + rm -rf /usr3/omm108dir/pg_xlog + cp -rf /ssd/omm108/gaussdatabf/gaussdata /ssd/omm108/ & + job0=$! + cp -rf /usr1/omm108dir/tablespace2bf/tablespace2 /usr1/omm108dir/ & + job1=$! + cp -rf /usr2/omm108dir/tablespace3bf/tablespace3 /usr2/omm108dir/ & + job2=$! + cp -rf /usr3/omm108dir/pg_xlogbf/pg_xlog /usr3/omm108dir/ & + job3=$! + wait $job1 $job2 $job3 $job0 + ``` + +- **5. **Partition data disks. + + During the performance test, data needs to be distributed to different storage media to increase the I/O throughput. The data can be distributed to the four NVMe drives on the server. Place the **pg\_xlog**, **tablespace2**, and **tablespace3** directories on the other three NVMe drives and provide the soft link pointing to the actual location in the original location. **pg\_xlog** is in the database directory, and **tablespace2** and **tablespace3** are in the **pg\_location** directory. For example, run the following commands to partition **tablespace2**: + + ``` + mv $DATA_DIR/pg_location/tablespace2 $TABSPACE2_DIR/tablespace2 + cd $DATA_DIR/pg_location/ + ln -svf $TABSPACE2_DIR/tablespace2 ./ + ``` + +- **6. **Run the TPC-C program. + + ``` + numactl –C 0-19,32-51,64-83,96-115 ./runBenchmark.sh props.opengauss.1000w + ``` + +- **7. **Monitor performance. + + Use htop to monitor the CPU usage of the database server and TPC-C client. In the extreme performance test, the CPU usage of each service is greater than 90%. If the CPU usage does not meet the requirement, the core binding mode may be incorrect and needs to be adjusted. + + ![](../figures/zh-cn_image_0000001251760151.jpg) + + In the preceding figure, the CPU in the yellow box is used to process network interruption. + +- **8. **View the monitoring status after tuning. + + The htop state after tuning is reliable. + + ![](../figures/zh-cn_image_0000001251760153.jpg) + + Database tuning is a tedious task. You need to continuously modify configurations, run TPC-C, and perform commissioning to achieve the optimal performance configuration. + + TPC-C running result: + + ![](../figures/zh-cn_image_0000001206760240.gif) + + diff --git a/content/en/post/2022/openGauss-Log-Consensus-Framework.md b/content/en/post/2022/openGauss-Log-Consensus-Framework.md new file mode 100644 index 0000000000000000000000000000000000000000..caa2831f64a67a426c702229aac1b6e4f26c07d6 --- /dev/null +++ b/content/en/post/2022/openGauss-Log-Consensus-Framework.md @@ -0,0 +1,230 @@ ++++ + +title = "openGauss Log Consensus Framework" + +date = "2021-09-29" + +tags = [ "openGauss Log Consensus Framework"] + +archives = "2021-09" + +author = "Xilin Hu" + +summary = "openGauss Log Consensus Framework" + +img = "/en/post/2022/title/img9.png" + +time = "12:30" + ++++ + +# openGauss Log Consensus Framework + +The distributed consistency algorithm is a basic problem of a distributed system. What needs to be resolved is how a distributed system achieves strong consistency on a value \(resolution\), so as to resolve the high availability problem of the system. Paxos is the most important distributed consistency algorithm, and many people use it as a synonym of distributed consistency protocols. + +The Paxos theory has been put forward for many years and products using Paxos and its variant protocols are emerging one after another. However, there are few industrial-grade third-party independent libraries and open-source projects. Common open-source products that refer to the Paxos protocol include ZooKeeper and etcd. The protocol does not support high-throughput state machine replication and does not provide an independent third-party library for other systems to quickly access. + +Therefore, the DCF feature is designed and implemented to support the distributed strong consistency scenario involved in openGauss. + +## 1 What is DCF? + +Its full name is distributed consensus framework. Typical algorithms for resolving distributed consistency problems are Paxos and Raft. DCF implements the Paxos algorithm. DCF provides capabilities such as log replication and cluster HA. It supports multiple types of nodes based on the Paxos protocol and the node roles can be adjusted as required. Log replication supports dynamic traffic adjustment, minority forcible startup, and automatic primary selection. + +DCF is a high-performance, highly mature, reliable, scalable, and easy-to-use independent basic library. Other systems can easily interconnect with DCF through APIs to obtain the strong consistency, high availability, and automatic disaster recovery capabilities provided by the Paxos algorithm. + +![](../figures/图片1.png) + +As shown in the preceding figure, DCF consists of the algorithm module, storage module, communication module, and service layer. + +- Algorithm module: + + The algorithm module is implemented based on the Multi-Paxos protocol. Based on the service scenarios, and requirements for high performance and ecosystem, DCF has made many function extensions and performance optimization to enrich the functions compared with the basic Multi-Paxos protocol, and the performance is significantly improved in multiple deployment scenarios. It mainly includes a leader election module, a log replication module, a metadata module, and a cluster management module. + +- Storage module: + + For specific service scenarios and optimal performance, DCF extracts a set of public interfaces for log storage and implements a default high-performance storage module. If you have specific scenarios or requirements for optimal performance and cost, you can connect the existing storage system to the log storage interface of DCF to meet specific requirements. This is one of the advantages of DCF as a third-party independent library. + +- Communication module: + + The communication module is implemented based on the message exchange component \(MEC\), provides the communication capability between DCF component instances, and provides an asynchronous event processing framework. The main functions are as follows: multiple extensible communication protocols, unicast, broadcast, and loopback sending APIs, asynchronous message processing framework, multi-channel mechanism, multi-priority queues, compression, and batch sending. + + +- Service layer: + + The service layer is the basis for driving the running of the entire DCF and provides various basic services required for program running, such as lock, asynchronous task scheduling, thread pool service, and timer capability. + + +## 2 What Can DCF Do? + +- 2.1 Adding and Deleting Nodes Online and Transferring the Leader Capability Online + + Based on the standard multi-paxos, DCF can add or delete nodes online and transfer the leader capability to other nodes online. This is more suitable for a wide range of service scenarios and helps build a development ecosystem. + +- 2.2 Priority-based Primary Selection and Policy-based Majority + + **Policy-based majority:** In the classic Paxos theory, data can be submitted after the majority faction reaches an agreement. However, the majority faction is not specific and cannot ensure that one or more nodes can obtain complete data. In actual applications, nodes that are geographically close to each other usually have strongly consistent data. The nodes that are far away from each other are always in the non-strongly consistent state and when city-level disaster recovery occurs, they cannot be activated as primary nodes. The policy-based majority capability allows users to dynamically configure one or more nodes to ensure data consistency. When a disaster recovery requirement occurs, the node can be activated as the primary node immediately. + + **Priority-based primary selection:** You can specify the priority of each node. DCF selects the primary node based on the specified priority. DCF activates the node with a lower priority only when all nodes with a higher priority are unavailable. + +- 2.3 Diversified Node Roles + + In addition to the typical leader, follow, and candidate roles, DCF provides custom roles, such as the passive role\(with logs and data, but without the right to be elected or participate in the majority voting\),and the log role \(with logs and the right to participate in the majority voting, but without data or the right to be elected\). With the support of these node roles, DCF supports multi-cluster deployment modes, such as synchronous deployment and synchronous/asynchronous hybrid deployment. + +- 2.4 Batch & Pipeline + + Batch: DCF supports multi-level batch operations, including: \(1\) Combine multiple logs into a single message for sending. \(2\) Combine multiple logs and writes them to disks. \(3\) Combine and replicate multiple logs. Batch can effectively reduce the extra loss caused by the message granularity and improve the throughput. + + Pipeline: Before a result of a previous message is returned, the message is concurrently sent to the corresponding node. By increasing the number of concurrent messages \(pipelines\), the delay of a single concurrent request can be effectively reduced and the performance can be improved. DCF uses the asynchronous mode in multiple stages, such as log persistence, network transmission, and log replication, to maximize the pipeline performance. + +- 2.5 Efficient Flow Control Algorithm + + Batching and pipelining can improve the throughput and performance of the entire system. However, if the batch size is too large, the delay of a single request is too long. As a result, the number of concurrent requests is too large, affecting the throughput and request delay. Therefore, DCF designs a set of efficient and adaptive flow control algorithms, automatically detects parameters such as the network bandwidth, network sending delay, and number of concurrent requests, and adjusts batch and pipeline parameters to control service traffic injection. + + +The flow control algorithm process is as follows: + +![](../figures/26-openGauss-Log-Consensus-Framework.png) + +The core algorithm process is as follows: + +1. The DCF primary node periodically samples and calculates consensus information, including the end-to-end consensus latency, end-to-end consensus log bandwidth, and overall log playback bandwidth. +2. The primary node obtains the performance change trend based on the sampling result and historical result, adjusts the control direction and step based on the historical control value and change trend, and calculates a new control value for better performance. +3. After the control period expires, the control value is updated. +4. The control value is continuously applied to service traffic to control the frequency of service traffic injection. + +DCF will continue to evolve in scenarios such as data communication, multiple log streams, and parallel large-capacity replication to provide users with efficient, reliable, and easy-to-manage log multi-copy replication and backup capabilities, meeting users' requirements for database disaster recovery and high availability. + +## 3 How Do We Use DCF? + +Assume that there are three nodes in the cluster and their IP addresses are 192.168.0.11, 192.168.0.12, and 192.168.0.13. + +The node IDs are 1, 2, and 3, and the node roles are LEADER, FOLLOWER, and FOLLOWER. + +To use DCF, set **enable\_dcf** to **on** during FusionSphere OpenStack OM installation and deployment. This parameter disabled by default. For example: + +Obtain the XML file template from **script/gspylib/etc/conf/centralized/cluster\_config\_template\_HA.xml**. + +The following values are examples and can be replaced as required. Each line is described with a comment. + +``` + + + + + + + + + + + + + + + + + + + + + + + + + +``` + +- 3.1 Querying the Cluster Status After the Installation Is Complete + + Use **gs\_ctl** to query the cluster status. + + ``` + # gs_ctl query –D + # gs_ctl query -D /nvme0/gaussdb/cluster/nvme0/dn1 + ``` + + ![](../figures/zh-cn_image_0000001251920351.png) + + In the preceding information, **dcf\_replication\_info** indicates the DCF information of the current node. + + **role**: role of the current node. The value can be **LEADER**, **FOLLOWER**, **LOGGER**, **PASSIVE**, **PRE\_CANDICATE**, **CANDIDATE**, or **UNKNOW**. The preceding figure shows that the current node is a leader node. + + **term**: election term. + + **run\_mode**: DCF running mode. The value **0** indicates that the automatic election mode is enabled, and the value **2** indicates that the automatic election mode is disabled. + + **work\_mode**: DCF working mode. + + **hb\_interval**: heartbeat interval between DCF nodes, in milliseconds. + + **elc\_timeout**: DCF election timeout period, in milliseconds. + + **applied\_index**: log location that is applied to the state machine. + + **commit\_index**: log location that has been saved by most DCF nodes. Logs before **commit\_index** have been made persistent. + + **first\_index**: location of the first log saved on the DCF node. This location is moved backward when the DN calls **dcf\_truncate**. The previous logs will be cleared. + + **last\_index**: location of the last log saved by the DCF node. This log location contains the logs that are stored in the memory of the DCF node but are not made persistent. Therefore, last\_index ≥ commit\_index. + + **cluster\_min\_apply\_idx**: location of the smallest applied log in the cluster. + + **leader\_id**: leader node ID. + + **leader\_ip**: IP address of the leader node. + + **leader\_port**: port of the leader node, for DCF internal use. + + **nodes**: information about other nodes in the cluster. + +- 3.2 Online Cluster Scale Adjustment + + To add a copy online, run the following command: + + ``` + # gs_ctl member --opration=add --nodeid= --ip= --port= -D + ``` + + To reduce the number of copies online, run the following command: + + ``` + # gs_ctl member --operation=remove --nodeid= -D + ``` + + If the cluster is normal, a single copy can be deleted within 5 minutes. + +- 3.3 Minority Forcible Startup + + In the majority fault scenario, no agreement can be reached based on the normal Paxos protocol. As a result, the system cannot continue to provide services. In this case, minority forcible startup is required to provide emergency services. + + Run the following command: + + ``` + # cm_ctl setrunmode –n -D --xmode=minority --votenum= + ``` + + In the three-copy cluster scenario, if two copies are faulty, data can be committed with one copy. + +- 3.4 Switchover + + Switchover between the primary and standby database instances is supported in one-primary and multiple-standby deployment mode to implement switchover between AZs. Switchover is performed for maintenance. Before a switchover, ensure that the cluster instances are running properly, all services are stopped, and the **pgxc\_get\_senders\_catchup\_time\(\)** view shows no ongoing catchup between the primary and standby nodes. + + For example, run the following command to switch the standby node to the primary node: + + ``` + # cm_ctl switchover –n -D + ``` + +- 3.5 Standby Node Rebuild + + The full build is supported in primary/standby mode. After receiving a full build request, the primary DN is blocked from reclaiming DCF logs, and the standby DN replicates Xlogs and data files from the primary DN. After the kernel of the standby DN is started, DCF starts to replicate log points. + + The following is an example: + + ``` + # gs_ctl build –b full –Z datanode –D + ``` + + The open-source DCF feature is another exploration of openGauss in the distributed field and another substantial contribution to open-source technologies. openGauss has been committed to promoting in-depth innovation of database technologies, increasing investment in basic database research and database theory innovation, fully opening up top-notch technical capabilities, and working with developers around the world to promote the innovation and development of database production, learning, and research. + + diff --git a/content/en/post/2022/openGauss-Supports-SM3-and-SM4-Algorithms.md b/content/en/post/2022/openGauss-Supports-SM3-and-SM4-Algorithms.md new file mode 100644 index 0000000000000000000000000000000000000000..05a883870bbf3c21afd685564c363c781863667d --- /dev/null +++ b/content/en/post/2022/openGauss-Supports-SM3-and-SM4-Algorithms.md @@ -0,0 +1,150 @@ ++++ + +title = "openGauss Supports SM3 and SM4 Algorithms" + +date = "2021-11-15" + +tags = ["openGauss Supports SM3 and SM4 Algorithms"] + +archives = "2021-11" + +author = "Xin Dou" + +summary = "openGauss Community Developer Guide" + +img = "/en/post/2022/title/img16.png" + +times = "17:30" + ++++ + +# openGauss Supports SM3 and SM4 Algorithms + +## 1. Introduction to the Chinese Cryptographic Algorithms + +Chinese cryptographic algorithms are Chinese algorithms issued by the State Cryptography Administration Office of Security Commercial Code Administration \(OSCCA\). Common algorithms include SM1, SM2, SM3, and SM4. The key length and block length are both 128 bits. To meet bank customers' requirements for database security capabilities, openGauss 2.0.0 and later versions support Chinese cryptographic algorithms to enhance enterprise-level security capabilities of databases and improve product security competitiveness, including the SM3 algorithm \(http://www.gmbz.org.cn/main/viewfile/20180108023812835219.html\) for user authentication, and the SM4 algorithm for data encryption and decryption \(http://www.gmbz.org.cn/main/viewfile/20180108015408199368.html\) + +## 2. SM3 Algorithm – User Authentication + +- 2.1 Usage + +openGauss supports four user authentication methods, which are determined by the **password\_encryption\_type** parameter in the **postgresql.conf** file. The following table lists the mapping between authentication methods and **password\_encryption\_type**. + +``` +| Authentication Method | Parameter | +| ---------- | -------------------------- | +| md5 | password_encryption_type=0 | +| sha256+md5 | password_encryption_type=1 | +| sha256 | password_encryption_type=2 | +| sm3 | password_encryption_type=3 | +``` + +The SM3 algorithm supports three connection modes: gsql, JDBC, and ODBC. + +To create a user supporting SM3 authentication, perform the following steps: + +\(1\) Set **password\_encryption\_type** to **3** in the **postgresql.conf** file and restart the database for the parameter to take effect. Then, the SM3 algorithm will be used to encrypt plaintext passwords for newly created users. + +![](../figures/zh-cn_image_0000001252703087.png) + +\(2\) Create a user. + +In the following example, a user **test** is created. You can view the encryption type during user creation in the **rolpassword** field of the **pg\_authid** system catalog. The following figure shows that the SM3 algorithm is used for encryption. + +![](../figures/zh-cn_image_0000001252343171.png)\(3\) In the **pg\_hba.conf** file, set the authentication method to SM3. + +![](../figures/zh-cn_image_0000001252703155.png) + +In this case, the **test** user can pass the authentication through remote login. + +![](../figures/zh-cn_image_0000001252343211.png) + +A user created by using the SM3 encryption algorithm can pass the authentication only when both the encryption algorithm and authentication method are SM3. + +For SM3 users, when JDBC is used for remote connection, you need to manually download the **bcprov-jdk15on** JAR package and import it to the application. + +\[Download Link\] \(https://mvnrepository.com/artifact/org.bouncycastle/bcprov-jdk15on/1.68\) + +The procedure for creating a user by using other authentication methods is similar to that for creating a user by using SM3 authentication. + +- 2.2 Implementation Principle + + openGauss uses the RFC 5802 password authentication solution. + + - User key generation + + The following figure shows the RFC 5802 key derivation process. + + ![](../figures/zh-cn_image_0000001252343303.png) + + ``` + SaltedPassword := PBKDF2 (password, salt, i) + ClientKey := HMAC(SaltedPassword, "Client Key") + StoredKey := Hash(ClientKey) + ``` + + StoredKey and ServerKey are stored on the server. + + 1\) The StoredKey is used to authenticate the client. + + The server authenticates the client by performing the exclusive OR operation on the ClientSignature and ClientProof sent by the client to obtain the ClientKey, performing the hash operation on the ClientKey, and comparing the obtained value with the StoredKey. If they are the same, the client passes the authentication. + + 2\) ServerKey is used to identify the client + + Similarly, the client authenticates the server by comparing ServerSignature with the value sent by the server. If they are the same, the client authenticates the server. + + 3\) During the authentication, the server can calculate the ClientKey. After the authentication is complete, the ClientKey is discarded and does not need to be stored. + + To ensure legal login, you must obtain the Password, SaltedPassword, or ClientKey. If the StoryKey and ServerKey are disclosed, illegal login may occur. + + - Authentication process + + The following figure shows the standard RFC 5802 password authentication process. + + ![](../figures/320.png) + + 1. The client sends the username to the server. + + 2. The server returns the AuthMessage and calculated ServerSignature to the client. + + 3. After receiving the message, the client uses the salt and iteration count in AuthMessage to calculate SaltedPassword based on the Password, and then calculates all lower-layer keys. The client checks whether the values of HMAC\(ServerKey, AuthMessage\) equals ServerSignature. If they are equal, the client authenticates the server. + + 4. The client sends the calculated ClientProof to the server. + + 5. The server uses the saved StoredKey and AuthMessage to calculate the HMAC, performs the exclusive OR operation on the HMAC and the ClientProof received from the client to obtain the ClientKey, and then performs the hash operation on the ClientKey to check whether the ClientKey is the same as the saved StoredKey. If they are the same, the client passes the authentication. + + After receiving the request from the client, the server interacts with the client for authentication based on the authentication method configured in the **pg\_hba.conf** file. + + +## 3. SM4 Algorithm – Data Encryption and Decryption + +Chinese cryptographic algorithm SM4 can be used to encrypt or decrypt data in a column of a table. The newly added encryption and decryption functions gs\_decrypt and gs\_decrypt are compatible with gs\_encrypt\_aes128 and sgs\_decrypt\_aes128, and supports encryption and decryption using AES128 and SM4. The SM4 algorithm invokes the EVP\_sm4\_cbc\(\) interface of OpenSSL. + +The gs\_encrypt\_aes128 and gs\_decrypt\_aes128 functions are described as follows: + +- gs\_encrypt\_aes128\(encryptstr, keystr\) + +​ Description: Encrypts **encryptstr** strings using **keystr** as the key and returns encrypted strings. + +- gs\_decrypt\_aes128\(decryptstr,keystr\) + +​ Description: Decrypts **decryptstr** strings using **keystr** as the key and returns decrypted strings. + +![](../figures/zh-cn_image_0000001207863420.png)The gs\_encrypt and gs\_decrypt functions are described as follows: + +- gs\_encrypt\(encryptstr, keystr, algorithm\) + +​ Description: Encrypts **encryptstr** strings using **keystr** as the key and returns encrypted strings. The options are **sm4** and **aes128**. + +- gs\_decrypt\_aes128\(decryptstr,keystr, algorithm\) + +​ Description: Decrypts **decryptstr** strings using **keystr** as the key and returns the decrypted strings. The options are **sm4** and **aes128**. + +![](../figures/zh-cn_image_0000001252343507.png)The following figures show how to encrypt and decrypt table data using the SM4 algorithm. + +![](../figures/zh-cn_image_0000001252463513.png) + +![](../figures/zh-cn_image_0000001252703473.png) + +openGauss supports SM3 for user authentication and SM4 for data encryption and decryption. + diff --git a/content/en/post/2022/title/img1.png b/content/en/post/2022/title/img1.png new file mode 100644 index 0000000000000000000000000000000000000000..2af578504062e5fa7a7aaf7e1c2014531e51e9c2 Binary files /dev/null and b/content/en/post/2022/title/img1.png differ diff --git a/content/en/post/2022/title/img10.png b/content/en/post/2022/title/img10.png new file mode 100644 index 0000000000000000000000000000000000000000..ce35c3cd313c8e4ed939ae18b91b9a64767ab504 Binary files /dev/null and b/content/en/post/2022/title/img10.png differ diff --git a/content/en/post/2022/title/img11.png b/content/en/post/2022/title/img11.png new file mode 100644 index 0000000000000000000000000000000000000000..7ebe22cb03c6ee1e735b29bce766c1e10d334f0c Binary files /dev/null and b/content/en/post/2022/title/img11.png differ diff --git a/content/en/post/2022/title/img12.png b/content/en/post/2022/title/img12.png new file mode 100644 index 0000000000000000000000000000000000000000..0ec8535146c6a1d5e0b78ee6c1a6b3a8ede1cdf3 Binary files /dev/null and b/content/en/post/2022/title/img12.png differ diff --git a/content/en/post/2022/title/img13.png b/content/en/post/2022/title/img13.png new file mode 100644 index 0000000000000000000000000000000000000000..86a420b92fb8289658d807d49f137b6d13862f6d Binary files /dev/null and b/content/en/post/2022/title/img13.png differ diff --git a/content/en/post/2022/title/img14.png b/content/en/post/2022/title/img14.png new file mode 100644 index 0000000000000000000000000000000000000000..1da9e55bd25cbc7cfc6fdef1800b4c95b077829b Binary files /dev/null and b/content/en/post/2022/title/img14.png differ diff --git a/content/en/post/2022/title/img15.jpg b/content/en/post/2022/title/img15.jpg new file mode 100644 index 0000000000000000000000000000000000000000..7ebe22cb03c6ee1e735b29bce766c1e10d334f0c Binary files /dev/null and b/content/en/post/2022/title/img15.jpg differ diff --git a/content/en/post/2022/title/img16.png b/content/en/post/2022/title/img16.png new file mode 100644 index 0000000000000000000000000000000000000000..2af578504062e5fa7a7aaf7e1c2014531e51e9c2 Binary files /dev/null and b/content/en/post/2022/title/img16.png differ diff --git a/content/en/post/2022/title/img17.png b/content/en/post/2022/title/img17.png new file mode 100644 index 0000000000000000000000000000000000000000..b903c7f8d5a3ba8b66b2d6be883a4bac7230915e Binary files /dev/null and b/content/en/post/2022/title/img17.png differ diff --git a/content/en/post/2022/title/img18.png b/content/en/post/2022/title/img18.png new file mode 100644 index 0000000000000000000000000000000000000000..1697caef6995dd16977bb9aa96af762e19fb7102 Binary files /dev/null and b/content/en/post/2022/title/img18.png differ diff --git a/content/en/post/2022/title/img19.png b/content/en/post/2022/title/img19.png new file mode 100644 index 0000000000000000000000000000000000000000..5537c95b900978a3020269be7ec52ce914224844 Binary files /dev/null and b/content/en/post/2022/title/img19.png differ diff --git a/content/en/post/2022/title/img2.png b/content/en/post/2022/title/img2.png new file mode 100644 index 0000000000000000000000000000000000000000..5537c95b900978a3020269be7ec52ce914224844 Binary files /dev/null and b/content/en/post/2022/title/img2.png differ diff --git a/content/en/post/2022/title/img20.png b/content/en/post/2022/title/img20.png new file mode 100644 index 0000000000000000000000000000000000000000..ce35c3cd313c8e4ed939ae18b91b9a64767ab504 Binary files /dev/null and b/content/en/post/2022/title/img20.png differ diff --git a/content/en/post/2022/title/img21.png b/content/en/post/2022/title/img21.png new file mode 100644 index 0000000000000000000000000000000000000000..b71bb7d740d0f375bbea6116ffde9175c0dbcacf Binary files /dev/null and b/content/en/post/2022/title/img21.png differ diff --git a/content/en/post/2022/title/img22.png b/content/en/post/2022/title/img22.png new file mode 100644 index 0000000000000000000000000000000000000000..31e776c19ddc9b62b4b88171d015b1b94ff2b022 Binary files /dev/null and b/content/en/post/2022/title/img22.png differ diff --git a/content/en/post/2022/title/img3.png b/content/en/post/2022/title/img3.png new file mode 100644 index 0000000000000000000000000000000000000000..b903c7f8d5a3ba8b66b2d6be883a4bac7230915e Binary files /dev/null and b/content/en/post/2022/title/img3.png differ diff --git a/content/en/post/2022/title/img4.png b/content/en/post/2022/title/img4.png new file mode 100644 index 0000000000000000000000000000000000000000..6b7b474933a31c6a20d0d1708e8909163293b4ad Binary files /dev/null and b/content/en/post/2022/title/img4.png differ diff --git a/content/en/post/2022/title/img5.png b/content/en/post/2022/title/img5.png new file mode 100644 index 0000000000000000000000000000000000000000..830c8bc490a1b830e759df1f04b453909a097406 Binary files /dev/null and b/content/en/post/2022/title/img5.png differ diff --git a/content/en/post/2022/title/img6.png b/content/en/post/2022/title/img6.png new file mode 100644 index 0000000000000000000000000000000000000000..b71bb7d740d0f375bbea6116ffde9175c0dbcacf Binary files /dev/null and b/content/en/post/2022/title/img6.png differ diff --git a/content/en/post/2022/title/img7.png b/content/en/post/2022/title/img7.png new file mode 100644 index 0000000000000000000000000000000000000000..830c8bc490a1b830e759df1f04b453909a097406 Binary files /dev/null and b/content/en/post/2022/title/img7.png differ diff --git a/content/en/post/2022/title/img8.png b/content/en/post/2022/title/img8.png new file mode 100644 index 0000000000000000000000000000000000000000..31e776c19ddc9b62b4b88171d015b1b94ff2b022 Binary files /dev/null and b/content/en/post/2022/title/img8.png differ diff --git a/content/en/post/2022/title/img9.png b/content/en/post/2022/title/img9.png new file mode 100644 index 0000000000000000000000000000000000000000..1da9e55bd25cbc7cfc6fdef1800b4c95b077829b Binary files /dev/null and b/content/en/post/2022/title/img9.png differ diff --git a/content/en/post/public_sys-resources/icon-caution.gif b/content/en/post/public_sys-resources/icon-caution.gif new file mode 100644 index 0000000000000000000000000000000000000000..6e90d7cfc2193e39e10bb58c38d01a23f045d571 Binary files /dev/null and b/content/en/post/public_sys-resources/icon-caution.gif differ diff --git a/content/en/post/public_sys-resources/icon-danger.gif b/content/en/post/public_sys-resources/icon-danger.gif new file mode 100644 index 0000000000000000000000000000000000000000..6e90d7cfc2193e39e10bb58c38d01a23f045d571 Binary files /dev/null and b/content/en/post/public_sys-resources/icon-danger.gif differ diff --git a/content/en/post/public_sys-resources/icon-note.gif b/content/en/post/public_sys-resources/icon-note.gif new file mode 100644 index 0000000000000000000000000000000000000000..6314297e45c1de184204098efd4814d6dc8b1cda Binary files /dev/null and b/content/en/post/public_sys-resources/icon-note.gif differ diff --git a/content/en/post/public_sys-resources/icon-notice.gif b/content/en/post/public_sys-resources/icon-notice.gif new file mode 100644 index 0000000000000000000000000000000000000000..86024f61b691400bea99e5b1f506d9d9aef36e27 Binary files /dev/null and b/content/en/post/public_sys-resources/icon-notice.gif differ diff --git a/content/en/post/public_sys-resources/icon-tip.gif b/content/en/post/public_sys-resources/icon-tip.gif new file mode 100644 index 0000000000000000000000000000000000000000..93aa72053b510e456b149f36a0972703ea9999b7 Binary files /dev/null and b/content/en/post/public_sys-resources/icon-tip.gif differ diff --git a/content/en/post/public_sys-resources/icon-warning.gif b/content/en/post/public_sys-resources/icon-warning.gif new file mode 100644 index 0000000000000000000000000000000000000000..6e90d7cfc2193e39e10bb58c38d01a23f045d571 Binary files /dev/null and b/content/en/post/public_sys-resources/icon-warning.gif differ diff --git a/content/en/prepare/prepare.md b/content/en/prepare/prepare.md index a21fc8c6e250e790a8998556997ebeae1e2830cb..c13054258c2ea38f19ff0800f05cc0ff4613bfbe 100644 --- a/content/en/prepare/prepare.md +++ b/content/en/prepare/prepare.md @@ -1,5 +1,5 @@ -1. Please refer to the http://git.mydoc.io/?t=179267 to register for Gitee account. +1. Please refer to the https://gitee.com/help/articles/4113 to register for Gitee account. 2. Setting your main E-mail in gitee(http://gitee.com/profile/emails). 3. Signing CLA in the https://opengauss.org/en/cla.html. -4. Refer to the http://git.mydoc.io/?t=180692 to prepare the git environment. +4. Refer to the https://gitee.com/help/articles/4107 to prepare the git environment. 5. Understanding the blog format. \ No newline at end of file diff --git a/content/zh/guidance/index_.md b/content/zh/guidance/index_.md index be91b91e4f601001c62bff3198738b81cb849acd..72d998c7d3b09ac03c4d685e18eeec2374dc9a8c 100644 --- a/content/zh/guidance/index_.md +++ b/content/zh/guidance/index_.md @@ -6,13 +6,13 @@ title = "Guidance to Post a Blog" ## Preparation -1. Refer to http: //git.mydoc.io/?t=179267 to register Gitee account. +1. Refer to https://gitee.com/help/articles/4113 to register Gitee account. -2. Set your primary mail box in gitee settings https: //gitee.com/profile/emails. +2. Set your primary mail box in gitee settings https://gitee.com/profile/emails. -3. Sign your CLA in . +3. Sign your CLA in . -4. Prepare your git environment refering to http: //git.mydoc.io/?t=180692. +4. Prepare your git environment refering to https://gitee.com/help/articles/4107. ## Understand blog format @@ -39,7 +39,7 @@ Tips: you can copy content/_example/2020-03-03-sample-post.md to your folder and The blog posting follows the pull request of Gitee. -1. Fork the blog project to your own gitee. Refer to for detailed guidance. +1. Fork the blog project to your own gitee. Refer to for detailed guidance. 2. Clone the code to your local environment. @@ -82,6 +82,6 @@ git commit -m "" git push origin : ``` -7. Refer to http: //git.mydoc.io/?t=153749to submit your Pull Request +7. Refer to https://gitee.com/help/articles/4122to submit your Pull Request 8. Wait for reviewing and merging. diff --git "a/content/zh/post/11/openGauss\350\216\267\345\245\226\351\241\271\347\233\256\350\256\262\350\247\243.md" "b/content/zh/post/11/openGauss\350\216\267\345\245\226\351\241\271\347\233\256\350\256\262\350\247\243.md" new file mode 100644 index 0000000000000000000000000000000000000000..b588aa244d0dc946d38c45aa15949abd25b3192a --- /dev/null +++ "b/content/zh/post/11/openGauss\350\216\267\345\245\226\351\241\271\347\233\256\350\256\262\350\247\243.md" @@ -0,0 +1,223 @@ ++++ + +title = "openGauss获奖项目讲解" + +date = "2022-9-14" + +tags = ["第四届openGauss有奖征文", "openGauss比赛作品"] + +archives = "2022-09" + +author = "m丶shine" + +summary = "2022年8月30日华为鲲鹏应用大赛openGauss赛道上海赛区第三名获奖作品开源分享,我们团队参加本次比赛的时候想了很多方案,但是最终还是决定用自己最擅长的项目." + ++++ + + + +# 前言 + + 2022年8月30日华为鲲鹏应用大赛openGauss赛道上海赛区第三名获奖作品开源分享,我们团队参加本次比赛的时候想了很多方案,但是最终还是决定用自己最擅长的项目 + +# 参赛方案介绍 + + 系统实现学生在线考试管理的基本功能,包括学生登录、查看自己的个人信息及考试信息;提供了在线考试的界面;后台管理员有管理员添加学生、管理学生、管理成绩、添加课程、添加题库题目和组建试卷等功能。本次的学生在线考试管理系统采用Python Django做后端、前端框架采用Bootstrap4实现,实现学生考试的动态管理,使得对信息的管理更加及时、高效,提高了效率。同时还对系统的开发原理、功能特点和设计方案进行了介绍。关键词:考试管理 openGuass数据库 Python Django Web + +# 系统需求分析 + + 系统需求分析 + + (1)学生用户是主要的需求者,主要需求功能是查看当前自己的考试信息、查看考试成绩并可导出以及进行在线考试等。 + + (2)教师用户主要需求功能是为自己所教授的课程组建题库和相对应的试卷,并可以查看学生的考试信息等。 + + (3)管理员用户的功能需求较为复杂,包括对学生信息、教师信息、考试信息进行管理。 + +# 主要功能模块 + + (1)用户登录:实现简单的登录及验证 + + (2)个人信息展示:展示考生个人信息 + + (3)考试管理:展示可以考试的试卷,考试及自动计算考试成绩。 + + (4)考试成绩管理:展示考试结果、导出考试信息 + + (5)后台基础数据管理:试卷,试题,考生等信息更新维护。 + +![输入图片说明](../../../../%E5%9B%BE%E7%89%87.png) + +# 设计思路 + + 设计思路 + + 系统设计包括三部分:数据库设计,功能函数视图设计,前端页面设计 + + 数据库设计 + + 根据对系统需求分析,得出该考试管理系统大致需要六个实体,他们的实体属性如下图所示: + +![输入图片说明](../../../../%E5%9B%BE%E7%89%87.png) + + 根据上面的实体联系图可以得出以下几个表: + + 学院表:Academy + + 专业表:Major + + 课程表:Course + + 学生表:Student + + 题库表:QuestionBank + + 试卷表:TestPaper + + 学生成绩表:Record + + 1.学院---序号、名称 + + 2.专业---序号、名称 + + 3.课程---课程号、课程名 + + 4.学生---学号、密码、姓名、性别、班级、邮箱 + + 5.试卷---标题、科目、使用专业、考试时常、开始时间 + + 6.题库---序号、专业、科目、备选项、题目、答案、难度、分值、题目类型 + +![输入图片说明](../../../../%E5%9B%BE%E7%89%87.png) + +# 字段基本数据分析 + +![输入图片说明](../../../../%E5%9B%BE%E7%89%87.png) + +![输入图片说明](../../../../%E5%9B%BE%E7%89%87.png) + +# 页面及功能设计 + + 1、登录页面: + + 其中需要登录,校验,登录后同时需要存储用户信息在Session中,以备登录后的页面使用。 + + 2、首页(考试信息页): + + 页面需要显示当前用户可以参考的考试试卷信息,在此页面点击开始考试可以跳转到考试页面。 + + 3、考试页面: + + 展示对应试卷的题目和选项信息,同时可以进行答题,交卷后可以自动计算考试分数并存入数据库。 + + 4、成绩显示页面: + + 展示对应考试的考试结果 + + 5、后台管理: + + 用于管理我们的专业,考生,试卷,题库等基础信息,为了快速实现系统将直接启用Django自带的Admin管理功能。 + + 6、个人详情: + + 用于展示个人详情信息。 + +![输入图片说明](../../../../%E5%9B%BE%E7%89%87.png) + +功能函数视图设计 + + # 功能函数视图设计 + + 学生在线考试系统是实现学生在线考试、查看相关信息的一个平台,整个学生在线考试系统共分为3个大模块:管理员模块,学生模块和公有模块,其中复杂的方法和模块的详细设计流程图如下。 + +![输入图片说明](../../../../%E5%9B%BE%E7%89%87.png) + +![输入图片说明](../../../../.gitee/%E5%9B%BE%E7%89%87.png) + +![输入图片说明](../../../../.gitee/%E5%9B%BE%E7%89%87.png) + +![输入图片说明](../../../../.gitee/%E5%9B%BE%E7%89%87.png) + +![输入图片说明](../../../../.gitee/%E5%9B%BE%E7%89%87.png) + +# 技术亮点及优势 + + openGuass是基于PostgreSQL9.2版本开发的,基本包括了PostgreSQL9.4的功能。所以可以采用连接postgresql的方式连接openGauss数据库。 + +django项目下的setting中的DATABASES下进行以下配置: + +DATABASES = { + +'default': { + +# 'ENGINE': 'django.db.backends.sqlite3', + +'ENGINE': 'django.db.backends.postgresql_psycopg2', + +'NAME': 'postgres', #数据库名 + +'USER': 'andy', #用户名 + +'PASSWORD': 'xxxxxxx', #密码 + +'HOST': 'xxx.xxx.xxx.xxx',#虚拟机ip + +'PORT': xxxx #openGauss数据口的端口 + +} + +} + + Django项目框架搭建起来后,我们所有对系统的前后台所有的程序开发都可以在这个项目中进行了,一个典型的Django项目模块功能的开发包括如下几个步骤: + + (1)创建app + + (2)注册app + + (3)定义模型 + + (4)定义视图函数 + + (5)配置访问路由URL + + (6)静态资源准备及配置 + + (7)前端模板开发 + + (8)测试及运行 + + + +# 商业模式及市场前景 + + 国外数据库占据国内大部分市场,关注国际局势的每个人都明白,核心技术掌握在自己手里的重要性,所以说我们要提倡使用属于我们自己的核心技术,这对于我们是很重要的! + +![输入图片说明](../../../../.gitee/%E5%9B%BE%E7%89%87.png) + +功能测试 +![输入图片说明](../../../../.gitee/%E5%9B%BE%E7%89%87.png) + +登录测试 +![输入图片说明](../../../../.gitee/%E5%9B%BE%E7%89%87.png) + +查询测试 +![输入图片说明](../../../../.gitee/%E5%9B%BE%E7%89%87.png) + +考试测试 +![输入图片说明](../../../../.gitee/%E5%9B%BE%E7%89%87.png) + + + +# 项目总结 + +我们团队通过这次openguass开发,进一步掌握数据库的方法和技术,提高软件开发的实际能力,培养设计能力和综合分析、解决问题的能力。 + +学习和实践了分析和设计软件系统的各种知识,包括面向对象的系统分析与设计,编码和测试方面的知识。 + +熟悉了如何根据实际需要分析实体关系,画出ER图从而设计出符合要求的数据库表。 + +学习和实践的数据库的增删改查功能在具体功能开发中的使用。 + +熟悉了openGuass数据库的相关操作。 + + diff --git "a/content/zh/post/2022/CentOS-7-9-\345\256\211\350\243\205-openGauss-2-1-0-\344\271\213\345\211\245\350\214\247\346\212\275\344\270\235.md" "b/content/zh/post/2022/CentOS-7-9-\345\256\211\350\243\205-openGauss-2-1-0-\344\271\213\345\211\245\350\214\247\346\212\275\344\270\235.md" new file mode 100644 index 0000000000000000000000000000000000000000..5667380efc2d7621b334658f02c0569fb7906647 --- /dev/null +++ "b/content/zh/post/2022/CentOS-7-9-\345\256\211\350\243\205-openGauss-2-1-0-\344\271\213\345\211\245\350\214\247\346\212\275\344\270\235.md" @@ -0,0 +1,221 @@ ++++ + +title = "CentOS 7.9 安装 openGauss 2.1.0 之剥茧抽丝" + +date = "2021-12-23" + +tags = [ "CentOS 7.9 安装 openGauss 2.1.0 之剥茧抽丝"] + +archives = "2021-12" + +author = "问天的天问" + +summary = "CentOS 7.9 安装 openGauss 2.1.0 之剥茧抽丝" + +img = "/zh/post/2022/title/img2.png" + +times = "12:30" + ++++ + +# CentOS 7.9 安装 openGauss 2.1.0 之剥茧抽丝 + +问天的天问 2021/12/23 + +本文是在参考官方的安装文档后,提取总结出的关键安装步骤。 + +## \[1\] 基础环境安装 + +``` +# timedatectl set-timezone Asia/Shanghai +# hostnamectl set-hostname gauss01 + +# nmcli con mod enp0s3 ipv4.method manual ipv4.address 192.168.2.131/24 +# nmcli con mod enp0s3 ipv4.gateway 192.168.2.1 +# nmcli con mod enp0s3 ipv4.dns 192.168.1.1 +# nmcli con mod enp0s3 connection.autoconnect yes +``` + +## \[2\] Disable SElinux + +``` +# sed -i 's@\(^SELINUX=\).*@\1disabled@g' /etc/selinux/config +``` + +## \[3\] Disable Firewall + +``` +# systemctl disable firewalld +``` + +## \[4\] Disable Transparent HugePages + +``` +# sed -i '/linux16.*$/s//& transparent_hugepage=never/g' /boot/grub2/grub.cfg + +Reboot and Confirm +# cat /sys/kernel/mm/transparent_hugepage/enabled +always madvise [never] +``` + +## \[5\] 文件句柄设置 + +``` +# cat >> /etc/security/limits.conf << EOF + +*`echo -e "\t"`soft`echo -e "\t"`nofile`echo -e "\t"`1000000 +*`echo -e "\t"`hard`echo -e "\t"`nofile`echo -e "\t"`1000000 +EOF + +``` + +## \[6\] 网卡设置 + +在网卡配置文件中最后一行添加 MTU=8192 + +``` +# cat /etc/sysconfig/network-scripts/ifcfg-enp0s3 +…… +MTU=8192 +``` + +## \[7\] 修改默认版本号 + +``` +# cat /etc/redhat-release +CentOS Linux release 7.9.2009 (Core) +修改为 +CentOS Linux release 7.6.1810 (Core) +``` + +## \[8\] 系统参数 + +在内核方面,官方给出的建议值基本上与系统 CentOS 7.9 的默认值相同,不相同的只有4项,如下: + +- net.ipv4.tcp\_retries1 +- net.ipv4.tcp\_syn\_retries +- net.ipv4.ip\_local\_port\_range +- vm.overcommit\_ratio + +根据实际情况判定是否需要修改。 + +## \[9\] 安装 python + +``` +# yum install -y python36 +``` + +## \[10\] 安装软件包 + +官方建议软件包 + +``` +# yum install -y libaio-devel flex bison ncurses-devel glibc-devel patch redhat-lsb readline-devel +``` + +个人建议软件包 + +``` +# yum install -y bzip2 net-tools lrzsz +``` + +## \[11\] 关闭 RemoveIPC + +CentOS 默认关闭,无需要配置。 + +## \[12\] 创建组和用户 + +组和用户都可以不用提前创建,在安装时会自动创建。 + +## \[13\] 解压安装包 + +``` +# mkdir -p /opt/software/openGauss +# chmod 755 -R /opt/software +# cd /opt/software/openGauss + +上传源码 openGauss-2.1.0-CentOS-64bit-all.tar.gz 并解压 +# tar -zxvf openGauss-2.1.0-CentOS-64bit-all.tar.gz +# tar -zxvf openGauss-2.1.0-CentOS-64bit-om.tar.gz +``` + +## \[14\] 编辑配置脚本 + +``` +# cp script/gspylib/etc/conf/cluster_config_template.xml cluster_config.xml +# vi /opt/software/openGauss/cluster_config.xml + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +``` + +根据实际情况进行相应修改。 + +## \[15\] 执行安装和初始化 + +以 root 用户安装,安装脚本自行创建 dbgrp 组和 omm 用户 + +``` +# cd script +# python3 gs_preinstall -U omm -G dbgrp -X /opt/software/openGauss/cluster_config.xml +``` + +以 omm 用户初始化数据库 + +``` +# chown -R omm:dbgrp /opt/software/openGauss +# su - omm +$ gs_install -X /opt/software/openGauss/cluster_config.xml +``` + +初始化成功后连接数据库 + +``` +$ gsql -d postgres -p 15400 +gsql ((openGauss 2.1.0 build 590b0f8e) compiled at 2021-09-30 14:29:04 commit 0 last mr ) +Non-SSL connection (SSL connection is recommended when requiring high-security) +Type "help" for help. + +openGauss=# +``` + +其中,postgres为需要连接的数据库名称,15400为数据库节点的端口号,即 cluster\_config.xml 配置文件中的dataPortBase的值。 + +## \[16\] 卸载openGauss + +``` +# su - omm +$ gs_uninstall --delete-data +命令卸载并不全面,还需要手工删除,也可不经命令卸载直接手工删除。 +# userdel -r omm +# groupdel dbgrp +# rm -rf /opt/software /opt/huawei +# rm -rf /var/log/omm +``` + diff --git "a/content/zh/post/2022/CentOs\350\231\232\346\213\237\346\234\272\344\270\213opengauss\347\232\204\351\205\215\347\275\256\344\275\277\347\224\250.md" "b/content/zh/post/2022/CentOs\350\231\232\346\213\237\346\234\272\344\270\213opengauss\347\232\204\351\205\215\347\275\256\344\275\277\347\224\250.md" new file mode 100644 index 0000000000000000000000000000000000000000..89cbbb7a3adb6617504810e9bedd3d6c4ed9b405 --- /dev/null +++ "b/content/zh/post/2022/CentOs\350\231\232\346\213\237\346\234\272\344\270\213opengauss\347\232\204\351\205\215\347\275\256\344\275\277\347\224\250.md" @@ -0,0 +1,488 @@ ++++ + +title = "CentOs虚拟机下opengauss的配置使用" + +date = "2021-12-11" + +tags = [ "CentOs虚拟机下opengauss的配置使用"] + +archives = "2021-12" + +author = "parker" + +summary = "CentOs虚拟机下opengauss的配置使用" + +img = "/zh/post/2022/title/img3.png" + +times = "12:30" + ++++ + +# CentOs虚拟机下opengauss的配置使用 + +## 环境说明 + +虚拟机平台 VMware + +服务器端 CentOS 7.9 + +本机系统 Windows 10 + +部署版本 OpenGauss 1.1.0 + +## 安装详细步骤 + +- 虚拟机VMware + + 本机已配置,该部分省略 + + +- CentOS 7.9 安装 + + 下载镜像源CentOS-7-x86\_64-DVD-2009.iso + + ![](figures/2c62c125feb04ff89234abf76991601e.png) + +- 虚拟机中选中镜像进行安装 + + ![](figures/7294465883ce45ac80a371f63dfe9659.png) + + ![](figures/356c385d615b442e951be7d27f00702e.png) + +- 设置 + + 内存设置为2GB + + 处理器设置为2 + + 网络默认即可 + + 声卡和打印机不使用直接进行了移除 + + +启动后进入系统安装,注意的点如下: + +- 分区 + + 选择系统-安装位置-手动分区进行分区如下: + + ![](figures/5d3d9f82ce164b08a6866a606fd7e03d.png) + + ![](figures/f569229a746940cba90ed0cda6fd1d2f.png) + +- 网络和主机名 + + 选择系统-网络和主机名进行设置如下: + + ![](figures/0bacb67d8b9d4ff6b786b2b734458b10.png) + + ![](figures/5e12f329abe74ed38ae99d8828adaa5d.png) + + 记录ip和主机名,之后配置需要用到 + + ``` + ip 192.168.201.131 + 主机名 db1 + ``` + +- 软件选择 + + 选择软件-软件选择设置如下: + + ![](figures/721e491c70e948abadf18b2eda7ce76f.png) + +- 用户设置 + + 上述设置完成后点击开始安装,该期间根据提示完成用户设置即可 + + ![](figures/22b37a0e95ea4472b4d331064192382c.png) + + 安装完成进行重启,登录系统完成安装 + + ![](figures/1e1aea950edc44d99adc91c658a9e14a.png) + +- 上网测试 + + ![](figures/0feab0d29d324acc9c4e87ffc7a3e826.png) + +- 修改操作系统版本\(CentOS 7.6可省略\) + + 通过 + + vi /etc/redhat-releas打开编辑文件,修改内容如下\(请使用su root切换至root用户进行操作\) + + ![](figures/c726f71fc88c4015b1d89f4586dfe290.png) + +- 关闭防火墙 + + 执行以下命令关闭防火墙 + + ``` + systemctl stop firewalld.service + + systemctl disable firewalld.service + ``` + + ![](figures/614036c6b5d84a0c86de61b3cbf88b78.png) + +- 设置字符集及环境变量 + + ![](figures/ba1ea7c4485b4830b21538d56ecac309.png) + +- 关闭swap交换内存 + + ![](figures/2775a3f24eb44c02931d63e302a4bf9c.png) + +- yum环境配置 + + 备份yum配置文件 + + ![](figures/27b944a22e1d45b39a0167b83e4d55a0.png) + +- 下载可用源的repo文件 + + ![](figures/3507d173b3e24d9f94dd543947ae33ef.png) + +- 查看repo文件是否正确 + + ![](figures/1e185faf72d14f6bb07e527d753614ed.png) + +- yum安装相关包 + + ``` + yum install -y libaio-devel flex bison ncurses-devel glibc.devel patch lsb_release wget python3 + ``` + + ![](figures/dc1c632c7c0f49f2ab7ebd57f78915d6.png) + + 设置python版本为3.x + + ![](figures/641abf7f6c9642b188ade66b1c8d25ee.png) + +- 修改完成后,确认yum是否使用,若不能使用,如本例中。修改/usr/bin/yum文件,修改\#!/usr/bin/python为\#!/usr/bin/python2.7 + + ![](figures/61364d2741cc46f7802cb48cc75571fe.png) + + +## 数据库安装 + +- 创建存放数据库安装目录 + + ![](figures/cd094375c2b44a8383694267e492fc63.png) + +- 下载数据库安装包 + + ![](figures/a6d0fc02a8c948f2b43e4ef47cecd731.png) + + +- 创建xml配置文件,用于数据库安装 + + 在openGauss文件夹下 + + vi clusterconfig.xml编辑以下内容 + + ``` + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ``` + + 其中ip设置为之前的192.168.201.131,主机名为db1,如下: + + ![](figures/d21813079e7b40a1b9edde6b9298d2f3.png) + + +- 解压安装包 + + ![](figures/7a7b1fc98317411a9a18982e944ba5c2.png) + + +- 解压后查看并修改文件权限 + + ![](figures/128f20b65c554c85bbcda62acad5616e.png) + +- 执行初始化脚本 + + ``` + cd /opt/software/openGauss/script + + python gs_preinstall -U omm -G dbgrp -X /opt/software/openGauss/clusterconfig.xml + ``` + + 返回Preinstallation succeeded内容时,初始化完成 + + ![](figures/ee22045a1dca446b925881137106db5c.png) + +- 初始化数据库 + + 重启虚拟机后使用omm用户进行数据库初始化 + + ``` + gs_install -X /opt/software/openGauss/clusterconfig.xml --gsinit-parameter="--encoding=UTF8" --dn-guc="max_process_memory=2GB" --dn-guc="shared_buffers=128MB" --dn-guc="bulk_write_ring_size=128MB" --dn-guc="cstore_buffers=16MB" + ``` + + 其中对应的参数内存大小须根据虚拟机情况进行设置 + + ![](figures/816de1e0a8c04796a4f3478eff37baed.png) + +- 安装完成后清理软件安装包 + + ![](figures/387c8fc827e34000936c977270c10f22.png) + + +## 连接数据库 + +![](figures/faa8002b28d94f5b9408f0e251daebc7.png) + +- JDBC配置 + + 从官方网站选取对应版本的jar包并解压,在eclipse上配置加载驱动类。 + + 第一次连接后操作数据库需要修改omm用户密码 + + ![](figures/0497eb639cb14b5182dc5b2aff97a757.png) + + 根据官方文档提供的demo程序修改后进行连接测试,连接成功如下: + + ![](figures/cb8039252a6b45e99d8ff682fb9df992.png) + +- demo程序: + + ``` + package gaussjdbc; + + import java.sql.Connection; + import java.sql.DriverManager; + import java.sql.PreparedStatement; + import java.sql.SQLException; + import java.sql.Statement; + import java.sql.Types; + import java.sql.CallableStatement; + + public class Gaussjdbc { + + //创建数据库连接。 + public static Connection GetConnection(String username, String passwd) { + String driver = "org.postgresql.Driver"; + String sourceURL = "jdbc:postgresql://192.168.201.131:26000/postgres"; + Connection conn = null; + try { + //加载数据库驱动。 + Class.forName(driver).newInstance(); + } catch (Exception e) { + e.printStackTrace(); + return null; + } + + try { + //创建数据库连接。 + conn = DriverManager.getConnection(sourceURL, username, passwd); + System.out.println("Connection succeed!"); + } catch (Exception e) { + e.printStackTrace(); + return null; + } + + return conn; + }; + + //执行普通SQL语句,创建customer_t1表。 + public static void CreateTable(Connection conn) { + Statement stmt = null; + try { + stmt = conn.createStatement(); + + //执行普通SQL语句。 + int rc = stmt + .executeUpdate("CREATE TABLE customer_t1(c_customer_sk INTEGER, c_customer_name VARCHAR(32));"); + + stmt.close(); + } catch (SQLException e) { + if (stmt != null) { + try { + stmt.close(); + } catch (SQLException e1) { + e1.printStackTrace(); + } + } + e.printStackTrace(); + } + } + + //执行预处理语句,批量插入数据。 + public static void BatchInsertData(Connection conn) { + PreparedStatement pst = null; + + try { + //生成预处理语句。 + pst = conn.prepareStatement("INSERT INTO customer_t1 VALUES (?,?)"); + for (int i = 0; i < 3; i++) { + //添加参数。 + pst.setInt(1, i); + pst.setString(2, "data " + i); + pst.addBatch(); + } + //执行批处理。 + pst.executeBatch(); + pst.close(); + } catch (SQLException e) { + if (pst != null) { + try { + pst.close(); + } catch (SQLException e1) { + e1.printStackTrace(); + } + } + e.printStackTrace(); + } + } + + //执行预编译语句,更新数据。 + public static void ExecPreparedSQL(Connection conn) { + PreparedStatement pstmt = null; + try { + pstmt = conn + .prepareStatement("UPDATE customer_t1 SET c_customer_name = ? WHERE c_customer_sk = 1"); + pstmt.setString(1, "new Data"); + int rowcount = pstmt.executeUpdate(); + pstmt.close(); + } catch (SQLException e) { + if (pstmt != null) { + try { + pstmt.close(); + } catch (SQLException e1) { + e1.printStackTrace(); + } + } + e.printStackTrace(); + } + } + + + //执行存储过程。 + public static void ExecCallableSQL(Connection conn) { + CallableStatement cstmt = null; + try { + + cstmt=conn.prepareCall("{? = CALL TESTPROC(?,?,?)}"); + cstmt.setInt(2, 50); + cstmt.setInt(1, 20); + cstmt.setInt(3, 90); + cstmt.registerOutParameter(4, Types.INTEGER); //注册out类型的参数,类型为整型。 + cstmt.execute(); + int out = cstmt.getInt(4); //获取out参数 + System.out.println("The CallableStatment TESTPROC returns:"+out); + cstmt.close(); + } catch (SQLException e) { + if (cstmt != null) { + try { + cstmt.close(); + } catch (SQLException e1) { + e1.printStackTrace(); + } + } + e.printStackTrace(); + } + } + + + /** + * 主程序,逐步调用各静态方法。 + * @param args + */ + public static void main(String[] args) { + //创建数据库连接。 + Connection conn = GetConnection("parker", "parker@123"); + + //创建表。 + CreateTable(conn); + + //批插数据。 + BatchInsertData(conn); + + //执行预编译语句,更新数据。 + ExecPreparedSQL(conn); + + //执行存储过程。 + //ExecCallableSQL(conn);//这部分在运行时有问题,直接注释掉了 + + //关闭数据库连接。 + try { + conn.close(); + } catch (SQLException e) { + e.printStackTrace(); + } + + } + + } + ``` + + +## 安装中遇到的问题与解决过程 + +- 初始化脚本失败报错 + + ![](figures/a662d9a9a96b40d089a6d9c68788bf3d.png) + + ![](figures/dbc89373c5734638a51add74523f640c.png) + +- CentOS上配置JAVA + + 自带的java路径寻找: + + ![](figures/480ae4bbdd664652af43663f061aae84.png) + + 配置CentOS环境变量: + + ![](figures/17fb09d479354307b7e2a8b27cbd2f7e.png) + + 而后期验证javac时发现CentOS其自带的java仅有运行环境,改用windows作为客户端。 + +- 也可以自行下载java环境配置进行解决配置: + + ![](figures/05476910e9e44c9fb0723d26b0f467f4.png) + +- 数据库连接问题 + + 修改后ip未放行错误 + + ![](figures/591c2725601c492cbccf312e9b2a7a11.png) + + 放行ip命令\(在官方文档客户端接入验证处可以查询\)如下 + + ``` + gs_guc set -N all -I all -h "host all parker 192.168.201.1/32 sha256" + ``` + + 具体的接入ip若不清楚可以通过报错信息或本地的ipconfig进行查看 + + diff --git "a/content/zh/post/2022/Go\350\257\255\350\250\200\350\277\236\346\216\245openGauss\347\216\257\345\242\203\346\220\255\345\273\272\350\277\207\347\250\213\357\274\210\345\220\253OG\345\256\211\350\243\205\357\274\211.md" "b/content/zh/post/2022/Go\350\257\255\350\250\200\350\277\236\346\216\245openGauss\347\216\257\345\242\203\346\220\255\345\273\272\350\277\207\347\250\213\357\274\210\345\220\253OG\345\256\211\350\243\205\357\274\211.md" new file mode 100644 index 0000000000000000000000000000000000000000..2b097fbb8dd9d12ecff4c5ec1afd1dc674f68283 --- /dev/null +++ "b/content/zh/post/2022/Go\350\257\255\350\250\200\350\277\236\346\216\245openGauss\347\216\257\345\242\203\346\220\255\345\273\272\350\277\207\347\250\213\357\274\210\345\220\253OG\345\256\211\350\243\205\357\274\211.md" @@ -0,0 +1,590 @@ ++++ + +title = "Go语言连接openGauss环境搭建过程(含OG安装)" + +date = "2021-12-24" + +tags = [ "Go语言连接openGauss环境搭建过程(含OG安装)"] + +archives = "2021-12" + +author = "葛二萌" + +summary = "Go语言连接openGauss环境搭建过程(含OG安装)" + +img = "/zh/post/2022/title/img4.png" + +times = "12:30" + ++++ + +# Go语言连接openGauss环境搭建过程(含OG安装) + +## 1.前言 + +本文共分为openGauss单机版安装部分和连接环境搭建部分,提供了通过go语言来连接openGauss的一种方案。openGauss现在也有了基于go的驱动,但是我觉得ODBC的方式更为通用一些,也不应被丢弃,因此本文使用go通过ODBC来连接openGauss。 + +- 硬件及软件环境: + + 硬件环境:虚拟机的内存8GB,4核心CPU,900G磁盘(非必须) + + 软件环境:CentOS7.6 + + 数据库版本:opengauss2.0企业版:openGauss-2.0.0-CentOS-64bit-all.tar.gz + + +## 2.openGauss单机版安装: + +- 2.1. 关闭防火墙 + + ``` + #停止firewall + systemctl stop firewalld.service + #禁止firewall开机启动 + systemctl disable firewalld.service + #检查防火墙是否关闭。 + systemctl status firewalld + ``` + + - 说明: + + 若防火墙状态显示为active \(running\),则表示防火墙未关闭。 + + 若防火墙状态显示为inactive \(dead\),则无需再关闭防火墙。 + + +- 2.2. 设置时区和时间 + + 将各数据库节点的时区设置为相同时区,可以将/usr/share/zoneinfo/目录下的时区文件拷贝为/etc/localtime文件。 + + ``` + cp /usr/share/zoneinfo/Asia/Shanghai /etc/localtime + ``` + +- 2.3. 关闭SELinux + + ``` + [root@node1 ~]# + + getenforce + sed -i 's/^SELINUX=.*/SELINUX=disabled/' /etc/selinux/config + setenforce 0 + getenforce + #检查 + cat /etc/selinux/config|grep SELINUX + 输出: + + # SELINUX= can take one of these three values: + + SELINUX=disabled + + # SELINUXTYPE= can take one of three values: + + SELINUXTYPE=targeted + + [root@node1 ~]# + ``` + +- 2.4. 修改/etc/hosts + + ``` + #添加一行 + + cat >>/etc/hosts <> /etc/profile<>/etc/ssh/sshd_config<>/etc/security/limits.conf + echo "* hard nofile 1000000" >>/etc/security/limits.conf + echo "* soft nproc unlimited" >>/etc/security/limits.conf + echo "* hard nproc unlimited" >>/etc/security/limits.conf + ``` + +- 2.9. 安装python3.6.x + + ``` + yum install openssl* -y + yum install python3* -y + ``` + + 检查 + + ``` + [omm@node1 dn]$ python3 -V + Python 3.6.8 + ``` + + 其他软件包,如需要可以安装(也可以直接使用安装): + + ``` + yum install -y libaio-devel flex bison ncurses-devel glibc-devel patch redhat-lsb-core readline-devel + yum install openssl* -y + yum install -y java-1.8.0-openjdk* psmisc bzip2 python3 python3-devel lksctp* + reboot #重新启动服务器 + ``` + +- 2.10. 创建安装包的存放目录 + + ``` + mkdir -p /opt/software/openGauss + chmod 755 -R /opt/software + ``` + +- 2.11. 下载openGauss数据库软件 + + 下载地址为:[https://opengauss.org/zh/download.html](https://opengauss.org/zh/download.html) + + 下载完成后上传到centos中 + +- 2.12. 解压缩openGauss DBMS介质 + + ``` + cd /opt/software/openGauss + tar -zxvf openGauss-2.0.0-CentOS-64bit-all.tar.gz + tar -zxvf openGauss-2.0.0-CentOS-64bit-om.tar.gz + ``` + +- 2.13. 创建XML文件 + + 下面是xml文件官方模板,一般只需要改一下自己centos机器的IP + + ``` + cat > clusterconfig.xml< + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + EOF + ``` + +- 2.14. 检查环境变量 + + ``` + echo $LD_LIBRARY_PATH + ``` + +- 2.15. 安装前进行交互式检查 + + ``` + [root@node1 script]# ./gs_preinstall -U omm -G dbgrp -X /opt/software/openGauss/clusterconfig.xml + ``` + + 出现以下内容代表检查没问题。 + + Parsing the configuration file. + + ``` + Successfully parsed the configuration file. + Installing the tools on the local node. + Successfully installed the tools on the local node. + Setting pssh path + Successfully set core path. + Are you sure you want to create the user[omm] and create trust for it (yes/no)? yes + Please enter password for cluster user. + Password: omm123 + Please enter password for cluster user again. + Password: omm123 + Successfully created [omm] user on all nodes. + Preparing SSH service. + Successfully prepared SSH service. + Checking OS software. + Successfully check os software. + Checking OS version. + Successfully checked OS version. + Creating cluster's path. + Successfully created cluster's path. + Setting SCTP service. + Successfully set SCTP service. + Set and check OS parameter. + Setting OS parameters. + Successfully set OS parameters. + Warning: Installation environment contains some warning messages. + Please get more details by "/opt/software/openGauss/script/gs_checkos -i A -h node1 --detail". + Set and check OS parameter completed. + Preparing CRON service. + Successfully prepared CRON service. + Setting user environmental variables. + Successfully set user environmental variables. + Setting the dynamic link library. + Successfully set the dynamic link library. + Setting Core file + Successfully set core path. + Setting pssh path + Successfully set pssh path. + Set ARM Optimization. + No need to set ARM Optimization. + Fixing server package owner. + Setting finish flag. + Successfully set finish flag. + Preinstallation succeeded + ``` + +- 2.16. 开始安装openGauss DBMS和创建数据库 + + 使用root执行如下命令 + + ``` + cd /opt/software/openGauss/script + chmod -R 755 /opt/software/openGauss/script + chown -R omm:dbgrp /opt/software/openGauss/script + ``` + + 使用omm用户安装openGauss DBMS和创建openGauss数据库 + + ``` + su - omm + cd /opt/software/openGauss/script + cp ../clusterconfig.xml . + gs_install -X /opt/software/openGauss/script/clusterconfig.xml + ``` + + 重要提示:用户需根据提示输入数据库的密码,密码需要具有一定的复杂度,为保证用户正常使用该数据库,请记住输入的数据库密码。此处建议密码设置为huawei@1234 + + ``` + [omm@node1 ~]$ cd /opt/software/openGauss/script + [omm@node1 script]$ cp ../clusterconfig.xml . + [omm@node1 script]$ gs_install -X /opt/software/openGauss/script/clusterconfig.xml + ``` + + ``` + Parsing the configuration file. + Check preinstall on every node. + Successfully checked preinstall on every node. + Creating the backup directory. + Successfully created the backup directory. + begin deploy.. + Installing the cluster. + begin prepare Install Cluster.. + Checking the installation environment on all nodes. + begin install Cluster.. + Installing applications on all nodes. + Successfully installed APP. + begin init Instance.. + encrypt cipher and rand files for database. + Please enter password for database:huawei@1234 + Please repeat for database:huawei@1234 + begin to create CA cert files + The sslcert will be generated in /opt/huawei/install/app/share/sslcert/om + Cluster installation is completed. + Configuring. + Deleting instances from all nodes. + Successfully deleted instances from all nodes. + Checking node configuration on all nodes. + Initializing instances on all nodes. + Updating instance configuration on all nodes. + Check consistence of memCheck and coresCheck on database nodes. + Configuring pg_hba on all nodes. + Configuration is completed. + Successfully started cluster. + Successfully installed application. + end deploy.. + ``` + + 查看数据库状态: + + ``` + [omm@node1 script]$ gs_om -t status + ``` + + 会出现以下内容: + + ----------------------------------------------------------------------- + + ``` + cluster_name : dbCluster + cluster_state : Normal + redistributing : No + --------------------------------------------------------------------- + [omm@node1 script]$ + ``` + + 启动数据库(安装完默认已经启动): + + ``` + [omm@node1 db1]$ gs_om -t start + Starting cluster. + + ========================================= + + [SUCCESS] node1: + + [2021-04-01 16:50:13.969][29784][][gs_ctl]: gs_ctl started,datadir is /opt/huawei/install/data/dn + + [2021-04-01 16:50:13.974][29784][][gs_ctl]: another server might be running; Please use the restart command + + ========================================= + + Successfully started. + ``` + + - 使用omm用户执行下面的操作。 + + 登录到数据库, + + ``` + gsql -d postgres -p 26000 -r + ``` + + \#执行上条命令会出现以下提示内容。 + + ``` + gsql ((openGauss 2.0.0 build 78689da9) compiled at 2021-03-31 21:04:03 commit 0 last mr ) + Non-SSL connection (SSL connection is recommended when requiring high-security) + Type "help" for help. + ``` + + 其他常见的基本命令 + + ``` + \q #退出会话 + gs_om -t stop #关闭数据库 + gs_om -t restart #重启数据库 + ``` + + 更多操作使用说明见官方文档:[https://opengauss.org/zh/docs/2.1.0/docs/Quickstart/Quickstart.html](https://opengauss.org/zh/docs/2.1.0/docs/Quickstart/Quickstart.html) + + + +## 3. 连接环境搭建 + +- 3.1 安装go语言环境 + + 安装go语言,安装包下载地址:[https://dl.google.com/go/go1.17.5.linux-amd64.tar.gz](https://dl.google.com/go/go1.17.5.linux-amd64.tar.gz) + + 上传压缩包后解压: tar -C /usr/local/ -xvf go1.11.4.linux-amd64.tar.gz + + 配置环境变量 + + ``` + vi /etc/profile + ``` + + 追加以下内容: + + ``` + export PATH=$PATH:/usr/local/go/bin + ``` + + 让环境变量配置生效: + + ``` + source /etc/profile + ``` + + 验证go是否安装成功: + + ``` + go version + ``` + + 出现以下内容代表安装成功(我装的是1.16版本,以你下载的安装包版本为准): + + ![](figures/我装的是1-16版本.png) + + +- 3.2 搭建unixODBC环境 + + 下载安装unixODBC: + + ``` + yum install unixODBC + yum install unixODBC-devel #非必须 + ``` + + 下载openGauss ODBC驱动ODBC\_2.1.0:[https://opengauss.org/zh/download.html](https://opengauss.org/zh/download.html) + + 下载之后上传到centos机器上/soft路径(该路径不是必须) ,把驱动拷贝到指定目录下: + + ``` + cd /soft + tar -xzvf openGauss-2.1.0-ODBC.tar.gz + cp lib/* /usr/local/lib + cp odbc/lib/* /usr/local/lib + ``` + + 安装完unixODBC后,/etc目录下会有一个文件: odbcinst.ini ,接下来配置这个文件 + + ``` + vim odbcinst.ini + ``` + + 将下列内容添加进去: + + ``` + [openGauss] + Driver64=/usr/local/lib/psqlodbcw.so + setup=/usr/local/lib/psqlodbcw.so + ``` + + 另外在/etc下编辑odbc.ini\(如没有则新建\): + + ``` + vim /etc/odbc.ini + ``` + + 将下列内容拷贝进去: + + ``` + [openGaussODBC] + Driver=openGauss + Servername=192.168.183.28 + Database=postgres + Username=gem + Password=huawei@1234 + Port=26000 + DatabaseName=postgres + ``` + + 注,上面要填自己的IP,数据库用户名和口令,且不能是omm初始用户。 + + 到此配置基本完成了,下面验证odbc是否可以openGauss(数据库要开启): + + ``` + isql -v openGaussODBC + ``` + + 若出现以下内容代表配置ok,连接没有问题: + + ![](figures/连接没有问题.png) + + +- 3.3 编写go语言通过ODBC连接openGauss的测试脚本并执行验证 + + ``` + vim test_gauss.go : + package main + import ( + "database/sql" + "fmt" + _ "odbc/driver" + ) + + func main() { + fmt.Printf("%s\n", "创建数据库链接") + conn, err := sql.Open("odbc","DSN=openGaussODBC;UID=gem;PWD=huawei@1234") + if err != nil { + fmt.Println("链接错误") + return + } + defer conn.Close() + fmt.Printf("%s\n", "构建查询") + stmt, err := conn.Prepare("select 666;") + if err != nil { + fmt.Println("查询异常:", err) + return + } + defer stmt.Close() + row, err := stmt.Query() + if err != nil { + fmt.Println("查询错误:", err) + } + defer row.Close() + fmt.Printf("%s\n", "数据集显示") + for row.Next() { + var id int + if err := row.Scan(&id); err == nil { + fmt.Println(id) + } + ``` + + 在脚本路径下执行测试: + + ``` + go run test_gauss.go + ``` + + 若出现以下结果表明连接成功。 + + ![](figures/若出现以下结果表明连接成功.png) + + +## 4.总结 + +个人认为,比较容易出错的点是,虚拟机内存要大于等于8GB,unixODBC两个配置文件odbc.ini和odbcinst.ini的配置稍麻烦,最后,数据库一定要start才能连接成功,个人有一次犯了一次这个错误,虽然是很浅显的道理哈哈。 + diff --git "a/content/zh/post/2022/MOGDB-openGauss\346\225\260\346\215\256\345\272\223gs_dump\345\244\207\344\273\275\350\204\232\346\234\254\345\217\212\345\244\207\344\273\275\346\270\205\347\220\206.md" "b/content/zh/post/2022/MOGDB-openGauss\346\225\260\346\215\256\345\272\223gs_dump\345\244\207\344\273\275\350\204\232\346\234\254\345\217\212\345\244\207\344\273\275\346\270\205\347\220\206.md" new file mode 100644 index 0000000000000000000000000000000000000000..9e6f28013c2980a52b453db77b1e24651613e0a1 --- /dev/null +++ "b/content/zh/post/2022/MOGDB-openGauss\346\225\260\346\215\256\345\272\223gs_dump\345\244\207\344\273\275\350\204\232\346\234\254\345\217\212\345\244\207\344\273\275\346\270\205\347\220\206.md" @@ -0,0 +1,130 @@ ++++ + +title = "MOGDB/openGauss数据库gs dump备份脚本及备份清理" + +date = "2022-01-07" + +tags = [ "MOGDB/openGauss数据库gs dump备份脚本及备份清理"] + +archives = "2022-01" + +author = "阎书利" + +summary = "MOGDB/openGauss数据库gs dump备份脚本及备份清理" + +img = "/zh/post/2022/title/img8.png" + +times = "12:30" + ++++ + +# MOGDB/openGauss数据库gs\_dump备份脚本及备份清理 + +需要对MOGDB/openGauss进行每天逻辑备份。如下脚本分享给大家。 + +## 一、备份脚本 + +- 1.脚本 + + ``` + c.sh (可以改名字) + + # database dump shell + # you should change the GAUSSHOME GAUSSPORT GAUSSDATA DUMP_USER DUMP_PASSWORD + #!/bin/bash + source /etc/profile + source /home/omm/.bash_profile + export GAUSSHOME=/opt/gaussdb/app + export GAUSSPORT=26000 + export GAUSSDATA=/gaussdb/data/dn1 + export PATH=$PGHOME/bin:$PATH + DUMP_USER=ysla + DUMP_PASSWORD='1qaz@WSX' + CUR_DATE=`date "+%Y-%m-%d-%H%M"` + dbnamelist=`cat oo.txt` + + #Loading DBLIST + gsql -p ${GAUSSPORT} postgres -c "select datname from pg_database where datname not in ('template1','template0','postgres')" -t | grep -v '^$' >oo.txt + + #save directory + SAVE_BASE_DIR="/gaussdb/dump_dir" + DAT_FILE_DIR="${SAVE_BASE_DIR}/${CUR_DATE}" + if [ -d ${DAT_FILE_DIR} ] + then : + else + mkdir -p ${DAT_FILE_DIR} + fi + # The real backup step! + echo "`date "+%Y-%m-%d-%H%M"` begin backup db " + for dbname in ${dbnamelist} + do + gs_dump -E UTF8 ${dbname} -U ${DUMP_USER} -W ${DUMP_PASSWORD} -p ${GAUSSPORT} -F p -f ${DAT_FILE_DIR}/${dbname}_${CUR_DATE}.sql + gs_dumpall -l ${dbname} -U ${DUMP_USER} -W ${DUMP_PASSWORD} -p ${GAUSSPORT} -g -f ${DAT_FILE_DIR}/global_data_${dbname}_${CUR_DATE}.sql + done + tar -cjvf ${DAT_FILE_DIR}.tar.gz /${DAT_FILE_DIR} --remove-files + echo "`date "+%Y-%m-%d-%H%M"` end backup db " + ``` + + 这个脚本需要修改GAUSSHOME GAUSSPORT GAUSSDATA DUMP\_USER DUMP\_PASSWORD这几个(数据库家目录,端口,数据目录,做dump的用户,以及密码),之后直接执行就可以。脚本会自动查询数据库,并把’template1’,'template0’和’postgres’数据库排除掉,然后把其他数据库的数据和全局对象各自备份一份,最后把备份的.sql文件进行压缩。 + +- 2,脚本执行 + + 执行结果如下: + + ![](figures/20220107-39368262-8b82-4c5d-973d-c268dab99042.png) + + 去备份保存的目录下 + + ![](figures/20220107-154aa7ec-6a24-41aa-8fe4-0eee137d0982.png) + + 解压一个文件 + + ![](figures/20220107-2073c9b3-0749-4d3d-a577-cf9467225d37.png) + + 如下数据库的数据保存的文件名格式为(数据库名\_日期时间.sql) + + 全局对象保存的文件名格式为(global\_data\_数据库名\_日期时间.sql) + + ![](figures/20220107-6c96183e-8ed2-4eac-840d-6de2b6c9e746.png) + + ![](figures/20220107-62164f26-2335-4465-ad23-47148ecae8a1.png) + + 查看数据库对应的数据备份 + + ``` + [omm@node1 2022-01-07-1634]$ vim ysla_2022-01-07-1634.sql + ``` + + ![](figures/20220107-ee45e332-8e56-4b07-a765-b1e5ce6df6b9.png) + + 查看数据库对应的全局对象备份 + + ``` + [omm@node1 2022-01-07-1634]$ vim global_data_ysla_2022-01-07-1634.sql + ``` + + ![](figures/20220107-7b9036fd-66fb-44ff-9ad6-61a878d5940b.png) + + +## 二、备份脚本加到crontab + +将如下一行加入crontab,让脚本每天凌晨12:30执行,这里的flock是为了防止一个脚本没执行完就到了这个脚本下一次执行的周期,可能会导致并发问题,严重时会导致出现脏数据性能瓶颈等恶性循环,所以使用flock建立排它锁 + +``` +-x 独占锁 -n 直接失败 如发现被锁如就失败不等待,除非解锁 +``` + +``` +30 00 * * * /usr/bin/flock -xn /tmp/test.lock -c 'sh /home/omm/c.sh >> c.log' +``` + +## 三、备份保留,清理 + +每天都进行备份,如果备份天数过多不清理,可能使目录打满,因此需要添加备份清理策略,我这里用的是crontab的方式,每天凌晨3:30清理过期数据。 + +如下一行加入到crontab里边,也是用了flock,这一行命令会找备份目录下的文件,排除掉隐藏文件,并把30天之前的.tar.gz文件删除。 + +``` +30 03 * * * /usr/bin/flock -xn /tmp/test1.lock -c "find /gaussdb/dump_dir -not -path '*/\.*' -mtime +30 -type f -name *.tar.gz -exec rm -rf {} \;" +``` + diff --git "a/content/zh/post/2022/MOGDB-openGauss\347\232\204txid_snapshot-\346\225\260\346\215\256\347\261\273\345\236\213\345\222\214\347\233\270\345\205\263\345\207\275\346\225\260.md" "b/content/zh/post/2022/MOGDB-openGauss\347\232\204txid_snapshot-\346\225\260\346\215\256\347\261\273\345\236\213\345\222\214\347\233\270\345\205\263\345\207\275\346\225\260.md" new file mode 100644 index 0000000000000000000000000000000000000000..361dc89bc030dfccaf0d39a100ddc60010de3543 --- /dev/null +++ "b/content/zh/post/2022/MOGDB-openGauss\347\232\204txid_snapshot-\346\225\260\346\215\256\347\261\273\345\236\213\345\222\214\347\233\270\345\205\263\345\207\275\346\225\260.md" @@ -0,0 +1,257 @@ ++++ + +title = "MOGDB/openGauss的txid snapshot 数据类型和相关函数" + +date = "2021-12-20" + +tags = [ "MOGDB/openGauss的txid snapshot 数据类型和相关函数"] + +archives = "2021-12" + +author = "阎书利" + +summary = "MOGDB/openGauss的txid snapshot 数据类型和相关函数" + +img = "/zh/post/2022/title/img5.png" + +times = "12:30" + ++++ + +# MOGDB/openGauss的txid\_snapshot 数据类型和相关函数 + +txid\_snapshot的文本表示为:xmin:xmax:xip\_list。 + +``` + 名称 描述 + xmin 最早的事务ID(txid)仍然活动。所有较早事务将是已经提交可见的,或者是直接回滚。 + xmax 作为尚未分配的txid。所有大于或等于此txids的都是尚未开始的快照时间,因此不可见。 + xip_list 当前快照中活动的txids。这个列表只包含在xmin和xmax之间活动的txids;有可能活动的txids高于xmax。 介于大于等于xmin、小于xmax,并且不在这个列表中的txid,在这个时间快照已经完成的,因此按照提交状态查看他是可见还是回滚。这个列表不包含子事务的txids。 +``` + +示例:10:20:10,13,15意思为:xmin=10, xmax=20, xip\_list=10, 13, 15。 + +测试如下: + +## 1.通过设置强制对临时对象使用COMMIT而不是2PC + +``` +SET enforce_two_phase_commit TO off; +``` + +## 2.正常案例演示 + +``` + postgres=# select '12:13:'::txid_snapshot; + ## txid_snapshot + 12:13: + (1 row) + + postgres=# select '12:18:14,16'::txid_snapshot; + ## txid_snapshot + 12:18:14,16 + (1 row) +``` + +## 3.错误案例演示 + +``` + postgres=# select '31:12:'::txid_snapshot; + ERROR: invalid input for txid_snapshot: "31:12:" + LINE 1: select '31:12:'::txid_snapshot; + ^ + CONTEXT: referenced column: txid_snapshot +------------------------------------------------------------------------------- + postgres=# select '0:1:'::txid_snapshot; + ERROR: invalid input for txid_snapshot: "0:1:" + LINE 1: select '0:1:'::txid_snapshot; + ^ + CONTEXT: referenced column: txid_snapshot +------------------------------------------------------------------------------- +postgres=# select '12:13:0'::txid_snapshot; + ERROR: invalid input for txid_snapshot: "12:13:0" + LINE 1: select '12:13:0'::txid_snapshot; + ^ + CONTEXT: referenced column: txid_snapshot +------------------------------------------------------------------------------- + postgres=# select '12:16:14,13'::txid_snapshot; + ERROR: invalid input for txid_snapshot: "12:16:14,13" + LINE 1: select '12:16:14,13'::txid_snapshot; + ^ + CONTEXT: referenced column: txid_snapshot +------------------------------------------------------------------------------- +postgres=# select '12:16:14,14'::txid_snapshot; + ERROR: invalid input for txid_snapshot: "12:16:14,14" + LINE 1: select '12:16:14,14'::txid_snapshot; + ^ + CONTEXT: referenced column: txid_snapshot +``` + +通过测试看出xmax应该大于xmin,不可为0,tixds应该按增序排列,且不为0,并且不能有重复的tixds,在使用的时候应当尽量避免。 + +## 4.创建测试表及测试数据导入 + +``` +postgres=# insert into snapshot_test values (4, '100:150:101,102,103,104,105,106,107,108,109,110,111,112,113,114,115,116,117,118,119,120,121,122,123,124,125,126,127,128,129,130,131'); +postgres=# create temp table snapshot_test(nr integer,snap txid_snapshot); + CREATE TABLE + postgres=# insert into snapshot_test values (1, '12:13:'); + INSERT 0 1 + postgres=# insert into snapshot_test values (2, '12:20:13,15,18'); + INSERT 0 1 + postgres=# insert into snapshot_test values (3, '100001:100009:100005,100007,100008'); + INSERT 0 1 + postgres=# insert into snapshot_test values (4, '100:150:101,102,103,104,105,106,107,108,109,110,111,112,113,114,115,116,117,118,119,120,121,122,123,124,125,126,127,128,129,130,131'); + INSERT 0 1 +``` + +查询数据情况: + +``` +postgres=# select snap from snapshot_test order by nr; + snap + ------------------------------------------------------------------------------------------------------- + ------------------------------ + 12:13: + 12:20:13,15,18 + 100001:100009:100005,100007,100008 + 100:150:101,102,103,104,105,106,107,108,109,110,111,112,113,114,115,116,117,118,119,120,121,122,123,12 + 4,125,126,127,128,129,130,131 + (4 rows) +``` + +## 5.函数测试 + +txid\_snapshot\_xmin\(\)为会返回快照的xmin, + +txid\_snapshot\_xmax\(\)会返回快照的xmax, + +txid\_snapshot\_xip\(\)获取正在进行的事务ip,即txids。 + +``` +postgres=# select txid_snapshot_xmin(snap), + postgres-# txid_snapshot_xmax(snap), + postgres-# txid_snapshot_xip(snap) + postgres-# from snapshot_test order by nr, 1, 2, 3; + txid_snapshot_xmin | txid_snapshot_xmax | txid_snapshot_xip + --------------------+--------------------+------------------- + 12 | 20 | 13 + 12 | 20 | 15 + 12 | 20 | 18 + 100001 | 100009 | 100005 + 100001 | 100009 | 100007 + 100001 | 100009 | 100008 + 100 | 150 | 101 + 100 | 150 | 102 + 100 | 150 | 103 + 100 | 150 | 104 + 100 | 150 | 105 +``` + +txid\_visible\_in\_snapshot\(\)会查看在快照中事务ID是否可见\(不使用子事务ID\) + +``` +postgres=# select id, txid_visible_in_snapshot(id, snap) + postgres-# from snapshot_test, generate_series(11, 21) id + postgres-# where nr = 2; + id | txid_visible_in_snapshot + ----+-------------------------- + 11 | t + 12 | t + 13 | f + 14 | t + 15 | f + 16 | t + 17 | t + 18 | f + 19 | t + 20 | f + 21 | f + (11 rows) +``` + +## 6.其他测试 + +- 测试二分查找 + + ``` + postgres=# select id, txid_visible_in_snapshot(id, snap) + postgres-# from snapshot_test, generate_series(90, 160) id + postgres-# where nr = 4; + id | txid_visible_in_snapshot + -----+-------------------------- + 90 | t + 91 | t + 92 | t + 93 | t + 94 | t + 95 | t + 96 | t + 97 | t + 98 | t + 99 | t + 100 | t + 101 | f + ``` + + +- 测试当前值 + + ``` + postgres=# select txid_current() >= txid_snapshot_xmin(txid_current_snapshot()); + ## ?column? + t + (1 row) + ``` + + 我们不能假设当前值总是小于xmax + + ``` + postgres=# select txid_visible_in_snapshot(txid_current(), txid_current_snapshot()); + ## txid_visible_in_snapshot + f + (1 row) + ``` + + 测试64bitness(MOGDB/openGauss将transactionid由int32改为了int64,64位的xid永远不可能耗尽,虽然xid改为了64位,但是过期的xid依旧需要freeze清理,只是永远不用担心会发生xid回卷宕机的风险。 ) + + ``` + postgres=# select txid_snapshot '1000100010001000:1000100010001100:1000100010001012,1000100010001013'; + + ## txid_snapshot + 1000100010001000:1000100010001100:1000100010001012,1000100010001013 + (1 row) + + postgres=# select txid_visible_in_snapshot('1000100010001012', '1000100010001000:1000100010001100:1000100010001012,1000100010001013'); + + ## txid_visible_in_snapshot + + f + (1 row) + + + + postgres=# select txid_visible_in_snapshot('1000100010001015', '1000100010001000:1000100010001100:1000100010001012,1000100010001013'); + + ## txid_visible_in_snapshot + + t + (1 row) + ``` + + 测试溢出64bit,9223372036854775807是是263-1,是乘方 也就是63位的最大二进制数字 。 + + ``` + postgres=# SELECT txid_snapshot '1:9223372036854775807:3'; + ## txid_snapshot + 1:9223372036854775807:3 + (1 row) + + postgres=# SELECT txid_snapshot '1:9223372036854775808:3'; + ERROR: invalid input for txid_snapshot: "1:9223372036854775808:3" + LINE 1: SELECT txid_snapshot '1:9223372036854775808:3'; + ^ + CONTEXT: referenced column: txid_snapshot + ``` + + diff --git "a/content/zh/post/2022/MogDB-openGauss-\350\207\252\345\256\232\344\271\211snmptrapd\345\221\212\350\255\246\344\277\241\346\201\257.md" "b/content/zh/post/2022/MogDB-openGauss-\350\207\252\345\256\232\344\271\211snmptrapd\345\221\212\350\255\246\344\277\241\346\201\257.md" new file mode 100644 index 0000000000000000000000000000000000000000..b54366f0a0bfd7677cd2b01d4a5df08cbdd994c1 --- /dev/null +++ "b/content/zh/post/2022/MogDB-openGauss-\350\207\252\345\256\232\344\271\211snmptrapd\345\221\212\350\255\246\344\277\241\346\201\257.md" @@ -0,0 +1,78 @@ ++++ + +title = "MogDB/openGauss 自定义snmptrapd告警信息" + +date = "2022-01-06" + +tags = [ "MogDB/openGauss 自定义snmptrapd告警信息"] + +archives = "2022-01" + +author = "高云龙" + +summary = "MogDB/openGauss 自定义snmptrapd告警信息" + +img = "/zh/post/2022/title/img9.png" + +times = "12:30" + ++++ + +# MogDB/openGauss 自定义snmptrapd告警信息 + +在实际使用中,默认的报警规则信息并不能很好的满足snmp服务端的需求,需要定制化报警信息,这里以添加ip为例,看似一个简单的将IP一行信息单独在报警展示出来,涉及到的配置文件修改还是挺多的。 + +![](figures/20220106-03097507-8fce-424c-8c74-969e1fb06f16.png) + +## 修改prometheus.yml文件 + +首先需要修改prometheus.yml文件,在对应的实例下添加ip标签 + +![](figures/20220106-36068d2a-eccf-45ff-89df-c994c6331802.png) + +## 修改规则报警文件 + +对应的报警规则文件也同样需要修改,我这里使用的是服务器磁盘使用率做报警测试,所以直接修改node\_rules.yml文件,添加ip信息。 + +![](figures/20220106-a00fdef7-cefb-4775-bd75-f6bfb0952b8d.png) + +## 查看alertmanager web界面 + +现在我们就可以在alertmanager的界面上看到我们新添加的ip标签了,同时报警信息里也带了ip。 + +![](figures/20220106-fec3c37b-f253-4aa9-a986-96012785126a.png) + +## 修改snmp\_notifier模版 + +altermanager模块将报警消息推送到snmp\_notifier后,还需要需改snmp\_notifier的描述模版description-template.tpl。 + +![](figures/20220106-e39ed7ff-add2-4ef5-9b4a-45edddfe74ff.png) + +## snmptrapd服务器接受报警信息 + +![](figures/20220106-02524930-39ff-4c6d-898e-4070ab278009.png) + +## 添加额外的模版 + +默认情况下,snmptrapd只会输出三行模版信息,要添加额外的版本信息需要使用–snmp.extra-field-template参数,比如添加第4行模版则在snmp\_notifier启动时指定参数–snmp.extra-field-template=4=/opt/snmp\_notifier/extra-field-template.tpl,模版extra-field-template.tpl可以参考description-template.tpl的格式 + +``` +{{- if .Alerts -}} +{{- range $severity, $alerts := (groupAlertsByLabel .Alerts "severity") -}} +{{- range $index, $alert := $alerts }} +{{ $alert.Annotations.ip }} +{{ end }} +{{ end }} +{{- end -}} +``` + +## 启动snmp\_notifier + +``` +nohup /opt/snmp_notifier/snmp_notifier --snmp.trap-description-template=/opt/snmp_notifier/description-template.tpl --snmp.extra-field-template=4=/opt/snmp_notifier/extra-field-template.tpl > /opt/snmp_notifier/snmp_notifier.log 2>&1 & +``` + +## 再次查看snmptrapd服务器接受报警信息 + +![](figures/20220106-a510566b-e8dc-4b21-b5df-974e4bac5cd4.png) + diff --git "a/content/zh/post/2022/MogDB-openGauss\345\205\263\344\272\216PL-SQL\345\214\277\345\220\215\345\235\227\350\260\203\347\224\250\346\265\213\350\257\225.md" "b/content/zh/post/2022/MogDB-openGauss\345\205\263\344\272\216PL-SQL\345\214\277\345\220\215\345\235\227\350\260\203\347\224\250\346\265\213\350\257\225.md" new file mode 100644 index 0000000000000000000000000000000000000000..33db45f9cbad61a9e69001a04da87484e72cd448 --- /dev/null +++ "b/content/zh/post/2022/MogDB-openGauss\345\205\263\344\272\216PL-SQL\345\214\277\345\220\215\345\235\227\350\260\203\347\224\250\346\265\213\350\257\225.md" @@ -0,0 +1,226 @@ ++++ + +title = "MogDB/openGauss关于PL/SQL匿名块调用测试" + +date = "2021-12-24" + +tags = [ "MogDB/openGauss关于PL/SQL匿名块调用测试"] + +archives = "2021-12" + +author = "lmj" + +summary = "MogDB/openGauss关于PL/SQL匿名块调用测试" + +img = "/zh/post/2022/title/img7.png" + +times = "12:30" + ++++ + +# MogDB/openGauss关于PL/SQL匿名块调用测试 + +## 一、原理介绍 + +PL/SQL\(Procedure Language/Structure Query Language\)是标准SQL语言添加了过程化功能的一门程序设计语言。 + +单一的SQL语句只能进行数据操作,没有流程控制,无法开发复杂的应用。PL/SQL语言是结合了结构化查询与数据库自身过程控制为一体的强大语言。 + +- 1.PL/SQL原理 + + PL/SQL是一种块结构的语言,它将一组语句放在一个块中,一次性发送给服务器。 + + PL/SQL引擎分析收到PL/SQL语句块中的内容,把其中的过程控制语句由PL/SQL引擎自身去执行,把PL/SQL块中的SQL语句交给服务器的SQL语句执行器执行。 + + PL/SQL块发送给服务器后,先被编译然后执行,对于有名称的PL/SQL块(如子程序)可以单独编译,永久的存储在数据库中,随时准备执行。 + + PL/SQL是一种块结构的语言,一个PL/SQL程序包含了一个或者多个逻辑块,逻辑块中可以声明变量,变量在使用之前必须先声明。 + +- 2.PL/SQL特点 + + –与SQL紧密结合 + + –支持面向对象编程 + + –更好的性能 + + –可移植性 + + –安全性 + +- 3.语法结构 + + 除了正常的执行程序外,PL/SQL还提供了专门的异常处理部分进行异常处理 + + ``` + [DECLARE + --declaration statements] ① + BEGIN + --executable statements ② + [EXCEPTION + --exception statements] ③ + END; + ``` + + **语法解析** + + ①声明部分:声明部分包含了变量和常量的定义。在此声明PL/SQL用到的变量,类型及游标,以及局部的存储过程和函数, + + 这个部分由关键字DECLARE开始,如果不声明变量或者常量,可以省略这部分。 + + ②执行部分:执行部分是 PL/SQL块的指令部分,由关键字BEGIN开始,关键字END结尾。 + + 所有的可执行PL/SQL语句都放在这一部分,该部分执行命令并操作变量。其他的PL/SQL块可以作为子块嵌套在该部分。 + + PL/SQL块的执行部分是必选的。注意END关键字后面用分号结尾。 + + ③异常处理部分:该部分是可选的,该部分用EXCEPTION关键字把可执行部分分成两个小部分,之前的程序是正常运行的程序, + + 一旦出现异常就跳转到异常部分执行。 + +- 4.PL/SQL语句块的类型 + + 1、匿名块 + + 2、命名块 + + –①procedure 存储过程 + + –②function 函数 + + –③package 包 + + –④trigger 触发器 + + 原本大家可能一提到PL/SQL就会想到ORACLE,ORACLE的PL/SQL很强大,它的匿名块调用以及有名块调用可以解决很多问题,在MOGDB/openGauss中,其实也有这样的功能,如下,是我针对MOGDB/openGauss匿名块的一些测试。 + + +## 二、匿名块测试 + +- 1.普通匿名块调用 + + ``` + openGauss=# create table t1(a int ,b text); + CREATE TABLE + + openGauss=# DECLARE + openGauss-# PRAGMA AUTONOMOUS_TRANSACTION; + openGauss-# BEGIN + openGauss$# raise notice 'Normal anonymous block printing.'; + openGauss$# insert into t1 values(1,'I am lmj!'); + openGauss$# END; + openGauss$# / + NOTICE: Normal anonymous block printing. + + ANONYMOUS BLOCK EXECUTE + openGauss=# select * from t1; + a | b + ---+----------- + 1 | I am lmj! + (1 row) + ``` + + +- 2.匿名块和事务影响 + + 启动一个事务后,执行一个自治事务匿名块,如果事务回滚,则匿名块不回滚。 + + ``` + openGauss=# truncate table t1; + TRUNCATE TABLE + + openGauss=# START TRANSACTION; + START TRANSACTION + openGauss=# DECLARE + openGauss-# PRAGMA AUTONOMOUS_TRANSACTION; + openGauss-# BEGIN + openGauss$# raise notice 'an autonomous transaction anonymous block.'; + openGauss$# insert into t1 values(1,'it will commit!'); + openGauss$# END; + openGauss$# / + NOTICE: an autonomous transaction anonymous block. + + ANONYMOUS BLOCK EXECUTE + openGauss=# insert into t1 values(1,'you will rollback!'); + INSERT 0 1 + openGauss=# rollback; + ROLLBACK + openGauss=# select * from t1; + a | b + ---+----------------- + 1 | it will commit! + (1 row) + ``` + +- 3.外部匿名块和内部匿名块 + + 其中外部匿名块是一个公共匿名块,而内部匿名块是一个自治事务匿名块,可以根据如下例子和第二个例子对比事务回滚和匿名块回滚 + + ``` + openGauss=# truncate table t1; + TRUNCATE TABLE + + openGauss=# DECLARE + openGauss-# BEGIN + openGauss$# DECLARE + openGauss$# PRAGMA AUTONOMOUS_TRANSACTION; + openGauss$# BEGIN + openGauss$# raise notice 'just use call.'; + openGauss$# insert into t1 values(1,'can you rollback!'); + openGauss$# END; + openGauss$# insert into t1 values(2,'I will rollback!'); + openGauss$# rollback; + openGauss$# END; + openGauss$# / + NOTICE: just use call. + ANONYMOUS BLOCK EXECUTE + openGauss=# select * from t1; + a | b + ---+--- + (0 rows) + ``` + +- 4.匿名块直接执行自治事务匿名块并引发异常 + + ``` + openGauss=# DECLARE + openGauss-# PRAGMA AUTONOMOUS_TRANSACTION; + openGauss-# res int := 0; + openGauss-# res2 int := 1; + openGauss-# BEGIN + openGauss$# raise notice 'just use call.'; + openGauss$# res2 = res2/res; + openGauss$# END; + openGauss$# / + NOTICE: just use call. + + ERROR: ERROR: division by zero + CONTEXT: PL/pgSQL function inline_code_block line 7 at assignment + ``` + + 匿名块执行错误,会报出异常 + +- 5.异常捕获 + + 在执行期间引发异常后,将捕获匿名块,如下所示,在执行错误后,抛出autonomous throw exception提示 + + ``` + openGauss=# DECLARE + openGauss-# PRAGMA AUTONOMOUS_TRANSACTION; + openGauss-# res int := 0; + openGauss-# res2 int := 1; + openGauss-# BEGIN + openGauss$# raise notice 'error catch.'; + openGauss$# res2 = res2/res; + openGauss$# EXCEPTION + openGauss$# WHEN division_by_zero THEN + openGauss$# raise notice 'autonomous throw exception.'; + openGauss$# END; + openGauss$# / + NOTICE: error catch. + + NOTICE: autonomous throw exception. + ANONYMOUS BLOCK EXECUTE + ``` + + diff --git "a/content/zh/post/2022/MogDB-opengauss\346\233\264\346\224\271\346\225\260\346\215\256\345\272\223\347\233\256\345\275\225\344\275\215\347\275\256(\345\273\272\350\256\256\346\265\213\350\257\225\347\216\257\345\242\203).md" "b/content/zh/post/2022/MogDB-opengauss\346\233\264\346\224\271\346\225\260\346\215\256\345\272\223\347\233\256\345\275\225\344\275\215\347\275\256(\345\273\272\350\256\256\346\265\213\350\257\225\347\216\257\345\242\203).md" new file mode 100644 index 0000000000000000000000000000000000000000..cbaf0164b0f3451423aabeaf93f0ecd1f49eeaf7 --- /dev/null +++ "b/content/zh/post/2022/MogDB-opengauss\346\233\264\346\224\271\346\225\260\346\215\256\345\272\223\347\233\256\345\275\225\344\275\215\347\275\256(\345\273\272\350\256\256\346\265\213\350\257\225\347\216\257\345\242\203).md" @@ -0,0 +1,102 @@ ++++ + +title = "MogDB/opengauss更改数据库目录位置建议测试环境" + +date = "2021-12-15" + +tags = [ "MogDB/opengauss更改数据库目录位置建议测试环境"] + +archives = "2021-12" + +author = "张凡" + +summary = "MogDB/opengauss更改数据库目录位置建议测试环境" + +img = "/zh/post/2022/title/img6.png" + +times = "12:30" + ++++ + +# MogDB/opengauss更改数据库目录位置\(建议测试环境\) + +有时我们部署完数据库,发现随着数据量的不断增加,数据目录所在的磁盘大小不能够满足我们的需求,需要更大的磁盘空间,这时选择重新部署数据库会很麻烦,之前所使用的数据库还需要重新导入,这里介绍将数据库目录更改到别的位置的方法,不建议生产环境使用,建议测试环境使用。 + +## 一、环境说明 + +``` +[root@node1 ~]# cat /etc/redhat-release +CentOS Linux release 7.6.1810 (Core) +``` + +## 二、查看数据目录位置 + +``` +[omm@node1 ~]$ gsql -d postgres -p26000 -r +gsql ((MogDB 2.0.1 build f892ccb7) compiled at 2021-07-09 16:12:59 commit 0 last mr ) +Non-SSL connection (SSL connection is recommended when requiring high-security) +Type "help" for help. + +postgres=# show data_directory ; + data_directory +----------------- + /opt/mogdb/data +(1 row) +``` + +## 三、更改数据库位置 + +``` +postgres=# alter system set data_directory='/opt/data'; ==>更改数据目录位置 +NOTICE: please restart the database for the POSTMASTER level parameter to take effect. +ALTER SYSTEM SET +postgres=# show data_directory ; ==>这个参数需要重启数据库生效 + data_directory +----------------- + /opt/mogdb/data +(1 row) + +postgres=# +``` + +## 四、停止数据库,拷贝数据目录 + +``` +[omm@node1 ~]$ gs_ctl stop -D /opt/mogdb/data/ ==>本机采取二进制方式部署,采用标准安装可的使用gs_om -t stop 停止 +[2021-12-15 16:05:07.505][22522][][gs_ctl]: gs_ctl stopped ,datadir is /opt/mogdb/data +waiting for server to shut down........ done +server stopped +[omm@node1 mogdb]$ mkdir -p /opt/data ==>创建新的数据目录 +[omm@node1 mogdb]$ cd /opt/data/ +[omm@node1 data]$ cp -r /opt/mogdb/data/* /opt/data/ ==>将原来数据目录的数据拷贝到新的数据目录下 +[omm@node1 data]$ chmod 0700 /opt/data ==>将新的数据目录赋予0700的权限,否则重启数据库会报错 +``` + +## 五、启动数据库,查看数据目录位置 + +``` +[omm@node1 data]$ gs_ctl start -D /opt/data +[2021-12-15 16:09:17.271][22740][][gs_ctl]: gs_ctl started,datadir is /opt/data +[2021-12-15 16:09:17.569][22740][][gs_ctl]: waiting for server to start... +.0 LOG: [Alarm Module]can not read GAUSS_WARNING_TYPE env. +....... +..... +.... +[2021-12-15 16:09:18.632][22740][][gs_ctl]: done +[2021-12-15 16:09:18.632][22740][][gs_ctl]: server started (/opt/data) +[omm@node1 data]$ gsql -d postgres -p26000 -r +gsql ((MogDB 2.0.1 build f892ccb7) compiled at 2021-07-09 16:12:59 commit 0 last mr ) +Non-SSL connection (SSL connection is recommended when requiring high-security) +Type "help" for help. + +postgres=# show data_directory ; ==>更改成功 + data_directory +---------------- + /opt/data +(1 row) +``` + +## 六、总结 + +以上是更改MogDB/opengauss数据库数据目录的方法,操作简单,不建议生产环境使用。 + diff --git "a/content/zh/post/2022/centos7-\345\256\211\350\243\205openGauss\346\236\201\347\256\200\347\211\210\346\234\254.md" "b/content/zh/post/2022/centos7-\345\256\211\350\243\205openGauss\346\236\201\347\256\200\347\211\210\346\234\254.md" new file mode 100644 index 0000000000000000000000000000000000000000..021d88a21bb39cc579d5d5e2cd43d23dcf47a251 --- /dev/null +++ "b/content/zh/post/2022/centos7-\345\256\211\350\243\205openGauss\346\236\201\347\256\200\347\211\210\346\234\254.md" @@ -0,0 +1,162 @@ ++++ + +title = "centos7 安装openGauss极简版本" + +date = "2021-12-14" + +tags = [ "centos7 安装openGauss极简版本"] + +archives = "2021-12" + +author = "雪狼sunny" + +summary = "centos7 安装openGauss极简版本" + +img = "/zh/post/2022/title/img3.png" + +times = "12:30" + ++++ + +# centos7 安装openGauss极简版本 + + 1 基础环境准备: + +- 系统: + + ``` + [root@bogon ~]# cat /etc/redhat-release + CentOS Linux release 7.9.2009 (Core) + [root@bogon ~]# + ``` + +- 相关软件: + + 安装netstat,由于centos7默认不带这个命令需要单独安装 + + ``` + yum install net-tools -y + ``` + + 安装bzip2 因为官方的安装包是openGauss-x.x.x-openEuler-64bit.tar.bz2 不安装这个软件直接解压命令报错 + + ``` + yum -y install bzip2 -y + ``` + + 修改内核的配置因为在你安装install命令的时候会出现如下报错 + + ``` + On systemwide basis, the maximum number of SEMMNI is not correct. the current SEMMNI value is: 128. Please check it. + ``` + + **解决:**在/etc/sysctl.conf中加入语句kernel.sem = 250 32000 100 999,然后执行sysctl -p + + 安装wget 命令用于下载openGauss的软件包 + + ``` + wget https://opengauss.obs.cn-south-1.myhuaweicloud.com/2.1.0/x86/openGauss-2.1.0-CentOS-64bit.tar.bz2 + ``` + +- 关闭防火墙和selinux + + ``` + ## 关闭防火墙 + + systemctl status firewalld + + systemctl disable firewalld.service + + systemctl stop firewalld.service + + + ## 关闭SELinux + + sed -i '/SELINUX=/d' etc/selinux/config + + echo "SELINUX=disabled" >> /etc/selinux/config + + cat etc/selinux/config|grep -v ^#|grep -v '^$' + ``` + +- 输入这个命令selinux直接关闭不用重启 + + ``` + setenforce 0 + ``` + + +## 2 安装环境准备: + +``` +groupadd -g 1001 dbgrp +useradd -u 2001 -g dbgrp omm +mkdir -p /opt/software/openGauss +chown -R omm:dbgrp opt +``` + +- 切换omm用户安装 + + ``` + [root@db1 ~]# su - omm + [omm@db1 ~]$ cd /opt/software/openGauss/ + [omm@db1 openGauss]$ tar -jxf openGauss-2.1.0-CentOS-64bit.tar.bz2 -C /opt/software/openGauss/ + ``` + +- 安装: + + ``` + [omm@bogon ~]$ cd /opt/software/openGauss/simpleInstall/ + [omm@bogon simpleInstall]$ ls + finance.sql install.sh README.md school.sql + [omm@bogon simpleInstall]$ sh install.sh -w gauss#123 + - -w:初始化数据库密码(gs_initdb指定),安全需要必须设置。 + -p:指定的openGauss端口号,如不指定,默认为5432。 + -h|–help:打印使用说明。 + ``` + + 安装后,该数据库部署结点的名称为sgnode(gs\_initdb指定)。 + + 执行时,如果出现报错“the maximum number of SEMMNI is not correct, the current SEMMNI is xxx. Please check it.”,请使用有root权限的用户执行如下命令 。 + + 安装成功会出现如下界面: + + ![](figures/安装成功会出现如下界面.png) + +- 启动成功: + + ``` + [omm@bogon ~]$ gs_ctl start -D $GAUSSHOME/data/single_node -Z single_node + [2021-12-14 15:32:45.083][11887][][gs_ctl]: gs_ctl started,datadir is /opt/software/openGauss/data/single_node + [2021-12-14 15:32:45.089][11887][][gs_ctl]: another server might be running; Please use the restart command + [omm@bogon ~]$ gsql -d postgres -p -r + failed to connect Unknown:-r. + [omm@bogon ~]$ gsql -d postgres -p + gsql:选项需要一个参数 -- p + Try "gsql --help" for more information. + [omm@bogon ~]$ gsql -d postgres -p 5432 -r + gsql ((openGauss 2.1.0 build 590b0f8e) compiled at 2021-09-30 14:29:04 commit 0 last mr ) + Non-SSL connection (SSL connection is recommended when requiring high-security) + Type "help" for help. + + openGauss=# \l + List of databases + Name | Owner | Encoding | Collate | Ctype | Access privileges + -----------+-------+----------+-------------+-------------+------------------- + postgres | omm | UTF8 | en_US.UTF-8 | en_US.UTF-8 | + template0 | omm | UTF8 | en_US.UTF-8 | en_US.UTF-8 | =c/omm + + | | | | | omm=CTc/omm + template1 | omm | UTF8 | en_US.UTF-8 | en_US.UTF-8 | =c/omm + + | | | | | omm=CTc/omm + (3 rows) + + openGauss=# \q + ``` + + +文档写的很粗糙,但是安装完使用完全没有问题。 + +参考连接:https://opengauss.org/zh/docs/2.1.0/docs/installation/%E5%8D%95%E8%8A%82%E7%82%B9%E5%AE%89%E8%A3%85.html + +opengauss + diff --git a/content/zh/post/2022/figures/0497eb639cb14b5182dc5b2aff97a757.png b/content/zh/post/2022/figures/0497eb639cb14b5182dc5b2aff97a757.png new file mode 100644 index 0000000000000000000000000000000000000000..c93afc07ec11bfe5abbec34781a1a3eb66c9ac17 Binary files /dev/null and b/content/zh/post/2022/figures/0497eb639cb14b5182dc5b2aff97a757.png differ diff --git a/content/zh/post/2022/figures/05476910e9e44c9fb0723d26b0f467f4.png b/content/zh/post/2022/figures/05476910e9e44c9fb0723d26b0f467f4.png new file mode 100644 index 0000000000000000000000000000000000000000..864e3b8fa9fd25d6932059fc55a8d11b81942ae4 Binary files /dev/null and b/content/zh/post/2022/figures/05476910e9e44c9fb0723d26b0f467f4.png differ diff --git a/content/zh/post/2022/figures/0bacb67d8b9d4ff6b786b2b734458b10.png b/content/zh/post/2022/figures/0bacb67d8b9d4ff6b786b2b734458b10.png new file mode 100644 index 0000000000000000000000000000000000000000..0d355ada50f4bcd081dbeb6b1cb4af7061f02187 Binary files /dev/null and b/content/zh/post/2022/figures/0bacb67d8b9d4ff6b786b2b734458b10.png differ diff --git a/content/zh/post/2022/figures/0feab0d29d324acc9c4e87ffc7a3e826.png b/content/zh/post/2022/figures/0feab0d29d324acc9c4e87ffc7a3e826.png new file mode 100644 index 0000000000000000000000000000000000000000..8971898569210edc1c45c92abb552934261c407a Binary files /dev/null and b/content/zh/post/2022/figures/0feab0d29d324acc9c4e87ffc7a3e826.png differ diff --git a/content/zh/post/2022/figures/128f20b65c554c85bbcda62acad5616e.png b/content/zh/post/2022/figures/128f20b65c554c85bbcda62acad5616e.png new file mode 100644 index 0000000000000000000000000000000000000000..7303cf2f06a86e61bb2fc3c9a2057cb04c826531 Binary files /dev/null and b/content/zh/post/2022/figures/128f20b65c554c85bbcda62acad5616e.png differ diff --git a/content/zh/post/2022/figures/17fb09d479354307b7e2a8b27cbd2f7e.png b/content/zh/post/2022/figures/17fb09d479354307b7e2a8b27cbd2f7e.png new file mode 100644 index 0000000000000000000000000000000000000000..f18a32d614e60fb688c2548479183c3234f62bd9 Binary files /dev/null and b/content/zh/post/2022/figures/17fb09d479354307b7e2a8b27cbd2f7e.png differ diff --git a/content/zh/post/2022/figures/1e185faf72d14f6bb07e527d753614ed.png b/content/zh/post/2022/figures/1e185faf72d14f6bb07e527d753614ed.png new file mode 100644 index 0000000000000000000000000000000000000000..43ff9ab8a42250d3894c24a05d70de149eafe466 Binary files /dev/null and b/content/zh/post/2022/figures/1e185faf72d14f6bb07e527d753614ed.png differ diff --git a/content/zh/post/2022/figures/1e1aea950edc44d99adc91c658a9e14a.png b/content/zh/post/2022/figures/1e1aea950edc44d99adc91c658a9e14a.png new file mode 100644 index 0000000000000000000000000000000000000000..838c524603941c7931801e6ed4f852b847381fc8 Binary files /dev/null and b/content/zh/post/2022/figures/1e1aea950edc44d99adc91c658a9e14a.png differ diff --git a/content/zh/post/2022/figures/20211015-225127-update.png b/content/zh/post/2022/figures/20211015-225127-update.png new file mode 100644 index 0000000000000000000000000000000000000000..481f898e5a451c2c1c502123a623cf0fddcc6a74 Binary files /dev/null and b/content/zh/post/2022/figures/20211015-225127-update.png differ diff --git a/content/zh/post/2022/figures/20211015-225510-fig-5-03.png b/content/zh/post/2022/figures/20211015-225510-fig-5-03.png new file mode 100644 index 0000000000000000000000000000000000000000..1d574e2e56d3cee1653f7b71038c3495c1fa32d0 Binary files /dev/null and b/content/zh/post/2022/figures/20211015-225510-fig-5-03.png differ diff --git a/content/zh/post/2022/figures/20211015-225511-fig-5-04.png b/content/zh/post/2022/figures/20211015-225511-fig-5-04.png new file mode 100644 index 0000000000000000000000000000000000000000..448edebc0ca928085083b41490c7f6ce5537c7d2 Binary files /dev/null and b/content/zh/post/2022/figures/20211015-225511-fig-5-04.png differ diff --git a/content/zh/post/2022/figures/20211015-225511-fig-5-05.png b/content/zh/post/2022/figures/20211015-225511-fig-5-05.png new file mode 100644 index 0000000000000000000000000000000000000000..ba5a91d3a741191fece09420d0e528d0b793661a Binary files /dev/null and b/content/zh/post/2022/figures/20211015-225511-fig-5-05.png differ diff --git a/content/zh/post/2022/figures/20211015-225511-fig-5-06.png b/content/zh/post/2022/figures/20211015-225511-fig-5-06.png new file mode 100644 index 0000000000000000000000000000000000000000..ec42630e6ab731d2cdff5c682c3b68f2858b5f41 Binary files /dev/null and b/content/zh/post/2022/figures/20211015-225511-fig-5-06.png differ diff --git a/content/zh/post/2022/figures/20211015-225512-72285f7db5051f38a7940e7f235f49df.png b/content/zh/post/2022/figures/20211015-225512-72285f7db5051f38a7940e7f235f49df.png new file mode 100644 index 0000000000000000000000000000000000000000..a31040b940a3ceb94c49369163d8032ce291b8a1 Binary files /dev/null and b/content/zh/post/2022/figures/20211015-225512-72285f7db5051f38a7940e7f235f49df.png differ diff --git a/content/zh/post/2022/figures/20211015-225512-d34f1a911a8804c0b1f8d791a65f175e.png b/content/zh/post/2022/figures/20211015-225512-d34f1a911a8804c0b1f8d791a65f175e.png new file mode 100644 index 0000000000000000000000000000000000000000..89b8d017aef8c37174214e17653996811cd43b77 Binary files /dev/null and b/content/zh/post/2022/figures/20211015-225512-d34f1a911a8804c0b1f8d791a65f175e.png differ diff --git a/content/zh/post/2022/figures/20211015-225513-64eaedd1d1501b104652b104bd3152b2.png b/content/zh/post/2022/figures/20211015-225513-64eaedd1d1501b104652b104bd3152b2.png new file mode 100644 index 0000000000000000000000000000000000000000..90415e2cefa209883e57263aa0a06970060faa41 Binary files /dev/null and b/content/zh/post/2022/figures/20211015-225513-64eaedd1d1501b104652b104bd3152b2.png differ diff --git a/content/zh/post/2022/figures/20211017-204222-dc83a9cc72803e849caa49dae027369f.png b/content/zh/post/2022/figures/20211017-204222-dc83a9cc72803e849caa49dae027369f.png new file mode 100644 index 0000000000000000000000000000000000000000..ce18bb6706250da0e6adabdf825ab2ada4cf242f Binary files /dev/null and b/content/zh/post/2022/figures/20211017-204222-dc83a9cc72803e849caa49dae027369f.png differ diff --git a/content/zh/post/2022/figures/20211017-210839-v2-58a3a0df18e1a92b9cc209036fb149ab_b.jpg b/content/zh/post/2022/figures/20211017-210839-v2-58a3a0df18e1a92b9cc209036fb149ab_b.jpg new file mode 100644 index 0000000000000000000000000000000000000000..2eb470c3661d574785df9ae4c30d4c0c6c808097 Binary files /dev/null and b/content/zh/post/2022/figures/20211017-210839-v2-58a3a0df18e1a92b9cc209036fb149ab_b.jpg differ diff --git a/content/zh/post/2022/figures/20211203-7294cdd5-5b8a-41dd-9558-468c56d0e49d.png b/content/zh/post/2022/figures/20211203-7294cdd5-5b8a-41dd-9558-468c56d0e49d.png new file mode 100644 index 0000000000000000000000000000000000000000..6297ff5616df65adac566d3353e0fde2baa6b1aa Binary files /dev/null and b/content/zh/post/2022/figures/20211203-7294cdd5-5b8a-41dd-9558-468c56d0e49d.png differ diff --git a/content/zh/post/2022/figures/20211203-8632d683-5aa7-4e1f-907c-3952796968f4.png b/content/zh/post/2022/figures/20211203-8632d683-5aa7-4e1f-907c-3952796968f4.png new file mode 100644 index 0000000000000000000000000000000000000000..d16ccb9c68401d34c58af39b0a07bdf898d402fe Binary files /dev/null and b/content/zh/post/2022/figures/20211203-8632d683-5aa7-4e1f-907c-3952796968f4.png differ diff --git a/content/zh/post/2022/figures/20211204-00e0901d-e71f-46d3-95ed-9e14cb28b1ac.png b/content/zh/post/2022/figures/20211204-00e0901d-e71f-46d3-95ed-9e14cb28b1ac.png new file mode 100644 index 0000000000000000000000000000000000000000..4babb78798c586c2bd1305d1a7b5d31731623c7c Binary files /dev/null and b/content/zh/post/2022/figures/20211204-00e0901d-e71f-46d3-95ed-9e14cb28b1ac.png differ diff --git a/content/zh/post/2022/figures/20211204-10f40098-2578-4da8-83c9-dd493f7d3111.png b/content/zh/post/2022/figures/20211204-10f40098-2578-4da8-83c9-dd493f7d3111.png new file mode 100644 index 0000000000000000000000000000000000000000..0eca84c53c45a78748b2ac30441d3c2ae0a0625e Binary files /dev/null and b/content/zh/post/2022/figures/20211204-10f40098-2578-4da8-83c9-dd493f7d3111.png differ diff --git a/content/zh/post/2022/figures/20211204-17ff081b-5a00-4c19-974a-69a531902983.png b/content/zh/post/2022/figures/20211204-17ff081b-5a00-4c19-974a-69a531902983.png new file mode 100644 index 0000000000000000000000000000000000000000..7d1317b2efd2630911aa1b6cae4f05c017440437 Binary files /dev/null and b/content/zh/post/2022/figures/20211204-17ff081b-5a00-4c19-974a-69a531902983.png differ diff --git a/content/zh/post/2022/figures/20211204-183e159b-ef0f-4134-b134-71f99ba6e89a.png b/content/zh/post/2022/figures/20211204-183e159b-ef0f-4134-b134-71f99ba6e89a.png new file mode 100644 index 0000000000000000000000000000000000000000..a604acac42f8692befc2b70a6314a4fd1f6177ed Binary files /dev/null and b/content/zh/post/2022/figures/20211204-183e159b-ef0f-4134-b134-71f99ba6e89a.png differ diff --git a/content/zh/post/2022/figures/20211204-25c40a97-f135-48be-af18-f1fe9986db5b.png b/content/zh/post/2022/figures/20211204-25c40a97-f135-48be-af18-f1fe9986db5b.png new file mode 100644 index 0000000000000000000000000000000000000000..b789b3c71b71b411361b06b72b11cba51b752cb8 Binary files /dev/null and b/content/zh/post/2022/figures/20211204-25c40a97-f135-48be-af18-f1fe9986db5b.png differ diff --git a/content/zh/post/2022/figures/20211204-32f1f188-106d-4627-8b7c-c939ddcb1c59.png b/content/zh/post/2022/figures/20211204-32f1f188-106d-4627-8b7c-c939ddcb1c59.png new file mode 100644 index 0000000000000000000000000000000000000000..f315d94a03a2ebb05fd0d3c91c5530385b98a7a0 Binary files /dev/null and b/content/zh/post/2022/figures/20211204-32f1f188-106d-4627-8b7c-c939ddcb1c59.png differ diff --git a/content/zh/post/2022/figures/20211204-41c59db9-f61d-4dae-b29d-7036223ba567.png b/content/zh/post/2022/figures/20211204-41c59db9-f61d-4dae-b29d-7036223ba567.png new file mode 100644 index 0000000000000000000000000000000000000000..487c06b27ef1fe62cb3c40c4245821e8c9b3430a Binary files /dev/null and b/content/zh/post/2022/figures/20211204-41c59db9-f61d-4dae-b29d-7036223ba567.png differ diff --git a/content/zh/post/2022/figures/20211204-7e5f33ac-8420-463d-9639-f67586ad76ed.png b/content/zh/post/2022/figures/20211204-7e5f33ac-8420-463d-9639-f67586ad76ed.png new file mode 100644 index 0000000000000000000000000000000000000000..0bb2ba7ea4b90d596a24c7689585895c01adddd1 Binary files /dev/null and b/content/zh/post/2022/figures/20211204-7e5f33ac-8420-463d-9639-f67586ad76ed.png differ diff --git a/content/zh/post/2022/figures/20211204-a73fa928-786e-406e-8289-c87c4275ab5f.png b/content/zh/post/2022/figures/20211204-a73fa928-786e-406e-8289-c87c4275ab5f.png new file mode 100644 index 0000000000000000000000000000000000000000..8305a75178d74b1c68fe0780afedb5feef90083a Binary files /dev/null and b/content/zh/post/2022/figures/20211204-a73fa928-786e-406e-8289-c87c4275ab5f.png differ diff --git a/content/zh/post/2022/figures/20211204-ae369c99-359e-419f-a4c2-9dba1f855cd5.png b/content/zh/post/2022/figures/20211204-ae369c99-359e-419f-a4c2-9dba1f855cd5.png new file mode 100644 index 0000000000000000000000000000000000000000..1fa6d5fd5a8424d17193f8e3b3b3d27220d83043 Binary files /dev/null and b/content/zh/post/2022/figures/20211204-ae369c99-359e-419f-a4c2-9dba1f855cd5.png differ diff --git a/content/zh/post/2022/figures/20211204-aec67dd0-2b24-4f75-8d74-9ea4b2a22edd.png b/content/zh/post/2022/figures/20211204-aec67dd0-2b24-4f75-8d74-9ea4b2a22edd.png new file mode 100644 index 0000000000000000000000000000000000000000..1dac8aa838a69cb66a31b4c373562e280854139f Binary files /dev/null and b/content/zh/post/2022/figures/20211204-aec67dd0-2b24-4f75-8d74-9ea4b2a22edd.png differ diff --git a/content/zh/post/2022/figures/20211204-b6e374da-906c-4f47-bc31-96f0ca3037fa.png b/content/zh/post/2022/figures/20211204-b6e374da-906c-4f47-bc31-96f0ca3037fa.png new file mode 100644 index 0000000000000000000000000000000000000000..e428d5b76562c8a83be7dbc84548bc7801b89a69 Binary files /dev/null and b/content/zh/post/2022/figures/20211204-b6e374da-906c-4f47-bc31-96f0ca3037fa.png differ diff --git a/content/zh/post/2022/figures/20211204-ba7b78a2-3978-45b4-b868-61334e4087f2.png b/content/zh/post/2022/figures/20211204-ba7b78a2-3978-45b4-b868-61334e4087f2.png new file mode 100644 index 0000000000000000000000000000000000000000..75a4863733bb6105fbfab011fdd7e5b1fe1d5c72 Binary files /dev/null and b/content/zh/post/2022/figures/20211204-ba7b78a2-3978-45b4-b868-61334e4087f2.png differ diff --git a/content/zh/post/2022/figures/20211204-c0cfe4c4-d76b-4a8c-bd04-7a2f81f603a6.png b/content/zh/post/2022/figures/20211204-c0cfe4c4-d76b-4a8c-bd04-7a2f81f603a6.png new file mode 100644 index 0000000000000000000000000000000000000000..3a39eb7614216719a8d22fd4b5e28d7176103804 Binary files /dev/null and b/content/zh/post/2022/figures/20211204-c0cfe4c4-d76b-4a8c-bd04-7a2f81f603a6.png differ diff --git a/content/zh/post/2022/figures/20211204-c6e730da-357c-4f03-92d7-95d47f015284.png b/content/zh/post/2022/figures/20211204-c6e730da-357c-4f03-92d7-95d47f015284.png new file mode 100644 index 0000000000000000000000000000000000000000..ac85a5955ef6ad9191312a316a794268f2412fef Binary files /dev/null and b/content/zh/post/2022/figures/20211204-c6e730da-357c-4f03-92d7-95d47f015284.png differ diff --git a/content/zh/post/2022/figures/20211204-c8674984-9927-4b9d-bdde-fb9725ea88ee.png b/content/zh/post/2022/figures/20211204-c8674984-9927-4b9d-bdde-fb9725ea88ee.png new file mode 100644 index 0000000000000000000000000000000000000000..48ade05b02fa82a89e09b316994df74fcb82baca Binary files /dev/null and b/content/zh/post/2022/figures/20211204-c8674984-9927-4b9d-bdde-fb9725ea88ee.png differ diff --git a/content/zh/post/2022/figures/20211204-cf9d6243-d31c-4e37-aa26-953e2822e0c1.png b/content/zh/post/2022/figures/20211204-cf9d6243-d31c-4e37-aa26-953e2822e0c1.png new file mode 100644 index 0000000000000000000000000000000000000000..c84230d44dba9cc68f2c5fd6e69ec5aff2cb5912 Binary files /dev/null and b/content/zh/post/2022/figures/20211204-cf9d6243-d31c-4e37-aa26-953e2822e0c1.png differ diff --git a/content/zh/post/2022/figures/20211204-cfc47e9a-4272-48e2-9fba-ab5a17c9b323.png b/content/zh/post/2022/figures/20211204-cfc47e9a-4272-48e2-9fba-ab5a17c9b323.png new file mode 100644 index 0000000000000000000000000000000000000000..a9ca333867019fe50cb57edf09a2af4926fb3ef7 Binary files /dev/null and b/content/zh/post/2022/figures/20211204-cfc47e9a-4272-48e2-9fba-ab5a17c9b323.png differ diff --git a/content/zh/post/2022/figures/20211204-eb905549-76da-4976-aaa6-dfef16877d00.png b/content/zh/post/2022/figures/20211204-eb905549-76da-4976-aaa6-dfef16877d00.png new file mode 100644 index 0000000000000000000000000000000000000000..dd724e0d88a9bef64bd751b6002aeda6152169e3 Binary files /dev/null and b/content/zh/post/2022/figures/20211204-eb905549-76da-4976-aaa6-dfef16877d00.png differ diff --git a/content/zh/post/2022/figures/20211204-ec617df5-639c-43a2-a45e-5d84738909c5.png b/content/zh/post/2022/figures/20211204-ec617df5-639c-43a2-a45e-5d84738909c5.png new file mode 100644 index 0000000000000000000000000000000000000000..be9ec6d3d2d82baad20fef36377952702c579549 Binary files /dev/null and b/content/zh/post/2022/figures/20211204-ec617df5-639c-43a2-a45e-5d84738909c5.png differ diff --git a/content/zh/post/2022/figures/20211204-f08b84a5-2be7-4bc4-826a-397c9ad77d79.png b/content/zh/post/2022/figures/20211204-f08b84a5-2be7-4bc4-826a-397c9ad77d79.png new file mode 100644 index 0000000000000000000000000000000000000000..e06701ead547264e1127a5d42b26dd2c0c1fa35e Binary files /dev/null and b/content/zh/post/2022/figures/20211204-f08b84a5-2be7-4bc4-826a-397c9ad77d79.png differ diff --git a/content/zh/post/2022/figures/20211204-fc1c14b8-f666-4600-b21e-b73aec582740.png b/content/zh/post/2022/figures/20211204-fc1c14b8-f666-4600-b21e-b73aec582740.png new file mode 100644 index 0000000000000000000000000000000000000000..5a87c69729318bfa7ddc75ddab30207bb2e02e09 Binary files /dev/null and b/content/zh/post/2022/figures/20211204-fc1c14b8-f666-4600-b21e-b73aec582740.png differ diff --git a/content/zh/post/2022/figures/20211204-ffad91b6-007a-441c-8af8-835a9c0e0597.png b/content/zh/post/2022/figures/20211204-ffad91b6-007a-441c-8af8-835a9c0e0597.png new file mode 100644 index 0000000000000000000000000000000000000000..7bd74a94450fa999a7c30d94b030df1f1284d101 Binary files /dev/null and b/content/zh/post/2022/figures/20211204-ffad91b6-007a-441c-8af8-835a9c0e0597.png differ diff --git a/content/zh/post/2022/figures/20211216-05611555-f74d-47d5-8057-a86a6fd5e38f.png b/content/zh/post/2022/figures/20211216-05611555-f74d-47d5-8057-a86a6fd5e38f.png new file mode 100644 index 0000000000000000000000000000000000000000..70bdc3f06fc1d17ee17642c4866ac81dfe8d2e8d Binary files /dev/null and b/content/zh/post/2022/figures/20211216-05611555-f74d-47d5-8057-a86a6fd5e38f.png differ diff --git a/content/zh/post/2022/figures/20211216-2e9cd439-b92e-4fcd-8180-ef7096c80a16.png b/content/zh/post/2022/figures/20211216-2e9cd439-b92e-4fcd-8180-ef7096c80a16.png new file mode 100644 index 0000000000000000000000000000000000000000..54f565eafb318182382b5b0ae491739477b244d8 Binary files /dev/null and b/content/zh/post/2022/figures/20211216-2e9cd439-b92e-4fcd-8180-ef7096c80a16.png differ diff --git a/content/zh/post/2022/figures/20211216-b9c6b9ce-6a77-4ce0-a064-291015801db2.png b/content/zh/post/2022/figures/20211216-b9c6b9ce-6a77-4ce0-a064-291015801db2.png new file mode 100644 index 0000000000000000000000000000000000000000..ae10b7c699123e524d996fb2475763192a8b6517 Binary files /dev/null and b/content/zh/post/2022/figures/20211216-b9c6b9ce-6a77-4ce0-a064-291015801db2.png differ diff --git a/content/zh/post/2022/figures/20211216-cd0ca2d6-dd3c-41d5-9643-775edc3e9035.png b/content/zh/post/2022/figures/20211216-cd0ca2d6-dd3c-41d5-9643-775edc3e9035.png new file mode 100644 index 0000000000000000000000000000000000000000..b0efe89242bc30ca5904b71d3eff765eecffd614 Binary files /dev/null and b/content/zh/post/2022/figures/20211216-cd0ca2d6-dd3c-41d5-9643-775edc3e9035.png differ diff --git a/content/zh/post/2022/figures/20211223-01cf061e-a19f-4516-9ddf-d38eb5bbbc86.png b/content/zh/post/2022/figures/20211223-01cf061e-a19f-4516-9ddf-d38eb5bbbc86.png new file mode 100644 index 0000000000000000000000000000000000000000..ed878049a36441975138595a7ab0079c4baff4d2 Binary files /dev/null and b/content/zh/post/2022/figures/20211223-01cf061e-a19f-4516-9ddf-d38eb5bbbc86.png differ diff --git a/content/zh/post/2022/figures/20211223-453c2df5-151d-4333-a812-732e1a32313b.png b/content/zh/post/2022/figures/20211223-453c2df5-151d-4333-a812-732e1a32313b.png new file mode 100644 index 0000000000000000000000000000000000000000..e258154c1d0b52f79be87fd4570da1bb1721657a Binary files /dev/null and b/content/zh/post/2022/figures/20211223-453c2df5-151d-4333-a812-732e1a32313b.png differ diff --git a/content/zh/post/2022/figures/20211223-60e81928-181c-4964-b0ec-abdd2acc7da7.png b/content/zh/post/2022/figures/20211223-60e81928-181c-4964-b0ec-abdd2acc7da7.png new file mode 100644 index 0000000000000000000000000000000000000000..a43926f0230c4ba8a6523e5429e0dda4e5885908 Binary files /dev/null and b/content/zh/post/2022/figures/20211223-60e81928-181c-4964-b0ec-abdd2acc7da7.png differ diff --git a/content/zh/post/2022/figures/20211223-7afbf443-21c5-4855-8ed7-c264abaf9ff0.png b/content/zh/post/2022/figures/20211223-7afbf443-21c5-4855-8ed7-c264abaf9ff0.png new file mode 100644 index 0000000000000000000000000000000000000000..4fc27ca5c1058a159d5cd750b934ca1c468dd057 Binary files /dev/null and b/content/zh/post/2022/figures/20211223-7afbf443-21c5-4855-8ed7-c264abaf9ff0.png differ diff --git a/content/zh/post/2022/figures/20211223-83e9cf25-6bbc-4e0e-a24d-963d9050ae73.png b/content/zh/post/2022/figures/20211223-83e9cf25-6bbc-4e0e-a24d-963d9050ae73.png new file mode 100644 index 0000000000000000000000000000000000000000..727b87b2bbfaed3e445b8ac80a90d79681e94390 Binary files /dev/null and b/content/zh/post/2022/figures/20211223-83e9cf25-6bbc-4e0e-a24d-963d9050ae73.png differ diff --git a/content/zh/post/2022/figures/20211223-8c6710da-e8ba-4c22-a1dd-dc76ecaec07a.png b/content/zh/post/2022/figures/20211223-8c6710da-e8ba-4c22-a1dd-dc76ecaec07a.png new file mode 100644 index 0000000000000000000000000000000000000000..0b280a7401e075154afd043494ad3be826854d0a Binary files /dev/null and b/content/zh/post/2022/figures/20211223-8c6710da-e8ba-4c22-a1dd-dc76ecaec07a.png differ diff --git a/content/zh/post/2022/figures/20211223-8e28c064-237c-4c48-8d6d-7498b11f1c3b.png b/content/zh/post/2022/figures/20211223-8e28c064-237c-4c48-8d6d-7498b11f1c3b.png new file mode 100644 index 0000000000000000000000000000000000000000..d98e87e855ed813ce9b1dc511e7eefb14681f5f7 Binary files /dev/null and b/content/zh/post/2022/figures/20211223-8e28c064-237c-4c48-8d6d-7498b11f1c3b.png differ diff --git a/content/zh/post/2022/figures/20211223-92cb0889-6352-4ae6-a73f-1ec772e8a730.png b/content/zh/post/2022/figures/20211223-92cb0889-6352-4ae6-a73f-1ec772e8a730.png new file mode 100644 index 0000000000000000000000000000000000000000..2801ceb245952612ff8d74ab6b0f3320fa4bdce9 Binary files /dev/null and b/content/zh/post/2022/figures/20211223-92cb0889-6352-4ae6-a73f-1ec772e8a730.png differ diff --git a/content/zh/post/2022/figures/20211223-9c55c807-e30b-44a9-8810-4d2b70db10a9.png b/content/zh/post/2022/figures/20211223-9c55c807-e30b-44a9-8810-4d2b70db10a9.png new file mode 100644 index 0000000000000000000000000000000000000000..5b63821ac2a1bc0c7f714290fc67b9c826221a77 Binary files /dev/null and b/content/zh/post/2022/figures/20211223-9c55c807-e30b-44a9-8810-4d2b70db10a9.png differ diff --git a/content/zh/post/2022/figures/20211223-ae44972c-4cc6-49b7-94c5-5b507039a686.png b/content/zh/post/2022/figures/20211223-ae44972c-4cc6-49b7-94c5-5b507039a686.png new file mode 100644 index 0000000000000000000000000000000000000000..fad7bb7c8ea49982827d5d58f4f4e380147a0f03 Binary files /dev/null and b/content/zh/post/2022/figures/20211223-ae44972c-4cc6-49b7-94c5-5b507039a686.png differ diff --git a/content/zh/post/2022/figures/20211223-c49e9596-383a-41c4-8057-77cdfd9e8f5e.png b/content/zh/post/2022/figures/20211223-c49e9596-383a-41c4-8057-77cdfd9e8f5e.png new file mode 100644 index 0000000000000000000000000000000000000000..3c9a607823ebe007dc21c165618f1d321f48df73 Binary files /dev/null and b/content/zh/post/2022/figures/20211223-c49e9596-383a-41c4-8057-77cdfd9e8f5e.png differ diff --git a/content/zh/post/2022/figures/20211223-ef70cfd4-da07-4c1d-aabe-cc867cedbc80.png b/content/zh/post/2022/figures/20211223-ef70cfd4-da07-4c1d-aabe-cc867cedbc80.png new file mode 100644 index 0000000000000000000000000000000000000000..31ffa5881dc342ca0466ded20706002b17b637ab Binary files /dev/null and b/content/zh/post/2022/figures/20211223-ef70cfd4-da07-4c1d-aabe-cc867cedbc80.png differ diff --git a/content/zh/post/2022/figures/20220106-02524930-39ff-4c6d-898e-4070ab278009.png b/content/zh/post/2022/figures/20220106-02524930-39ff-4c6d-898e-4070ab278009.png new file mode 100644 index 0000000000000000000000000000000000000000..aebbd447a1e0820bdc56d66313358a0b9961a20f Binary files /dev/null and b/content/zh/post/2022/figures/20220106-02524930-39ff-4c6d-898e-4070ab278009.png differ diff --git a/content/zh/post/2022/figures/20220106-03097507-8fce-424c-8c74-969e1fb06f16.png b/content/zh/post/2022/figures/20220106-03097507-8fce-424c-8c74-969e1fb06f16.png new file mode 100644 index 0000000000000000000000000000000000000000..09afd5367d5e1a8e05ea157143908b81c0a5cd69 Binary files /dev/null and b/content/zh/post/2022/figures/20220106-03097507-8fce-424c-8c74-969e1fb06f16.png differ diff --git a/content/zh/post/2022/figures/20220106-36068d2a-eccf-45ff-89df-c994c6331802.png b/content/zh/post/2022/figures/20220106-36068d2a-eccf-45ff-89df-c994c6331802.png new file mode 100644 index 0000000000000000000000000000000000000000..f532a04bf18ba67f375e04f67e060fe2a5de1ef9 Binary files /dev/null and b/content/zh/post/2022/figures/20220106-36068d2a-eccf-45ff-89df-c994c6331802.png differ diff --git a/content/zh/post/2022/figures/20220106-a00fdef7-cefb-4775-bd75-f6bfb0952b8d.png b/content/zh/post/2022/figures/20220106-a00fdef7-cefb-4775-bd75-f6bfb0952b8d.png new file mode 100644 index 0000000000000000000000000000000000000000..b57fbe34f283ebc026f154c4e1b8719742d92690 Binary files /dev/null and b/content/zh/post/2022/figures/20220106-a00fdef7-cefb-4775-bd75-f6bfb0952b8d.png differ diff --git a/content/zh/post/2022/figures/20220106-a510566b-e8dc-4b21-b5df-974e4bac5cd4.png b/content/zh/post/2022/figures/20220106-a510566b-e8dc-4b21-b5df-974e4bac5cd4.png new file mode 100644 index 0000000000000000000000000000000000000000..d385047ab0c808c0287c97cdd9ebbbdd1fc51735 Binary files /dev/null and b/content/zh/post/2022/figures/20220106-a510566b-e8dc-4b21-b5df-974e4bac5cd4.png differ diff --git a/content/zh/post/2022/figures/20220106-e39ed7ff-add2-4ef5-9b4a-45edddfe74ff.png b/content/zh/post/2022/figures/20220106-e39ed7ff-add2-4ef5-9b4a-45edddfe74ff.png new file mode 100644 index 0000000000000000000000000000000000000000..cafc81772fe9e11227290703a20f6bc9f1738846 Binary files /dev/null and b/content/zh/post/2022/figures/20220106-e39ed7ff-add2-4ef5-9b4a-45edddfe74ff.png differ diff --git a/content/zh/post/2022/figures/20220106-fec3c37b-f253-4aa9-a986-96012785126a.png b/content/zh/post/2022/figures/20220106-fec3c37b-f253-4aa9-a986-96012785126a.png new file mode 100644 index 0000000000000000000000000000000000000000..5d29f51e53a200a7107138126c21fea57af716ae Binary files /dev/null and b/content/zh/post/2022/figures/20220106-fec3c37b-f253-4aa9-a986-96012785126a.png differ diff --git a/content/zh/post/2022/figures/20220107-154aa7ec-6a24-41aa-8fe4-0eee137d0982.png b/content/zh/post/2022/figures/20220107-154aa7ec-6a24-41aa-8fe4-0eee137d0982.png new file mode 100644 index 0000000000000000000000000000000000000000..c5b803e5863889bc79fa9c2651c4742c4bc40191 Binary files /dev/null and b/content/zh/post/2022/figures/20220107-154aa7ec-6a24-41aa-8fe4-0eee137d0982.png differ diff --git a/content/zh/post/2022/figures/20220107-2073c9b3-0749-4d3d-a577-cf9467225d37.png b/content/zh/post/2022/figures/20220107-2073c9b3-0749-4d3d-a577-cf9467225d37.png new file mode 100644 index 0000000000000000000000000000000000000000..997aad3e04ca89b0e750bfb5e2050ee0d95df7e4 Binary files /dev/null and b/content/zh/post/2022/figures/20220107-2073c9b3-0749-4d3d-a577-cf9467225d37.png differ diff --git a/content/zh/post/2022/figures/20220107-39368262-8b82-4c5d-973d-c268dab99042.png b/content/zh/post/2022/figures/20220107-39368262-8b82-4c5d-973d-c268dab99042.png new file mode 100644 index 0000000000000000000000000000000000000000..4a6ebf97eabe536833d4afbc63770db130de9e89 Binary files /dev/null and b/content/zh/post/2022/figures/20220107-39368262-8b82-4c5d-973d-c268dab99042.png differ diff --git a/content/zh/post/2022/figures/20220107-62164f26-2335-4465-ad23-47148ecae8a1.png b/content/zh/post/2022/figures/20220107-62164f26-2335-4465-ad23-47148ecae8a1.png new file mode 100644 index 0000000000000000000000000000000000000000..f9d2972e242530ecda89e2c1cc80d54897cb45c3 Binary files /dev/null and b/content/zh/post/2022/figures/20220107-62164f26-2335-4465-ad23-47148ecae8a1.png differ diff --git a/content/zh/post/2022/figures/20220107-6c96183e-8ed2-4eac-840d-6de2b6c9e746.png b/content/zh/post/2022/figures/20220107-6c96183e-8ed2-4eac-840d-6de2b6c9e746.png new file mode 100644 index 0000000000000000000000000000000000000000..517cae679afecb33b52b338e7e22834a9d2613ab Binary files /dev/null and b/content/zh/post/2022/figures/20220107-6c96183e-8ed2-4eac-840d-6de2b6c9e746.png differ diff --git a/content/zh/post/2022/figures/20220107-7b9036fd-66fb-44ff-9ad6-61a878d5940b.png b/content/zh/post/2022/figures/20220107-7b9036fd-66fb-44ff-9ad6-61a878d5940b.png new file mode 100644 index 0000000000000000000000000000000000000000..7ebb2bed99bdac96204c2cf30ce6f5b21ae87376 Binary files /dev/null and b/content/zh/post/2022/figures/20220107-7b9036fd-66fb-44ff-9ad6-61a878d5940b.png differ diff --git a/content/zh/post/2022/figures/20220107-ee45e332-8e56-4b07-a765-b1e5ce6df6b9.png b/content/zh/post/2022/figures/20220107-ee45e332-8e56-4b07-a765-b1e5ce6df6b9.png new file mode 100644 index 0000000000000000000000000000000000000000..d2651489047259b818fae49599920ae17522208f Binary files /dev/null and b/content/zh/post/2022/figures/20220107-ee45e332-8e56-4b07-a765-b1e5ce6df6b9.png differ diff --git a/content/zh/post/2022/figures/22b37a0e95ea4472b4d331064192382c.png b/content/zh/post/2022/figures/22b37a0e95ea4472b4d331064192382c.png new file mode 100644 index 0000000000000000000000000000000000000000..972721176606947baac4e685f09068ea3506520a Binary files /dev/null and b/content/zh/post/2022/figures/22b37a0e95ea4472b4d331064192382c.png differ diff --git a/content/zh/post/2022/figures/2775a3f24eb44c02931d63e302a4bf9c.png b/content/zh/post/2022/figures/2775a3f24eb44c02931d63e302a4bf9c.png new file mode 100644 index 0000000000000000000000000000000000000000..250d290fc508170e1ac9723bb4cf6088d100efbb Binary files /dev/null and b/content/zh/post/2022/figures/2775a3f24eb44c02931d63e302a4bf9c.png differ diff --git a/content/zh/post/2022/figures/27b944a22e1d45b39a0167b83e4d55a0.png b/content/zh/post/2022/figures/27b944a22e1d45b39a0167b83e4d55a0.png new file mode 100644 index 0000000000000000000000000000000000000000..3eb015ad3b77e040959a2e3e0db25dcb135393c9 Binary files /dev/null and b/content/zh/post/2022/figures/27b944a22e1d45b39a0167b83e4d55a0.png differ diff --git a/content/zh/post/2022/figures/2c62c125feb04ff89234abf76991601e.png b/content/zh/post/2022/figures/2c62c125feb04ff89234abf76991601e.png new file mode 100644 index 0000000000000000000000000000000000000000..301c2097e14c6982c8493543632391e08ecd8f39 Binary files /dev/null and b/content/zh/post/2022/figures/2c62c125feb04ff89234abf76991601e.png differ diff --git "a/content/zh/post/2022/figures/2\346\265\213\350\257\225\351\252\214\350\257\201.png" "b/content/zh/post/2022/figures/2\346\265\213\350\257\225\351\252\214\350\257\201.png" new file mode 100644 index 0000000000000000000000000000000000000000..77ad0591e79c4fd3a0863d0fd143abe569ed7873 Binary files /dev/null and "b/content/zh/post/2022/figures/2\346\265\213\350\257\225\351\252\214\350\257\201.png" differ diff --git a/content/zh/post/2022/figures/3507d173b3e24d9f94dd543947ae33ef.png b/content/zh/post/2022/figures/3507d173b3e24d9f94dd543947ae33ef.png new file mode 100644 index 0000000000000000000000000000000000000000..9629817a5cfe4f13b6f138cc4ecfecf513acb450 Binary files /dev/null and b/content/zh/post/2022/figures/3507d173b3e24d9f94dd543947ae33ef.png differ diff --git a/content/zh/post/2022/figures/356c385d615b442e951be7d27f00702e.png b/content/zh/post/2022/figures/356c385d615b442e951be7d27f00702e.png new file mode 100644 index 0000000000000000000000000000000000000000..bfcf116a8b809e831ac311125c29b4f97e4ae7c8 Binary files /dev/null and b/content/zh/post/2022/figures/356c385d615b442e951be7d27f00702e.png differ diff --git a/content/zh/post/2022/figures/387c8fc827e34000936c977270c10f22.png b/content/zh/post/2022/figures/387c8fc827e34000936c977270c10f22.png new file mode 100644 index 0000000000000000000000000000000000000000..2a91b909d5369a398eabeb078d61bf6304bdbcc2 Binary files /dev/null and b/content/zh/post/2022/figures/387c8fc827e34000936c977270c10f22.png differ diff --git a/content/zh/post/2022/figures/480ae4bbdd664652af43663f061aae84.png b/content/zh/post/2022/figures/480ae4bbdd664652af43663f061aae84.png new file mode 100644 index 0000000000000000000000000000000000000000..dd7de840d5a3ad685b488d21d0e4d64ad85f87ee Binary files /dev/null and b/content/zh/post/2022/figures/480ae4bbdd664652af43663f061aae84.png differ diff --git a/content/zh/post/2022/figures/591c2725601c492cbccf312e9b2a7a11.png b/content/zh/post/2022/figures/591c2725601c492cbccf312e9b2a7a11.png new file mode 100644 index 0000000000000000000000000000000000000000..535a1dc77b07db4595203b1d22b35f7962279226 Binary files /dev/null and b/content/zh/post/2022/figures/591c2725601c492cbccf312e9b2a7a11.png differ diff --git a/content/zh/post/2022/figures/5d3d9f82ce164b08a6866a606fd7e03d.png b/content/zh/post/2022/figures/5d3d9f82ce164b08a6866a606fd7e03d.png new file mode 100644 index 0000000000000000000000000000000000000000..ea9ef563d157560da1046da4b0813307c7e5d949 Binary files /dev/null and b/content/zh/post/2022/figures/5d3d9f82ce164b08a6866a606fd7e03d.png differ diff --git a/content/zh/post/2022/figures/5e12f329abe74ed38ae99d8828adaa5d.png b/content/zh/post/2022/figures/5e12f329abe74ed38ae99d8828adaa5d.png new file mode 100644 index 0000000000000000000000000000000000000000..eb0f7f0ac649f2604727a2e1fa3ade2dadc775f6 Binary files /dev/null and b/content/zh/post/2022/figures/5e12f329abe74ed38ae99d8828adaa5d.png differ diff --git a/content/zh/post/2022/figures/61364d2741cc46f7802cb48cc75571fe.png b/content/zh/post/2022/figures/61364d2741cc46f7802cb48cc75571fe.png new file mode 100644 index 0000000000000000000000000000000000000000..02ad3da2b101e84845e7bbe583e7c2c1b94ce07d Binary files /dev/null and b/content/zh/post/2022/figures/61364d2741cc46f7802cb48cc75571fe.png differ diff --git a/content/zh/post/2022/figures/614036c6b5d84a0c86de61b3cbf88b78.png b/content/zh/post/2022/figures/614036c6b5d84a0c86de61b3cbf88b78.png new file mode 100644 index 0000000000000000000000000000000000000000..3b9462f3b7abe01bf2509cadc04c3b954c20e09f Binary files /dev/null and b/content/zh/post/2022/figures/614036c6b5d84a0c86de61b3cbf88b78.png differ diff --git a/content/zh/post/2022/figures/615c11832ab3f51d914222dd.png b/content/zh/post/2022/figures/615c11832ab3f51d914222dd.png new file mode 100644 index 0000000000000000000000000000000000000000..15653719bf489da0437c7da5b7e89d6f0cc8f7ce Binary files /dev/null and b/content/zh/post/2022/figures/615c11832ab3f51d914222dd.png differ diff --git a/content/zh/post/2022/figures/615c11832ab3f51d914222e9.png b/content/zh/post/2022/figures/615c11832ab3f51d914222e9.png new file mode 100644 index 0000000000000000000000000000000000000000..b54e5ca6e7aced964332ccffebd61fd47b631009 Binary files /dev/null and b/content/zh/post/2022/figures/615c11832ab3f51d914222e9.png differ diff --git a/content/zh/post/2022/figures/615c11832ab3f51d914222f4.png b/content/zh/post/2022/figures/615c11832ab3f51d914222f4.png new file mode 100644 index 0000000000000000000000000000000000000000..c03cc7e87395a6da0108687d529dc20849c08ac6 Binary files /dev/null and b/content/zh/post/2022/figures/615c11832ab3f51d914222f4.png differ diff --git a/content/zh/post/2022/figures/615c11832ab3f51d91422301.png b/content/zh/post/2022/figures/615c11832ab3f51d91422301.png new file mode 100644 index 0000000000000000000000000000000000000000..30e8fdff5d050a36ced441b4ef515cb00df61e99 Binary files /dev/null and b/content/zh/post/2022/figures/615c11832ab3f51d91422301.png differ diff --git a/content/zh/post/2022/figures/615c13152ab3f51d91446977.png b/content/zh/post/2022/figures/615c13152ab3f51d91446977.png new file mode 100644 index 0000000000000000000000000000000000000000..d9272f7d33de4b4c7e5b8542808e63adb7278fc5 Binary files /dev/null and b/content/zh/post/2022/figures/615c13152ab3f51d91446977.png differ diff --git a/content/zh/post/2022/figures/615c14052ab3f51d9145c371.png b/content/zh/post/2022/figures/615c14052ab3f51d9145c371.png new file mode 100644 index 0000000000000000000000000000000000000000..c437be002001bc930fd48e01e2d178fbf51f9fbe Binary files /dev/null and b/content/zh/post/2022/figures/615c14052ab3f51d9145c371.png differ diff --git a/content/zh/post/2022/figures/615c14052ab3f51d9145c37e.png b/content/zh/post/2022/figures/615c14052ab3f51d9145c37e.png new file mode 100644 index 0000000000000000000000000000000000000000..d6803af00b4b54c2b5a13a403922aaa7e2ddcf83 Binary files /dev/null and b/content/zh/post/2022/figures/615c14052ab3f51d9145c37e.png differ diff --git a/content/zh/post/2022/figures/615c14052ab3f51d9145c394.png b/content/zh/post/2022/figures/615c14052ab3f51d9145c394.png new file mode 100644 index 0000000000000000000000000000000000000000..49ac43ab32753958e9ada19b8820b9154100ac1f Binary files /dev/null and b/content/zh/post/2022/figures/615c14052ab3f51d9145c394.png differ diff --git a/content/zh/post/2022/figures/615c14052ab3f51d9145c3a9.png b/content/zh/post/2022/figures/615c14052ab3f51d9145c3a9.png new file mode 100644 index 0000000000000000000000000000000000000000..c9223fbf01f4f981267768f0a46fb63fde0d4ff2 Binary files /dev/null and b/content/zh/post/2022/figures/615c14052ab3f51d9145c3a9.png differ diff --git a/content/zh/post/2022/figures/615c14a32ab3f51d9146b955.png b/content/zh/post/2022/figures/615c14a32ab3f51d9146b955.png new file mode 100644 index 0000000000000000000000000000000000000000..d71abeee3556049623218a9414e2c7a530abfe45 Binary files /dev/null and b/content/zh/post/2022/figures/615c14a32ab3f51d9146b955.png differ diff --git a/content/zh/post/2022/figures/615c14a32ab3f51d9146b960.png b/content/zh/post/2022/figures/615c14a32ab3f51d9146b960.png new file mode 100644 index 0000000000000000000000000000000000000000..f1f00ff4e7ed548b43e101b660e87d4624899762 Binary files /dev/null and b/content/zh/post/2022/figures/615c14a32ab3f51d9146b960.png differ diff --git a/content/zh/post/2022/figures/615c14a32ab3f51d9146b96f.png b/content/zh/post/2022/figures/615c14a32ab3f51d9146b96f.png new file mode 100644 index 0000000000000000000000000000000000000000..b2498e1aedf27793ae36e4758d8dcdde32e3779d Binary files /dev/null and b/content/zh/post/2022/figures/615c14a32ab3f51d9146b96f.png differ diff --git a/content/zh/post/2022/figures/615c14a32ab3f51d9146b994.png b/content/zh/post/2022/figures/615c14a32ab3f51d9146b994.png new file mode 100644 index 0000000000000000000000000000000000000000..03ccb8b753dcf761fa600ac6e1fc500ca240709c Binary files /dev/null and b/content/zh/post/2022/figures/615c14a32ab3f51d9146b994.png differ diff --git a/content/zh/post/2022/figures/615c15482ab3f51d9147a2aa.png b/content/zh/post/2022/figures/615c15482ab3f51d9147a2aa.png new file mode 100644 index 0000000000000000000000000000000000000000..f47be217bfa8fa59421baa8aa4435159595d0993 Binary files /dev/null and b/content/zh/post/2022/figures/615c15482ab3f51d9147a2aa.png differ diff --git a/content/zh/post/2022/figures/615c15482ab3f51d9147a2b3.png b/content/zh/post/2022/figures/615c15482ab3f51d9147a2b3.png new file mode 100644 index 0000000000000000000000000000000000000000..1a4b4ea30a0cf948ac6d7321af263b8d3e0eb015 Binary files /dev/null and b/content/zh/post/2022/figures/615c15482ab3f51d9147a2b3.png differ diff --git a/content/zh/post/2022/figures/615c15482ab3f51d9147a2ba.png b/content/zh/post/2022/figures/615c15482ab3f51d9147a2ba.png new file mode 100644 index 0000000000000000000000000000000000000000..33f0ad4668b3817e3147804996eca7fc2fbc7bab Binary files /dev/null and b/content/zh/post/2022/figures/615c15482ab3f51d9147a2ba.png differ diff --git a/content/zh/post/2022/figures/615c15c42ab3f51d91484e93.png b/content/zh/post/2022/figures/615c15c42ab3f51d91484e93.png new file mode 100644 index 0000000000000000000000000000000000000000..343eda20df9a14b2dc79c66e833ae3af2cb0f531 Binary files /dev/null and b/content/zh/post/2022/figures/615c15c42ab3f51d91484e93.png differ diff --git a/content/zh/post/2022/figures/615c15c42ab3f51d91484e9e.png b/content/zh/post/2022/figures/615c15c42ab3f51d91484e9e.png new file mode 100644 index 0000000000000000000000000000000000000000..f0ecb1dc524fe925c879ed344ce7f8250ae7f70f Binary files /dev/null and b/content/zh/post/2022/figures/615c15c42ab3f51d91484e9e.png differ diff --git a/content/zh/post/2022/figures/615c15c42ab3f51d91484ead.png b/content/zh/post/2022/figures/615c15c42ab3f51d91484ead.png new file mode 100644 index 0000000000000000000000000000000000000000..619671b7c931cc39da11eee4b5d9d3aedded25a9 Binary files /dev/null and b/content/zh/post/2022/figures/615c15c42ab3f51d91484ead.png differ diff --git a/content/zh/post/2022/figures/615c15c42ab3f51d91484ec6.png b/content/zh/post/2022/figures/615c15c42ab3f51d91484ec6.png new file mode 100644 index 0000000000000000000000000000000000000000..162882467f532bb80b153bd6c206f919c717437c Binary files /dev/null and b/content/zh/post/2022/figures/615c15c42ab3f51d91484ec6.png differ diff --git a/content/zh/post/2022/figures/615c15c42ab3f51d91484ed6.png b/content/zh/post/2022/figures/615c15c42ab3f51d91484ed6.png new file mode 100644 index 0000000000000000000000000000000000000000..bf4deb15222f2ef72f6393c3fe76bd9e72ca7686 Binary files /dev/null and b/content/zh/post/2022/figures/615c15c42ab3f51d91484ed6.png differ diff --git a/content/zh/post/2022/figures/615c16922ab3f51d914979b2.png b/content/zh/post/2022/figures/615c16922ab3f51d914979b2.png new file mode 100644 index 0000000000000000000000000000000000000000..d6ddb96060ab61f3361f332cc7216576f5911c42 Binary files /dev/null and b/content/zh/post/2022/figures/615c16922ab3f51d914979b2.png differ diff --git a/content/zh/post/2022/figures/615c16922ab3f51d914979bf.png b/content/zh/post/2022/figures/615c16922ab3f51d914979bf.png new file mode 100644 index 0000000000000000000000000000000000000000..48a7a21cad2c81ddc9379db0f3c8d92789da0b81 Binary files /dev/null and b/content/zh/post/2022/figures/615c16922ab3f51d914979bf.png differ diff --git a/content/zh/post/2022/figures/615c16922ab3f51d914979c5.png b/content/zh/post/2022/figures/615c16922ab3f51d914979c5.png new file mode 100644 index 0000000000000000000000000000000000000000..7687af7b39dccb9eece9763f782fefcc73faf810 Binary files /dev/null and b/content/zh/post/2022/figures/615c16922ab3f51d914979c5.png differ diff --git a/content/zh/post/2022/figures/615c16932ab3f51d914979dd.png b/content/zh/post/2022/figures/615c16932ab3f51d914979dd.png new file mode 100644 index 0000000000000000000000000000000000000000..100c3f1d34c739439d7c458d30732852af1df008 Binary files /dev/null and b/content/zh/post/2022/figures/615c16932ab3f51d914979dd.png differ diff --git a/content/zh/post/2022/figures/615c16932ab3f51d914979e7.png b/content/zh/post/2022/figures/615c16932ab3f51d914979e7.png new file mode 100644 index 0000000000000000000000000000000000000000..b42168b735293fb2f4251ee28d98616cfd099e76 Binary files /dev/null and b/content/zh/post/2022/figures/615c16932ab3f51d914979e7.png differ diff --git a/content/zh/post/2022/figures/615c16f62ab3f51d914a1b6d.png b/content/zh/post/2022/figures/615c16f62ab3f51d914a1b6d.png new file mode 100644 index 0000000000000000000000000000000000000000..5bbcf3bb2d2866c28bad8c11f180706be246a311 Binary files /dev/null and b/content/zh/post/2022/figures/615c16f62ab3f51d914a1b6d.png differ diff --git a/content/zh/post/2022/figures/615c16f62ab3f51d914a1b7d.png b/content/zh/post/2022/figures/615c16f62ab3f51d914a1b7d.png new file mode 100644 index 0000000000000000000000000000000000000000..1c48312f6a679222b78da0b7bb23b14b3864ac07 Binary files /dev/null and b/content/zh/post/2022/figures/615c16f62ab3f51d914a1b7d.png differ diff --git a/content/zh/post/2022/figures/615c16f62ab3f51d914a1b92.png b/content/zh/post/2022/figures/615c16f62ab3f51d914a1b92.png new file mode 100644 index 0000000000000000000000000000000000000000..2df830b26a6325c7ac7935b38b49466e60326472 Binary files /dev/null and b/content/zh/post/2022/figures/615c16f62ab3f51d914a1b92.png differ diff --git a/content/zh/post/2022/figures/615c16f62ab3f51d914a1ba8.png b/content/zh/post/2022/figures/615c16f62ab3f51d914a1ba8.png new file mode 100644 index 0000000000000000000000000000000000000000..80c76ef7af4c163b07e637efbf03ce131a3c015a Binary files /dev/null and b/content/zh/post/2022/figures/615c16f62ab3f51d914a1ba8.png differ diff --git a/content/zh/post/2022/figures/615c183c2ab3f51d914bfbaf.png b/content/zh/post/2022/figures/615c183c2ab3f51d914bfbaf.png new file mode 100644 index 0000000000000000000000000000000000000000..209022bb80a6179af2b98a09b5ef0234f18b3575 Binary files /dev/null and b/content/zh/post/2022/figures/615c183c2ab3f51d914bfbaf.png differ diff --git a/content/zh/post/2022/figures/615c183c2ab3f51d914bfbb6.png b/content/zh/post/2022/figures/615c183c2ab3f51d914bfbb6.png new file mode 100644 index 0000000000000000000000000000000000000000..352339817226447df02352a3374a865a94d730af Binary files /dev/null and b/content/zh/post/2022/figures/615c183c2ab3f51d914bfbb6.png differ diff --git a/content/zh/post/2022/figures/615c191d2ab3f51d914d3f1b.png b/content/zh/post/2022/figures/615c191d2ab3f51d914d3f1b.png new file mode 100644 index 0000000000000000000000000000000000000000..cd87ece05a26e3786c758cd784a346698cd32eea Binary files /dev/null and b/content/zh/post/2022/figures/615c191d2ab3f51d914d3f1b.png differ diff --git a/content/zh/post/2022/figures/615c191d2ab3f51d914d3f25.png b/content/zh/post/2022/figures/615c191d2ab3f51d914d3f25.png new file mode 100644 index 0000000000000000000000000000000000000000..6b2d1cc7fdd5eaeee8bf36bbdf362dfcc0f477f1 Binary files /dev/null and b/content/zh/post/2022/figures/615c191d2ab3f51d914d3f25.png differ diff --git a/content/zh/post/2022/figures/615c191d2ab3f51d914d3f32.png b/content/zh/post/2022/figures/615c191d2ab3f51d914d3f32.png new file mode 100644 index 0000000000000000000000000000000000000000..ae45a4c5b3ea2e928e25f9ee41ccaf2ea752e430 Binary files /dev/null and b/content/zh/post/2022/figures/615c191d2ab3f51d914d3f32.png differ diff --git a/content/zh/post/2022/figures/615c191d2ab3f51d914d3f43.png b/content/zh/post/2022/figures/615c191d2ab3f51d914d3f43.png new file mode 100644 index 0000000000000000000000000000000000000000..5f9989edc0a4b61b0df65c28672004a27f656429 Binary files /dev/null and b/content/zh/post/2022/figures/615c191d2ab3f51d914d3f43.png differ diff --git a/content/zh/post/2022/figures/615c19272ab3f51d914d4e90.png b/content/zh/post/2022/figures/615c19272ab3f51d914d4e90.png new file mode 100644 index 0000000000000000000000000000000000000000..491c3c2dba52acdf764e71524307235728ac1563 Binary files /dev/null and b/content/zh/post/2022/figures/615c19272ab3f51d914d4e90.png differ diff --git a/content/zh/post/2022/figures/615c19272ab3f51d914d4e97.png b/content/zh/post/2022/figures/615c19272ab3f51d914d4e97.png new file mode 100644 index 0000000000000000000000000000000000000000..7b38491cafc1d387f2c40b2520c06eddaee04654 Binary files /dev/null and b/content/zh/post/2022/figures/615c19272ab3f51d914d4e97.png differ diff --git a/content/zh/post/2022/figures/615c19272ab3f51d914d4e9d.png b/content/zh/post/2022/figures/615c19272ab3f51d914d4e9d.png new file mode 100644 index 0000000000000000000000000000000000000000..004c5dcdcf6eac988077eb915a2405591374fc24 Binary files /dev/null and b/content/zh/post/2022/figures/615c19272ab3f51d914d4e9d.png differ diff --git a/content/zh/post/2022/figures/615c19272ab3f51d914d4ea8.png b/content/zh/post/2022/figures/615c19272ab3f51d914d4ea8.png new file mode 100644 index 0000000000000000000000000000000000000000..29987cbe2b7cf0d5878f481349870afe8b16ec93 Binary files /dev/null and b/content/zh/post/2022/figures/615c19272ab3f51d914d4ea8.png differ diff --git a/content/zh/post/2022/figures/615c19272ab3f51d914d4eaf.png b/content/zh/post/2022/figures/615c19272ab3f51d914d4eaf.png new file mode 100644 index 0000000000000000000000000000000000000000..051ecde9986a3bba65c06144f9d7690fc993ea5c Binary files /dev/null and b/content/zh/post/2022/figures/615c19272ab3f51d914d4eaf.png differ diff --git a/content/zh/post/2022/figures/615c19302ab3f51d914d5dd8.png b/content/zh/post/2022/figures/615c19302ab3f51d914d5dd8.png new file mode 100644 index 0000000000000000000000000000000000000000..08238474333c93fc84cc3b258621d0fee17d2c6d Binary files /dev/null and b/content/zh/post/2022/figures/615c19302ab3f51d914d5dd8.png differ diff --git a/content/zh/post/2022/figures/615c19302ab3f51d914d5de4.png b/content/zh/post/2022/figures/615c19302ab3f51d914d5de4.png new file mode 100644 index 0000000000000000000000000000000000000000..2aa479db903a2395fc8e3dc6fb78975125c27581 Binary files /dev/null and b/content/zh/post/2022/figures/615c19302ab3f51d914d5de4.png differ diff --git a/content/zh/post/2022/figures/615c19302ab3f51d914d5df7.png b/content/zh/post/2022/figures/615c19302ab3f51d914d5df7.png new file mode 100644 index 0000000000000000000000000000000000000000..a3a949ef10f22ffd6102470de7605a03cee48b02 Binary files /dev/null and b/content/zh/post/2022/figures/615c19302ab3f51d914d5df7.png differ diff --git a/content/zh/post/2022/figures/615c19302ab3f51d914d5e02.png b/content/zh/post/2022/figures/615c19302ab3f51d914d5e02.png new file mode 100644 index 0000000000000000000000000000000000000000..8f88b2f335cd2f62b906e348081712c565b3690d Binary files /dev/null and b/content/zh/post/2022/figures/615c19302ab3f51d914d5e02.png differ diff --git a/content/zh/post/2022/figures/615c19302ab3f51d914d5e11.png b/content/zh/post/2022/figures/615c19302ab3f51d914d5e11.png new file mode 100644 index 0000000000000000000000000000000000000000..0f2e8cc62c89db71a928ba79e954630f8392860d Binary files /dev/null and b/content/zh/post/2022/figures/615c19302ab3f51d914d5e11.png differ diff --git a/content/zh/post/2022/figures/615c193f2ab3f51d914d72ba.png b/content/zh/post/2022/figures/615c193f2ab3f51d914d72ba.png new file mode 100644 index 0000000000000000000000000000000000000000..bc90ddd5235827dfa7250548ec42a6f34ee5c7f6 Binary files /dev/null and b/content/zh/post/2022/figures/615c193f2ab3f51d914d72ba.png differ diff --git a/content/zh/post/2022/figures/615c193f2ab3f51d914d72c2.png b/content/zh/post/2022/figures/615c193f2ab3f51d914d72c2.png new file mode 100644 index 0000000000000000000000000000000000000000..1994b49f03cdf89bfa7b0cc13ae12bb8309c56a9 Binary files /dev/null and b/content/zh/post/2022/figures/615c193f2ab3f51d914d72c2.png differ diff --git a/content/zh/post/2022/figures/615c193f2ab3f51d914d72e9.png b/content/zh/post/2022/figures/615c193f2ab3f51d914d72e9.png new file mode 100644 index 0000000000000000000000000000000000000000..349f61aced9156de9023b82fccce89edb6b7c2a5 Binary files /dev/null and b/content/zh/post/2022/figures/615c193f2ab3f51d914d72e9.png differ diff --git a/content/zh/post/2022/figures/615c193f2ab3f51d914d72fc.png b/content/zh/post/2022/figures/615c193f2ab3f51d914d72fc.png new file mode 100644 index 0000000000000000000000000000000000000000..f2fedb67cb1d2ceeb5105a36fd7c06e8d515f1fe Binary files /dev/null and b/content/zh/post/2022/figures/615c193f2ab3f51d914d72fc.png differ diff --git a/content/zh/post/2022/figures/615c19492ab3f51d914d811b.png b/content/zh/post/2022/figures/615c19492ab3f51d914d811b.png new file mode 100644 index 0000000000000000000000000000000000000000..b09136e5c909402c8e04fddecb64331b4ec8b493 Binary files /dev/null and b/content/zh/post/2022/figures/615c19492ab3f51d914d811b.png differ diff --git a/content/zh/post/2022/figures/615c19492ab3f51d914d8137.png b/content/zh/post/2022/figures/615c19492ab3f51d914d8137.png new file mode 100644 index 0000000000000000000000000000000000000000..8f896b99705a54eb554acc6568bef41c9ce08d61 Binary files /dev/null and b/content/zh/post/2022/figures/615c19492ab3f51d914d8137.png differ diff --git a/content/zh/post/2022/figures/615c19492ab3f51d914d8153.png b/content/zh/post/2022/figures/615c19492ab3f51d914d8153.png new file mode 100644 index 0000000000000000000000000000000000000000..d2c1072e2248dcd89006effe6b88d2b834d3f8bd Binary files /dev/null and b/content/zh/post/2022/figures/615c19492ab3f51d914d8153.png differ diff --git a/content/zh/post/2022/figures/615c19492ab3f51d914d8161.png b/content/zh/post/2022/figures/615c19492ab3f51d914d8161.png new file mode 100644 index 0000000000000000000000000000000000000000..dee08fb5bc1df54d112060aeec8374a9e1e050a2 Binary files /dev/null and b/content/zh/post/2022/figures/615c19492ab3f51d914d8161.png differ diff --git a/content/zh/post/2022/figures/615c533b2ab3f51d91a72523.jpg b/content/zh/post/2022/figures/615c533b2ab3f51d91a72523.jpg new file mode 100644 index 0000000000000000000000000000000000000000..b54b725d38b5c112212ffd6e022548a88134a7df Binary files /dev/null and b/content/zh/post/2022/figures/615c533b2ab3f51d91a72523.jpg differ diff --git a/content/zh/post/2022/figures/615c53892ab3f51d91a7b1e6.png b/content/zh/post/2022/figures/615c53892ab3f51d91a7b1e6.png new file mode 100644 index 0000000000000000000000000000000000000000..bf5923c491b733359aef9a1806234efeb41f6a13 Binary files /dev/null and b/content/zh/post/2022/figures/615c53892ab3f51d91a7b1e6.png differ diff --git a/content/zh/post/2022/figures/615ffa572ab3f51d91af9b67.jpg b/content/zh/post/2022/figures/615ffa572ab3f51d91af9b67.jpg new file mode 100644 index 0000000000000000000000000000000000000000..20754d3e984a08d9c0dde9614e333fe60466b4e7 Binary files /dev/null and b/content/zh/post/2022/figures/615ffa572ab3f51d91af9b67.jpg differ diff --git a/content/zh/post/2022/figures/615ffb2b2ab3f51d91b0c00c.jpg b/content/zh/post/2022/figures/615ffb2b2ab3f51d91b0c00c.jpg new file mode 100644 index 0000000000000000000000000000000000000000..f412a1cd789f19faf6383481b5715051a330a4a8 Binary files /dev/null and b/content/zh/post/2022/figures/615ffb2b2ab3f51d91b0c00c.jpg differ diff --git a/content/zh/post/2022/figures/615ffbbb2ab3f51d91b187c6.jpg b/content/zh/post/2022/figures/615ffbbb2ab3f51d91b187c6.jpg new file mode 100644 index 0000000000000000000000000000000000000000..cd29bf8b4e78704a87bb6b6a97f0fcbaf738d6ea Binary files /dev/null and b/content/zh/post/2022/figures/615ffbbb2ab3f51d91b187c6.jpg differ diff --git a/content/zh/post/2022/figures/615ffdad2ab3f51d91b42898.jpg b/content/zh/post/2022/figures/615ffdad2ab3f51d91b42898.jpg new file mode 100644 index 0000000000000000000000000000000000000000..5447098b2acbfb8066511f2ae695178ba07140dc Binary files /dev/null and b/content/zh/post/2022/figures/615ffdad2ab3f51d91b42898.jpg differ diff --git a/content/zh/post/2022/figures/615ffeef2ab3f51d91b5bb72.jpg b/content/zh/post/2022/figures/615ffeef2ab3f51d91b5bb72.jpg new file mode 100644 index 0000000000000000000000000000000000000000..46b53c33151eafdd08e8ce06c460bdb258a84c3d Binary files /dev/null and b/content/zh/post/2022/figures/615ffeef2ab3f51d91b5bb72.jpg differ diff --git a/content/zh/post/2022/figures/615fff622ab3f51d91b644eb.jpg b/content/zh/post/2022/figures/615fff622ab3f51d91b644eb.jpg new file mode 100644 index 0000000000000000000000000000000000000000..9cf62be780a130c39f37b8b0002200e1f542a22f Binary files /dev/null and b/content/zh/post/2022/figures/615fff622ab3f51d91b644eb.jpg differ diff --git a/content/zh/post/2022/figures/641abf7f6c9642b188ade66b1c8d25ee.png b/content/zh/post/2022/figures/641abf7f6c9642b188ade66b1c8d25ee.png new file mode 100644 index 0000000000000000000000000000000000000000..c3b32759df73c016ee54b2da091d97db20b1615a Binary files /dev/null and b/content/zh/post/2022/figures/641abf7f6c9642b188ade66b1c8d25ee.png differ diff --git a/content/zh/post/2022/figures/721e491c70e948abadf18b2eda7ce76f.png b/content/zh/post/2022/figures/721e491c70e948abadf18b2eda7ce76f.png new file mode 100644 index 0000000000000000000000000000000000000000..22ff947110d2b5333cdb107521037d1b537f57f4 Binary files /dev/null and b/content/zh/post/2022/figures/721e491c70e948abadf18b2eda7ce76f.png differ diff --git a/content/zh/post/2022/figures/7294465883ce45ac80a371f63dfe9659.png b/content/zh/post/2022/figures/7294465883ce45ac80a371f63dfe9659.png new file mode 100644 index 0000000000000000000000000000000000000000..25091e59d461f1ee0983c6723b4d45c6c52140b2 Binary files /dev/null and b/content/zh/post/2022/figures/7294465883ce45ac80a371f63dfe9659.png differ diff --git a/content/zh/post/2022/figures/7a7b1fc98317411a9a18982e944ba5c2.png b/content/zh/post/2022/figures/7a7b1fc98317411a9a18982e944ba5c2.png new file mode 100644 index 0000000000000000000000000000000000000000..54a1d1951071d9b4ac9b1b8341dbba395bd85367 Binary files /dev/null and b/content/zh/post/2022/figures/7a7b1fc98317411a9a18982e944ba5c2.png differ diff --git a/content/zh/post/2022/figures/816de1e0a8c04796a4f3478eff37baed.png b/content/zh/post/2022/figures/816de1e0a8c04796a4f3478eff37baed.png new file mode 100644 index 0000000000000000000000000000000000000000..4448f7deec2eb697af199860dcdb83f25c70bc29 Binary files /dev/null and b/content/zh/post/2022/figures/816de1e0a8c04796a4f3478eff37baed.png differ diff --git a/content/zh/post/2022/figures/a662d9a9a96b40d089a6d9c68788bf3d.png b/content/zh/post/2022/figures/a662d9a9a96b40d089a6d9c68788bf3d.png new file mode 100644 index 0000000000000000000000000000000000000000..2558cc4508684ab3bd7c593cf1a30bbf68750bc2 Binary files /dev/null and b/content/zh/post/2022/figures/a662d9a9a96b40d089a6d9c68788bf3d.png differ diff --git a/content/zh/post/2022/figures/a6d0fc02a8c948f2b43e4ef47cecd731.png b/content/zh/post/2022/figures/a6d0fc02a8c948f2b43e4ef47cecd731.png new file mode 100644 index 0000000000000000000000000000000000000000..2ced577820ebc2b01a6c4e1e28cbfc5fe7ff2ab2 Binary files /dev/null and b/content/zh/post/2022/figures/a6d0fc02a8c948f2b43e4ef47cecd731.png differ diff --git a/content/zh/post/2022/figures/ba1ea7c4485b4830b21538d56ecac309.png b/content/zh/post/2022/figures/ba1ea7c4485b4830b21538d56ecac309.png new file mode 100644 index 0000000000000000000000000000000000000000..549ec34af5c51d2af09e1fcf61fe56085763ec23 Binary files /dev/null and b/content/zh/post/2022/figures/ba1ea7c4485b4830b21538d56ecac309.png differ diff --git a/content/zh/post/2022/figures/c726f71fc88c4015b1d89f4586dfe290.png b/content/zh/post/2022/figures/c726f71fc88c4015b1d89f4586dfe290.png new file mode 100644 index 0000000000000000000000000000000000000000..9543723be927f77404ea82abeb0aee472e56e1d2 Binary files /dev/null and b/content/zh/post/2022/figures/c726f71fc88c4015b1d89f4586dfe290.png differ diff --git a/content/zh/post/2022/figures/cb8039252a6b45e99d8ff682fb9df992.png b/content/zh/post/2022/figures/cb8039252a6b45e99d8ff682fb9df992.png new file mode 100644 index 0000000000000000000000000000000000000000..f03b1407774c9f30845331fb12310da9936b1fbe Binary files /dev/null and b/content/zh/post/2022/figures/cb8039252a6b45e99d8ff682fb9df992.png differ diff --git a/content/zh/post/2022/figures/cd094375c2b44a8383694267e492fc63.png b/content/zh/post/2022/figures/cd094375c2b44a8383694267e492fc63.png new file mode 100644 index 0000000000000000000000000000000000000000..bc5bd1795fb97edbe7a23c5371f388fea99cfb7f Binary files /dev/null and b/content/zh/post/2022/figures/cd094375c2b44a8383694267e492fc63.png differ diff --git a/content/zh/post/2022/figures/d21813079e7b40a1b9edde6b9298d2f3.png b/content/zh/post/2022/figures/d21813079e7b40a1b9edde6b9298d2f3.png new file mode 100644 index 0000000000000000000000000000000000000000..b17148ae18deac8611e111ff18c3c27e16f51103 Binary files /dev/null and b/content/zh/post/2022/figures/d21813079e7b40a1b9edde6b9298d2f3.png differ diff --git a/content/zh/post/2022/figures/dbc89373c5734638a51add74523f640c.png b/content/zh/post/2022/figures/dbc89373c5734638a51add74523f640c.png new file mode 100644 index 0000000000000000000000000000000000000000..7d1f151cef0851276b848059877c33552f628fe1 Binary files /dev/null and b/content/zh/post/2022/figures/dbc89373c5734638a51add74523f640c.png differ diff --git a/content/zh/post/2022/figures/dc1c632c7c0f49f2ab7ebd57f78915d6.png b/content/zh/post/2022/figures/dc1c632c7c0f49f2ab7ebd57f78915d6.png new file mode 100644 index 0000000000000000000000000000000000000000..5900e1e846cd8422fbe466206be036b297c82ef5 Binary files /dev/null and b/content/zh/post/2022/figures/dc1c632c7c0f49f2ab7ebd57f78915d6.png differ diff --git a/content/zh/post/2022/figures/ee22045a1dca446b925881137106db5c.png b/content/zh/post/2022/figures/ee22045a1dca446b925881137106db5c.png new file mode 100644 index 0000000000000000000000000000000000000000..b5cd86b6651ab9d74de07e63db4e000e2f57cdfa Binary files /dev/null and b/content/zh/post/2022/figures/ee22045a1dca446b925881137106db5c.png differ diff --git a/content/zh/post/2022/figures/f569229a746940cba90ed0cda6fd1d2f.png b/content/zh/post/2022/figures/f569229a746940cba90ed0cda6fd1d2f.png new file mode 100644 index 0000000000000000000000000000000000000000..2b8f2a8381c9854c7ae1d8d60735b8c3c065b7cb Binary files /dev/null and b/content/zh/post/2022/figures/f569229a746940cba90ed0cda6fd1d2f.png differ diff --git a/content/zh/post/2022/figures/faa8002b28d94f5b9408f0e251daebc7.png b/content/zh/post/2022/figures/faa8002b28d94f5b9408f0e251daebc7.png new file mode 100644 index 0000000000000000000000000000000000000000..8c2cd7d12d8921b2212de6e2e8dc6d53dca4c916 Binary files /dev/null and b/content/zh/post/2022/figures/faa8002b28d94f5b9408f0e251daebc7.png differ diff --git "a/content/zh/post/2022/figures/openGauss\345\206\205\345\255\230\344\274\230\345\214\226\345\255\230\345\202\250\345\274\225\346\223\216\347\273\223\346\236\204\345\233\276.png" "b/content/zh/post/2022/figures/openGauss\345\206\205\345\255\230\344\274\230\345\214\226\345\255\230\345\202\250\345\274\225\346\223\216\347\273\223\346\236\204\345\233\276.png" new file mode 100644 index 0000000000000000000000000000000000000000..8d353fb3803fad934e57f5a228d3657235c5a608 Binary files /dev/null and "b/content/zh/post/2022/figures/openGauss\345\206\205\345\255\230\344\274\230\345\214\226\345\255\230\345\202\250\345\274\225\346\223\216\347\273\223\346\236\204\345\233\276.png" differ diff --git "a/content/zh/post/2022/figures/openGauss\345\206\205\346\240\270\345\210\206\346\236\220\357\274\210\344\270\200\357\274\211-\345\244\232\347\272\277\347\250\213\346\236\266\346\236\204\345\220\257\345\212\250\350\277\207\347\250\213\350\257\246\350\247\2431.png" "b/content/zh/post/2022/figures/openGauss\345\206\205\346\240\270\345\210\206\346\236\220\357\274\210\344\270\200\357\274\211-\345\244\232\347\272\277\347\250\213\346\236\266\346\236\204\345\220\257\345\212\250\350\277\207\347\250\213\350\257\246\350\247\2431.png" new file mode 100644 index 0000000000000000000000000000000000000000..23ad767ae2f932f9a184926f27cb61ffb9e8ce9e Binary files /dev/null and "b/content/zh/post/2022/figures/openGauss\345\206\205\346\240\270\345\210\206\346\236\220\357\274\210\344\270\200\357\274\211-\345\244\232\347\272\277\347\250\213\346\236\266\346\236\204\345\220\257\345\212\250\350\277\207\347\250\213\350\257\246\350\247\2431.png" differ diff --git "a/content/zh/post/2022/figures/openGauss\345\206\205\346\240\270\345\210\206\346\236\220\357\274\210\344\270\200\357\274\211-\345\244\232\347\272\277\347\250\213\346\236\266\346\236\204\345\220\257\345\212\250\350\277\207\347\250\213\350\257\246\350\247\2432.png" "b/content/zh/post/2022/figures/openGauss\345\206\205\346\240\270\345\210\206\346\236\220\357\274\210\344\270\200\357\274\211-\345\244\232\347\272\277\347\250\213\346\236\266\346\236\204\345\220\257\345\212\250\350\277\207\347\250\213\350\257\246\350\247\2432.png" new file mode 100644 index 0000000000000000000000000000000000000000..512c758271d7bb825cd1c9bb95a6c7f17dcae390 Binary files /dev/null and "b/content/zh/post/2022/figures/openGauss\345\206\205\346\240\270\345\210\206\346\236\220\357\274\210\344\270\200\357\274\211-\345\244\232\347\272\277\347\250\213\346\236\266\346\236\204\345\220\257\345\212\250\350\277\207\347\250\213\350\257\246\350\247\2432.png" differ diff --git "a/content/zh/post/2022/figures/openGauss\345\206\205\346\240\270\345\210\206\346\236\220\357\274\210\344\270\200\357\274\211-\345\244\232\347\272\277\347\250\213\346\236\266\346\236\204\345\220\257\345\212\250\350\277\207\347\250\213\350\257\246\350\247\2433.png" "b/content/zh/post/2022/figures/openGauss\345\206\205\346\240\270\345\210\206\346\236\220\357\274\210\344\270\200\357\274\211-\345\244\232\347\272\277\347\250\213\346\236\266\346\236\204\345\220\257\345\212\250\350\277\207\347\250\213\350\257\246\350\247\2433.png" new file mode 100644 index 0000000000000000000000000000000000000000..ba00fa376623fb44e8d616f464f6c198907587f8 Binary files /dev/null and "b/content/zh/post/2022/figures/openGauss\345\206\205\346\240\270\345\210\206\346\236\220\357\274\210\344\270\200\357\274\211-\345\244\232\347\272\277\347\250\213\346\236\266\346\236\204\345\220\257\345\212\250\350\277\207\347\250\213\350\257\246\350\247\2433.png" differ diff --git "a/content/zh/post/2022/figures/openGauss\345\206\205\346\240\270\345\210\206\346\236\220\357\274\210\344\270\200\357\274\211-\345\244\232\347\272\277\347\250\213\346\236\266\346\236\204\345\220\257\345\212\250\350\277\207\347\250\213\350\257\246\350\247\2434.png" "b/content/zh/post/2022/figures/openGauss\345\206\205\346\240\270\345\210\206\346\236\220\357\274\210\344\270\200\357\274\211-\345\244\232\347\272\277\347\250\213\346\236\266\346\236\204\345\220\257\345\212\250\350\277\207\347\250\213\350\257\246\350\247\2434.png" new file mode 100644 index 0000000000000000000000000000000000000000..f746678f619a9d77affa9422a503047026461306 Binary files /dev/null and "b/content/zh/post/2022/figures/openGauss\345\206\205\346\240\270\345\210\206\346\236\220\357\274\210\344\270\200\357\274\211-\345\244\232\347\272\277\347\250\213\346\236\266\346\236\204\345\220\257\345\212\250\350\277\207\347\250\213\350\257\246\350\247\2434.png" differ diff --git "a/content/zh/post/2022/figures/openGauss\345\206\205\346\240\270\345\210\206\346\236\220\357\274\210\344\270\200\357\274\211-\345\244\232\347\272\277\347\250\213\346\236\266\346\236\204\345\220\257\345\212\250\350\277\207\347\250\213\350\257\246\350\247\2435.png" "b/content/zh/post/2022/figures/openGauss\345\206\205\346\240\270\345\210\206\346\236\220\357\274\210\344\270\200\357\274\211-\345\244\232\347\272\277\347\250\213\346\236\266\346\236\204\345\220\257\345\212\250\350\277\207\347\250\213\350\257\246\350\247\2435.png" new file mode 100644 index 0000000000000000000000000000000000000000..ba00fa376623fb44e8d616f464f6c198907587f8 Binary files /dev/null and "b/content/zh/post/2022/figures/openGauss\345\206\205\346\240\270\345\210\206\346\236\220\357\274\210\344\270\200\357\274\211-\345\244\232\347\272\277\347\250\213\346\236\266\346\236\204\345\220\257\345\212\250\350\277\207\347\250\213\350\257\246\350\247\2435.png" differ diff --git "a/content/zh/post/2022/figures/openGauss\345\206\205\346\240\270\345\210\206\346\236\220\357\274\210\344\270\200\357\274\211-\345\244\232\347\272\277\347\250\213\346\236\266\346\236\204\345\220\257\345\212\250\350\277\207\347\250\213\350\257\246\350\247\2436.png" "b/content/zh/post/2022/figures/openGauss\345\206\205\346\240\270\345\210\206\346\236\220\357\274\210\344\270\200\357\274\211-\345\244\232\347\272\277\347\250\213\346\236\266\346\236\204\345\220\257\345\212\250\350\277\207\347\250\213\350\257\246\350\247\2436.png" new file mode 100644 index 0000000000000000000000000000000000000000..5afce2a120e3ed5ee179a8e3a30f83f622971c09 Binary files /dev/null and "b/content/zh/post/2022/figures/openGauss\345\206\205\346\240\270\345\210\206\346\236\220\357\274\210\344\270\200\357\274\211-\345\244\232\347\272\277\347\250\213\346\236\266\346\236\204\345\220\257\345\212\250\350\277\207\347\250\213\350\257\246\350\247\2436.png" differ diff --git "a/content/zh/post/2022/figures/openGauss\345\206\205\346\240\270\345\210\206\346\236\220\357\274\210\344\270\200\357\274\211-\345\244\232\347\272\277\347\250\213\346\236\266\346\236\204\345\220\257\345\212\250\350\277\207\347\250\213\350\257\246\350\247\2437.png" "b/content/zh/post/2022/figures/openGauss\345\206\205\346\240\270\345\210\206\346\236\220\357\274\210\344\270\200\357\274\211-\345\244\232\347\272\277\347\250\213\346\236\266\346\236\204\345\220\257\345\212\250\350\277\207\347\250\213\350\257\246\350\247\2437.png" new file mode 100644 index 0000000000000000000000000000000000000000..21c98083278e967f21e43511d53cd3899106f727 Binary files /dev/null and "b/content/zh/post/2022/figures/openGauss\345\206\205\346\240\270\345\210\206\346\236\220\357\274\210\344\270\200\357\274\211-\345\244\232\347\272\277\347\250\213\346\236\266\346\236\204\345\220\257\345\212\250\350\277\207\347\250\213\350\257\246\350\247\2437.png" differ diff --git "a/content/zh/post/2022/figures/openGauss\345\206\205\346\240\270\345\210\206\346\236\220\357\274\210\344\270\200\357\274\211-\345\244\232\347\272\277\347\250\213\346\236\266\346\236\204\345\220\257\345\212\250\350\277\207\347\250\213\350\257\246\350\247\2438.png" "b/content/zh/post/2022/figures/openGauss\345\206\205\346\240\270\345\210\206\346\236\220\357\274\210\344\270\200\357\274\211-\345\244\232\347\272\277\347\250\213\346\236\266\346\236\204\345\220\257\345\212\250\350\277\207\347\250\213\350\257\246\350\247\2438.png" new file mode 100644 index 0000000000000000000000000000000000000000..5c84d48c50a7bf293d0e227f235858406d4434b0 Binary files /dev/null and "b/content/zh/post/2022/figures/openGauss\345\206\205\346\240\270\345\210\206\346\236\220\357\274\210\344\270\200\357\274\211-\345\244\232\347\272\277\347\250\213\346\236\266\346\236\204\345\220\257\345\212\250\350\277\207\347\250\213\350\257\246\350\247\2438.png" differ diff --git "a/content/zh/post/2022/figures/openGauss\345\206\205\346\240\270\345\210\206\346\236\220\357\274\210\344\270\200\357\274\211-\345\244\232\347\272\277\347\250\213\346\236\266\346\236\204\345\220\257\345\212\250\350\277\207\347\250\213\350\257\246\350\247\2439.png" "b/content/zh/post/2022/figures/openGauss\345\206\205\346\240\270\345\210\206\346\236\220\357\274\210\344\270\200\357\274\211-\345\244\232\347\272\277\347\250\213\346\236\266\346\236\204\345\220\257\345\212\250\350\277\207\347\250\213\350\257\246\350\247\2439.png" new file mode 100644 index 0000000000000000000000000000000000000000..37c44e0fd7b44df55031bae4ddbf070e53d0e78d Binary files /dev/null and "b/content/zh/post/2022/figures/openGauss\345\206\205\346\240\270\345\210\206\346\236\220\357\274\210\344\270\200\357\274\211-\345\244\232\347\272\277\347\250\213\346\236\266\346\236\204\345\220\257\345\212\250\350\277\207\347\250\213\350\257\246\350\247\2439.png" differ diff --git a/content/zh/post/2022/figures/outerBatch1.png b/content/zh/post/2022/figures/outerBatch1.png new file mode 100644 index 0000000000000000000000000000000000000000..2800c3a5a21a2181fe346b9c47430777a7c5f51a Binary files /dev/null and b/content/zh/post/2022/figures/outerBatch1.png differ diff --git a/content/zh/post/2022/figures/vi.png b/content/zh/post/2022/figures/vi.png new file mode 100644 index 0000000000000000000000000000000000000000..ca214d6c2b02532a6a1854465b32ac71a4099eff Binary files /dev/null and b/content/zh/post/2022/figures/vi.png differ diff --git a/content/zh/post/2022/figures/zh-cn_image_0000001197508006.png b/content/zh/post/2022/figures/zh-cn_image_0000001197508006.png new file mode 100644 index 0000000000000000000000000000000000000000..485878c43220a940a965b797966a4ebcacf81ccf Binary files /dev/null and b/content/zh/post/2022/figures/zh-cn_image_0000001197508006.png differ diff --git a/content/zh/post/2022/figures/zh-cn_image_0000001197720014.png b/content/zh/post/2022/figures/zh-cn_image_0000001197720014.png new file mode 100644 index 0000000000000000000000000000000000000000..b19a89f588b61a3f9a08e5ce016d9f3e7b821fdc Binary files /dev/null and b/content/zh/post/2022/figures/zh-cn_image_0000001197720014.png differ diff --git a/content/zh/post/2022/figures/zh-cn_image_0000001291302034.jpg b/content/zh/post/2022/figures/zh-cn_image_0000001291302034.jpg new file mode 100644 index 0000000000000000000000000000000000000000..b620dd3fc2f0e3e7b3d65c2aee77e8b58cf69fca Binary files /dev/null and b/content/zh/post/2022/figures/zh-cn_image_0000001291302034.jpg differ diff --git "a/content/zh/post/2022/figures/\344\270\213\347\274\226\350\276\221pg_hba-conf\346\226\207\344\273\266.png" "b/content/zh/post/2022/figures/\344\270\213\347\274\226\350\276\221pg_hba-conf\346\226\207\344\273\266.png" new file mode 100644 index 0000000000000000000000000000000000000000..77b7fb411e79de94a1a219622108ce7fca8b5840 Binary files /dev/null and "b/content/zh/post/2022/figures/\344\270\213\347\274\226\350\276\221pg_hba-conf\346\226\207\344\273\266.png" differ diff --git "a/content/zh/post/2022/figures/\344\270\273\350\246\201\347\224\2613\344\270\252\346\225\260\347\273\204\345\234\250\350\241\250\347\244\272.png" "b/content/zh/post/2022/figures/\344\270\273\350\246\201\347\224\2613\344\270\252\346\225\260\347\273\204\345\234\250\350\241\250\347\244\272.png" new file mode 100644 index 0000000000000000000000000000000000000000..0334790b6035793d50ab3702ef616bd25e9a060c Binary files /dev/null and "b/content/zh/post/2022/figures/\344\270\273\350\246\201\347\224\2613\344\270\252\346\225\260\347\273\204\345\234\250\350\241\250\347\244\272.png" differ diff --git "a/content/zh/post/2022/figures/\344\277\256\346\224\271\345\246\202\344\270\213.png" "b/content/zh/post/2022/figures/\344\277\256\346\224\271\345\246\202\344\270\213.png" new file mode 100644 index 0000000000000000000000000000000000000000..5cb43620f8f7278111c24044ee5662640b95226c Binary files /dev/null and "b/content/zh/post/2022/figures/\344\277\256\346\224\271\345\246\202\344\270\213.png" differ diff --git "a/content/zh/post/2022/figures/\345\256\211\350\243\205\346\210\220\345\212\237\344\274\232\345\207\272\347\216\260\345\246\202\344\270\213\347\225\214\351\235\242.png" "b/content/zh/post/2022/figures/\345\256\211\350\243\205\346\210\220\345\212\237\344\274\232\345\207\272\347\216\260\345\246\202\344\270\213\347\225\214\351\235\242.png" new file mode 100644 index 0000000000000000000000000000000000000000..571a8e81fd7c118c18a5c3367138621cfca0e9b4 Binary files /dev/null and "b/content/zh/post/2022/figures/\345\256\211\350\243\205\346\210\220\345\212\237\344\274\232\345\207\272\347\216\260\345\246\202\344\270\213\347\225\214\351\235\242.png" differ diff --git "a/content/zh/post/2022/figures/\346\210\221\344\273\254\350\207\252\345\256\232\344\271\211\347\232\204\346\214\207\346\240\207\345\267\262\347\273\217\351\207\207\351\233\206\345\210\260.png" "b/content/zh/post/2022/figures/\346\210\221\344\273\254\350\207\252\345\256\232\344\271\211\347\232\204\346\214\207\346\240\207\345\267\262\347\273\217\351\207\207\351\233\206\345\210\260.png" new file mode 100644 index 0000000000000000000000000000000000000000..7078b664720dec9ac98d5018024e246818f8c958 Binary files /dev/null and "b/content/zh/post/2022/figures/\346\210\221\344\273\254\350\207\252\345\256\232\344\271\211\347\232\204\346\214\207\346\240\207\345\267\262\347\273\217\351\207\207\351\233\206\345\210\260.png" differ diff --git "a/content/zh/post/2022/figures/\346\210\221\350\243\205\347\232\204\346\230\2571-16\347\211\210\346\234\254.png" "b/content/zh/post/2022/figures/\346\210\221\350\243\205\347\232\204\346\230\2571-16\347\211\210\346\234\254.png" new file mode 100644 index 0000000000000000000000000000000000000000..aaa8af6a0439336d61d0018a46db907ec4b2f684 Binary files /dev/null and "b/content/zh/post/2022/figures/\346\210\221\350\243\205\347\232\204\346\230\2571-16\347\211\210\346\234\254.png" differ diff --git "a/content/zh/post/2022/figures/\346\265\213\350\257\225\351\252\214\350\257\201.png" "b/content/zh/post/2022/figures/\346\265\213\350\257\225\351\252\214\350\257\201.png" new file mode 100644 index 0000000000000000000000000000000000000000..52401301383786f79a7562aaf351c36f41f0b6fb Binary files /dev/null and "b/content/zh/post/2022/figures/\346\265\213\350\257\225\351\252\214\350\257\201.png" differ diff --git "a/content/zh/post/2022/figures/\346\265\213\350\257\225\351\252\214\350\257\2011.png" "b/content/zh/post/2022/figures/\346\265\213\350\257\225\351\252\214\350\257\2011.png" new file mode 100644 index 0000000000000000000000000000000000000000..a0141bfac0eadab7abd4fb5c22f116fad574bf87 Binary files /dev/null and "b/content/zh/post/2022/figures/\346\265\213\350\257\225\351\252\214\350\257\2011.png" differ diff --git "a/content/zh/post/2022/figures/\347\254\2541\351\230\266\346\256\265.png" "b/content/zh/post/2022/figures/\347\254\2541\351\230\266\346\256\265.png" new file mode 100644 index 0000000000000000000000000000000000000000..54c38384d8cf113ab5528885cfbcc329f520be91 Binary files /dev/null and "b/content/zh/post/2022/figures/\347\254\2541\351\230\266\346\256\265.png" differ diff --git "a/content/zh/post/2022/figures/\347\254\2542\351\230\266\346\256\265.png" "b/content/zh/post/2022/figures/\347\254\2542\351\230\266\346\256\265.png" new file mode 100644 index 0000000000000000000000000000000000000000..cd6bd728aa9f5aa374a21e0be2cbde45a102f252 Binary files /dev/null and "b/content/zh/post/2022/figures/\347\254\2542\351\230\266\346\256\265.png" differ diff --git "a/content/zh/post/2022/figures/\347\274\226\350\257\221\346\211\247\350\241\214\347\250\213\345\272\217\345\220\216.png" "b/content/zh/post/2022/figures/\347\274\226\350\257\221\346\211\247\350\241\214\347\250\213\345\272\217\345\220\216.png" new file mode 100644 index 0000000000000000000000000000000000000000..a175c0287ccf70f3c55edf63d99e354ba1a5a456 Binary files /dev/null and "b/content/zh/post/2022/figures/\347\274\226\350\257\221\346\211\247\350\241\214\347\250\213\345\272\217\345\220\216.png" differ diff --git "a/content/zh/post/2022/figures/\350\213\245\345\207\272\347\216\260\344\273\245\344\270\213\347\273\223\346\236\234\350\241\250\346\230\216\350\277\236\346\216\245\346\210\220\345\212\237.png" "b/content/zh/post/2022/figures/\350\213\245\345\207\272\347\216\260\344\273\245\344\270\213\347\273\223\346\236\234\350\241\250\346\230\216\350\277\236\346\216\245\346\210\220\345\212\237.png" new file mode 100644 index 0000000000000000000000000000000000000000..971ed5c1a70d56cc8fe57ca853b866db7621d4af Binary files /dev/null and "b/content/zh/post/2022/figures/\350\213\245\345\207\272\347\216\260\344\273\245\344\270\213\347\273\223\346\236\234\350\241\250\346\230\216\350\277\236\346\216\245\346\210\220\345\212\237.png" differ diff --git "a/content/zh/post/2022/figures/\350\277\220\350\241\214gsom\345\220\216\346\212\245\351\224\231\345\246\202\344\270\213.png" "b/content/zh/post/2022/figures/\350\277\220\350\241\214gsom\345\220\216\346\212\245\351\224\231\345\246\202\344\270\213.png" new file mode 100644 index 0000000000000000000000000000000000000000..bf0af9fbe22086f18b7f881a71668c04b348e7c8 Binary files /dev/null and "b/content/zh/post/2022/figures/\350\277\220\350\241\214gsom\345\220\216\346\212\245\351\224\231\345\246\202\344\270\213.png" differ diff --git "a/content/zh/post/2022/figures/\350\277\231\346\227\266\345\217\257\350\203\275\351\201\207\345\210\260failed-to.png" "b/content/zh/post/2022/figures/\350\277\231\346\227\266\345\217\257\350\203\275\351\201\207\345\210\260failed-to.png" new file mode 100644 index 0000000000000000000000000000000000000000..05d13029ea1094f5587bb7194b7f2e7f8699861a Binary files /dev/null and "b/content/zh/post/2022/figures/\350\277\231\346\227\266\345\217\257\350\203\275\351\201\207\345\210\260failed-to.png" differ diff --git "a/content/zh/post/2022/figures/\350\277\231\351\207\214\344\270\27226000.png" "b/content/zh/post/2022/figures/\350\277\231\351\207\214\344\270\27226000.png" new file mode 100644 index 0000000000000000000000000000000000000000..bcbacff4517158209f0c0e94d14b968194d7acf4 Binary files /dev/null and "b/content/zh/post/2022/figures/\350\277\231\351\207\214\344\270\27226000.png" differ diff --git "a/content/zh/post/2022/figures/\350\277\231\351\207\214\344\270\272260001.png" "b/content/zh/post/2022/figures/\350\277\231\351\207\214\344\270\272260001.png" new file mode 100644 index 0000000000000000000000000000000000000000..18cf6daa85b2fd6a06e46d3648bcd3abf4396a44 Binary files /dev/null and "b/content/zh/post/2022/figures/\350\277\231\351\207\214\344\270\272260001.png" differ diff --git "a/content/zh/post/2022/figures/\350\277\236\346\216\245\346\262\241\346\234\211\351\227\256\351\242\230.png" "b/content/zh/post/2022/figures/\350\277\236\346\216\245\346\262\241\346\234\211\351\227\256\351\242\230.png" new file mode 100644 index 0000000000000000000000000000000000000000..8e747ebb6b64fe95708b78a315e381307c7788fe Binary files /dev/null and "b/content/zh/post/2022/figures/\350\277\236\346\216\245\346\262\241\346\234\211\351\227\256\351\242\230.png" differ diff --git "a/content/zh/post/2022/figures/\351\207\207\347\224\250JDBC\345\274\200\345\217\221\345\272\224\347\224\250\347\250\213\345\272\217\347\232\204\346\265\201\347\250\213.png" "b/content/zh/post/2022/figures/\351\207\207\347\224\250JDBC\345\274\200\345\217\221\345\272\224\347\224\250\347\250\213\345\272\217\347\232\204\346\265\201\347\250\213.png" new file mode 100644 index 0000000000000000000000000000000000000000..884b9343d59ad1a297845d7c097bc6c9e3743f51 Binary files /dev/null and "b/content/zh/post/2022/figures/\351\207\207\347\224\250JDBC\345\274\200\345\217\221\345\272\224\347\224\250\347\250\213\345\272\217\347\232\204\346\265\201\347\250\213.png" differ diff --git "a/content/zh/post/2022/node_exporter-\350\207\252\345\256\232\344\271\211\347\233\221\346\216\247\346\214\207\346\240\207.md" "b/content/zh/post/2022/node_exporter-\350\207\252\345\256\232\344\271\211\347\233\221\346\216\247\346\214\207\346\240\207.md" new file mode 100644 index 0000000000000000000000000000000000000000..3065f36a2c3ed90ecf2fc9d039c3df26323e8d23 --- /dev/null +++ "b/content/zh/post/2022/node_exporter-\350\207\252\345\256\232\344\271\211\347\233\221\346\216\247\346\214\207\346\240\207.md" @@ -0,0 +1,127 @@ ++++ + +title = "node exporter 自定义监控指标" + +date = "2021-12-16" + +tags = [ "node exporter 自定义监控指标"] + +archives = "2021-12" + +author = "高云龙" + +summary = "node exporter 自定义监控指标" + +img = "/zh/post/2022/title/img10.png" + +times = "12:30" + ++++ + +# node\_exporter 自定义监控指标 + +## 概述 + +node\_exporter除了可以收集系统指标外,还可以采集我们自定义的监控指标。采集自定义监控指标是通过textfile模块来完成的,textfile模块默认会随着node\_exporter启动而启动,如果想要采集自定义指标,还需要在启动node\_exporter的时候,添加–collector.textfile.directory=""参数,这个参数是自定义的采集路径,所有自定义监控指标文件都放在这个目录下,且文件名都以.prom结尾。 + +## 自定义指标 + +- 启动node\_exporter + + ``` + --创建目录 + # mkdir -p /opt/node_exporter/prom + --以指定采集路径的方式启动 + # nohup /opt/node_exporter/node_exporter --collector.textfile.directory="/opt/node_exporter/prom" > /opt/node_exporter/node_exporter.log 2>&1 & + ``` + +- 创建监控指标文件 + + ``` + # cd /opt/node_exporter/prom + # vi db_heartbeat.prom + + --HELP 和 TYPE 如果没有制定,node_exporter会自动添加 + # HELP db_select Metric read from /opt/node_exporter/prom/db_heartbeat.prom + # TYPE db_select untyped + db_select{database="172.16.3.90:5432"} 1 + db_select{database="172.16.3.90:7432"} 0 + ``` + + 在浏览器中可以看到,我们自定义的指标已经采集到 + + ![](figures/我们自定义的指标已经采集到.png) + + +## 定时任务 + +自定义监控指标大多数需要与crontab结合,按着需求设置采集指标的时间。 + +- flock命令 + + 为了防止某个任务的执行时间超过了 crontab 中为此任务设定的执行周期,使用flock命令将crontab串行化: + + flock -xn /tmp/flock.lock -c ‘xxx.sh’ --如果/tmp/flock.lock不存在,flock会自动创建 + + ``` + Usage: + flock [options] [command args] + flock [options] -c + flock [options] + + Options: + -s --shared get a shared lock + -x --exclusive get an exclusive lock (default) + -u --unlock remove a lock + -n --nonblock fail rather than wait + -w --timeout wait for a limited amount of time + -E --conflict-exit-code exit code after conflict or timeout + -o --close close file descriptor before running command + -c --command run a single command string through the shell + + -h, --help display this help and exit + -V, --version output version information and exit + + For more details see flock(1). + ``` + +- shell脚本 + + 这里以查询MogDB为例,通过sql(select 1;)进行探活 + + ``` + vi /opt/scripts/db_heartbeat.sh + + #!/bin/bash + + source /home/omm/.bashrc + + nums=( + 172.16.3.90:5432:opengauss_exporter:opengauss_exporter123 + 172.16.3.90:7432:opengauss_exporter:opengauss_exporter123 + ) + + for i in $(seq 0 $[${#nums[*]}-1]) + do + ip=`echo ${nums[$i]}|awk -F ':' '{print $1}'` + port=`echo ${nums[$i]}|awk -F ':' '{print $2}'` + username=`echo ${nums[$i]}|awk -F ':' '{print $3}'` + password=`echo ${nums[$i]}|awk -F ':' '{print $4}'` + + result=`gsql "host=$ip port=$port user=$username password=$password dbname=postgres" -t -c "select 1"` + if [ $? -eq 0 ]; then + echo "db_select{database=\"$ip:$port\"} 1" >> /opt/node_exporter/prom/db_heartbeat.prom + else + echo "db_select{database=\"$ip:$port\"} 0" >> /opt/node_exporter/prom/db_heartbeat.prom + fi + done + ``` + +- crontab + + ``` + --执行脚本之前,先清理.prom文件,防止监控指标重复 + * * * * * /usr/bin/flock -xn /tmp/flock.lock -c ">/opt/node_exporter/prom/db_heartbeat.prom && /usr/bin/bash /opt/scripts/db_heartbeat.sh >> /opt/scripts/db_heartbeat.log" + ``` + + diff --git "a/content/zh/post/2022/openGauss-MOT\345\255\230\345\202\250\345\274\225\346\223\216.md" "b/content/zh/post/2022/openGauss-MOT\345\255\230\345\202\250\345\274\225\346\223\216.md" new file mode 100644 index 0000000000000000000000000000000000000000..dca8a3f793fba23afa9b06fed18046576230e26f --- /dev/null +++ "b/content/zh/post/2022/openGauss-MOT\345\255\230\345\202\250\345\274\225\346\223\216.md" @@ -0,0 +1,51 @@ ++++ + +title = "openGauss MOT存储引擎" + +date = "2022-01-07" + +tags = [ "openGauss MOT存储引擎"] + +archives = "2022-01" + +author = "ORA-DBA" + +summary = "openGauss MOT存储引擎" + +img = "/zh/post/2022/title/img15.jpg" + +times = "12:30" + ++++ + +# openGauss MOT存储引擎 + +## 介绍 + +MOT存储引擎,是一种事务性行存储,针对多核和大内存服务器进行了优化。MOT为事务性工作负载提供更高的性能。 + +MOT支持ACID特性,并包括严格的持久性和高可用性支持。企业可以在关键任务、性能敏感的在线事务处理(OLTP)中使用MOT,以实现高性能、高吞吐、可预测低延迟以及多核服务器的高利用率。 + +MOT适合在多路和多核处理器的现代服务器上运行。 + +## openGauss内存优化存储引擎结构 + +openGauss内存优化存储引擎结构图 + +![](figures/openGauss内存优化存储引擎结构图.png) + +openGauss 内存优化存储引擎组件负责管理MOT和事务。 + +MOT与基于磁盘的普通表并排创建。MOT实现了几乎完全的SQL覆盖,并且支持完整的数据库功能集,如存储过程和自定义函数。 + +通过完全存储在内存中的数据和索引、非统一内存访问感知(NUMA-aware)设计、消除锁和锁存争用的算法以及查询原生编译,MOT可提供更快的数据访问和更高效的事务执行。 + +MOT有效的几乎无锁的设计和高度调优的实现,使其在多核服务器上实现了卓越的近线性吞吐量扩展。 + +**MOT完全支持ACID特性:** + +- 原子性(Atomicity):原子事务是一系列不可分割的数据库操作。在事务完成(分别提交或中止)之后,这些操作要么全部发生,要么全部不发生。 +- 一致性(Consistency):事务结束后,数据库处于一致状态,保留数据完整性。 +- 隔离性(Isolation):事务之间不能相互干扰。MOT支持不同的重复读和读提交隔离级别。在下一个版本中,MOT还将支持可序列化隔离。 +- 持久性(Durability):即使发生崩溃和失败,成功完成(提交)的事务效果持久保存。MOT完全集成了openGauss的基于WAL的日志记录。同时支持同步和异步日志记录选项。MOT还支持同步+面向NUMA优化的组提交。 + diff --git "a/content/zh/post/2022/openGauss-MogDB-TPCH\346\200\247\350\203\275\346\265\213\350\257\225\346\255\245\351\252\244.md" "b/content/zh/post/2022/openGauss-MogDB-TPCH\346\200\247\350\203\275\346\265\213\350\257\225\346\255\245\351\252\244.md" new file mode 100644 index 0000000000000000000000000000000000000000..b0e9a1296e2beabf316f592394c083140e5126da --- /dev/null +++ "b/content/zh/post/2022/openGauss-MogDB-TPCH\346\200\247\350\203\275\346\265\213\350\257\225\346\255\245\351\252\244.md" @@ -0,0 +1,106 @@ ++++ + +title = "openGauss/MogDB TPCH性能测试步骤" + +date = "2021-12-28" + +tags = [ "openGauss/MogDB TPCH性能测试步骤"] + +archives = "2021-12" + +author = "Seven" + +summary = "openGauss/MogDB TPCH性能测试步骤" + +img = "/zh/post/2022/title/img13.png" + +times = "12:30" + ++++ + +# openGauss/MogDB TPCH性能测试步骤 + +TPCH官网直接下载的包无法直接兼容OpenGauss/MogDB/Postgresql,为了兼容pg/og的语法,总结了测试步骤供大家参考 + +- 建表 + + ``` + gsql -p 26000 -d postgres -U tpch < createtab_og.sql + ``` + +- 生成数据 + + 例: + + 生成100G数据 + + ``` + ./dbgen -s 100 + ``` + + 例: + + 8线程生成500G数据 + + ``` + #!/bin/sh + ./dbgen -vf -s 500 -S 1 -C 8 & + ./dbgen -vf -s 500 -S 2 -C 8 & + ./dbgen -vf -s 500 -S 3 -C 8 & + ./dbgen -vf -s 500 -S 4 -C 8 & + ./dbgen -vf -s 500 -S 5 -C 8 & + ./dbgen -vf -s 500 -S 6 -C 8 & + ./dbgen -vf -s 500 -S 7 -C 8 & + ./dbgen -vf -s 500 -S 8 -C 8 & + ``` + +- 数据转换 + + 生成的数据文件格式为tbl,转为csv格式 + + ``` + for i in `ls .tbl`;do sed 's/|$//' $i > ${i/tbl/csv};echo $i;done; + ``` + +- 导入数据 + + ``` + dir=/TPCH/TPCH_gs/TPCH/dbgen/data + opts='-p 26000 -d postgres' + gsql $opts -c "COPY tpch.region FROM '$dir/region.csv' WITH (FORMAT csv,DELIMITER '|')" + gsql $opts -c "COPY tpch.nation FROM '$dir/nation.csv' WITH (FORMAT csv,DELIMITER '|')" + gsql $opts -c "COPY tpch.part FROM '$dir/part.csv' WITH (FORMAT csv,DELIMITER '|')" + gsql $opts -c "COPY tpch.supplier FROM '$dir/supplier.csv' WITH (FORMAT csv,DELIMITER '|')" + gsql $opts -c "COPY tpch.customer FROM '$dir/customer.csv' WITH (FORMAT csv,DELIMITER '|')" + gsql $opts -c "COPY tpch.partsupp FROM '$dir/partsupp.csv' WITH (FORMAT csv,DELIMITER '|')" + gsql $opts -c "COPY tpch.orders FROM '$dir/orders.csv' WITH (FORMAT csv,DELIMITER '|')" + gsql $opts -c "COPY tpch.lineitem FROM '$dir/lineitem.csv' WITH (FORMAT csv,DELIMITER '|')" + ``` + +- 创建所需函数 + + ``` + create or replace function NUMTOYMINTERVAL(float8, text) returns interval as $$ select ($1||' '||$2)::interval; + $$ language sql strict immutable; + + create or replace function NUMTODSINTERVAL(float8, text) returns interval as $$ select ($1||' '||$2)::interval; + $$ language sql strict immutable; + ``` + +- 执行SQL文件夹下sql + + ``` + #!/bin/bash + opts='-p 26000 -d postgres -U tpch -W 'password'' + for i in `seq 10 22` + do + echo $i"'s result" + gsql ${opts} -f ${i}.sql + done + ``` + + +TPCH包及建表语句,执行SQL语句见网盘: + +链接: https://pan.baidu.com/s/1Cg7neIxXGjDYS7BfZxl2IQ 密码: urkt + diff --git "a/content/zh/post/2022/openGauss-MogDB\345\244\247\345\257\271\350\261\241LargeObject\345\255\230\345\217\226\346\265\213\350\257\225.md" "b/content/zh/post/2022/openGauss-MogDB\345\244\247\345\257\271\350\261\241LargeObject\345\255\230\345\217\226\346\265\213\350\257\225.md" new file mode 100644 index 0000000000000000000000000000000000000000..1aa789027640dd641c96d81fa7d8bed537b7c5e4 --- /dev/null +++ "b/content/zh/post/2022/openGauss-MogDB\345\244\247\345\257\271\350\261\241LargeObject\345\255\230\345\217\226\346\265\213\350\257\225.md" @@ -0,0 +1,238 @@ ++++ + +title = "openGauss/MogDB大对象LargeObject存取测试" + +date = "2021-12-17" + +tags = [ "openGauss/MogDB大对象LargeObject存取测试"] + +archives = "2021-12" + +author = "多米爸比" + +summary = "openGauss/MogDB大对象LargeObject存取测试" + +img = "/zh/post/2022/title/img14.png" + +times = "12:30" + ++++ + +# openGauss/MogDB大对象LargeObject存取测试 + +openGauss/MogDB数据库里bytea二进制类型受segment size编译参数限制,默认不能超过1GB,如果字段存储数据超过1GB可以使用lo(Large Object)扩展类型。 + +## lo类型需要先创建lo extension + +``` +$ gsql -p5432 -Uomm postgres -r +gsql ((MogDB 2.0.1 build f892ccb7) compiled at 2021-07-09 16:15:21 commit 0 last mr ) +Non-SSL connection (SSL connection is recommended when requiring high-security) +Type "help" for help. + +postgres=# create extension lo; +CREATE EXTENSION +``` + +创建完lo扩展,我们新建test\_lo表,info字段使用lo类型。 + +``` +postgres=# create table test_lo(id int,info lo); +CREATE TABLE +``` + +创建test\_lo表管理触发器,对update和delete操作使用lo\_manage函数管理,不然会产生孤立大对象。 + +``` +postgres=# create trigger test_lo before UPDATE OR DELETE ON test_lo FOR EACH ROW EXECUTE procedure lo_manage(info); +WARNING: Trigger function with non-plpgsql type is not recommended. +DETAIL: Non-plpgsql trigger function are not shippable by default. +HINT: Unshippable trigger may lead to bad performance. +CREATE TRIGGER +``` + +使用dd生成2GB文件 + +``` +postgres=# \! dd if=/dev/zero of=test_lo bs=1M count=2048 && sync +记录了2048+0 的读入 +记录了2048+0 的写出 +2147483648字节(2.1 GB,2.0 GiB)已复制,0.805435 s,2.7 GB/s +``` + +## 测试lo\_import函数导入数据到数据表 + +``` +postgres=# insert into test_lo values(1,lo_import('/home/omm/test_lo')); +INSERT 0 1 +``` + +可以看到数据可以正常导入,如果不使用lo类型,使用bytea类型会提示下面的报错。 + +``` +ERROR: requested length too large +``` + +## 测试lo\_export函数导出数据表数据到文件 + +``` +postgres=# select lo_export(test_lo.info,'/home/omm/test_ext_lo') from test_lo where id=1; + lo_export +----------- + 1 +(1 row) +``` + +可以看到数据正常导出。 + +查看导入导出的数据文件,也可以使用diff命令进行比对。 + +``` +postgres=# \! ls -lh test_* +-rw-r--r-- 1 omm dbgrp 2.0G 12月 17 13:00 test_ext_lo +-rw------- 1 omm dbgrp 2.0G 12月 17 12:58 test_lo +``` + +## 查看数据表大对象字段大小 + +分两步进行,首先查大对象字段的oid(lo类型字段在用户表里面只存储一个oid引用指针,并不实际存数据) + +``` +postgres=# select * from test_lo; + id | info +----+------- + 1 | 16392 +(1 row) +``` + +实际数据使用多条bytea记录存储在pg\_largeobject表,可以根据oid查询统计字段的大小 + +``` +postgres=# select loid,pg_size_pretty(sum(octet_length(data))) +from pg_largeobject +where loid =16392 +group by loid; + loid | pg_size_pretty +-------+---------------- + 16392 | 2048 MB +(1 row) +``` + +也可以使用如下函数来查询 + +``` +create or replace function get_lo_size(oid) +returns bigint +volatile strict +as $function$ +declare + fd integer; + sz bigint; +begin + fd := lo_open($1, x'40000'::int); + perform lo_lseek64(fd, 0, 2); + sz := lo_tell64(fd); + perform lo_close(fd); + return sz; +end; +$function$ language plpgsql; +``` + +查询结果如下 + +``` +postgres=# select pg_size_pretty(get_lo_size(16392)); + pg_size_pretty +---------------- + 2048 MB +(1 row) +``` + +再来测试JDBC应用层的使用 + +## JDBC-Java文件入库 + +``` + public static void main(String[] args) throws Exception{ + Class.forName("org.postgresql.Driver"); + + Connection conn = DriverManager.getConnection("jdbc:postgresql://ip:port/dbname","username","password"); + + conn.setAutoCommit(false); + + LargeObjectManager lobj = conn.unwrap(org.postgresql.PGConnection.class).getLargeObjectAPI(); + + long oid = lobj.createLO(LargeObjectManager.READ | LargeObjectManager.WRITE); + + LargeObject obj = lobj.open(oid, LargeObjectManager.WRITE); + + File file = new File("c:/work/test_lo"); + FileInputStream fis = new FileInputStream(file); + + byte buf[] = new byte[10*1024*1024]; + int s, tl = 0; + while ((s = fis.read(buf, 0, 2048)) > 0) + { + obj.write(buf, 0, s); + tl += s; + } + + obj.close(); + + PreparedStatement ps = conn.prepareStatement("INSERT INTO test_lo VALUES (?, ?)"); + ps.setInt(1, 100); + ps.setLong(2, oid); + ps.executeUpdate(); + ps.close(); + fis.close(); + + conn.commit(); + conn.close(); + + } +``` + +## JDBC-Java读数据输出到文件 + +``` + public static void main(String[] args) throws Exception{ + Class.forName("org.postgresql.Driver"); + + Connection conn = DriverManager.getConnection("jdbc:postgresql://ip:port/dbname","username","password"); + + conn.setAutoCommit(false); + + LargeObjectManager lobj = conn.unwrap(org.postgresql.PGConnection.class).getLargeObjectAPI(); + + PreparedStatement ps = conn.prepareStatement("SELECT info FROM test_lo WHERE id = ?"); + ps.setInt(1, 100); + ResultSet rs = ps.executeQuery(); + + File file = new File("c:/work/test_out_lo"); + FileOutputStream fos = new FileOutputStream(file); + + while (rs.next()) + { + long oid = rs.getLong(1); + LargeObject obj = lobj.open(oid, LargeObjectManager.READ); + + byte buf[] = new byte[10*1024*1024]; + int s, tl = 0; + while ((s = obj.read(buf, 0, 2048)) > 0) + { + fos.write(buf, 0, s); + tl += s; + } + + obj.close(); + } + rs.close(); + ps.close(); + fos.close(); + + conn.commit(); + conn.close(); + + } +``` + diff --git "a/content/zh/post/2022/openGauss-gsql-\345\270\270\347\224\250\345\205\203\345\221\275\344\273\244-\344\270\200.md" "b/content/zh/post/2022/openGauss-gsql-\345\270\270\347\224\250\345\205\203\345\221\275\344\273\244-\344\270\200.md" new file mode 100644 index 0000000000000000000000000000000000000000..f0d867db9fe532c6774b8067e018ca66bfe57042 --- /dev/null +++ "b/content/zh/post/2022/openGauss-gsql-\345\270\270\347\224\250\345\205\203\345\221\275\344\273\244-\344\270\200.md" @@ -0,0 +1,472 @@ ++++ + +title = "openGauss gsql 常用元命令 一" + +date = "2022-01-10" + +tags = [ "openGauss gsql 常用元命令 一"] + +archives = "2022-01" + +author = "晨辉" + +summary = "openGauss gsql 常用元命令 一" + +img = "/zh/post/2022/title/img12.png" + +times = "12:30" + ++++ + +# openGauss gsql 常用元命令 一 + +## 连接数据库 使用 -E参数可以显示元命令具体执行的SQL信息 + +``` +[omm@og1 ~]$ gsql -d postgres -p15400 -E +gsql ((openGauss 2.1.0 build 590b0f8e) compiled at 2021-09-30 14:29:04 commit 0 last mr ) +Non-SSL connection (SSL connection is recommended when requiring high-security) +Type "help" for help. +``` + +## \\l 显示数据库中数据库信息 + +``` +openGauss=# \l +********* QUERY ********** +SELECT d.datname as "Name", + pg_catalog.pg_get_userbyid(d.datdba) as "Owner", + pg_catalog.pg_encoding_to_char(d.encoding) as "Encoding", + d.datcollate as "Collate", + d.datctype as "Ctype", + pg_catalog.array_to_string(d.datacl, E'\n') AS "Access privileges" +FROM pg_catalog.pg_database d +ORDER BY 1; +************************** + + List of databases + Name | Owner | Encoding | Collate | Ctype | Access privileges +-----------+-------+----------+------------+------------+------------------- + mydb | omm | UTF8 | en_US.utf8 | en_US.utf8 | + postgres | omm | UTF8 | en_US.utf8 | en_US.utf8 | + studentdb | omm | UTF8 | en_US.utf8 | en_US.utf8 | + template0 | omm | UTF8 | en_US.utf8 | en_US.utf8 | =c/omm + + | | | | | omm=CTc/omm + template1 | omm | UTF8 | en_US.utf8 | en_US.utf8 | =c/omm + + | | | | | omm=CTc/omm +(5 rows) +``` + +## \\du 同\\dg 显示数据库中所有用户和角色 + +``` +openGauss=# \du +********* QUERY ********** +SELECT r.rolname, r.rolsuper, r.rolinherit, + r.rolcreaterole, r.rolcreatedb, r.rolcanlogin, + r.rolconnlimit, r.rolvalidbegin, r.rolvaliduntil, + ARRAY(SELECT b.rolname + FROM pg_catalog.pg_auth_members m + JOIN pg_catalog.pg_roles b ON (m.roleid = b.oid) + WHERE m.member = r.oid) as memberof +, r.rolreplication +, r.rolauditadmin +, r.rolsystemadmin +, r.rolmonitoradmin +, r.roloperatoradmin +, r.rolpolicyadmin +, r.roluseft +, r.rolkind +FROM pg_catalog.pg_roles r +WHERE r.rolname not in ('gs_role_copy_files', 'gs_role_signal_backend', 'gs_role_tablespace', 'gs_role_replication', 'gs_role_account_lock', 'gs_role_pldebugger') +ORDER BY 1; +************************** + + List of roles + Role name | Attributes | Member of +-----------+------------------------------------------------------------------------------------------------------------------+----------- + omm | Sysadmin, Create role, Create DB, Replication, Administer audit, Monitoradmin, Operatoradmin, Policyadmin, UseFT | {} + student | Sysadmin | {} + +openGauss=# \dg +********* QUERY ********** +SELECT r.rolname, r.rolsuper, r.rolinherit, + r.rolcreaterole, r.rolcreatedb, r.rolcanlogin, + r.rolconnlimit, r.rolvalidbegin, r.rolvaliduntil, + ARRAY(SELECT b.rolname + FROM pg_catalog.pg_auth_members m + JOIN pg_catalog.pg_roles b ON (m.roleid = b.oid) + WHERE m.member = r.oid) as memberof +, r.rolreplication +, r.rolauditadmin +, r.rolsystemadmin +, r.rolmonitoradmin +, r.roloperatoradmin +, r.rolpolicyadmin +, r.roluseft +, r.rolkind +FROM pg_catalog.pg_roles r +WHERE r.rolname not in ('gs_role_copy_files', 'gs_role_signal_backend', 'gs_role_tablespace', 'gs_role_replication', 'gs_role_account_lock', 'gs_role_pldebugger') +ORDER BY 1; +************************** + + List of roles + Role name | Attributes | Member of +-----------+------------------------------------------------------------------------------------------------------------------+----------- + omm | Sysadmin, Create role, Create DB, Replication, Administer audit, Monitoradmin, Operatoradmin, Policyadmin, UseFT | {} + student | Sysadmin | {} +``` + +## \\db 显示数据库中所有表空间信息 + +``` +openGauss=# \db +********* QUERY ********** +SELECT spcname AS "Name", + pg_catalog.pg_get_userbyid(spcowner) AS "Owner", + pg_catalog.pg_tablespace_location(oid) AS "Location" +FROM pg_catalog.pg_tablespace +ORDER BY 1; +************************** + + List of tablespaces + Name | Owner | Location +------------+-------+------------------------ + pg_default | omm | + pg_global | omm | + student_ts | omm | tablespace/student_ts1 +(3 rows) +``` + +## \\dn 显示数据库中所有schema信息 + +``` +openGauss=# \dn +********* QUERY ********** +SELECT n.nspname AS "Name", + pg_catalog.pg_get_userbyid(n.nspowner) AS "Owner" +FROM pg_catalog.pg_namespace n +WHERE n.nspname !~ '^pg_' AND n.nspname <> 'information_schema' +ORDER BY 1; +************************** + + List of schemas + Name | Owner +----------------+--------- + blockchain | omm + cstore | omm + db4ai | omm + dbe_perf | omm + dbe_pldebugger | omm + pkg_service | omm + pmk | omm + public | omm + snapshot | omm + sqladvisor | omm + student | student +(11 rows) +``` + +## \\d 显示当前数据库下相关数据库对象信息\(包含表、视图、物化视图、序列、外部表、stream\\ contview\) + +``` +openGauss=# \d +********* QUERY ********** +SELECT n.nspname as "Schema", + c.relname as "Name", + CASE c.relkind WHEN 'r' THEN 'table' WHEN 'v' THEN 'view' WHEN 'i' THEN 'index' WHEN 'I' THEN 'global partition index' WHEN 'S' THEN 'sequence' WHEN 's' THEN 'special' WHEN 'f' THEN 'foreign table' WHEN 'm' THEN 'materialized view' WHEN 'e' THEN 'stream' WHEN 'o' THEN 'contview' END as "Type", + pg_catalog.pg_get_userbyid(c.relowner) as "Owner", + c.reloptions as "Storage" +FROM pg_catalog.pg_class c + LEFT JOIN pg_catalog.pg_namespace n ON n.oid = c.relnamespace +WHERE c.relkind IN ('r','v','m','S','f','e','o','') + AND n.nspname <> 'pg_catalog' + AND n.nspname <> 'db4ai' + AND n.nspname <> 'information_schema' + AND n.nspname !~ '^pg_toast' + AND c.relname not like 'matviewmap_%' + AND c.relname not like 'mlog_%' + AND pg_catalog.pg_table_is_visible(c.oid) +ORDER BY 1,2; +************************** + + List of relations + Schema | Name | Type | Owner | Storage +--------+--------+-------+-------+---------------------------------- + public | test | table | omm | {orientation=row,compression=no} + public | v_test | view | omm | +(2 rows) +``` + +## \\d tablename 查看某个表的详细信息 + +``` +openGauss=# \d test +********* QUERY ********** +SELECT c.oid, + n.nspname, + c.relname +FROM pg_catalog.pg_class c + LEFT JOIN pg_catalog.pg_namespace n ON n.oid = c.relnamespace +WHERE c.relname ~ '^(test)$' + AND pg_catalog.pg_table_is_visible(c.oid) +ORDER BY 2, 3; +************************** + +********* QUERY ********** +SELECT c.relchecks, c.relkind, c.relhasindex, c.relhasrules, c.relhastriggers, c.relhasoids, '', c.reltablespace, CASE WHEN c.reloftype = 0 THEN '' ELSE c.reloftype::pg_catalog.regtype::pg_catalog.text END, c.relpersistence,c.relhasclusterkey, c.relreplident, (select count(1) as haspolicy from pg_catalog.pg_class WHERE relname = 'pg_rlspolicy') +FROM pg_catalog.pg_class c + LEFT JOIN pg_catalog.pg_class tc ON (c.reltoastrelid = tc.oid) +WHERE c.oid = '16575'; +************************** + +********* QUERY ********** +SELECT * FROM pg_catalog.pg_class WHERE relname = 'gs_encrypted_columns' AND relnamespace = 11; +************************** + +********* QUERY ********** +SELECT a.attname, + pg_catalog.format_type(a.atttypid, a.atttypmod), + (SELECT substring(pg_catalog.pg_get_expr(d.adbin, d.adrelid) for 176) + FROM pg_catalog.pg_attrdef d + WHERE d.adrelid = a.attrelid AND d.adnum = a.attnum AND a.atthasdef), + a.attnotnull, a.attnum, + (SELECT c.collname FROM pg_catalog.pg_collation c, pg_catalog.pg_type t + WHERE c.oid = a.attcollation AND t.oid = a.atttypid AND a.attcollation <> t.typcollation) AS attcollation, + NULL AS indexdef, + NULL AS attfdwoptions, + (SELECT pg_catalog.format_type (a.atttypmod, g.data_type_original_mod) AS clientlogic_original_type FROM gs_encrypted_columns g WHERE g.column_name = a.attname AND g.rel_id = 16575group by g.data_type_original_oid, g.data_type_original_mod), +(SELECT g.data_type_original_oid AS clientlogic_original_type_oid FROM gs_encrypted_columns g WHERE g.column_name = a.attname AND g.rel_id = 16575group by g.data_type_original_oid, g.data_type_original_mod), + (SELECT h.adgencol + FROM pg_catalog.pg_attrdef h + WHERE h.adrelid = a.attrelid AND h.adnum = a.attnum AND a.atthasdef) AS generated_column +FROM pg_catalog.pg_attribute a +WHERE a.attrelid = '16575' AND a.attnum > 0 AND NOT a.attisdropped AND a.attkvtype != 4 AND a.attname <> 'tableoid' AND a.attname <> 'tablebucketid' +ORDER BY a.attnum; +************************** + +********* QUERY ********** +SELECT c2.relname, i.indisprimary, i.indisunique, i.indisclustered, i.indisvalid, pg_catalog.pg_get_indexdef(i.indexrelid, 0, true), + pg_catalog.pg_get_constraintdef(con.oid, true), contype, condeferrable, condeferred, i.indisreplident, c2.reltablespace, i.indisusable +FROM pg_catalog.pg_class c, pg_catalog.pg_class c2, pg_catalog.pg_index i + LEFT JOIN pg_catalog.pg_constraint con ON (conrelid = i.indrelid AND conindid = i.indexrelid AND contype IN ('p','u','x')) +WHERE c.oid = '16575' AND c.oid = i.indrelid AND i.indexrelid = c2.oid +ORDER BY i.indisprimary DESC, i.indisunique DESC, c2.relname; +************************** + +********* QUERY ********** +SELECT pol.policyname, pol.policypermissive, trim(pol.policyroles::text, '{}'), pol.policyqual, pol.policycmd +FROM pg_catalog.pg_rlspolicies pol +LEFT JOIN pg_catalog.pg_namespace N on (N.nspname = pol.schemaname) +LEFT JOIN pg_catalog.pg_class C on (pol.tablename = C.relname and C.relnamespace = N.oid) +WHERE C.oid = '16575' ORDER BY 1; +************************** + +********* QUERY ********** +SELECT c.oid::pg_catalog.regclass FROM pg_catalog.pg_class c, pg_catalog.pg_inherits i WHERE c.oid=i.inhparent AND i.inhrelid = '16575' ORDER BY inhseqno; +************************** + +********* QUERY ********** +SELECT c.oid::pg_catalog.regclass FROM pg_catalog.pg_class c, pg_catalog.pg_inherits i WHERE c.oid=i.inhrelid AND i.inhparent = '16575' ORDER BY c.oid::pg_catalog.regclass::pg_catalog.text; +************************** + +********* QUERY ********** +select partkey,partstrategy from pg_partition where parentid = 16575 order by partkey +************************** + + Table "public.test" + Column | Type | Modifiers +--------+---------+----------- + id | integer | +Indexes: + "idx_id_test" btree (id) TABLESPACE pg_default + +openGauss=# +``` + +## \\dt 显示当前数据库中所有的表 + +``` +openGauss=# \dt +********* QUERY ********** +SELECT n.nspname as "Schema", + c.relname as "Name", + CASE c.relkind WHEN 'r' THEN 'table' WHEN 'v' THEN 'view' WHEN 'i' THEN 'index' WHEN 'I' THEN 'global partition index' WHEN 'S' THEN 'sequence' WHEN 's' THEN 'special' WHEN 'f' THEN 'foreign table' WHEN 'm' THEN 'materialized view' WHEN 'e' THEN 'stream' WHEN 'o' THEN 'contview' END as "Type", + pg_catalog.pg_get_userbyid(c.relowner) as "Owner", + c.reloptions as "Storage" +FROM pg_catalog.pg_class c + LEFT JOIN pg_catalog.pg_namespace n ON n.oid = c.relnamespace +WHERE c.relkind IN ('r','') + AND n.nspname <> 'pg_catalog' + AND n.nspname <> 'db4ai' + AND n.nspname <> 'information_schema' + AND n.nspname !~ '^pg_toast' + AND c.relname not like 'matviewmap_%' + AND c.relname not like 'mlog_%' + AND pg_catalog.pg_table_is_visible(c.oid) +ORDER BY 1,2; +************************** + + List of relations + Schema | Name | Type | Owner | Storage +--------+------+-------+-------+---------------------------------- + public | test | table | omm | {orientation=row,compression=no} +(1 row) +``` + +## \\dt+ 以扩展方式显示当前数据库所有表信息,比起\\dt 多了最后一列描述信息 + +``` +openGauss=# \dt+ +********* QUERY ********** +SELECT n.nspname as "Schema", + c.relname as "Name", + CASE c.relkind WHEN 'r' THEN 'table' WHEN 'v' THEN 'view' WHEN 'i' THEN 'index' WHEN 'I' THEN 'global partition index' WHEN 'S' THEN 'sequence' WHEN 's' THEN 'special' WHEN 'f' THEN 'foreign table' WHEN 'm' THEN 'materialized view' WHEN 'e' THEN 'stream' WHEN 'o' THEN 'contview' END as "Type", + pg_catalog.pg_get_userbyid(c.relowner) as "Owner", + pg_catalog.pg_size_pretty(pg_catalog.pg_table_size(c.oid)) as "Size", + c.reloptions as "Storage", + pg_catalog.obj_description(c.oid, 'pg_class') as "Description" +FROM pg_catalog.pg_class c + LEFT JOIN pg_catalog.pg_namespace n ON n.oid = c.relnamespace +WHERE c.relkind IN ('r','') + AND n.nspname <> 'pg_catalog' + AND n.nspname <> 'db4ai' + AND n.nspname <> 'information_schema' + AND n.nspname !~ '^pg_toast' + AND c.relname not like 'matviewmap_%' + AND c.relname not like 'mlog_%' + AND pg_catalog.pg_table_is_visible(c.oid) +ORDER BY 1,2; +************************** + + List of relations + Schema | Name | Type | Owner | Size | Storage | Description +--------+------+-------+-------+---------+----------------------------------+------------- + public | test | table | omm | 0 bytes | {orientation=row,compression=no} | +(1 row) +``` + +## \\di 查看当前数据库中索引信息 + +``` +openGauss=# \di +********* QUERY ********** +SELECT n.nspname as "Schema", + c.relname as "Name", + CASE c.relkind WHEN 'r' THEN 'table' WHEN 'v' THEN 'view' WHEN 'i' THEN 'index' WHEN 'I' THEN 'global partition index' WHEN 'S' THEN 'sequence' WHEN 's' THEN 'special' WHEN 'f' THEN 'foreign table' WHEN 'm' THEN 'materialized view' WHEN 'e' THEN 'stream' WHEN 'o' THEN 'contview' END as "Type", + pg_catalog.pg_get_userbyid(c.relowner) as "Owner", + c2.relname as "Table", + c.reloptions as "Storage" +FROM pg_catalog.pg_class c + LEFT JOIN pg_catalog.pg_namespace n ON n.oid = c.relnamespace + LEFT JOIN pg_catalog.pg_index i ON i.indexrelid = c.oid + LEFT JOIN pg_catalog.pg_class c2 ON i.indrelid = c2.oid +WHERE c.relkind IN ('i','I','') + AND n.nspname <> 'pg_catalog' + AND n.nspname <> 'db4ai' + AND n.nspname <> 'information_schema' + AND n.nspname !~ '^pg_toast' + AND c.relname not like 'matviewmap_%' + AND c.relname not like 'mlog_%' + AND pg_catalog.pg_table_is_visible(c.oid) +ORDER BY 1,2; +************************** + + List of relations + Schema | Name | Type | Owner | Table | Storage +--------+-------------+-------+-------+-------+--------- + public | idx_id_test | index | omm | test | +(1 row) +``` + +## \\di indexname 查看当前数据库某个索引的信息 + +``` +openGauss=# \di idx_id_test +********* QUERY ********** +SELECT n.nspname as "Schema", + c.relname as "Name", + CASE c.relkind WHEN 'r' THEN 'table' WHEN 'v' THEN 'view' WHEN 'i' THEN 'index' WHEN 'I' THEN 'global partition index' WHEN 'S' THEN 'sequence' WHEN 's' THEN 'special' WHEN 'f' THEN 'foreign table' WHEN 'm' THEN 'materialized view' WHEN 'e' THEN 'stream' WHEN 'o' THEN 'contview' END as "Type", + pg_catalog.pg_get_userbyid(c.relowner) as "Owner", + c2.relname as "Table", + c.reloptions as "Storage" +FROM pg_catalog.pg_class c + LEFT JOIN pg_catalog.pg_namespace n ON n.oid = c.relnamespace + LEFT JOIN pg_catalog.pg_index i ON i.indexrelid = c.oid + LEFT JOIN pg_catalog.pg_class c2 ON i.indrelid = c2.oid +WHERE c.relkind IN ('i','I','s','') + AND n.nspname !~ '^pg_toast' + AND c.relname not like 'matviewmap_%' + AND c.relname not like 'mlog_%' + AND c.relname ~ '^(idx_id_test)$' + AND pg_catalog.pg_table_is_visible(c.oid) +ORDER BY 1,2; +************************** + + List of relations + Schema | Name | Type | Owner | Table | Storage +--------+-------------+-------+-------+-------+--------- + public | idx_id_test | index | omm | test | +(1 row) +``` + +## \\dv 查看当前数据库视图信息 + +``` +openGauss=# \dv +********* QUERY ********** +SELECT n.nspname as "Schema", + c.relname as "Name", + CASE c.relkind WHEN 'r' THEN 'table' WHEN 'v' THEN 'view' WHEN 'i' THEN 'index' WHEN 'I' THEN 'global partition index' WHEN 'S' THEN 'sequence' WHEN 's' THEN 'special' WHEN 'f' THEN 'foreign table' WHEN 'm' THEN 'materialized view' WHEN 'e' THEN 'stream' WHEN 'o' THEN 'contview' END as "Type", + pg_catalog.pg_get_userbyid(c.relowner) as "Owner", + c.reloptions as "Storage" +FROM pg_catalog.pg_class c + LEFT JOIN pg_catalog.pg_namespace n ON n.oid = c.relnamespace +WHERE c.relkind IN ('v','') + AND n.nspname <> 'pg_catalog' + AND n.nspname <> 'db4ai' + AND n.nspname <> 'information_schema' + AND n.nspname !~ '^pg_toast' + AND c.relname not like 'matviewmap_%' + AND c.relname not like 'mlog_%' + AND pg_catalog.pg_table_is_visible(c.oid) +ORDER BY 1,2; +************************** + + List of relations + Schema | Name | Type | Owner | Storage +--------+--------+------+-------+--------- + public | v_test | view | omm | +(1 row) +``` + +## \\ds 查看当前数据库序列信息 + +``` +openGauss=# \ds +********* QUERY ********** +SELECT n.nspname as "Schema", + c.relname as "Name", + CASE c.relkind WHEN 'r' THEN 'table' WHEN 'v' THEN 'view' WHEN 'i' THEN 'index' WHEN 'I' THEN 'global partition index' WHEN 'S' THEN 'sequence' WHEN 's' THEN 'special' WHEN 'f' THEN 'foreign table' WHEN 'm' THEN 'materialized view' WHEN 'e' THEN 'stream' WHEN 'o' THEN 'contview' END as "Type", + pg_catalog.pg_get_userbyid(c.relowner) as "Owner", + c.reloptions as "Storage" +FROM pg_catalog.pg_class c + LEFT JOIN pg_catalog.pg_namespace n ON n.oid = c.relnamespace +WHERE c.relkind IN ('S','') + AND n.nspname <> 'pg_catalog' + AND n.nspname <> 'db4ai' + AND n.nspname <> 'information_schema' + AND n.nspname !~ '^pg_toast' + AND c.relname not like 'matviewmap_%' + AND c.relname not like 'mlog_%' + AND pg_catalog.pg_table_is_visible(c.oid) +ORDER BY 1,2; +************************** + + List of relations + Schema | Name | Type | Owner | Storage +--------+------+----------+-------+--------- + public | sq1 | sequence | omm | +(1 row) +``` + diff --git "a/content/zh/post/2022/openGauss-gsql-\345\270\270\347\224\250\345\205\203\345\221\275\344\273\244-\344\272\214.md" "b/content/zh/post/2022/openGauss-gsql-\345\270\270\347\224\250\345\205\203\345\221\275\344\273\244-\344\272\214.md" new file mode 100644 index 0000000000000000000000000000000000000000..ac0e4ee6e47de9d96570da056b7a07f56cda69b7 --- /dev/null +++ "b/content/zh/post/2022/openGauss-gsql-\345\270\270\347\224\250\345\205\203\345\221\275\344\273\244-\344\272\214.md" @@ -0,0 +1,243 @@ ++++ + +title = "openGauss gsql 常用元命令 二" + +date = "2022-01-10" + +tags = ["openGauss gsql 常用元命令 二"] + +archives = "2022-01" + +author = "晨辉" + +summary = "openGauss gsql 常用元命令 二" + +img = "/zh/post/2022/title/img11.png" + +times = "12:30" + ++++ + +# openGauss gsql 常用元命令 二 + +## \\df 查看当前数据库函数信息 + +``` +openGauss=# \df +********* QUERY ********** +SELECT n.nspname as "Schema", + p.proname as "Name", + pg_catalog.pg_get_function_result(p.oid) as "Result data type", + pg_catalog.pg_get_function_arguments(p.oid) as "Argument data types", + CASE + WHEN p.proisagg THEN 'agg' + WHEN p.proiswindow THEN 'window' + WHEN p.prorettype = 'pg_catalog.trigger'::pg_catalog.regtype THEN 'trigger' + ELSE 'normal' +END as "Type" , + fencedmode as "fencedmode" + , + propackage as "propackage" + , + prokind as "prokind" + +FROM pg_catalog.pg_proc p + LEFT JOIN pg_catalog.pg_namespace n ON n.oid = p.pronamespace +WHERE pg_catalog.pg_function_is_visible(p.oid) + AND n.nspname <> 'pg_catalog' + AND n.nspname <> 'db4ai' + AND n.nspname <> 'information_schema' +ORDER BY 1, 2, 4; +************************** + + List of functions + Schema | Name | Result data type | Argument data types | Type | fencedmode | propackage | prokind +--------+-----------+------------------+--------------------------------------------------+--------+------------+------------+--------- + public | fuc_worth | numeric | price numeric, amount integer, OUT worth numeric | normal | f | f | f +(1 row) +``` + +## \\dx 查看已安装的扩展程序信息 + +``` +openGauss=# \dx +********* QUERY ********** +SELECT e.extname AS "Name", e.extversion AS "Version", n.nspname AS "Schema", c.description AS "Description" +FROM pg_catalog.pg_extension e LEFT JOIN pg_catalog.pg_namespace n ON n.oid = e.extnamespace LEFT JOIN pg_catalog.pg_description c ON c.objoid = e.oid AND c.classoid = 'pg_catalog.pg_extension'::pg_catalog.regclass +ORDER BY 1; +************************** + + List of installed extensions + Name | Version | Schema | Description +-----------------+---------+------------+-------------------------------------------------- + dist_fdw | 1.0 | pg_catalog | foreign-data wrapper for distfs access + file_fdw | 1.0 | pg_catalog | foreign-data wrapper for flat file access + hdfs_fdw | 1.0 | pg_catalog | foreign-data wrapper for flat file access + hstore | 1.1 | pg_catalog | data type for storing sets of (key, value) pairs + log_fdw | 1.0 | pg_catalog | Foreign Data Wrapper for accessing logging data + mot_fdw | 1.0 | pg_catalog | foreign-data wrapper for MOT access + plpgsql | 1.0 | pg_catalog | PL/pgSQL procedural language + security_plugin | 1.0 | pg_catalog | provides security functionality +(8 rows) +``` + +## \\x 语法 \\x\[on|off|auto\] 设置语句的输出模式,模式为行的方式输出,执行 \\x on 切换为以列的方式来显示 + +``` +openGauss=# \x +Expanded display is on. +openGauss=# \dx +********* QUERY ********** +SELECT e.extname AS "Name", e.extversion AS "Version", n.nspname AS "Schema", c.description AS "Description" +FROM pg_catalog.pg_extension e LEFT JOIN pg_catalog.pg_namespace n ON n.oid = e.extnamespace LEFT JOIN pg_catalog.pg_description c ON c.objoid = e.oid AND c.classoid = 'pg_catalog.pg_extension'::pg_catalog.regclass +ORDER BY 1; +************************** + +List of installed extensions +-[ RECORD 1 ]------------------------------------------------- +Name | dist_fdw +Version | 1.0 +Schema | pg_catalog +Description | foreign-data wrapper for distfs access +-[ RECORD 2 ]------------------------------------------------- +Name | file_fdw +Version | 1.0 +Schema | pg_catalog +Description | foreign-data wrapper for flat file access +-[ RECORD 3 ]------------------------------------------------- +``` + +## \\timing 语法 \\timing \[on|off\] 控制显示SQL的执行时间,默认为off, on 为显示SQL语句的执行时间 + +``` +openGauss=# select * from test; + id +---- + 1 +(1 row) + +openGauss=# \timing on +Timing is on. +openGauss=# select * from test; + id +---- + 1 +(1 row) + +Time: 0.352 ms +``` + +## \\h 用于获取SQL语句的帮助,例如 \\h merge + +``` +openGauss=# \h merge +Command: MERGE +Description: insert, update, or delete rows of a table based upon source data +Syntax: +MERGE [/*+ plan_hint */] INTO table_name [ [ AS ] alias ] +USING { { table_name | view_name } | subquery } [ [ AS ] alias ] +ON ( condition ) +[ + WHEN MATCHED THEN + UPDATE SET { column_name = { expression | DEFAULT } | + ( column_name [, ...] ) = ( { expression | DEFAULT } [, ...] ) } [, ...] + [ WHERE condition ] +] +[ + WHEN NOT MATCHED THEN + INSERT { DEFAULT VALUES | + [ ( column_name [, ...] ) ] VALUES ( { expression | DEFAULT } [, ...] ) [, ...] [ WHERE condition ] } +]; +``` + +## ? 获取gsql的元命令的帮助 + +``` +openGauss=# \? +General + \copyright show openGauss usage and distribution terms + \g [FILE] or ; execute query (and send results to file or |pipe) + \h(\help) [NAME] help on syntax of SQL commands, * for all commands + \parallel [on [num]|off] toggle status of execute (currently off) + \q quit gsql + +Query Buffer + \e [FILE] [LINE] edit the query buffer (or file) with external editor + \ef [FUNCNAME [LINE]] edit function definition with external editor + \p show the contents of the query buffer + \r reset (clear) the query buffer + \w FILE write query buffer to file + +Input/Output + \copy ... perform SQL COPY with data stream to the client host + \echo [STRING] write string to standard output + \i FILE execute commands from file + \i+ FILE KEY execute commands from encrypted file + \ir FILE as \i, but relative to location of current script + \ir+ FILE KEY as \i+, but relative to location of current script + \o [FILE] send all query results to file or |pipe + \qecho [STRING] write string to query output stream (see \o) +``` + +## ! os\_command 用于执行操作系统命令,同oracle的 !,mysql的 system + +``` +openGauss-# \! pwd +/home/omm +openGauss-# \! ls +1.sh create_db_tables.sql test.sql +``` + +## \\o filename 用于重定向输出到文件,注意这个不是简单的将屏幕的内容输出到文本,而是将SQL语句正确执行的结果输出到文本 + +``` +openGauss-# \o test.out +openGauss-# select * from test; +WARNING: Session unused timeout. +FATAL: terminating connection due to administrator command +could not send data to server: Broken pipe +The connection to the server was lost. Attempting reset: Succeeded. +openGauss=# select * from test; +openGauss=# \! cat test.out + id +---- + 1 +(1 row) + +openGauss=# select * from pg_tables; +openGauss=# \! cat test.out + id +---- + 1 +(1 row) + + schemaname | tablename | tableowner | tablespace | hasindexes | hasrules | hastriggers | tablecreator | created | last_ddl_time +--------------------+-------------------------------+------------+------------+------------+----------+-------------+--------------+-------------------------------+------------------------------- + pg_catalog | pg_statistic | omm | | t | f | f | | | + +\i file.sql +``` + +## \\conninfo 显示gsql中显示会话的连接信息 + +## \\c\[onnect\] \[DBNAME\] 切换数据库 + +``` +openGauss=# \conninfo +You are connected to database "postgres" as user "omm" via socket in "/opt/huawei/tmp" at port "15400". +openGauss=# \c mydb +Non-SSL connection (SSL connection is recommended when requiring high-security) +You are now connected to database "mydb" as user "omm". +mydb=# \conninfo +You are connected to database "mydb" as user "omm" via socket in "/opt/huawei/tmp" at port "15400". +``` + +## \\echo \[string\] 打印字符串 + +``` +mydb=# \echo Hello World! +Hello World! +``` + +## \\q 退出gsql + diff --git "a/content/zh/post/2022/openGauss\344\270\255\347\232\204SQL\345\274\225\346\223\216\344\273\213\347\273\215.md" "b/content/zh/post/2022/openGauss\344\270\255\347\232\204SQL\345\274\225\346\223\216\344\273\213\347\273\215.md" new file mode 100644 index 0000000000000000000000000000000000000000..ab04f5c4cd4eafbe886c8ef010a48e05917edfb1 --- /dev/null +++ "b/content/zh/post/2022/openGauss\344\270\255\347\232\204SQL\345\274\225\346\223\216\344\273\213\347\273\215.md" @@ -0,0 +1,46 @@ ++++ + +title = "openGauss中的SQL引擎介绍" + +date = "2021-12-23" + +tags = [ "openGauss中的SQL引擎介绍"] + +archives = "2021-12" + +author = "ccgo" + +summary = "openGauss中的SQL引擎介绍" + +img = "/zh/post/2022/title/img2.png" + +times = "12:30" + ++++ + +# openGauss中的SQL引擎介绍 + +![](figures/20211223-8c6710da-e8ba-4c22-a1dd-dc76ecaec07a.png) + +![](figures/20211223-01cf061e-a19f-4516-9ddf-d38eb5bbbc86.png) + +![](figures/20211223-92cb0889-6352-4ae6-a73f-1ec772e8a730.png) + +![](figures/20211223-453c2df5-151d-4333-a812-732e1a32313b.png) + +![](figures/20211223-60e81928-181c-4964-b0ec-abdd2acc7da7.png) + +![](figures/20211223-9c55c807-e30b-44a9-8810-4d2b70db10a9.png) + +![](figures/20211223-7afbf443-21c5-4855-8ed7-c264abaf9ff0.png) + +![](figures/20211223-ef70cfd4-da07-4c1d-aabe-cc867cedbc80.png) + +![](figures/20211223-ae44972c-4cc6-49b7-94c5-5b507039a686.png) + +![](figures/20211223-c49e9596-383a-41c4-8057-77cdfd9e8f5e.png) + +![](figures/20211223-83e9cf25-6bbc-4e0e-a24d-963d9050ae73.png) + +![](figures/20211223-8e28c064-237c-4c48-8d6d-7498b11f1c3b.png) + diff --git "a/content/zh/post/2022/openGauss\344\272\213\345\212\241\346\234\272\345\210\266\344\270\255MVCC\346\212\200\346\234\257\347\232\204\345\256\236\347\216\260\345\210\206\346\236\220.md" "b/content/zh/post/2022/openGauss\344\272\213\345\212\241\346\234\272\345\210\266\344\270\255MVCC\346\212\200\346\234\257\347\232\204\345\256\236\347\216\260\345\210\206\346\236\220.md" new file mode 100644 index 0000000000000000000000000000000000000000..94e23564b066e59db4f776c63ed9a673ffd5b6ba --- /dev/null +++ "b/content/zh/post/2022/openGauss\344\272\213\345\212\241\346\234\272\345\210\266\344\270\255MVCC\346\212\200\346\234\257\347\232\204\345\256\236\347\216\260\345\210\206\346\236\220.md" @@ -0,0 +1,477 @@ ++++ + +title = "openGauss事务机制中MVCC技术的实现分析" + +date = "2021-12-27" + +tags = [ "openGauss事务机制中MVCC技术的实现分析"] + +archives = "2021-12" + +author = "luooofan" + +summary = "openGauss事务机制中MVCC技术的实现分析" + +img = "/zh/post/2022/title/img16.png" + +times = "12:30" + ++++ + +# openGauss事务机制中MVCC技术的实现分析 + +## 概述 + +1. **事务** + + 事务是为用户提供的最核心、最具吸引力的数据库功能之一。简单地说,事务是用户定义的一系列数据库操作\(如查询、插入、修改或删除等\)的集合,从数据库内部保证了该操作集合作为一个整体的原子性\(Atomicity\)、一致性\(Consistency\)、隔离性\(Isolation\)和持久性\(Durability\),这些特性统称事务的ACID特性。 + +2. **DBMS中的并发控制** + + 并发控制旨在针对数据库中对事务并行的场景,保证 ACID 中的一致性(Consistency)与隔离性(Isolation)。数据库技术中主流的三种并发控制技术分别是: Multi-version Concurrency Control \(MVCC\), Strict Two-Phase Locking \(S2PL\), 以及 Optimistic Concurrency Control \(OCC\),每种技术也都有很多的变种。 + +3. **MVCC** + + MVCC的基本机制是:写事务不会原地修改元组内容,每次写操作都会在旧的版本之上创建新的版本,并且会保留旧的版本。当某个事务需要读取数据时,数据库系统会从所有的版本中选取出符合该事务隔离级别要求的版本。 + + MVCC 的主要优点是读数据的锁请求与写数据的锁请求不冲突,以此来实现读不阻塞写,写也不阻塞读。 + + +- openGauss事务整体架构 + + ![](figures/20211017-210839-v2-58a3a0df18e1a92b9cc209036fb149ab_b.jpg) + + 在openGauss中,事务的实现与存储引擎的实现有很强关联,代码主要集中在src/gausskernel/storage/access/transam及src/gausskernel/storage/lmgr下,关键文件如图所示。 + + - (1) 事务管理器:事务系统的中枢,它的实现是一个有限循环状态机,通过接受外部系统的命令并根据当前事务所处的状态决定事务的下一步执行过程。 + - (2) 日志管理器:用来记录事务执行的状态以及数据变化的过程,包括事务提交日志\(CLOG\)、事务提交序列日志(CSNLOG)以及事务日志(XLOG)。其中CLOG日志只用来记录事务执行的结果状态,CSNLOG记录日志提交的顺序,用于可见性判断;XLOG是数据的redo日志,用于恢复及持久化。 + - (3) 线程管理机制:通过一片内存区域记录所有线程的事务信息,任何一个线程可以通过访问该区域获取其他事务的状态信息。 + - (4) MVCC机制:openGauss系统中,事务执行读流程结合各事务提交的CSN序列号,采用了多版本并发控制机制,实现了元组的读和写互不阻塞。 + - (5) 锁管理器:实现系统的写并发控制,通过锁机制来保证事务写流程的隔离性。 + + +## MVCC的实现 + +- 我们需要关注: + - 元组版本号的实现 + - 快照的实现 + - 判断数据有效性、可见性、可更新性的算法的实现 + - 不同的隔离级别的实现 + +- 多版本元组存储结构 + - src/include/access/htup.h + + 为了定义MVCC 中不同版本的数据,Opengauss在每个元组的头部信息HeapTupleHeaderData中引入了一些字段如下: + + ``` + typedef struct HeapTupleHeaderData { + union { + HeapTupleFields t_heap; /* 存储该元组的一些描述信息 */ + DatumTupleFields t_datum; + } t_choice; + + ItemPointerData t_ctid; /* (块号,块内偏移) 存储用来记录当前元组或新元组的物理位置 */ + + /* Fields below here must match MinimalTupleData! */ + + uint16 t_infomask2; + + uint16 t_infomask; /* various flag bits, see below */ + + uint8 t_hoff; + + /* ^ - 23 bytes - ^ */ + + bits8 t_bits[FLEXIBLE_ARRAY_MEMBER]; + + /* MORE DATA FOLLOWS AT END OF STRUCT */ + } HeapTupleHeaderData; + typedef HeapTupleHeaderData* HeapTupleHeader + ``` + + - HeapTupleFields + + ``` + typedef struct HeapTupleFields { + ShortTransactionId t_xmin; /* 存放插入该 Tuple 时的 txid */ + ShortTransactionId t_xmax; /* 存放删除或者更新该 Tuple 时的 txid,如果还没更新或者删除,那么置 0,表示无效 */ + + union { + CommandId t_cid; /* 创建或更新/删除该 Tuple 的命令在该事务内执行的所有 SQL 命令中的编号 */ + ShortTransactionId t_xvac; /* old-style VACUUM FULL xact ID */ + } t_field3; + } HeapTupleFields; + ``` + + - t\_infomask + + ``` + #define HEAP_HASNULL 0x0001 /* has null attribute(s) */ + #define HEAP_HASVARWIDTH 0x0002 /* has variable-width attribute(s) */ + #define HEAP_HASEXTERNAL 0x0004 /* has external stored attribute(s) */ + #define HEAP_HASOID 0x0008 /* has an object-id field */ + #define HEAP_COMPRESSED 0x0010 /* has compressed data */ + #define HEAP_COMBOCID 0x0020 /* t_cid is a combo cid */ + #define HEAP_XMAX_EXCL_LOCK 0x0040 /* xmax is exclusive locker */ + #define HEAP_XMAX_SHARED_LOCK 0x0080 /* xmax is shared locker */ + /* if either LOCK bit is set, xmax hasn't deleted the tuple, only locked it */ + #define HEAP_IS_LOCKED (HEAP_XMAX_EXCL_LOCK | HEAP_XMAX_SHARED_LOCK) + #define HEAP_XMIN_COMMITTED 0x0100 /* t_xmin committed */ + #define HEAP_XMIN_INVALID 0x0200 /* t_xmin invalid/aborted */ + #define HEAP_XMIN_FROZEN (HEAP_XMIN_INVALID | HEAP_XMIN_COMMITTED) + #define HEAP_XMAX_COMMITTED 0x0400 /* t_xmax committed */ + #define HEAP_XMAX_INVALID 0x0800 /* t_xmax invalid/aborted */ + ... + ``` + + +- 插入、删除、更新元组 + - 元组在页中是如何存放的 + + ![](figures/20211015-225510-fig-5-03.png) + + ![](figures/20211015-225127-update.png) + + - 插入 + + 假设一个txid为99的事务插入一个元组 + + ![](figures/20211015-225511-fig-5-04.png) + + - 删除 + + 假设一个txid为111的事务删除一个元组 + + ![](figures/20211015-225511-fig-5-05.png) + + - 更新 + + 假设99号事务插入的元组被100号事务更新了两次 + + ![](figures/20211015-225511-fig-5-06.png) + + openGauss通过HeapTupleHeaderData 的几个特殊的字段,给元组设置了不同的版本号,元组的每次更新操作都会产生一条新版本的元组,版本之间从旧到新形成了一条版本链(旧的ctid指向新的元组)。 + + +- 事务快照的实现 + + 为了实现元组对事务的可见性判断,openGauss引入了事务快照SnapshotData + + 在openGauss中,有两种方式来实现快照。 + + - (1)活跃事务数组方法 + + 在数据库进程中,维护一个全局的数组,其中的成员为正在执行的事务信息,包括事务的事务号,该数组即活跃事务数组。 + + 在每个事务开始的时候,复制一份该数组内容。 + + 当事务执行过程中扫描到某个元组时,需要通过判断元组xmin和xmax这两个事务对于查询事务的可见性,来决定该元组是否对查询事务可见。 + + ![](figures/20211015-225512-d34f1a911a8804c0b1f8d791a65f175e.png) + + - (2)时间戳方法 + + ![](figures/20211015-225512-72285f7db5051f38a7940e7f235f49df.png) + + 在openGauss内部,使用一个全局自增的长整数作为逻辑的时间戳,模拟数据库内部的时序,该逻辑时间戳被称为提交顺序号(Commit Sequence Number,简称CSN)。 + + 每当一个事务提交的时候,在CSN日志中会记录该事务号 XID对应的逻辑时间戳 CSN 值。 + + ![](figures/20211015-225513-64eaedd1d1501b104652b104bd3152b2.png) + + ``` + #define COMMITSEQNO_INPROGRESS UINT64CONST(0x0) // 表示该事务还未提交或回滚 + #define COMMITSEQNO_ABORTED UINT64CONST(0x1) // 表示该事务已经回滚 + #define COMMITSEQNO_FROZEN UINT64CONST(0x2) // 表示该事务已提交,且对任何快照可见 + #define COMMITSEQNO_FIRST_NORMAL UINT64CONST(0x3) // 事务正常的CSN号起始值 + #define COMMITSEQNO_COMMIT_INPROGRESS (UINT64CONST(1) << 62) // 事务正在提交中 + ``` + + - 事务快照数据结构SnapshotData + + src/include/utils/snapshot.h + + 获取快照时会记录当前活跃的最小的xid,记为snapshot.xmin。当前最新提交的“事务id\(latestCompleteXid\) + 1”,记为snapshot.xmax。当前最新提交的“CSN号 + 1”\(NextCommitSeqNo\),记为snapshot.csn。 + + ``` + typedef struct SnapshotData { + SnapshotSatisfiesFunc satisfies; /* 判断可见性的函数;通常使用MVCC,即HeapTupleSatisfiesMVCC */ + TransactionId xmin; /*当前活跃事务最小值,小于该值的事务说明已结束 */ + TransactionId xmax; /*最新提交事务id(latestCompeleteXid)+1,大于等于改值说明事务还未开始,该事务id不可见 */ + TransactionId* xip; /*记录当前活跃事务链表,在CSN版本中该值无用 */ + TransactionId* subxip; /* 记录缓存子事务活跃链表,在CSN版本中该值无用 */ + uint32 xcnt; /* 记录活跃事务的个数(xip中元组数)在CSN版本中该值无用 */ + ... + + CommitSeqNo snapshotcsn; /* 快照的CSN号,一般为最新提交事务的CSN号+1(NextCommitSeqNo),CSN号严格小于该值的事务可见。 */ + ... + + CommandId curcid; /*事务块中的命令序列号,即同一事务中,前面插入的数据,后面可见。 */ + uint32 active_count; /* ActiveSnapshot stack的refcount */ + uint32 regd_count; /* RegisteredSnapshotList 的refcount*/ + void* user_data; /* 本地多版本快照使用,标记该快照还有线程使用,不能直接释放 */ + SnapshotType snapshot_type; /* openGauss单机无用 */ + } SnapshotData; + ``` + + - satisfies是openGauss提供的对于事务可见性判断的统一操作接口。 + + src/gausskernel/storage/access/heap/heapam\_visibility.c + + - HeapTupleSatisfiesMVCC:判断元组对某一快照版本是否有效 + - HeapTupleSatisfiesUpdate:判断元组是否可更新 + - HeapTupleSatisfiesDirty:判断当前元组是否已脏 + - HeapTupleSatisfiesSelf:判断tuple对自身信息是否有效 + - HeapTupleSatisfiesToast:用于TOAST表(参考文档)的判断 + - HeapTupleSatisfiesVacuum:用在VACUUM,判断某个元组是否对任何正在运行的事务可见,如果是,则该元组不能被VACUUM删除 + - HeapTupleSatisfiesAny:所有元组都可见 + - HeapTupleSatisfiesHistoricMVCC:用于CATALOG 表 + - …… + + - MVCC可见性判断机制 + + + + + + + + + + + + + + + + +

状态

+

xmax对于查询可见

+

xmax对于查询不可见

+

xmin对于查询可见

+

记录不可见(先插入,后删除)

+

记录可见(先插入,未删除)

+

xmin对于查询不可见

+

不可能发生

+

记录不可见(未插入,未删除)

+
+ + - XidVisibleInSnapshot + + src/gausskernel/storage/access/heap/heapam\_visibility.c + + ``` + bool XidVisibleInSnapshot(TransactionId xid, Snapshot snapshot, TransactionIdStatus* hintstatus, Buffer buffer, bool* sync) + { + bool looped = false; + *hintstatus = XID_INPROGRESS; + if (GTM_MODE && TransactionIdFollowsOrEquals(xid, snapshot->xmax)) { + return false; + } + loop: + csn = TransactionIdGetCommitSeqNo(xid, false, true, false); + if (COMMITSEQNO_IS_COMMITTED(csn)) { + *hintstatus = XID_COMMITTED; + if (csn < snapshot->snapshotcsn) + return true; + else + return false; + } else if (COMMITSEQNO_IS_COMMITTING(csn)) { + ... + } else { + if (csn == COMMITSEQNO_ABORTED) + *hintstatus = XID_ABORTED; + return false; + } + } + ``` + + 如果xid事务正在执行: + + ``` + if (looped) { + ereport(DEBUG1, (errmsg("transaction id %lu's csn %ld is changed to ABORT after lockwait.", xid, csn))); + RecheckXidFinish(xid, csn); + CSNLogSetCommitSeqNo(xid, 0, NULL, COMMITSEQNO_ABORTED); + SetLatestFetchState(xid, COMMITSEQNO_ABORTED); + *hintstatus = XID_ABORTED; + return false; + } else { + if (!COMMITSEQNO_IS_SUBTRANS(csn)) { + ... + CommitSeqNo latestCSN = GET_COMMITSEQNO(csn); + if (latestCSN >= snapshot->snapshotcsn) { + ... + return false; + } + } else { + parentXid = (TransactionId)GET_PARENTXID(csn); + } + ... + if (TransactionIdIsValid(parentXid)) + SyncWaitXidEnd(parentXid, buffer); + else + SyncWaitXidEnd(xid, buffer); + looped = true; + parentXid = InvalidTransactionId; + goto loop; + } + ``` + + HeapTupleSatisfiesMVCC + + ``` + static bool HeapTupleSatisfiesMVCC(HeapTuple htup, Snapshot snapshot, Buffer buffer) + { + // 取元组头 + HeapTupleHeader tuple = htup->t_data; + ... + // 根据hint bit,若xmin没有被标记为已提交:可能被标记为回滚,或者还未标记 + if (!HeapTupleHeaderXminCommitted(tuple)) { + // 如果xmin已经被标记为invalid,说明插入该元组的事务已经回滚,直接返回不可见 + if (HeapTupleHeaderXminInvalid(tuple)) + return false; + // xmin还未标记,并且xmin为当前事务,说明是在同一个事务内的插入命令和扫描命令,则需要去判断CID + // 同一个事务内,后面的查询可以查到当前事务之前命令插入的并且未删除的结果 + if (TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetXmin(page, tuple))) { + if ((tuple->t_infomask & HEAP_COMBOCID) && CheckStreamCombocid(tuple, snapshot->curcid, page)) + return true; /* delete after stream producer thread scan started */ + + // 当前扫描命令之后的某条命令才插入 + if (HeapTupleHeaderGetCmin(tuple, page) >= snapshot->curcid) + return false; /* inserted after scan started */ + // 到这里说明当前扫描命令之前已经插入 + // 根据hint bit,xmax被标记为invalid + if (tuple->t_infomask & HEAP_XMAX_INVALID) + return true; + + ... + + // 当前扫描命令之后的某条命令删除了该元组 + if (HeapTupleHeaderGetCmax(tuple, page) >= snapshot->curcid) + return true; /* deleted after scan started */ + else + return false; /* deleted before scan started */ + } + // xmin还没打标记,并且不是当前事务 + else { + // 通过csnlog判断事务是否可见,并且返回该事务的最终提交状态 + visible = XidVisibleInSnapshot(HeapTupleHeaderGetXmin(page, tuple), snapshot, &hintstatus, buffer, NULL); + // 如果该事务提交,则打上提交的hint bit用于加速判断 + if (hintstatus == XID_COMMITTED) + SetHintBits(tuple, buffer, HEAP_XMIN_COMMITTED, HeapTupleHeaderGetXmin(page, tuple)); + // 如果事务回滚,则打上回滚标记 + if (hintstatus == XID_ABORTED) { + ... + SetHintBits(tuple, buffer, HEAP_XMIN_INVALID, InvalidTransactionId); + } + // 如果xmin不可见,则该元组不可见 + if (!visible) { + ... + return false; + } + } + } + // 根据hint bit,若xmin已经被标记为已提交,则通过函数接口CommittedXidVisibleInSnapshot判断是否对本次快照可见 + else { + /* xmin is committed, but maybe not according to our snapshot */ + if (!HeapTupleHeaderXminFrozen(tuple) && + !CommittedXidVisibleInSnapshot(HeapTupleHeaderGetXmin(page, tuple), snapshot, buffer)) { + if (...) { + return false; /* treat as still in progress */ + } + } + } + // 到此为止认为xmin visible,继续判断xmax的可见性 + + recheck_xmax: + // 根据hint bit,xmax已经被标记为invalid,即已经回滚 + if (tuple->t_infomask & HEAP_XMAX_INVALID) /* xid invalid or aborted */ + return true; + + ... // 还有一些其他状态判断 + + // 根据hint bit,xmax没有被标记为commited + if (!(tuple->t_infomask & HEAP_XMAX_COMMITTED)) { + bool sync = false; + TransactionId xmax = HeapTupleHeaderGetXmax(page, tuple); + + // 如果xmax为当前事务 + if (TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetXmax(page, tuple))) { + // 如果删除该元组的命令后发生于快照扫描时刻 + if (HeapTupleHeaderGetCmax(tuple, page) >= snapshot->curcid) + return true; /* deleted after scan started */ + else + return false; /* deleted before scan started */ + } + + visible = XidVisibleInSnapshot(HeapTupleHeaderGetXmax(page, tuple), snapshot, &hintstatus, buffer, &sync); + /* + * If sync wait, xmax may be modified by others. So we need to check xmax again after acquiring the page lock. + */ + if (sync && (xmax != HeapTupleHeaderGetXmax(page, tuple))) { + goto recheck_xmax; + } + // 根据hintstatus在元组头部打标记 hint bit + if (hintstatus == XID_COMMITTED) { + SetHintBits(tuple, buffer, HEAP_XMAX_COMMITTED, HeapTupleHeaderGetXmax(page, tuple)); + } + if (hintstatus == XID_ABORTED) { + ... + SetHintBits(tuple, buffer, HEAP_XMAX_INVALID, InvalidTransactionId); + } + if (!visible) { + if (...) { + if (sync && (xmax != HeapTupleHeaderGetXmax(page, tuple))) { + goto recheck_xmax; + } + return true; /* treat as still in progress */ + } + } + } + // 根据hint bit,xmax被标记为commited + else { + /* xmax is committed, but maybe not according to our snapshot */ + if (!CommittedXidVisibleInSnapshot(HeapTupleHeaderGetXmax(page, tuple), snapshot, buffer)) { + if (...) { + return true; /* treat as still in progress */ + } + } + } + return false; + } + ``` + + + +- 隔离级别的实现 + + ![](figures/zh-cn_image_0000001197508006.png) + + - (1)脏写(dirty write):两个事务分别写入,两个事务分别提交或回滚,则事务的结果无法确定,即一个事务可以回滚另一个事务的提交。 + - (2)脏读(dirty read):一个事务可以读取另一个事务未提交的修改数据。 + - (3)不可重复读(fuzzy read):一个事务重复读取前面读取过的数据,数据的结果被另外的事务修改。 + - (4)幻读(phantom):一个事务重复执行范围查询,返回一组符合条件的数据,每次查询的结果集因为其他事务的修改发生改变\(条数\)。 + - (5)更新丢失\(lost update\):一个事务在读取元组并更新该元组的过程中,有另一个事务修改了该元组的值,导致最终这次修改丢失。 + - (6)读偏斜\(read skew\):假设数据x,y有隐式的约束x+y=100;事务一读取x=50;事务二写x=25并更新y=75保证约束成立,事务二提交,事务一再读取y=75,导致事务一中读取x+y=125,不满足约束。 + - (7)写偏斜\(write skew\):假设数据x,y有隐式的约束x+y<=100;事务一读取x=50,并写入y=50;事务二读取y=30并写入x=70,并提交;事务一再提交;最终导致x=70,y=50不满足x+y<=100的约束。 + + 隔离级别越高,在一个事务执行过程中,它能“感知”到的并发事务的影响越小。在最高的可串行化隔离级别下,任意一个事务的执行,均“感知”不到有任何其他并发事务执行的影响,并且所有事务执行的效果就和一个个顺序执行的效果完全相同。 + + 在openGauss中,隔离级别的实现基于MVCC和快照机制,因此这种隔离方式被称为快照隔离\(Snapshot Isolation,SI\)。目前,openGauss支持读已提交和可重复读这两种隔离级别。两者实现上的差别在于在一个事务中获取快照的次数。\(在实现上可重复读隔离级别无幻读问题,有A5B写偏斜问题\) + + 如果采用读已提交的隔离级别,那么在一个事务块中每条语句的执行开始阶段,都会去获取一次最新的快照,从而可以看到那些在本事务块开始以后、在前面语句执行过程中提交的并发事务的效果。 + + 如果采用可重复读的隔离级别,那么在一个事务块中,只会在第一条语句的执行开始阶段,获取一次快照,后面执行的所有语句都会采用这个快照,整个事务块中的所有语句均不会看到该快照之后提交的并发事务的效果。 + + ![](figures/20211017-204222-dc83a9cc72803e849caa49dae027369f.png) + + + +## 总结 + +- 元组版本号的实现:使用元组头部信息的字段来标示元组的版本号 +- 快照的实现:活跃事务数组方法和时间戳方法 +- 判断数据有效性、可见性、可更新性的算法的实现: XidVisibleInSnapshot和HeapTupleSatisfiesMVCC +- 不同隔离级别的实现:在一个事务中获取快照的次数 + diff --git "a/content/zh/post/2022/openGauss\345\206\205\346\240\270\345\210\206\346\236\220\357\274\210\344\270\200\357\274\211-\345\244\232\347\272\277\347\250\213\346\236\266\346\236\204\345\220\257\345\212\250\350\277\207\347\250\213\350\257\246\350\247\243.md" "b/content/zh/post/2022/openGauss\345\206\205\346\240\270\345\210\206\346\236\220\357\274\210\344\270\200\357\274\211-\345\244\232\347\272\277\347\250\213\346\236\266\346\236\204\345\220\257\345\212\250\350\277\207\347\250\213\350\257\246\350\247\243.md" new file mode 100644 index 0000000000000000000000000000000000000000..35842567bf43925fd3be800c9fe30df68b2fb206 --- /dev/null +++ "b/content/zh/post/2022/openGauss\345\206\205\346\240\270\345\210\206\346\236\220\357\274\210\344\270\200\357\274\211-\345\244\232\347\272\277\347\250\213\346\236\266\346\236\204\345\220\257\345\212\250\350\277\207\347\250\213\350\257\246\350\247\243.md" @@ -0,0 +1,311 @@ ++++ + +title = "openGauss内核分析(一):多线程架构启动过程详" + +date = "2022-05-17" + +tags = [ "内核分析"] + +archives = "2022-05" + +author = "酷哥" + +summary = "内核分析" + +img = "/zh/post/2022/title/img7.png" + +times = "12:30" + ++++ + +# openGauss内核分析(一):多线程架构启动过程详解" + +openGauss数据库自2020年6月30日开源以来,吸引了众多内核开发者的关注。那么openGauss的多线程是如何启动的,一条SQL语句在 SQL引擎,执行引擎和存储引擎的执行过程是怎样的,酷哥做了一些总结,第一期内容主要分析openGauss 多线程架构启动过程。 + +openGauss数据库是一个单进程多线程的数据库,客户端可以使用JDBC/ODBC/Libpq/Psycopg等驱动程序,向openGauss的主线程(Postmaster)发起连接请求。 + +![](figures/openGauss内核分析(一)-多线程架构启动过程详解1.png) + +## **01** **openGauss为什么要使用多线程架构** + +随着计算机领域多核技术的发展,如何充分有效的利用多核的并行处理能力,是每个服务器端应用程序都必须考虑的问题。由于数据库服务器的服务进程或线程间存在着大量数据共享和同步,而多线程可以充分利用多CPU来并行执行多个强相关任务,例如执行引擎可以充分的利用线程的并发执行以提供性能。在多线程的架构下,数据共享的效率更高,能提高服务器访问的效率和性能,同时维护开销和复杂度更低,这对于提高数据库系统的并行处理能力非常重要。 + +**多线程的三大主要优势:** + +**优势一:**线程启动开销远小于进程启动开销。与进程相比,它是一种非常“节俭”的多任务操作方式。在Linux系统下,启动一个新的进程必须分配给它独立的地址空间,建立众多的数据表来维护它的代码段、堆栈段和数据段,这是一种“昂贵”的多任务工作方式。而运行于一个进程中的多个线程,它们彼此之间使用相同的地址空间,共享大部分数据,启动一个线程所花费的空间远远小于启动一个进程所花费的空间。 + +**优势二:**线程间方便的通信机制:对不同进程来说,它们具有独立的数据空间,要进行数据的传递只能通过通信的方式进行,这种方式不仅费时,而且很不方便。线程则不然,由于同一进程下的线程之间共享数据空间,所以一个线程的数据可以直接为其他线程所用,这不仅快捷,而且方便。 + +**优势三:**线程切换开销小于进程切换开销,对于Linux系统来讲,进程切换分两步:1.切换页目录以使用新的地址空间;2.切换内核栈和硬件上下文。对线程切换,第1步是不需要做的,第2步是进程和线程都要做的,所以明显线程切换开销小。 + +## **02 openGauss主要线程有哪些** + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

后台线程

+

功能介绍

+

Postmaster主线程

+

入口函数PostmasterMain,主要负责内存、全局信息、信号、线程池等的初始化,启动辅助线程并监控线程状态,循环监听接收新的连接

+

Walwriter日志写线程

+

入口函数WalWriterMain,将内存的预写日志页数据刷新到预写日志文件中,保证已提交的事物永久记录,不会丢失

+

Startup数据库启动线程

+

入口函数StartupProcessMain,数据库启动时Postmaster主线程拉起的第一个子线程,主要完成数据库的日志REDO(重做)操作,进行数据库的恢复。日志REDO操作结束,数据库完成恢复后,如果不是备机,Startup线程就退出了。如果是备机,那么Startup线程一直在运行,REDO备机接收到新的日志

+

Bgwriter后台数据写线程

+

入口函数BackgroundWriterMain,对共享缓冲区的脏页数据进行下盘

+

PageWriter

+

入口函数ckpt_pagewriter_main,将脏页数据拷贝至双写区域并落盘

+

Checkpointer检查点线程

+

入口函数CheckpointerMain,周期性检查点,所有数据文件被更新,将数据脏页刷新到磁盘,确保数据库一致;崩溃回复后,做过checkpointer更改不需要从预写日志中恢复

+

StatCollector统计线程

+

入口函数PgstatCollectorMain,统计信息,包括对象、sql、会话、锁等,保存到pgstat.stat文件中,用于性能、故障、状态分析

+

WalSender日志发送线程

+

入口函数WalSenderMain,主机发送预写日志

+

WalReceiver日志接收线程

+

入口函数WalReceiverMain,备机接收预写日志

+

Postgres业务处理线程

+

入口函数PostgresMain:处理客户端连接请求,执行相关SQL业务

+
+ +数据库启动后,可以通过操作系统命令ps查看线程信息\(进程号为17012\) + +![](figures/openGauss内核分析(一)-多线程架构启动过程详解2.png) + +## **03** **openGauss启动过程** + +下面主要介绍openGauss数据库的启动过程,包括主线程,辅助线程及业务处理线程的启动过程。 + +- **gs\_ctl启动数据库** + + gs\_ctl是openGauss提供的数据库服务控制工具,可以用来启停数据库服务和查询数据库状态。主要供数据库管理模块调用,启动数据库使用如下命令: + + ``` + + ``` + + gs\_ctl的入口函数在“src/bin/pg\_ctl/pg\_ctl.cpp”,gs\_ctl进程fork一个进程来运行 gaussdb进程,通过shell命令启动。 + + ![](figures/openGauss内核分析(一)-多线程架构启动过程详解3.png) + + 上图中的cmd为“**/opt/software/openGauss/bin/gaussdb -D /opt/software/openGauss/data”,进入到数据库运行调用的第一个函数是main函数,**在“src/gausskernel/process/main/main.cpp”文件中,在main.cpp文件中,主要完成实例Context(上下文)的初始化、本地化设置,根据main.cpp文件的入口参数调用BootStrapProcessMain函数、GucInfoMain函数、PostgresMain函数和PostmasterMain函数。BootStrapProcessMain函数和PostgresMain函数是在initdb场景下初始化数据库使用的。GucInfoMain函数作用是显示GUC(grand unified configuration,配置参数,在数据库中指的是运行参数)参数信息。正常的数据库启动会进入PostmasterMain函数。下面对这个函数进行更详细的介绍。 + + ![](figures/openGauss内核分析(一)-多线程架构启动过程详解4.png) + + 1.MemoryContextInit:内存上下文系统初始化,主要完成对ThreadTopMemoryContext,ErrorContext,AlignContext和ProfileLogging等全局变量的初始化。 + + 2.pg\_perm\_setlocale:设置程序语言环境相关的全局变量。 + + 3.check\_root: 确认程序运行者无操作系统的root权限,防止的意外文件覆盖等问题。 + + 4.如果gaussdb后的第一个参数是—boot,则进行数据库初始化,如果gaussdb后的第一个参数是--single,则调用PostgresMain\(\),进入(本地)单用户版服务端程序。之后,与普通服务器端线程类似,循环等待用户输入SQL语句,直至用户输入EOF(Ctrl+D),退出程序。如果没有指定额外启动选项,程序进入PostmasterMain函数,开始一系列服务器端的正常初始化工作。 + +- **PostmasterMain 函数** + + **下面具体介绍PostmasterMain。** + + ![](figures/openGauss内核分析(一)-多线程架构启动过程详解5.png) + + 1.设置线程号相关的全局变量MyProcPid、PostmasterPid、MyProgName和程序运行环境相关的全局变量IsPostmasterEnvironment。 + + 2.调用postmaster\_mem\_cxt = AllocSetContextCreate\(t\_thrd.top\_mem\_cxt,...\),在目前线程的top\_mem\_cxt下创建postmaster\_mem\_cxt全局变量和相应的内存上下文。 + + 3. MemoryContextSwitchTo\(postmaster\_mem\_cxt\)切换到postmaster\_mem\_cxt内存上下文。 + + 4.调用getInstallationPaths\(\),设置my\_exec\_path(一般即为gaussdb可执行文件所在路径)。 + + 5.调用InitializeGUCOptions\(\),根据代码中各个GUC参数的默认值生成ConfigureNamesBool、ConfigureNamesInt、ConfigureNamesReal、ConfigureNamesString、ConfigureNamesEnum等 GUC参数的全局变量数组,以及统一管理GUC参数的guc\_variables、num\_guc\_variables、size\_guc\_variables全局变量,并设置与具体操作系统环境相关的GUC参数。 + + 6. while \(opt = ...\) SetConfigOption, 若在启动gaussdb时用指定了非默认的GUC参数,则在此时加载至上一步中创建的全局变量中。 + + 7.调用checkDataDir\(\),确认数据库安装成功以及PGDATA目录的有效性。 + + 8.调用CreateDataDirLockFile\(\),创建数据目录的锁文件。 + + 9.调用process\_shared\_preload\_libraries\(\),处理预加载库。 + + 10.为每个ListenSocket创建监听。 + + 11. reset\_shared,设置共享内存和信号,主要包括页面缓存池、各种锁缓存池、WAL日志缓存池、事务日志缓存池、事务(号)概况缓存池、各后台线程(锁使用)概况缓存池、各后台线程等待和运行状态缓存池、两阶段状态缓存池、检查点缓存池、WAL日志复制和接收缓存池、数据页复制和接收缓存池等。在后续阶段创建出的客户端后台线程以及各个辅助线程均使用该共享内存空间,不再单独开辟。 + + 12.将启动时手动设置的GUC参数以文件形式保存下来,以供后续后台服务端线程启动时使用。 + + 13.为不同信号设置handler。 + + 14.调用pgstat\_init\(\),初始化状态收集子系统。 + + 15.调用load\_hba\(\),加载pg\_hba.conf文件,该文件记录了允许连接(指定或全部)数据库的客户端物理机的地址和端口;调用load\_ident\(\),加载pg\_ident.conf文件,该文件记录了操作系统用户名与数据库系统用户名的对应关系,以便后续处理客户端连接时的身份认证。 + + 16.调用 StartupPID = initialize\_util\_thread\(STARTUP\),进行数据一致性校验。对于服务端主机来说,查看pg\_control文件,若上次关闭状态为DB\_SHUTDOWNED且recovery.conf文件没有指定进行恢复,则认为数据一致性成立;否则,根据pg\_control中检查点的redo位置或者recovery.conf文件中指定的位置,读取WAL日志或归档日志进行replay(回放),直至数据达到预期的一致性状,主要函数StartupXLOG。 + + 17. 最后进入ServerLoop\(\)函数,循环响应客户端连接请求。 + +- **ServerLoop函数** + + **下面来讲ServerLoop函数主流程** + + ![](figures/openGauss内核分析(一)-多线程架构启动过程详解6.png) + + 1.调用gs\_signal\_setmask\(&UnBlockSig, NULL\)和gs\_signal\_unblock\_sigusr2\(\),使得线程可以响应用户或其它线程的、指定的信号集。 + + 2.每隔PM\_POLL\_TIMEOUT\_MINUTE时间修改一次socket文件和socket锁文件的访问和修改时间,以免被操作系统淘汰。 + + 3.判断线程状态(pmState),若为PM\_WAIT\_DEAD\_END,则休眠100毫秒,并且不接收任何连接;否则,通过系统调用poll\(\)或select\(\)来阻塞地读取监听端口上传入的数据,最长阻塞时间PM\_POLL\_TIMEOUT\_SECOND。 + + 4.调用gs\_signal\_setmask\(&BlockSig, NULL\)和gs\_signal\_block\_sigusr2\(\)不再接收外源信号。 + + 5.判断poll\(\)或select\(\)函数的返回值,若小于零,监听出错,服务端进程退出;若大于零,则创建连接ConnCreate\(\),并进入后台服务线程启动流程BackendStartup\(\)。对于父线程,即postmaster线程,在结束BackendStartup\(\)的调用以后,会调用ConnFree\(\),清除连接信息;若poll\(\)或select\(\)的返回值为零,即没有信息传入,则不进行任何操作。 + + 6.调用ADIO\_RUN\(\)、ADIO\_END\(\) ,若AioCompleters没有启动,则启动之。 + + 7.检查各个辅助线程的线程号是否为零,若为零,则调用initialize\_util\_thread启动。 + + 以非线程池模式为例,介绍线程的启动逻辑。BackendStartup函数是通过调用initialize\_worker\_thread\(WORKE,port\)创建一个后台线程处理客户请求。后台线程的启动函数initialize\_util\_thread和工作线程的启动函数initialize\_worker\_thread,最后都是调用initialize\_thread函数完成线程的启动。 + + ![](figures/openGauss内核分析(一)-多线程架构启动过程详解7.png) + + 1.initialize\_thread函数调用gs\_thread\_create函数创建线程,调用InternalThreadFunc函数处理线程。 + + ``` + ThreadId initialize_thread(ThreadArg* thr_argv) + { + + + gs_thread_t thread; + int error_code = gs_thread_create(&thread, InternalThreadFunc, 1, (void*)thr_argv); + if (error_code != 0) { + ereport(LOG, + (errmsg("can not fork thread[%s], errcode:%d, %m", + GetThreadName(thr_argv->m_thd_arg.role), error_code))); + gs_thread_release_args_slot(thr_argv); + return InvalidTid; + } + + + return gs_thread_id(thread); + } + ``` + + 2.InternalThreadFunc函数根据角色调用GetThreadEntry函数,GetThreadEntry函数直接以角色为下标,返回对应GaussdbThreadEntryGate数组对应的元素。数组的元素是处理具体任务的回调函数指针,指针指向的函数为GaussDbThreadMain。 + + ``` + static void* InternalThreadFunc(void* args) + { + knl_thread_arg* thr_argv = (knl_thread_arg*)args; + gs_thread_exit((GetThreadEntry(thr_argv->role))(thr_argv)); + return (void*)NULL; + } + GaussdbThreadEntry GetThreadEntry(knl_thread_role role) + { + Assert(role > MASTER && role < THREAD_ENTRY_BOUND); + return GaussdbThreadEntryGate[role]; + } + static GaussdbThreadEntry GaussdbThreadEntryGate[] = {GaussDbThreadMain, + GaussDbThreadMain, + GaussDbThreadMain, + GaussDbThreadMain, + ......}; + ``` + + 3.在GaussDbThreadMain函数中,首先初始化线程基本信息,Context和信号处理函数,接着就是根据thread\_role角色的不同调用不同角色的处理函数,进入各个线程的main函数,角色为WORKER会进入PostgresMain函数,下面具体介绍PostgresMain函数。 + +- **PostgresMain函数** + + ![](figures/openGauss内核分析(一)-多线程架构启动过程详解8.png) + + 1.process\_postgres\_switches\(\),加载传入的启动选项和GUC参数。 + + 2.为不同信号设置handler。 + + 3.调用sigdelset\(&BlockSig, SIGQUIT\),允许响应SIGQUIT信号。 + + 4.调用BaseInit\(\),初始化存储管理系统和页面缓存池计数。 + + 5.调用on\_shmem\_exit\(\),设置线程退出前需要进行的内存清理动作。这些清理动作构成一个链表(on\_shmem\_exit\_list全局变量),每次调用该函数都向链表尾端添加一个节点,链表长度由on\_shmem\_exit\_index记录,且不可超过MAX\_ON\_EXITS宏。在线程退出时,从后往前调用各个节点中的动作(函数指针),完成清理工作。 + + 6.调用gs\_signal\_setmask \(&UnBlockSig\),设置屏蔽的信号类型。 + + 7.调用InitBackendWorker进行统计系统初始化、syscache初始化工作。 + + 8. BeginReportingGUCOptions如有需要则打印GUC参数。 + + 9.调用on\_proc\_exit\(\),设置线程退出前需要进行的线程清理动作。设置和调用机制与on\_shmem\_exit\(\)类似。 + + 10.调用process\_local\_preload\_libraries\(\),处理GUC参数设定后的预加载库。 + + 11. AllocSetContextCreate创建MessageContext、RowDescriptionContext、MaskPasswordCtx上下文。 + + 12.调用sigsetjmp\(\),设置longjump点,若后续查询执行中出错,在某些情况下可以返回此处重新开始。 + + 13.调用gs\_signal\_unblock\_sigusr2\(\),允许线程响应指定的信号集。 + + 14.然后进入for循环,进行查询执行。 + + ![](figures/openGauss内核分析(一)-多线程架构启动过程详解9.png) + + 1.调用pgstat\_report\_activity\(\)、pgstat\_report\_waitstatus\(\),告诉统计系统后台线程正处于idle状态。 + + 2.设置全局变量DoingCommandRead = true。 + + 3.调用ReadCommand\(\),读取客户端SQL语句。 + + 4.设置全局变量DoingCommandRead=false。 + + 5.若在上述过程中收到SIGHUP信号,表示线程需要重新加载修改过的postgresql.conf配置文件。 + + 6.进入switch \(firstchar\),根据接收到的信息进行分支判断。 + + +## **04思考如何新增一个辅助线程** + +**参考其他线程完成** + + + + + + + + + +

涉及修改文件

+

Postmaster.cpp

+

涉及修改函数

+

GaussdbThreadGate – 定义

+

Serverloop – 启动线程

+

Reaper – 回收线程

+

GaussDBThreadMain – 入口函数

+
+ diff --git "a/content/zh/post/2022/openGauss\345\215\225\346\234\272\351\203\250\347\275\262.md" "b/content/zh/post/2022/openGauss\345\215\225\346\234\272\351\203\250\347\275\262.md" new file mode 100644 index 0000000000000000000000000000000000000000..9c7a6290729a2791f56c8cce53f81468fde8633e --- /dev/null +++ "b/content/zh/post/2022/openGauss\345\215\225\346\234\272\351\203\250\347\275\262.md" @@ -0,0 +1,404 @@ ++++ + +title = "openGauss单机部署" + +date = "2021-12-11" + +tags = [ "openGauss单机部署"] + +archives = "2021-11" + +author = "可达" + +summary = "openGauss单机部署" + +img = "/zh/post/2022/title/img1.png" + +times = "12:30" + ++++ + + + +# openGauss单机部署 + +## 一、安装环境 + +1. 操作系统:虚拟机VMware、CentOS7.9 +2. 环境设置: + - (1)虚拟机内存3G、磁盘100G + - (2)系统版本修改 + + 一开始使用了centos8,无法安装,因此降低版本,选用7.9后依然存在一些问题,因此修改/etc/redhat-release文件中系统版本为CentOS Linux release 7.6\(Core\) + + - (3)配置YUM源 + + ①删除系统自带yum源 + + ``` + rm -rf /etc/yum.repos.d/* + ``` + + ②下载阿里云yum源 + + ``` + wget -O /etc/yum.repos.d/CentOS-Base.repo http://mirrors.aliyun.com/repo/Centos-7.repo + ``` + + ③生成仓库缓存 + + ``` + yum makecache + ``` + + - (4)安装依赖包 + + ``` + 1 yum install ‐y libaio‐devel flex bison ncurses‐devel glibc.devel patch lsb_release + 2 yum install ‐y openssl* python3 + ``` + + - (5)关闭SELINUX和Firewall + + ``` + 1 setenforce 0 + 2 systemctl disable firewalld.service + 3 systemctl stop firewalld.service + ``` + + - (6)关闭交换内存 + + ``` + swapoff -a + ``` + + - (7)关闭透明大页 + + ``` + 1 vim /etc/rc.d/rc.local + 2 if test ‐f /sys/kernel/mm/transparent_hugepage/enabled; + 3 then + 4 echo never > /sys/kernel/mm/transparent_hugepage/enabled + 5 fi + 6 if test ‐f /sys/kernel/mm/transparent_hugepage/defrag; + 7 then + 8 echo never > /sys/kernel/mm/transparent_hugepage/defrag + 9 fi + ``` + + - (8)修改主机名 + + ``` + 1 echo "node1" > /etc/hostname + 2 echo “ 192.168.17.129 node1” >>/etc/hosts + ``` + + + +## 二、安装详细步骤 + +1. Opengauss安装 + - (1)下载opengauss安装包及创建用户组和目录 + + ``` + 1 groupadd dbgrp + 2 useradd -g dbgrp -d /home/omm -m -s /bin/bash omm + 3 echo "omm" | passwd -‐stdin omm + 4 mkdir -p /opt/software/openGauss + 5 chmod 755 -R /opt/software + 6 chown -R omm:dbgrp /opt/software/openGauss + cd /opt/software/openGauss/ + 7 wget https://opengauss.obs.cn-south-1.myhuaweicloud.com/2.0.0/x86/openGauss-2.0.0-CentOS-64bit-all.tar.gz + 8 tar -zxvf openGauss-2.0.0-CentOS-64bit-all.tar.gz + 9 tar -zxvf openGauss-2.0.0-CentOS-64bit-om.tar.gz + ``` + + - (2)单机xml配置文件 + + 首先从如下地址复制文件至当前位置 + + ``` + cp script/gspylib/etc/conf/cluster_config_template.xml . + ``` + + 修改配置文件具体如下,配置文件中要注意配置一下几个参数:nodeNAMES、backips + + ``` + + + + + + + + + + + + + + + + + + + + + + + + + + + + ``` + + - (3)设置lib库 + + +``` +vim .bashrc +添加 +export GPHOME=/opt/huawei/install/om +export PATH=$GPHOME/script/gspylib/pssh/bin:$GPHOME/script:$PATH +export LD_LIBRARY_PATH=$GPHOME/lib:$LD_LIBRARY_PATH +export PYTHONPATH=$GPHOME/lib +export GAUSSHOME=/opt/huawei/install/app +export PATH=$GAUSSHOME/bin:$PATH +export LD_LIBRARY_PATH=$GAUSSHOME/lib:$LD_LIBRARY_PATH +export S3_CLIENT_CRT_FILE=$GAUSSHOME/lib/client.crt +export GAUSS_VERSION=2.0.0 +export PGHOST=/opt/huawei/tmp +export GAUSSLOG=/opt/huawei/log/omm +umask 077 +export GAUSS_ENV=2 +export GS_CLUSTER_NAME=singlenode +``` + +1. (4)执行交互式初始化 + - ①预安装,操作如下: + + ``` + 1 cd /opt/software/openGauss/script + 2 root@node1 script]#python3 gs_preinstall -U omm -G dbgrp -X /opt/software/openGauss/cluster_config_template.xml + Parsing the configuration file. + Successfully parsed the configuration file. + Installing the tools on the local node. + Successfully installed the tools on the local node. + Setting pssh path + Successfully set core path. + Are you sure you want to create the user[omm] and create trust for it (yes)? yes + Preparing SSH service. + Successfully prepared SSH service. + Checking OS software. + Successfully check os software. + Checking OS version. + Successfully checked OS version. + Creating cluster's path. + Successfully created cluster's path. + Setting SCTP service. + Successfully set SCTP service. + Set and check OS parameter. + Setting OS parameters. + Successfully set OS parameters. + Warning: Installation environment contains some warning messages. + Please get more details by "/opt/software/openGauss/script/gs_checkos -i A -h node1 --detail". + Set and check OS parameter completed. + Preparing CRON service. + Successfully prepared CRON service. + Setting user environmental variables. + Successfully set user environmental variables. + Setting the dynamic link library. + Successfully set the dynamic link library. + Setting Core file + Successfully set core path. + Setting pssh path + Successfully set pssh path. + Set ARM Optimization. + No need to set ARM Optimization. + Fixing server package owner. + Setting finish flag. + Successfully set finish flag. + Preinstallation succeeded. + ``` + + 当出现“Preinstallation succeeded.”时,预安装成功。 + + ②安装 + + 进入script目录后进行正式安装,命令如下,其中“/opt/software/openGauss/cluster\_config\_template.xml”为前几步中编辑的配置文件。 + + 此过程需要输入密码,且设置的密码要符合复杂度要求如下: + + 最少包含8个字符; + + 不能和用户名和当前密码(ALTER)相同,或和当前密码反序; + + 至少包含大写字母(A-Z),小写字母(a-z),数字,非字母数字字符(限定为\~!@\#$%^&\*\(\)-\_=+|\[\{\}\];:,<.\>/?)四类字符中的三类字符: + + ``` + [omm@node1 openGauss]$ cd script/ + [omm@node1 script]$ gs_install -X /opt/software/openGauss/cluster_config_template.xml + Parsing the configuration file. + Check preinstall on every node. + Successfully checked preinstall on every node. + Creating the backup directory. + Successfully created the backup directory. + begin deploy.. + Installing the cluster. + begin prepare Install Cluster.. + Checking the installation environment on all nodes. + begin install Cluster.. + Installing applications on all nodes. + Successfully installed APP. + begin init Instance.. + encrypt cipher and rand files for database. + Please enter password for database: + Please repeat for database: + begin to create CA cert files + The sslcert will be generated in /opt/huawei/install/app/sslcert/om + Cluster installation is completed. + Configuring. + Deleting instances from all nodes. + Successfully deleted instances from all nodes. + Checking node configuration on all nodes. + Initializing instances on all nodes. + Updating instance configuration on all nodes. + Check consistence of memCheck and coresCheck on database nodes. + Configuring pg_hba on all nodes. + Configuration is completed. + Successfully started cluster. + Successfully installed application. + end deploy.. + ``` + + 测试安装是否成功,首先需要使数据库处于开启状态,然后输入”gsql -d postgres -p 26000”命令使数据库在本地运行,其中-p 为数据库端口dataPortBase,具体数值在前述过程中xml配置文件中确定,这里为26000。 + + ![](figures/这里为26000.png) + + ![](figures/这里为260001.png) + +2. Opengauss连接设置 + - (1)安装java,确认jdk版本为1.8 + - (2)从官网下载jdbc压缩包后,将其解压至路径/usr/lib/jvm/java-1.8.0-openjdk-1.8.0.302.b08-0.el7\_9.x86\_64/jre/lib/ext下 + - (3)配置数据库服务器中的白名单与监听名单 + + - ①以操作系统用户omm登录数据库主节点 + - ②执行如下命令增加对外提供服务的网卡IP或者主机名(英文逗号分隔),其中NodeName为当前节点名称,如: + + ``` + gs_guc reload -N NodeName -I all -c "listen_addresses='localhost,192.168.17.129'" + ``` + + - ③执行如下命令在数据库主节点配置文件中增加一条认证规则。(这里假设客户端IP地址为192.168.17.129,即远程连接的机器的IP地址) + + ``` + gs_guc reload -N all -I all -h "host all yushan 192.168.17.129/32 sha256" + - -N all表示openGauss中的所有主机。 + - -I all表示主机中的所有实例。 + - -h表示指定需要在“pg_hba.conf”增加的语句。 + - all表示允许客户端连接到任意的数据库。 + - yushan表示连接数据库的用户。 + - 192.168.17.129/32表示只允许IP地址为192.168.17.129的主机连接。在使用过程中,请根据用户的网络进行配置修改。32表示子网掩码为1的位数,即255.255.255.255 + - sha256表示连接时jack用户的密码使用sha256算法加密。 + ``` + + 与之效果相同的代替操作: + + 在/opt/huawei/install/data/db1路径(创建的节点名叫db1)下编辑pg\_hba.conf文件 + + ![](figures/下编辑pg_hba-conf文件.png) + + (4)通过编写java程序即可连接,example如下 + + ``` + import java.sql.Connection; + import java.sql.DriverManager; + import java.sql.PreparedStatement; + import java.sql.SQLException; + import java.sql.Statement; + import java.sql.CallableStatement; + public class test{ + public static Connection getConnect(String username, String passwd) + { + //驱动类。 + String driver = "org.postgresql.Driver"; + //数据库连接描述符。 + String sourceURL = "jdbc:postgresql://127.0.0.1:26000/postgres"; + Connection conn = null; + + try + { + //加载驱动。 + Class.forName(driver); + } + catch( Exception e ) + { + e.printStackTrace(); + return null; + } + + try + { + //创建连接。 + conn = DriverManager.getConnection(sourceURL, username, passwd); + System.out.println("Connection succeed!"); + } + catch(Exception e) + { + e.printStackTrace(); + return null; + } + + return conn; + }; + public static void main(String[] args) { + // TODO Auto-generated method stub + Connection conn = getConnect("yushan", "1qaz@wsx"); + //BatchInsertData(conn); + try { + conn.close(); + } catch (SQLException e) { + e.printStackTrace(); + } + } + } + ``` + + 编译执行程序后,如图,连接成功。 + + ![](figures/编译执行程序后.png) + + + +## 三、安装过程中碰到的问题与解决办法 + +1. 问题1 + + 在安装结束后,准备运行后,发现gsom 无法启动。 + + **解决过程:** + + 猜测可能是内存不足,虚拟机一开始设置的内存为1G,查阅相关博客发现,1G内存对于企业版不足,后将虚拟机内存设置为3G。 + +2. 问题2 + + 运行gsom后报错如下 + + ![](figures/运行gsom后报错如下.png) + + **解决过程:** + + 检查发现pg\_hba.conf文件配置出错,修改如下 + + ![](figures/修改如下.png) + +3. 问题3 + + 一开始安装的虚拟机为centos8,进行预安装后发现不支持该版本操作系统。 + + **解决过程:** + + 切换为centos7.9,但依然报错,因此修改/etc/redhat-release文件中系统版本为CentOS Linux release 7.6\(Core\) + + diff --git "a/content/zh/post/2022/openGauss\345\220\221\351\207\217\345\214\226\345\274\225\346\223\216--hash-join.md" "b/content/zh/post/2022/openGauss\345\220\221\351\207\217\345\214\226\345\274\225\346\223\216--hash-join.md" new file mode 100644 index 0000000000000000000000000000000000000000..59879cda175bba51bc992ceef315819c22396d40 --- /dev/null +++ "b/content/zh/post/2022/openGauss\345\220\221\351\207\217\345\214\226\345\274\225\346\223\216--hash-join.md" @@ -0,0 +1,181 @@ ++++ + +title = "openGauss向量化引擎--hash join" + +date = "2022-01-07" + +tags = [ "openGauss向量化引擎--hash join"] + +archives = "2022-01" + +author = "yanzongshuaiDBA" + +summary = "openGauss向量化引擎--hash join" + +img = "/zh/post/2022/title/img1.png" + +times = "12:30" + ++++ + +# openGauss向量化引擎--hash join + +传统的行执行器采用一次一个元组的执行模式,执行过程中CPU大部分时间没有用了处理数据,都用在了遍历执行树等操作,导致CPU的有效利用率较低。面向OLAP场景大量函数调用次数,需要巨大开销,为解决次问题,openGauss中开发了向量化引擎。采用一次一批元组的执行模式,可大幅减少遍历执行节点及调用函数的开销。 + +本文主要介绍hash join如何进行向量化的。 + +## 算子之间数据传递结构 + +算子之间数据组织及传递结构是VectorBatch: + +``` +class VectorBatch : public BaseObject { + +public: + // number of rows in the batch. + int m_rows; + // number of columns in the batch. + int m_cols; + // Shall we check the selection vector. + bool m_checkSel; + // Selection vector; + bool* m_sel; + // ScalarVector + ScalarVector* m_arr; + // SysColumns + SysColContainer* m_sysColumns; + // Compress buffer + StringInfo m_pCompressBuf; +... +} +``` + +![](figures/主要由3个数组在表示.png) + +主要由3个数组在表示:m\_vals为列值数组,m\_flag为对应列的行值是否为NULL,m\_sel为该行是否满足过滤条件。 + +## VecHashJoin + +向量化hash join的算子是VecHashJoin。其执行函数是ExecVecHashJoin,分为2个阶段:HASH\_BUILD和HASH\_PROBE。 + +``` +VectorBatch* ExecVecHashJoin(VecHashJoinState* node) +{ + int64 rows = 0; + for (;;) { + switch (node->joinState) { + case HASH_BUILD: { + if (node->hashTbl == NULL) + node->hashTbl = New(CurrentMemoryContext) HashJoinTbl(node); + ((HashJoinTbl*)(node->hashTbl))->Build();//构建hash表 + rows = ((HashJoinTbl*)(node->hashTbl))->getRows(); + } break; + case HASH_PROBE: { + result = ((HashJoinTbl*)(node->hashTbl))->Probe();//进行hash探测并构建join结果 + return result; + } + default: + break; + } + } +} +``` + +## HASH\_BUILD + +其中build的阶段又分为2个小阶段:1)获取内表的batch,然后通过m\_funBuild:申请hashCell \*cell\_arr连续内存,每个节点是一个hashCell,大小是m\_cellSize,共有batch中记录的行数个。然后将其接入m\_cache链表。然后将batch中列值依次存入cell\_arr中。2)通过PrepareProbe函数构建Hash表,并将cell\_arr中值放到hash表中。 + +``` +void HashJoinTbl::Build() +{ + for (;;) { + batch = VectorEngine(inner_node);//获取内表batch + if (unlikely(BatchIsNull(batch))) + break; + RuntimeBinding(m_funBuild, m_strategy)(batch); + } + PushDownFilterIfNeed(); + PrepareProbe(); + ... +} +``` + +第1阶段: + +![](figures/第1阶段.png) + +第2阶段: + +![](figures/第2阶段.png) + +第2阶段,通过m\_keyIdx数组得到哪一列是join key,将cell\_arr中该列值拿出来通过m\_innerHashFuncs函数计算hash值,将其保存到m\_cacheLoc\[\]数组中,作为m\_data数组下标,通过这种方式将内表列值放到hash表中。 + +## HASH\_PROBE + +通过probeHashTable进行探测,并join。也分为2个小阶段:1)外表hash阶段:首先获取外表的batch,通过m\_outerHashFuncs hash函数将外表的join key列hash出的值放到m\_cacheLoc数组中,作为hash表数组的下标:m\_hashTbl-\>m\_data\[m\_cacheLoc\[i\]\]。2)join阶段:定位到的m\_hashTbl-\>m\_data\[m\_cacheLoc\[i\]\]中列值和外表中列值是否相同,若相等则通过m\_keyMatch\[\]数组标记。最后将m\_keyMatch\[\]数组标记为1的列值构建成向量batch,并返回。 + +``` +VectorBatch* HashJoinTbl::probeHashTable(hashSource* probSource) +{ + VectorBatch* res_batch = NULL; + while (true) { + switch (m_probeStatus) { + case PROBE_FETCH: + //获取外表batch + m_outRawBatch = probSource->getBatch(); + if (BatchIsNull(m_outRawBatch)) { + } else { + int row = m_outRawBatch->m_rows; + int mask = m_hashTbl->m_size - 1; +hashBatch(m_outRawBatch, m_outKeyIdx, m_cacheLoc, m_outerHashFuncs); + for (int i = 0; i < row; i++) { + m_cacheLoc[i] = m_outRawBatch->m_arr[icol].m_vals[i] & mask; + m_cellCache[i] = m_hashTbl->m_data[m_cacheLoc[i]]; + m_match[i] = false; /* flag all the row no match */ + m_keyMatch[i] = true; + } + ... + } + break; + case PROBE_DATA: + res_batch = (this->*m_joinFun)(m_outRawBatch); + if (!BatchIsNull(res_batch)) + return res_batch; + break; + case PROBE_FINAL: + return endJoin(); + default: + break; + } + } +} +// +VectorBatch* HashJoinTbl::innerJoinT(VectorBatch* batch)//外部batch +{ + while (m_doProbeData) { + last_build_idx = 0; + RuntimeBinding(m_matchKeyFunction, i)(&batch->m_arr[m_outKeyIdx[i]], row, m_keyIdx[i], i); + for (row_idx = last_build_idx; row_idx < row; row_idx++) { + if (m_keyMatch[row_idx]) { + val = m_cellCache[row_idx]->m_val; + for (i = 0; i < m_innerBatch->m_cols; i++) { + p_vector = &m_innerBatch->m_arr[i]; + + p_vector->m_vals[result_row] = val[i].val; + p_vector->m_flag[result_row] = val[i].flag; + } + for (i = 0; i < m_outerBatch->m_cols; i++) { + p_vector = &m_outerBatch->m_arr[i]; + p_vector->m_vals[result_row] = batch->m_arr[i].m_vals[row_idx]; + p_vector->m_flag[result_row] = batch->m_arr[i].m_flag[row_idx]; + } + result_row++; + } + } + } + return buildResult(m_innerBatch, m_outerBatch, true); +} +``` + +![](figures/outerBatch1.png) + diff --git "a/content/zh/post/2022/opengauss\346\225\260\346\215\256\345\272\223-PITR\346\201\242\345\244\215.md" "b/content/zh/post/2022/opengauss\346\225\260\346\215\256\345\272\223-PITR\346\201\242\345\244\215.md" new file mode 100644 index 0000000000000000000000000000000000000000..c1ba9f3b49cfadcd82f4e330ab978138dffd842f --- /dev/null +++ "b/content/zh/post/2022/opengauss\346\225\260\346\215\256\345\272\223-PITR\346\201\242\345\244\215.md" @@ -0,0 +1,116 @@ ++++ + +title = "opengauss数据库-PITR恢复" + +date = "2022-01-05" + +tags = [ "opengauss数据库-PITR恢复"] + +archives = "2022-01" + +author = "周琦放" + +summary = "opengauss数据库-PITR恢复" + +img = "/zh/post/2022/title/img17.png" + +times = "12:30" + ++++ + +# opengauss数据库-PITR恢复 + +当数据库崩溃或希望回退到数据库之前的某一状态时,openGauss的即时恢复功能(Point-In-Time Recovery,简称PITR)可以支持恢复到备份归档数据之后的任意时间点 + +## 前提条件 + +全库备份文件:base.tar.gz; 归档的wal日志文件,归档备份目录/ogarchive + +## 备份数据文件 + +根据环境变量找到当前的数据文件目录,并重命名数据文件目录 + +请注意案例中的 /opt/huawei/install/data/ 为本示例中的数据文件目录,请根据实际情况修改此输入值 + +``` +[omm@ogsta ~]$ echo $DATADIR +/opt/huawei/install/data/dn +[omm@ogsta ~]$ cd /opt/huawei/install/data/ +[omm@ogsta data]$ mv dn/ dn_bak +[omm@ogsta data]$ ll +total 4 +drwx------ 23 omm dbgrp 4096 Jan 4 13:10 dn_bak +[omm@ogsta data]$ mkdir dn +``` + +- 全量备份解压 + + base.tar.gz压缩文件是通过gs\_basebackup 压缩,因此需要采用两次解压,gunzip和gs\_tar + + ``` + [omm@ogsta ogarchive]$ gunzip base.tar.gz + [omm@ogsta ogarchive]$ gs_tar -D /opt/huawei/install/data/dn -F base.tar + [omm@ogsta ogarchive]$ cd /opt/huawei/install/data/dn + [omm@ogsta dn]$ ls + asp_data cacert.pem mot.conf pg_hba.conf pg_multixact pg_snapshots pg_xlog server.crt term_file + backup_label full_backup_label pg_clog pg_hba.conf.bak pg_notify pg_stat_tmp postgresql.conf server.key undo + backup_label.old global pg_csnlog pg_hba.conf.lock pg_perf pg_tblspc postgresql.conf.bak server.key.cipher + base gs_profile pg_ctl.lock pg_ident.conf pg_replslot pg_twophase postgresql.conf.lock server.key.rand + build_completed.done gswlm_userinfo.cfg pg_errorinfo pg_llog pg_serial PG_VERSION rewind_lable sql_monitor + [omm@ogsta dn]$ + ``` + + +## 清空pg\_xlog + +该目录在数据文件目录中 + +``` +[omm@ogsta pg_xlog]$ pwd +/opt/huawei/install/data/dn/pg_xlog +[omm@ogsta pg_xlog]$ ll +total 32768 +-rw------- 1 omm dbgrp 16777216 Jan 4 13:38 000000010000000000000013 +-rw------- 1 omm dbgrp 16777216 Jan 4 13:38 000000010000000000000014 +drwx------ 2 omm dbgrp 80 Jan 4 13:38 archive_status +[omm@ogsta pg_xlog]$ rm -rf * +[omm@ogsta pg_xlog]$ ll +total 0 +``` + +## 配置recovery.conf文件 + +该配置文件请在数据文件目录中修改 + +``` +[omm@ogsta dn]$ pwd +/opt/huawei/install/data/dn +[omm@ogsta dn]$ cat recovery.conf +restore_command = 'cp /ogarchive/%f %p' +``` + +## 数据库启动 + +``` +gs_ctl -D /opt/huawei/install/data/dn start +``` + +这时可能遇到failed to translate name to xlog in GetOldestXLOGSegNo报错,需要如下的解决方法 + +![](figures/这时可能遇到failed-to.png) + +## 复制归档日志文件 + +以上的报错,是由于日志文件问题,根据recovery.conf文件内容,只是把归档目录中的文件自动复制到pg\_xlog目录中,不如直接手动把归档日志文件复制到pg\_xlog目录中 + +``` +cd /ogarchive/ +cp * /opt/huawei/install/data/dn/pg_xlog/ +``` + +## 重启 + +``` +gs_ctl -D /opt/huawei/install/data/dn start +``` + diff --git "a/content/zh/post/2022/opengauss\346\225\260\346\215\256\345\272\223-\344\270\273\344\273\216\346\220\255\345\273\272.md" "b/content/zh/post/2022/opengauss\346\225\260\346\215\256\345\272\223-\344\270\273\344\273\216\346\220\255\345\273\272.md" new file mode 100644 index 0000000000000000000000000000000000000000..487ebe8a49dedcb03292584b60381c9c4e005d87 --- /dev/null +++ "b/content/zh/post/2022/opengauss\346\225\260\346\215\256\345\272\223-\344\270\273\344\273\216\346\220\255\345\273\272.md" @@ -0,0 +1,421 @@ ++++ + +title = "opengauss数据库-主从搭建" + +date = "2021-12-31" + +tags = [ "opengauss数据库-主从搭建"] + +archives = "2021-12" + +author = "周琦放" + +summary = "opengauss数据库-主从搭建" + +img = "/zh/post/2022/title/img22.png" + +times = "12:30" + ++++ + +# opengauss数据库-主从搭建 + +## 环境说明 + +![](figures/zh-cn_image_0000001197720014.png) + +## 软件下载 + +opengauss 下载地址:https://opengauss.org/zh/download.html + +## 环境准备 + +- 关闭 SELINUX + +修改 /etc/selinux/config文件中的“SELINUX”值为“disabled + +``` +[root@ogpri openGauss]# more /etc/selinux/config +# This file controls the state of SELinux on the system. +# SELINUX= can take one of these three values: +# enforcing - SELinux security policy is enforced. +# permissive - SELinux prints warnings instead of enforcing. +# disabled - No SELinux policy is loaded. +SELINUX=disabled +# SELINUXTYPE= can take one of three values: +# targeted - Targeted processes are protected, +# minimum - Modification of targeted policy. Only selected processes are protected. +# mls - Multi Level Security protection. +SELINUXTYPE=targeted +``` + +- 关闭防火墙并禁止开机重启 + + ``` + systemctl disable firewalld.service + systemctl stop firewalld.service + ``` + +- 设置root用户远程登陆 + + 将PermitRootLogin改为yes。 + + ``` + vim /etc/ssh/sshd_config + PermitRootLogin yes + ``` + + 修改后生效 + + ``` + systemctl restart sshd.service + ``` + +- 关闭透明大页 + + ``` + echo never > /sys/kernel/mm/transparent_hugepage/enabled + ``` + + +## 数据库安装包依赖 + +root 用户下执行,所有的节点都要安装,建议配置本地yum源 + +``` +yum -y install libaio-devel flex bison ncurses-devel glibc-devel patch redhat-lsb-core readline-devel +``` + +- Python3依赖包 + + ``` + yum install bzip2-devel db4-devel gdbm-devel libpcap-devel openssl openssl-devel sqlite-devel tk-devel xz-devel zlib* gcc* + ``` + +- 安装python3 + + ``` + tar -xvf Python-3.6.8.tar.xz + cd Python-3.6.8/ + ./configure --prefix=/usr/local/python3 --enable-shared CFLAGS=-fPIC + make && make install + ``` + +- 设置python3 软连接 + + ``` + ln -s /usr/local/Python3/bin/python3 /usr/bin/python3 + ln -s /usr/local/python3/bin/pip3 /usr/bin/pip3 + ``` + +- 设置python3环境变量 + + ``` + vi /etc/profile + ``` + +- 安装python3 + + ``` + export PYTHON_HOME=/usr/local/python3.6.8 + export PATH=$PATH:$PYTHON_HOME/bin + :wq + Source /etc/profile + ``` + +- 验证安装结果 + + ``` + [root@ogpri ~]# python3 + Python 3.6.8 (default, Dec 27 2021, 21:52:53) + [GCC 4.8.5 20150623 (Red Hat 4.8.5-36)] on linux + Type "help", "copyright", "credits" or "license" for more information. + >>> + ``` + + +## 安装opengaussdb 数据库软件 + +数据库软件安装,如没有特殊说明,均在主节点执行,执行用户为root + +- 创建数据库软件目录 + + ``` + mkdir -p /opt/software/openGauss + chmod 755 -R /opt/software + ``` + + +- 创建XML配置文件 + + 如果是验证安装测试使用,建议修改其中的ip地址和hostname, + + ``` + vi /opt/software/openGauss/cluster_config.xml + ``` + + ``` + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ``` + +- 预安装 + + 在安装过程中请输入root,omm两个用户的密码 + + ``` + /opt/software/openGauss/script/gs_preinstall -U omm -G dbgrp -X /opt/software/openGauss/cluster_config.xml + ``` + + 安装日志如下: + + ``` + [root@ogpri script]# /opt/software/openGauss/script/gs_preinstall -U omm -G dbgrp -X /opt/software/openGauss/cluster_config.xml + Parsing the configuration file. + Successfully parsed the configuration file. + Installing the tools on the local node. + Successfully installed the tools on the local node. + Are you sure you want to create trust for root (yes/no)? yes + Please enter password for root. + Password: + + Creating SSH trust for the root permission user. + Checking network information. + All nodes in the network are Normal. + Successfully checked network information. + Creating SSH trust. + Creating the local key file. + Successfully created the local key files. + Appending local ID to authorized_keys. + Successfully appended local ID to authorized_keys. + Updating the known_hosts file. + Successfully updated the known_hosts file. + Appending authorized_key on the remote node. + Successfully appended authorized_key on all remote node. + Checking common authentication file content. + Successfully checked common authentication content. + Distributing SSH trust file to all node. + Successfully distributed SSH trust file to all node. + Verifying SSH trust on all hosts. + Successfully verified SSH trust on all hosts. + Successfully created SSH trust. + Successfully created SSH trust for the root permission user. + Setting pssh path + Successfully set core path. + Distributing package. + Begin to distribute package to tool path. + Successfully distribute package to tool path. + Begin to distribute package to package path. + Successfully distribute package to package path. + Successfully distributed package. + Are you sure you want to create the user[omm] and create trust for it (yes/no)? yes + Preparing SSH service. + Successfully prepared SSH service. + Installing the tools in the cluster. + Successfully installed the tools in the cluster. + Checking hostname mapping. + Successfully checked hostname mapping. + reating SSH trust for [omm] user. + Please enter password for current user[omm]. + Password: + + Checking network information. + All nodes in the network are Normal. + Successfully checked network information. + Creating SSH trust. + Creating the local key file. + Successfully created the local key files. + Appending local ID to authorized_keys. + Successfully appended local ID to authorized_keys. + Updating the known_hosts file. + Successfully updated the known_hosts file. + Appending authorized_key on the remote node. + Successfully appended authorized_key on all remote node. + Checking common authentication file content. + Successfully checked common authentication content. + Distributing SSH trust file to all node. + Successfully distributed SSH trust file to all node. + Verifying SSH trust on all hosts. + Successfully verified SSH trust on all hosts. + Successfully created SSH trust. + Successfully created SSH trust for [omm] user. + Checking OS software. + Successfully check os software. + Checking OS version. + Successfully checked OS version. + Creating cluster's path. + Successfully created cluster's path. + Set and check OS parameter. + Setting OS parameters. + Successfully set OS parameters. + Warning: Installation environment contains some warning messages. + Please get more details by "/opt/software/openGauss/script/gs_checkos -i A -h ogpri,ogsta --detail". + Set and check OS parameter completed. + Preparing CRON service. + Successfully prepared CRON service. + Setting user environmental variables. + Successfully set user environmental variables. + Setting the dynamic link library. + Successfully set the dynamic link library. + Setting Core file + Successfully set core path. + Setting pssh path + Successfully set pssh path. + Setting Cgroup. + Successfully set Cgroup. + Set ARM Optimization. + No need to set ARM Optimization. + Fixing server package owner. + Setting finish flag. + Successfully set finish flag. + Preinstallation succeeded. + ``` + +- 切换用户执行安装 + + ``` + su - ommgs_install -X /opt/software/openGauss/cluster_config.xml + ``` + + 安装日志如下: + + ``` + Parsing the configuration file. + Check preinstall on every node. + Successfully checked preinstall on every node. + Creating the backup directory. + Successfully created the backup directory. + begin deploy.. + Installing the cluster. + begin prepare Install Cluster.. + Checking the installation environment on all nodes. + begin install Cluster.. + Installing applications on all nodes. + Successfully installed APP. + begin init Instance.. + encrypt cipher and rand files for database. + Please enter password for database: + Please repeat for database: + begin to create CA cert files + The sslcert will be generated in /opt/huawei/install/app/share/sslcert/om + Cluster installation is completed. + Configuring. + Deleting instances from all nodes. + Successfully deleted instances from all nodes. + Checking node configuration on all nodes. + Initializing instances on all nodes. + Updating instance configuration on all nodes. + Check consistence of memCheck and coresCheck on database nodes. + Successful check consistence of memCheck and coresCheck on all nodes. + Configuring pg_hba on all nodes. + Configuration is completed. + Successfully started cluster. + Successfully installed application. + end deploy.. + ``` + +- 登陆验证 + + ``` + [omm@ogpri dn]$ gsql -d postgres -p 26000 + gsql ((openGauss 2.1.0 build 590b0f8e) compiled at 2021-09-30 14:29:04 commit 0 last mr ) + Non-SSL connection (SSL connection is recommended when requiring high-security) + Type "help" for help. + openGauss=# + ``` + +- 查看主从状态 + + ``` + gs_ctl -D /opt/huawei/install/data/dn/ query + ``` + + 信息如下: + + ``` + [2021-12-29 14:41:33.751][21110][][gs_ctl]: gs_ctl query ,datadir is /opt/huawei/install/data/dn + HA state: + local_role : Primary + static_connections : 1 + db_state : Normal + detail_information : Normal + + Senders info: + sender_pid : 9716 + local_role : Primary + peer_role : Standby + peer_state : Normal + state : Streaming + sender_sent_location : 0/401A080 + sender_write_location : 0/401A080 + sender_flush_location : 0/401A080 + sender_replay_location : 0/401A080 + receiver_received_location : 0/401A080 + receiver_write_location : 0/401A080 + receiver_flush_location : 0/401A080 + receiver_replay_location : 0/401A080 + sync_percent : 100% + sync_state : Quorum + sync_priority : 1 + sync_most_available : Off + channel : 192.168.56.227:26001-->192.168.56.228:35144 + Receiver info: + No information + ``` + + diff --git "a/content/zh/post/2022/opengauss\346\225\260\346\215\256\345\272\223-\345\217\202\346\225\260\344\274\230\345\214\226.md" "b/content/zh/post/2022/opengauss\346\225\260\346\215\256\345\272\223-\345\217\202\346\225\260\344\274\230\345\214\226.md" new file mode 100644 index 0000000000000000000000000000000000000000..f0acf8db759cebbb49c178d0e29f9441db487f0e --- /dev/null +++ "b/content/zh/post/2022/opengauss\346\225\260\346\215\256\345\272\223-\345\217\202\346\225\260\344\274\230\345\214\226.md" @@ -0,0 +1,162 @@ ++++ + +title = "opengauss数据库-参数优化" + +date = "2021-12-31" + +tags = [ "opengauss数据库-参数优化"] + +archives = "2021-12" + +author = "周琦放" + +summary = "opengauss数据库-参数优化" + +img = "/zh/post/2022/title/img18.png" + +times = "12:30" + ++++ + +# opengauss数据库-参数优化 + + 2021/12/31 + +## 参数调整 + +``` +cat opt_params.sh +#!/bin/bash +source ~/.bashrc +memory=`free|awk '{print $2}' |sed -n 2p` +if [[ $memory -lt 10*1024*1024 ]] +then +max_process_memory=2GB +shared_buffers=128MB +max_connections=500 +work_mem=4MB +maintenance_work_mem=256MB +echo "If the database fails to start, lower the parameters max_process_memory and shared_buffers" +elif [[ $memory -gt 4*1024*1024 ]] && [[ $memory -lt 8*1024*1024 ]] +then +max_process_memory=5GB +shared_buffers=1GB +max_connections=1000 +work_mem=16MB +maintenance_work_mem=1GB +else +max_process_memory=$((memory*6/10/1024/1024)) +shared_buffers=$((memory*3/10/1024/1024)) +max_connections=3000 +work_mem=64MB +maintenance_work_mem=2GB +fi + +##内存相关参数 +gs_guc set -I all -N all -c "max_process_memory=${max_process_memory}" +gs_guc set -I all -N all -c "shared_buffers=${shared_buffers}" +gs_guc set -I all -N all -c "work_mem=${work_mem}" +gs_guc set -I all -N all -c "maintenance_work_mem=${maintenance_work_mem}" +gs_guc set -I all -N all -c "cstore_buffers=16MB" +gs_guc set -I all -N all -c "wal_buffers=1GB" +gs_guc set -I all -N all -c "local_syscache_threshold=32MB" +gs_guc set -I all -N all -c "standby_shared_buffers_fraction=1" + +##连接访问相关参数 +gs_guc set -I all -N all -c "max_connections=${max_connections}" +gs_guc set -I all -N all -c "max_prepared_transactions=${max_connections}" +gs_guc set -I all -N all -c "listen_addresses = '*'" +gs_guc set -I all -N all -c "remote_read_mode=non_authentication" +gs_guc set -I all -N all -c "password_encryption_type=1" +gs_guc set -I all -N all -c "password_reuse_time=0" +gs_guc set -I all -N all -c "password_lock_time=0" +gs_guc set -I all -N all -c "password_effect_time=0" +gs_guc set -I all -N all -c "session_timeout=0" + +##wal相关参数 +gs_guc set -I all -N all -c "wal_level=logical" +gs_guc set -I all -N all -c "full_page_writes=off" +gs_guc set -I all -N all -c "wal_log_hints=off" +gs_guc set -I all -N all -c "xloginsert_locks=48" +gs_guc set -I all -N all -c "advance_xlog_file_num=10" + +##复制相关参数 +gs_guc set -I all -N all -c "synchronous_commit=on" +gs_guc set -I all -N all -c "wal_keep_segments=1024" +gs_guc set -I all -N all -c "max_wal_senders=16" +gs_guc set -I all -N all -c "recovery_max_workers=4" +gs_guc set -I all -N all -c "most_available_sync=on" +gs_guc set -I all -N all -c "max_size_for_xlog_prune=104857600" +gs_guc set -I all -N all -c "catchup2normal_wait_time=0" +gs_guc set -I all -N all -c "enable_slot_log=on" +gs_guc set -I all -N all -c "max_replication_slots=32" +gs_guc set -I all -N all -c "wal_receiver_timeout=60s" +gs_guc set -I all -N all -c "sync_config_strategy=none_node" + +##日志相关参数 +gs_guc set -I all -N all -c "logging_collector=on" +gs_guc set -I all -N all -c "log_duration=on" +gs_guc set -I all -N all -c "log_line_prefix='%m %u %d %r %p %S'" +gs_guc set -I all -N all -c "log_checkpoints=on" +gs_guc set -I all -N all -c "plog_merge_age=0" + +gs_guc set -I all -N all -c "archive_dest='/ogarchive'" + + + +##性能统计相关参数 +gs_guc set -I all -N all -c "vacuum_cost_limit=1000" +gs_guc set -I all -N all -c "autovacuum_max_workers=10" +gs_guc set -I all -N all -c "autovacuum_naptime=20s" +gs_guc set -I all -N all -c "autovacuum_vacuum_cost_delay=10" +gs_guc set -I all -N all -c "autovacuum_vacuum_scale_factor=0.05" +gs_guc set -I all -N all -c "autovacuum_analyze_scale_factor=0.02" +gs_guc set -I all -N all -c "autovacuum_vacuum_threshold=200" +gs_guc set -I all -N all -c "autovacuum_analyze_threshold=200" +gs_guc set -I all -N all -c "autovacuum_io_limits=104857600" +gs_guc set -I all -N all -c "instr_unique_sql_count=20000" +gs_guc set -I all -N all -c "enable_save_datachanged_timestamp=off" +gs_guc set -I all -N all -c "track_sql_count=off" +gs_guc set -I all -N all -c "enable_instr_rt_percentile=off" +gs_guc set -I all -N all -c "enable_instance_metric_persistent=off" +gs_guc set -I all -N all -c "enable_logical_io_statistics=off" +gs_guc set -I all -N all -c "enable_user_metric_persistent=off" +gs_guc set -I all -N all -c "enable_mergejoin=on" +gs_guc set -I all -N all -c "enable_nestloop=on" +gs_guc set -I all -N all -c "enable_pbe_optimization=off" +gs_guc set -I all -N all -c "enable_resource_track=on" +gs_guc set -I all -N all -c "enable_wdr_snapshot=on" +gs_guc set -I all -N all -c "instr_unique_sql_count=5000" + +##客户端白名单 +gs_guc set -I all -N all -h "host all all 0.0.0.0/0 md5" + +##其他参数 +gs_guc set -I all -N all -c "checkpoint_segments=1024" +gs_guc set -I all -N all -c "checkpoint_completion_target=0.8" +gs_guc set -I all -N all -c "pagewriter_sleep=200" + +gs_guc set -I all -N all -c "enable_alarm=off" +gs_guc set -I all -N all -c "enable_codegen=off" +gs_guc set -I all -N all -c "audit_enabled=on" +gs_guc set -I all -N all -c "enable_asp=off" + +gs_guc set -I all -N all -c "lc_messages='en_US.UTF-8'" +gs_guc set -I all -N all -c "lc_monetary='en_US.UTF-8'" +gs_guc set -I all -N all -c "lc_numeric='en_US.UTF-8'" +gs_guc set -I all -N all -c "lc_time='en_US.UTF-8'" + +gs_guc set -I all -N all -c "update_lockwait_timeout=1min" +gs_guc set -I all -N all -c "lockwait_timeout=1min" + +gs_guc set -I all -N all -c "max_files_per_process=100000" +gs_guc set -I all -N all -c "behavior_compat_options='display_leading_zero'" +gs_guc set -I all -N all -c "enable_thread_pool=off" +``` + +## 重启生效 + +``` +gs_om -t stop && gs_om -t start +``` + diff --git "a/content/zh/post/2022/opengauss\346\225\260\346\215\256\345\272\223-\345\234\260\347\220\206\345\235\220\346\240\207gis\345\256\211\350\243\205.md" "b/content/zh/post/2022/opengauss\346\225\260\346\215\256\345\272\223-\345\234\260\347\220\206\345\235\220\346\240\207gis\345\256\211\350\243\205.md" new file mode 100644 index 0000000000000000000000000000000000000000..e1fba9d7b865aa9c3c4d930d8071273b4c31065c --- /dev/null +++ "b/content/zh/post/2022/opengauss\346\225\260\346\215\256\345\272\223-\345\234\260\347\220\206\345\235\220\346\240\207gis\345\256\211\350\243\205.md" @@ -0,0 +1,88 @@ ++++ + +title = "pengauss数据库-地理坐标gis安装" + +date = "2021-12-31" + +tags = [ "pengauss数据库-地理坐标gis安装"] + +archives = "2021-12" + +author = "周琦放 " + +summary = "pengauss数据库-地理坐标gis安装" + +img = "/zh/post/2022/title/img19.png" + +times = "12:30" + ++++ + +# opengauss数据库-地理坐标gis安装 + + 如无特殊说明,请在所有节点执行,执行用户为root + +## 组件下载 + +下载gis组件:Yukon-1.0-Alpha-openGauss2.1.0-CentOS\_x64.tar.gz,请注意下载的组件版本一定要和数据库版本严格一致 + +下载地址https://gitee.com/opengauss/Yukon/releases/v1.0-alpha + +## 环境配置要求 + +本次安装使用的是root 用户进行安装,而opengauss数据库的系统用户为omm,需要把omm用户的环境变量配置到root下的/etc/profile中, + +如下:加粗部分为omm的环境变量,请根据实际情况修改,omm用户的环境变量一般在/home/omm/.bashrc + +``` +more /etc/profile +...... +export ORACLE_HOME=/root/ora2pg/instantclient_11_2 +export PATH=$PATH:$ORACLE_HOME +export LD_LIBRARY_PATH=$ORACLE_HOME:$LD_LIBRARY_PATH +export TNS_ADMIN=$ORACLE_HOME +export PYTHON_HOME=/usr/local/python3 +export PATH=$PATH:$PYTHON_HOME/bin +export PATH=/root/gauss_om/omm/script:$PATH +export GPHOME=/opt/huawei/install/om +export PATH=$GPHOME/script/gspylib/pssh/bin:$GPHOME/script:$PATH +export LD_LIBRARY_PATH=$GPHOME/lib:$LD_LIBRARY_PATH +export PYTHONPATH=$GPHOME/lib +export GAUSSHOME=/opt/huawei/install/app +export PATH=$GAUSSHOME/bin:$PATH +export LD_LIBRARY_PATH=$GAUSSHOME/lib:$LD_LIBRARY_PATH +export S3_CLIENT_CRT_FILE=$GAUSSHOME/lib/client.crt +export GAUSS_VERSION=2.1.0 +export PGHOST=/opt/huawei/tmp +export GAUSSLOG=/var/log/omm/omm +umask 077 +export GAUSS_ENV=2 +export GS_CLUSTER_NAME=Cluster01 +``` + +## 安装Yukon gis组件 + +``` +tar -zxvf Yukon-1.0-Alpha-openGauss2.1.0-CentOS_x64.tar.gz +cd Yukon-1.0-Alpha-openGauss2.1.0/Yukon-1.0/ +sh install_yukon.sh -i +``` + +## 验证测试 + +``` +[omm@ogpri ~]$ gsql -d postgres -p 26000 +gsql ((openGauss 2.1.0 build 590b0f8e) compiled at 2021-09-30 14:29:04 commit 0 last mr ) +Non-SSL connection (SSL connection is recommended when requiring high-security) +Type "help" for help. + +openGauss=# create database test; +CREATE DATABASE +openGauss=# \c test; +Non-SSL connection (SSL connection is recommended when requiring high-security) +You are now connected to database "test" as user "omm". +test=# create extension postgis; +CREATE EXTENSION +test=# +``` + diff --git "a/content/zh/post/2022/opengauss\346\225\260\346\215\256\345\272\223-\351\253\230\345\217\257\347\224\250jdbc\344\275\277\347\224\250\346\226\271\346\263\225.md" "b/content/zh/post/2022/opengauss\346\225\260\346\215\256\345\272\223-\351\253\230\345\217\257\347\224\250jdbc\344\275\277\347\224\250\346\226\271\346\263\225.md" new file mode 100644 index 0000000000000000000000000000000000000000..dca4506e79a040bb92a5436f00a20cfa464a2f10 --- /dev/null +++ "b/content/zh/post/2022/opengauss\346\225\260\346\215\256\345\272\223-\351\253\230\345\217\257\347\224\250jdbc\344\275\277\347\224\250\346\226\271\346\263\225.md" @@ -0,0 +1,201 @@ ++++ + +title = "opengauss数据库-高可用jdbc使用方法" + +date = "2021-12-31" + +tags = [ "opengauss数据库-高可用jdbc使用方法"] + +archives = "2021-12" + +author = "周琦放" + +summary = "opengauss数据库-高可用jdbc使用方法" + +img = "/zh/post/2022/title/img20.png" + +times = "12:30" + ++++ + +# opengauss数据库-高可用jdbc使用方法 + +## 驱动下载 + +下载jdbc驱动 openGauss-2.1.0-JDBC.tar.gz + +下载地址:https://opengauss.org/zh/download.html + +## 表demo 案例 + +``` +create database test; +create schema demo; +CREATE TABLE demo.websites ( + id int NOT NULL, + name char(20) NOT NULL DEFAULT '', + url varchar(255) NOT NULL DEFAULT '', + PRIMARY KEY (id) +); +COMMENT ON COLUMN demo.websites.name IS '站点名称'; +INSERT INTO demo.websites VALUES + +('1', 'openGauss', 'https://opengauss.org/zh/'), +('2', '华为云', 'https://www.huaweicloud.com/'), +('3', 'openEuler', 'https://openeuler.org/zh/'), +('4', '华为support中心', 'https://support.huaweicloud.com/'); +``` + +## vip 使用方法 + +请注意192.168.56.229 为mogha组件中的vip 地址 + +``` +package com.company; +import java.sql.*; + +public class openGaussSelect { + static final String JDBC_DRIVER = "org.postgresql.Driver"; + static final String DB_URL = "jdbc:postgresql://192.168.56.229:26000/test"; + // 数据库的用户名与密码,需要根据自己的设置 + static final String USER = "test"; + static final String PASS = "zhou0815FANG"; + public static void main(String[] args) { + Connection conn = null; + Statement stmt = null; + try{ + // 注册 JDBC 驱动 + Class.forName(JDBC_DRIVER); + // 打开链接 + System.out.println("连接数据库..."); + conn = DriverManager.getConnection(DB_URL,USER,PASS); + + // 执行查询 + System.out.println(" 实例化Statement对象..."); + stmt = conn.createStatement(); + String sql; + sql = "SELECT id, name, url FROM demo.websites"; + ResultSet rs = stmt.executeQuery(sql); + // 展开结果集数据库 + while(rs.next()){ + // 通过字段检索 + int id = rs.getInt("id"); + String name = rs.getString("name"); + String url = rs.getString("url"); + // 输出数据 + System.out.print("ID: " + id); + System.out.print(", 站点名称: " + name); + System.out.print(", 站点 URL: " + url); + System.out.print("\n"); + } + // 完成后关闭 + rs.close(); + stmt.close(); + conn.close(); + }catch(SQLException se){ + + // 处理 JDBC 错误 + se.printStackTrace(); + }catch(Exception e){ + // 处理 Class.forName 错误 + e.printStackTrace(); + }finally{ + // 关闭资源 + try{ + if(stmt!=null) stmt.close(); + }catch(SQLException se2){ + }// 什么都不做 + try{ + if(conn!=null) conn.close(); + }catch(SQLException se){ + se.printStackTrace(); + } + } + System.out.println("Goodbye!"); + } +} +``` + +## 测试验证 + +![](figures/测试验证.png) + +## JDBC轮询 + +使用jdbc中targetServerType=master属性 + +``` +package com.company; + +import java.sql.*; +public class openGaussSelect { + static final String JDBC_DRIVER = "org.postgresql.Driver"; + static final String DB_URL = "jdbc:postgresql://192.168.56.227:26000,192.168.56.228:26000/test?targetServerType=master"; + // 数据库的用户名与密码,需要根据自己的设置 + static final String USER = "test"; + static final String PASS = "zhou0815FANG"; + public static void main(String[] args) { + Connection conn = null; + Statement stmt = null; + try{ + // 注册 JDBC 驱动 + Class.forName(JDBC_DRIVER); + + // 打开链接 + System.out.println("连接数据库..."); + conn = DriverManager.getConnection(DB_URL,USER,PASS); + + // 执行查询 + System.out.println(" 实例化Statement对象..."); + stmt = conn.createStatement(); + String sql; + sql = "SELECT id, name, url FROM demo.websites"; + ResultSet rs = stmt.executeQuery(sql); + + // 展开结果集数据库 + while(rs.next()){ + // 通过字段检索 + int id = rs.getInt("id"); + String name = rs.getString("name"); + String url = rs.getString("url") + + // 输出数据 + System.out.print("ID: " + id); + System.out.print(", 站点名称: " + name); + + System.out.print(", 站点 URL: " + url); + System.out.print("\n"); + } + // 完成后关闭 + rs.close(); + stmt.close(); + conn.close(); + }catch(SQLException se){ + // 处理 JDBC 错误 + se.printStackTrace(); + }catch(Exception e){ + // 处理 Class.forName 错误 + e.printStackTrace(); + }finally{ + // 关闭资源 + try{ + if(stmt!=null) stmt.close(); + }catch(SQLException se2){ + }// 什么都不做 + try{ + if(conn!=null) conn.close(); + }catch(SQLException se){ + se.printStackTrace(); + } + } + System.out.println("Goodbye!"); + } +} +``` + +## 测试验证 + +![](figures/测试验证1.png) + +![](figures/2测试验证.png) + diff --git "a/content/zh/post/2022/opengauss\346\225\260\346\215\256\345\272\223-\351\253\230\345\217\257\347\224\250\351\203\250\347\275\262mogha.md" "b/content/zh/post/2022/opengauss\346\225\260\346\215\256\345\272\223-\351\253\230\345\217\257\347\224\250\351\203\250\347\275\262mogha.md" new file mode 100644 index 0000000000000000000000000000000000000000..4013994f685722734f1a0a709317c849291d6722 --- /dev/null +++ "b/content/zh/post/2022/opengauss\346\225\260\346\215\256\345\272\223-\351\253\230\345\217\257\347\224\250\351\203\250\347\275\262mogha.md" @@ -0,0 +1,234 @@ ++++ + +title = "opengauss数据库-高可用部署mogha" + +date = "2021-12-31" + +tags = [ "opengauss数据库-高可用部署mogha"] + +archives = "2021-12" + +author = "周琦放" + +summary = "opengauss数据库-高可用部署mogha" + +img = "/zh/post/2022/title/img21.png" + +times = "12:30" + ++++ + +# opengauss数据库-高可用部署mogha + +## 高可用 + +- 组件下载 + + 如无特殊说明,请在所有节点执行,执行用户为root + + 下载高可用组件 mogha-2.3.0-CentOS-x86\_64.tar.gz + + 下载地址:https://docs.mogdb.io/zh/mogha/v2.3/release-notes/ + + +## 环境配置要求 + +由于 MogHA 需要自动挂虚拟IP的操作,内部需要通过 ifconfig 指令来操作网卡,MogHA 是通过数据库安装用户进行启动的,要想执行网卡操作就需要 sudo 权限,在安装期间脚本会检查 /etc/sudoers 配置中是否配置了运行用户的权限,如果存在就跳过配置,如果没有,会尝试自动的将 omm 用户添加到 /etc/sudoers 中,并赋予 ifconfig 的权限。 + +所以建议在部署 MogHA 服务之前,先检查一下 /etc/sudoers 中是否成功配置了 运行用户的 sudo 权限,配置参考如下: + +``` +# 追加下列 1 行到文件末尾 +omm ALL=(ALL) NOPASSWD: /usr/sbin/ifconfig +``` + +数据库配置要求 + +数据库要求至少为一主一备,数据库模式为同步。postgresql.conf中参数要求如下: + +``` +synchronous_commit = on +``` + +## 安装mogha + +建议存放在用户的家目录下,如omm用户,建议存放在/home/omm目录下 + +``` +su - root +tar -zxvf mogha-2.3.0-CentOS-x86_64.tar.gz +cd mogha/ +./install.sh omm /opt/huawei/install/data/dn +``` + +## 配置文件 + +安装完成后,会生成一个node.conf文件,修改此配置文件 + +请注意需要修改的内容 + +``` +# docs: https://docs.mogdb.io/zh/mogha/v2.3/overview +[config] + +# 数据库端口 + +db_port=26000 + +# 数据库的操作系统用户,通常为omm +db_user=omm +# 数据库的数据目录 +db_datadir=/opt/huawei/install/data/dn +# 本地主库元数据存储路径 +# primary_info=/root/mogha/primary_info +# 本地备库元数据存储路径 +# standby_info=/root/mogha/standby_info +# 是否使用 lite 模式,可选值:True / False +lite_mode=True +# HA节点之间通信端口,如果有防火墙,需要配置互通 +agent_port=8081 +# [2.3.0新增] +# HA节点间HTTP API 请求超时时间(秒) +# http_req_timeout=3 +# 心跳间隔时间 +# heartbeat_interval=3 +# 主库丢失的探测时间 +# primary_lost_timeout=10 +# 主库的孤单时间 +# primary_lonely_timeout=10 +# 双主确认超时时间 +# double_primary_timeout=10 +# 本地元数据文件类型,支持 json/bin +# meta_file_type=json +# 是否为数据库实例进程限制cpu +# taskset=False +# 设置输出的日志格式 +# logger_format=%(asctime)s %(levelname)s [%(filename)s:%(lineno)d]: %(message)s +# [2.3.0新增]设置日志存储目录 +# log_dir=/root/mogha +# [2.3.0新增] 日志文件最大字节数(接近该值时,将发生日志滚动) +# 支持的单位:KB, MB, GB (忽略大小写) +# log_max_size=512MB +# [2.3.0新增] 日志保留的文件个数 +# log_backup_count=10 +# 设置除了主备相关的机器,允许可以访问到web接口的IP列表, 多个IP时逗号分隔 +# allow_ips= +# [2.1新增] 主实例进程未启动时,是否需要 HA 进行拉起或切换 +# 搭配 primary_down_handle_method 使用 +# handle_down_primary=True +# [2.1新增] 备库进程未启动时,是否需要 HA 进行拉起 +# handle_down_standby=True +# [2.1新增] 主库实例进程未启动时,如何处理 +# 支持两种处理方式: +# - restart: 尝试重启,尝试次数在 restart_strategy 参数中设定 +# - failover: 直接切换 +# primary_down_handle_method=restart +# [2.1新增] 重启实例最大尝试条件: times/minutes +# 例如: 10/3 最多尝试10次或者3分钟,任何一个条件先满足就不再尝试。 +# restart_strategy=10/3 +# [2.1.1新增] UCE(uncorrected error)故障感知功能,默认开启 +# uce_error_detection=True +# [2.1.1新增] UCE检查时,读取最后多少行日志数据进行判断 +# uce_detect_max_lines=200 +# [2.2.1新增] +# debug_mode=False +# (选填) 元数据库的连接参数(openGauss类数据库) +# [meta] +# ha_name= # HA集群的名称,全局唯一,禁止两套HA集群共用一个名字 +# host= # 机器IP +# port= # 端口 +# db= # 数据库名 +# user= # 用户名 +# password= # 密码 +# connect_timeout=3 # 连接超时,单位秒 +# host1-9,每个代表一个机器(最多支持1主8备) +# (lite模式需仅配置 host1 和 host2 即可,) +# - ip: 业务IP +# - heartbeat_ips: 心跳网络ip,允许配置多个心跳网络,以逗号隔开 +[host1] +ip=192.168.56.227 +heartbeat_ips= +[host2] +ip=192.168.56.228 +heartbeat_ips= +# [host3] +# ip= +# heartbeat_ips= +# [host4] +# ip= +# heartbeat_ips= +# [host5] +# ip= +# heartbeat_ips= +# [host6] +# ip= +# heartbeat_ips= +# [host7] +# ip= +# heartbeat_ips= +# [host8] +# ip= +# heartbeat_ips= +# [host9] +# ip= +# heartbeat_ips= + +# zone1~3 用于定义机房,不同机房配置独立虚拟IP, +# 切换不会切过去,作为异地保留项目 +# - vip: 机房虚拟IP (没有不填) +# - hosts: 本机房内机器列表, 填写机器在配置文件中对应的配置模块名 host1~9,示例:host1,host2 + +# - ping_list: 用于检查网络是否通畅的仲裁节点,例如网关,支持填写多个IP (逗号分隔) + +# - cascades: 机房内的级联机器列表 (配置方式同 hosts, 没有不填) +# - arping: (选填) 机房的 arping 地址 + +[zone1] +## 该VIP为虚拟ip地址,请根据实际情况填写 +vip=192.168.56.229 + +hosts=host1,host2 +ping_list=192.168.56.1 +cascades= +arping= +# [zone2] +# vip= +# hosts= +# ping_list= +# cascades= +# arping= +# [zone3] +# vip= +# hosts= +# ping_list= +# cascades= +# arping= +``` + +## 启动 + +``` +systemctl start mogha.service +``` + +## 查看日志信息 + +- 主库心跳日志信息 + + ``` + 2021-12-29 13:20:49,211 INFO [__init__.py:59]: ping result: {'192.168.56.1': True, '192.168.56.228': True} + 2021-12-29 13:20:49,323 INFO [__init__.py:84]: local instance is alive Primary, state: Normal + 2021-12-29 13:20:54,593 INFO [__init__.py:59]: ping result: {'192.168.56.1': True, '192.168.56.228': True} + 2021-12-29 13:20:54,719 INFO [__init__.py:84]: local instance is alive Primary, state: Normal + ``` + +- 备库心跳日志信息 + + ``` + 2021-12-29 13:32:07,774 INFO [__init__.py:59]: ping result: {'192.168.56.1': True, '192.168.56.227': True} + 2021-12-29 13:32:07,890 INFO [__init__.py:84]: local instance is alive Standby, state: Normal + 2021-12-29 13:32:13,109 INFO [__init__.py:59]: ping result: {'192.168.56.1': True, '192.168.56.227': True} + 2021-12-29 13:32:13,219 INFO [__init__.py:84]: local instance is alive Standby, state: Normal + ``` + + diff --git a/content/zh/post/2022/public_sys-resources/icon-caution.gif b/content/zh/post/2022/public_sys-resources/icon-caution.gif new file mode 100644 index 0000000000000000000000000000000000000000..6e90d7cfc2193e39e10bb58c38d01a23f045d571 Binary files /dev/null and b/content/zh/post/2022/public_sys-resources/icon-caution.gif differ diff --git a/content/zh/post/2022/public_sys-resources/icon-danger.gif b/content/zh/post/2022/public_sys-resources/icon-danger.gif new file mode 100644 index 0000000000000000000000000000000000000000..6e90d7cfc2193e39e10bb58c38d01a23f045d571 Binary files /dev/null and b/content/zh/post/2022/public_sys-resources/icon-danger.gif differ diff --git a/content/zh/post/2022/public_sys-resources/icon-note.gif b/content/zh/post/2022/public_sys-resources/icon-note.gif new file mode 100644 index 0000000000000000000000000000000000000000..6314297e45c1de184204098efd4814d6dc8b1cda Binary files /dev/null and b/content/zh/post/2022/public_sys-resources/icon-note.gif differ diff --git a/content/zh/post/2022/public_sys-resources/icon-notice.gif b/content/zh/post/2022/public_sys-resources/icon-notice.gif new file mode 100644 index 0000000000000000000000000000000000000000..86024f61b691400bea99e5b1f506d9d9aef36e27 Binary files /dev/null and b/content/zh/post/2022/public_sys-resources/icon-notice.gif differ diff --git a/content/zh/post/2022/public_sys-resources/icon-tip.gif b/content/zh/post/2022/public_sys-resources/icon-tip.gif new file mode 100644 index 0000000000000000000000000000000000000000..93aa72053b510e456b149f36a0972703ea9999b7 Binary files /dev/null and b/content/zh/post/2022/public_sys-resources/icon-tip.gif differ diff --git a/content/zh/post/2022/public_sys-resources/icon-warning.gif b/content/zh/post/2022/public_sys-resources/icon-warning.gif new file mode 100644 index 0000000000000000000000000000000000000000..6e90d7cfc2193e39e10bb58c38d01a23f045d571 Binary files /dev/null and b/content/zh/post/2022/public_sys-resources/icon-warning.gif differ diff --git a/content/zh/post/2022/title/img1.png b/content/zh/post/2022/title/img1.png new file mode 100644 index 0000000000000000000000000000000000000000..2af578504062e5fa7a7aaf7e1c2014531e51e9c2 Binary files /dev/null and b/content/zh/post/2022/title/img1.png differ diff --git a/content/zh/post/2022/title/img10.png b/content/zh/post/2022/title/img10.png new file mode 100644 index 0000000000000000000000000000000000000000..ce35c3cd313c8e4ed939ae18b91b9a64767ab504 Binary files /dev/null and b/content/zh/post/2022/title/img10.png differ diff --git a/content/zh/post/2022/title/img11.png b/content/zh/post/2022/title/img11.png new file mode 100644 index 0000000000000000000000000000000000000000..7ebe22cb03c6ee1e735b29bce766c1e10d334f0c Binary files /dev/null and b/content/zh/post/2022/title/img11.png differ diff --git a/content/zh/post/2022/title/img12.png b/content/zh/post/2022/title/img12.png new file mode 100644 index 0000000000000000000000000000000000000000..0ec8535146c6a1d5e0b78ee6c1a6b3a8ede1cdf3 Binary files /dev/null and b/content/zh/post/2022/title/img12.png differ diff --git a/content/zh/post/2022/title/img13.png b/content/zh/post/2022/title/img13.png new file mode 100644 index 0000000000000000000000000000000000000000..86a420b92fb8289658d807d49f137b6d13862f6d Binary files /dev/null and b/content/zh/post/2022/title/img13.png differ diff --git a/content/zh/post/2022/title/img14.png b/content/zh/post/2022/title/img14.png new file mode 100644 index 0000000000000000000000000000000000000000..1da9e55bd25cbc7cfc6fdef1800b4c95b077829b Binary files /dev/null and b/content/zh/post/2022/title/img14.png differ diff --git a/content/zh/post/2022/title/img15.jpg b/content/zh/post/2022/title/img15.jpg new file mode 100644 index 0000000000000000000000000000000000000000..7ebe22cb03c6ee1e735b29bce766c1e10d334f0c Binary files /dev/null and b/content/zh/post/2022/title/img15.jpg differ diff --git a/content/zh/post/2022/title/img16.png b/content/zh/post/2022/title/img16.png new file mode 100644 index 0000000000000000000000000000000000000000..2af578504062e5fa7a7aaf7e1c2014531e51e9c2 Binary files /dev/null and b/content/zh/post/2022/title/img16.png differ diff --git a/content/zh/post/2022/title/img17.png b/content/zh/post/2022/title/img17.png new file mode 100644 index 0000000000000000000000000000000000000000..b903c7f8d5a3ba8b66b2d6be883a4bac7230915e Binary files /dev/null and b/content/zh/post/2022/title/img17.png differ diff --git a/content/zh/post/2022/title/img18.png b/content/zh/post/2022/title/img18.png new file mode 100644 index 0000000000000000000000000000000000000000..1697caef6995dd16977bb9aa96af762e19fb7102 Binary files /dev/null and b/content/zh/post/2022/title/img18.png differ diff --git a/content/zh/post/2022/title/img19.png b/content/zh/post/2022/title/img19.png new file mode 100644 index 0000000000000000000000000000000000000000..5537c95b900978a3020269be7ec52ce914224844 Binary files /dev/null and b/content/zh/post/2022/title/img19.png differ diff --git a/content/zh/post/2022/title/img2.png b/content/zh/post/2022/title/img2.png new file mode 100644 index 0000000000000000000000000000000000000000..5537c95b900978a3020269be7ec52ce914224844 Binary files /dev/null and b/content/zh/post/2022/title/img2.png differ diff --git a/content/zh/post/2022/title/img20.png b/content/zh/post/2022/title/img20.png new file mode 100644 index 0000000000000000000000000000000000000000..ce35c3cd313c8e4ed939ae18b91b9a64767ab504 Binary files /dev/null and b/content/zh/post/2022/title/img20.png differ diff --git a/content/zh/post/2022/title/img21.png b/content/zh/post/2022/title/img21.png new file mode 100644 index 0000000000000000000000000000000000000000..b71bb7d740d0f375bbea6116ffde9175c0dbcacf Binary files /dev/null and b/content/zh/post/2022/title/img21.png differ diff --git a/content/zh/post/2022/title/img22.png b/content/zh/post/2022/title/img22.png new file mode 100644 index 0000000000000000000000000000000000000000..31e776c19ddc9b62b4b88171d015b1b94ff2b022 Binary files /dev/null and b/content/zh/post/2022/title/img22.png differ diff --git a/content/zh/post/2022/title/img3.png b/content/zh/post/2022/title/img3.png new file mode 100644 index 0000000000000000000000000000000000000000..b903c7f8d5a3ba8b66b2d6be883a4bac7230915e Binary files /dev/null and b/content/zh/post/2022/title/img3.png differ diff --git a/content/zh/post/2022/title/img4.png b/content/zh/post/2022/title/img4.png new file mode 100644 index 0000000000000000000000000000000000000000..6b7b474933a31c6a20d0d1708e8909163293b4ad Binary files /dev/null and b/content/zh/post/2022/title/img4.png differ diff --git a/content/zh/post/2022/title/img5.png b/content/zh/post/2022/title/img5.png new file mode 100644 index 0000000000000000000000000000000000000000..830c8bc490a1b830e759df1f04b453909a097406 Binary files /dev/null and b/content/zh/post/2022/title/img5.png differ diff --git a/content/zh/post/2022/title/img6.png b/content/zh/post/2022/title/img6.png new file mode 100644 index 0000000000000000000000000000000000000000..b71bb7d740d0f375bbea6116ffde9175c0dbcacf Binary files /dev/null and b/content/zh/post/2022/title/img6.png differ diff --git a/content/zh/post/2022/title/img7.png b/content/zh/post/2022/title/img7.png new file mode 100644 index 0000000000000000000000000000000000000000..830c8bc490a1b830e759df1f04b453909a097406 Binary files /dev/null and b/content/zh/post/2022/title/img7.png differ diff --git a/content/zh/post/2022/title/img8.png b/content/zh/post/2022/title/img8.png new file mode 100644 index 0000000000000000000000000000000000000000..31e776c19ddc9b62b4b88171d015b1b94ff2b022 Binary files /dev/null and b/content/zh/post/2022/title/img8.png differ diff --git a/content/zh/post/2022/title/img9.png b/content/zh/post/2022/title/img9.png new file mode 100644 index 0000000000000000000000000000000000000000..1da9e55bd25cbc7cfc6fdef1800b4c95b077829b Binary files /dev/null and b/content/zh/post/2022/title/img9.png differ diff --git "a/content/zh/post/2022/zabbix-\347\233\221\346\216\247-MogDB-openGauss-\344\271\213-\351\207\207\351\233\206prometheus\346\225\260\346\215\256.md" "b/content/zh/post/2022/zabbix-\347\233\221\346\216\247-MogDB-openGauss-\344\271\213-\351\207\207\351\233\206prometheus\346\225\260\346\215\256.md" new file mode 100644 index 0000000000000000000000000000000000000000..edecaa106677cea3bc43caff0bd2c1702f875234 --- /dev/null +++ "b/content/zh/post/2022/zabbix-\347\233\221\346\216\247-MogDB-openGauss-\344\271\213-\351\207\207\351\233\206prometheus\346\225\260\346\215\256.md" @@ -0,0 +1,140 @@ ++++ + +title = "zabbix 监控 MogDB/openGauss 之 采集prometheus数据" + +date = "2021-12-16" + +tags = [ "zabbix 监控 MogDB/openGauss 之 采集prometheus数据"] + +archives = "2021-12" + +author = "高云龙" + +summary = "zabbix 监控 MogDB/openGauss 之 采集prometheus数据" + +img = "/zh/post/2022/title/img3.png" + +times = "12:30" + ++++ + +# zabbix 监控 MogDB/openGauss 之 采集prometheus数据 + +## 前言 + +市场上比较的监控方式有两种:zabbix和prometheus架构,对于MogDB/openGauss数据库来说,已经通过[grafana + prometheus + opengauss\_exporter](https://www.modb.pro/db/173483)的方式完成了监控部署,如何通过zabbix完成对MogDB/openGauss数据库完成监控呢,通过zabbix官网我们知道从zabbix 4.2版本开始支持了Prometheus 数据源,那本篇文章先实现通过zabbix采集prometheus数据,zabbix底层的数据存储采用MogDB数据库。 + +## 软件信息 + +- OS: CentOS 7.9 on x86 +- database:MogDB 2.0.1 +- prometheus:2.31.1 +- opengauss\_exporter: 0.0.9 + +本环境已经安装好MogDB数据库、prometheus和opengauss\_exporter,这里主要介绍zabbix安装及与prometheus适配。 + +--安装依赖包 + +``` +yum -y install gcc gcc-c++ curl curl-devel net-snmp net-snmp-devel readline.x86_64 readline-devel.x86_64 zlib.x86_64 zlib-devel.x86_64 libevent.x86_64 libevent-devel.x86_64 postgresql-devel.x86_64 golang.x86_64 libmcrypt-devel mhash-devel libxslt-devel libjpeg libjpeg-devel libpng libpng-devel freetype freetype-devel libxml2 libxml2-devel zlib zlib-devel glibc glibc-devel glib2 glib2-devel bzip2 bzip2-devel ncurses ncurses-devel curl curl-devel e2fsprogs e2fsprogs-devel krb5 krb5-devel libidn libidn-devel openssl openssl-devel sqlite-devel.x86_64 sqlite.x86_64 oniguruma-devel oniguruma +``` + +## zabbix安装部署 + +[参考zabbix官网快速部署](https://www.zabbix.com/cn/download?zabbix=5.0&os_distribution=centos&os_version=7&db=postgresql&ws=nginx) + +- 安装准备 + + --安装zabbix源 + + ``` + # rpm -Uvh https://repo.zabbix.com/zabbix/5.0/rhel/7/x86_64/zabbix-release-5.0-1.el7.noarch.rpm + # yum clean all + + --安装zabbix server 和 agent + # yum install zabbix-server-pgsql zabbix-agent + + --配置Zabbix前端 + # yum install centos-release-scl + + --编辑zabbix.repo + vim /etc/yum.repos.d/zabbix.repo + [zabbix-frontend] + ... + enabled=1 + ... + Install Zabbix frontend packages. + + --安装pgsql和nginx + # yum install zabbix-web-pgsql-scl zabbix-nginx-conf-scl + + --为Zabbix前端配置PHP + vim /etc/opt/rh/rh-nginx116/nginx/conf.d/zabbix.conf + + listen 80; + server_name 172.16.3.90; + + *** + vim /etc/opt/rh/rh-php72/php-fpm.d/zabbix.conf + + listen.acl_users = apache,nginx + php_value[date.timezone] = Europe/Riga + ``` + + +- MogDB数据库配置 + + ``` + --创建数据库 + postgres=# create database zabbix DBCOMPATIBILITY='PG'; + + --创建用户 + postgres=# \c zabbix + abbix=# create user zabbix encrypted password 'zabbix@123';create user zabbix encrypted password 'zabbix@123'; + + --修改pg_hba.conf + host all zabbix 172.16.3.90/32 md5 + + --导入数据 + $ zcat /usr/share/doc/zabbix-server-pgsql*/create.sql.gz | gsql -h 172.16.3.90 -U zabbix zabbix -f + + ``` + + +- 启动Zabbix server和agent进 + + ``` + --启动Zabbix server和agent进程,并为它们设置开机自启: + + # systemctl restart zabbix-server zabbix-agent rh-nginx116-nginx rh-php72-php-fpm + # systemctl enable zabbix-server zabbix-agent rh-nginx116-nginx rh-php72-php-fpm + ``` + +- 展示Zabbix前端 + + 连接到新安装的Zabbix前端,直接浏览器输入:172.16.3.90 ,如有下图展示说明zabbix启动成功,配置完前段界面后,zabbix初始账号是:Admin,密码:zabbix + + ![](figures/20211203-7294cdd5-5b8a-41dd-9558-468c56d0e49d.png) + + ![](figures/20211203-8632d683-5aa7-4e1f-907c-3952796968f4.png) + + +## zabbix配置prometheus + +- 配置监控项 + + 在zabbix界面:Configuration --\> Hosts --\> Items --\> Create Item + + ![](figures/20211216-cd0ca2d6-dd3c-41d5-9643-775edc3e9035.png) + +- 添加监控项信息 + + ![](figures/20211216-05611555-f74d-47d5-8057-a86a6fd5e38f.png) + + ![](figures/20211216-2e9cd439-b92e-4fcd-8180-ef7096c80a16.png) + +- 查看监控项 + + ![](figures/20211216-b9c6b9ce-6a77-4ce0-a064-291015801db2.png) + + diff --git "a/content/zh/post/2022/\345\215\216\344\270\272openGauss\346\225\260\346\215\256\345\272\223\345\256\211\350\243\205\344\270\216\344\275\277\347\224\250.md" "b/content/zh/post/2022/\345\215\216\344\270\272openGauss\346\225\260\346\215\256\345\272\223\345\256\211\350\243\205\344\270\216\344\275\277\347\224\250.md" new file mode 100644 index 0000000000000000000000000000000000000000..ef94d00e41b1721b8055688718145dec1f89b9e2 --- /dev/null +++ "b/content/zh/post/2022/\345\215\216\344\270\272openGauss\346\225\260\346\215\256\345\272\223\345\256\211\350\243\205\344\270\216\344\275\277\347\224\250.md" @@ -0,0 +1,851 @@ ++++ + +title = "华为openGauss数据库安装与使用" + +date = "2021-12-29" + +tags = [ "华为openGauss数据库安装与使用"] + +archives = "2021-12" + +author = "Vector " + +summary = "华为openGauss数据库安装与使用" + +img = "/zh/post/2022/title/img6.png" + +times = "12:30" + ++++ + +# 华为openGauss数据库安装与使用 + +主要参考博客:[opengauss单机部署-墨天轮](https://www.modb.pro/doc/4705) + +[企业版安装 | openGauss](https://opengauss.org/zh/docs/2.0.0/docs/installation/%E4%BC%81%E4%B8%9A%E7%89%88%E5%AE%89%E8%A3%85.html) + +## 1. 虚拟机安装 + +先做安装准备,点击链接[下载](https://download3.vmware.com/software/wkst/file/VMware-workstation-full-16.1.2-17966106.exe)VMware Workstation Pro16,此处为Windows 10使用。 + +- 1.1 VMware安装 + + 打开下载好的exe文件,即开始安装: + + ![](figures/615c183c2ab3f51d914bfbb6.png) + + 安装位置默认在C盘,点击更改可以修改安装位置,我安装到了E:\\VMware\\下,安装路径尽量不要有中文,记得勾选PATH按钮,这样不用自己再添加环境变量,可勾选增强型键盘驱动程序,此功能可更好地处理国际键盘和带有额外按键的键盘: + + ![](figures/615c15c42ab3f51d91484e93.png) + + 一直点击下一步: + + ![](figures/615c11832ab3f51d914222f4.png) + + ![](figures/615c11832ab3f51d914222dd.png) + + ![](figures/615c11832ab3f51d914222e9.png) + + ![](figures/615c11832ab3f51d91422301.png) + + 点击输入许可证,密钥可以自己购买,或者百度搜索以下,多尝试几个,下面是我当时安装使用的密钥,不知道现在失效没有: + + ![](figures/615c183c2ab3f51d914bfbaf.png) + + 安装后可能要求重启系统,重启后进入软件。依次点击导航栏中的 帮助 -\> 关于 VMware Workstation ,查看许可证信息的状态,如下图所示即为激活成功。 + + ![](figures/615c15c42ab3f51d91484e9e.png) + +- 1.2 虚拟机部署centos + + 可以在官方网站下载centos7,只有centos7.6支持安装opengauss,如果找不到7.6版本的centos,也可安装稍高版本的centos,安装完之后需要在系统文件中做相关修改,我下载的是centos7.9,文件太大了,需要下一段时间,记得更改下载保存的位置,我放在了E:\\Linux\\下。我第一次安装时不知道必须安装centos7,安装成了centos8,而重新安装时部分截图忘记保存,所以下面部分截图出现的centos8,大家视为centos7就好。 + + ![](figures/615c15c42ab3f51d91484ead.png) + + 下载完成,打开VMware选择新建虚拟机: + + ![](figures/615c191d2ab3f51d914d3f1b.png) + + ![](figures/615c191d2ab3f51d914d3f25.png) + + 浏览文件,选择centos7的下载目录,选择镜像文件: + + ![](figures/615c191d2ab3f51d914d3f32.png) + + 设置虚拟机的名称和账户名,以及密码: + + ![](figures/615c191d2ab3f51d914d3f43.png) + + 选择虚拟机的安装位置: + + ![](figures/615c19272ab3f51d914d4e90.png) + + 设置磁盘的容量,默认为20GB,我修改为了40GB,点击下一步即可: + + ![](figures/615c19272ab3f51d914d4e97.png) + + 自定义硬件可以根据自己的需求,修改centos的设置: + + ![](figures/615c19272ab3f51d914d4e9d.png) + + 内存大小默认为1GB,我设置为了2GB: + + ![](figures/615c19272ab3f51d914d4ea8.png) + + 网络适配器选择NAT模式,设置完成之后点击确定: + + ![](figures/615c19272ab3f51d914d4eaf.png) + + ![](figures/615c19302ab3f51d914d5de4.png) + + 等待安装: + + ![](figures/615c19302ab3f51d914d5dd8.png) + + ![](figures/615c19302ab3f51d914d5df7.png) + + ![](figures/615c19302ab3f51d914d5e02.png) + + ![](figures/615c19302ab3f51d914d5e11.png) + + ![](figures/615c193f2ab3f51d914d72c2.png) + + 中间会出现这个页面让你设置,如果你没赶快进行操作,就跳过去了,设置不设置都没有关系,安装完成之后也可以设置: + + ![](figures/615c193f2ab3f51d914d72ba.png) + + 如下是,点击各个按钮进行时间、显示、输入法的设置: + + ![](figures/615c193f2ab3f51d914d72fc.png) + + ![](figures/615c19492ab3f51d914d811b.png) + + ![](figures/615c19492ab3f51d914d8137.png) + + ![](figures/615c19492ab3f51d914d8153.png) + + 设置完成之后继续安装,安装完毕,输入设置的密码之后,回车: + + ![](figures/615c193f2ab3f51d914d72e9.png) + + 安装成功! + + ![](figures/615c19492ab3f51d914d8161.png) + +- 1.3 centos配置 + - 1.3.1 设置系统版本 + + 因为opengauss要求的centos版本是7.6,因此我们需要修改/etc/redhat-release文件: + + ![](figures/615c15c42ab3f51d91484ed6.png) + + ``` + #进入管理员模式 + su + #打开文件,进行编辑 + vi /etc/redhat-release + ``` + + 修改成如下内容CentOS Linux release 7.6 \(Core\): + + ![](figures/615c15c42ab3f51d91484ec6.png) + + - 1.3.2 网络设置 + + 使用ifconfig或者ip addr可以查看自己的ip地址 + + ![](figures/615c16922ab3f51d914979b2.png) + + 我的网卡的名字为ens-33,接下来,给网卡增加DNS:echo 'DNS1=114.114.114.114'\>\>/etc.sysconfig/network-scripts/ifcfg-ens33 + + 重启网卡:systemctl restart network,测试是否可以访问:ping www.baidu.com + + ![](figures/615c16922ab3f51d914979bf.png) + + 如上图所示,则可以访问。 + + - 1.3.3 修改主机名 + + ``` + echo "vector" > /etc/hostname + echo "192.168.48.128 vector" >>/etc/hostd + ``` + + 最后系统重启后记得查看主机名是否修改成功: + + ``` + cat /etc/hostname + ``` + + - 1.3.4 配置YUM源 + + 删除系统自带的yum源 + + ``` + rm -rf /etc/yum.repos.d/* + ``` + + 下载阿里云yum源 + + ``` + wget -O /etc/yum.repos.d/CentOS-Base http://mirrors.aliyun.com/repo/Centos7.repo + ``` + + 生成仓库缓存 + + ``` + yum makecache + ``` + + 安装python3.6,一定要装3.6版本 + + ``` + sudo yum install epel-release + sudo yum install python36 + ``` + + - 1.3.5 关闭防火墙 + + ``` + sed -i 's/SELINUX=enforcing/SELINUX=disabled/g' /etc/selinux/config + ``` + + - 1.3.6 设置字符集 + + ``` + cat >>/etc/profile< + +- 2.1 安装前准备 + + 我下载的安装包是[企业版2.0.0版本](https://opengauss.obs.cn-south-1.myhuaweicloud.com/2.0.0/x86/openGauss-2.0.0-CentOS-64bit-all.tar.gz),刚开始装的是极简版,但是极简版缺少安装包,缺少工具,最后回归企业版。安装过程,参考了[官方文档](https://opengauss.org/zh/docs/2.0.0/docs/installation/%E4%BC%81%E4%B8%9A%E7%89%88%E5%AE%89%E8%A3%85.html)。 + + 将下载好的安装包解压,我放在了目录/opt/software/openGauss/: + + ``` + #先创建文件夹 + mkdir -p /opt/software/openGauss + #设置访问权限 + chmod 755 -R /opt/software + ``` + + - 不建议把安装包的存放目录规划到openGauss用户的根目录或其子目录下,可能导致权限问题。 + - openGauss用户须具有/opt/software/openGauss目录的读写权限。 + + 在安装包所在的目录下,解压安装包openGauss-2.0.0-CentOS-64bit-all.tar.gz。安装包解压后,会有om安装包和server安装包。继续解压om安装包,会在/opt/software/openGauss路径下自动生成script子目录,并且在script目录下生成gs\_preinstall等各种om工具脚本。 + + 建议跟我目录放的一样,不然容易出问题,解压命令如下: + + ``` + cd /opt/software/openGauss + tar -zxvf openGauss-2.0.0-CentOS-64bit-all.tar.gz + tar -zxvf openGauss-2.0.0-CentOS-64bit-om.tar.gz + ``` + + ![](figures/615c16932ab3f51d914979dd.png) + + 在该目录下获取XML文件script/gspylib/etc/conf/cluster\_config\_template.xml,重命名为cluster\_config.xml放在/opt/software/openGauss/下,并将以下模板修改为自己的信息放入xml文件,第37行的15400表示设置了数据库的端口号,以下模板只需要更改两点:ip地址和主机名: + + ``` + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ``` + + 根据我的ip地址192.168.48.128和我的主机名vector更改之后文件内容如下: + + ``` + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ``` + + 执行以下命令准备安装环境: + + ``` + cd /opt/software/openGauss/script + ./gs_preinstall -U omm -G dbgrp -L -X /opt/software/openGauss/cluster_config.xml + ``` + + ![](figures/615c14052ab3f51d9145c394.png) + + 如上表示预安装成功! + +- 2.2 执行安装 + - 2.2.1 检查 + + 检查安装包和openGauss配置文件在规划路径下是否已存在,如果没有,重新执行预安装,确保预安装成功,再执行以下步骤。 + + - 2.2.2 切换用户 + + 登录到openGauss的主机,并切换到omm用户: + + ``` + su omm + ``` + + omm指的是前置脚本gs\_preinstall中-U参数指定的用户。 + + 安装脚本gs\_install必须以前置脚本中指定的omm执行,否则,脚本执行会报错。 + + - 2.2.3 安装 + + 使用gs\_install安装openGauss。 + + ``` + gs_install -X /opt/software/openGauss/cluster_config.xml + ``` + + /opt/software/openGauss/cluster\_config.xml为openGauss配置文件的路径。在执行过程中,用户需根据提示输入数据库的密码,密码具有一定的复杂度,为保证用户正常使用该数据库,请记住输入的数据库密码。 + + 设置的密码要符合复杂度要求: + + - 最少包含8个字符。 + - 不能和用户名、当前密码(ALTER)、或当前密码反序相同。 + - 至少包含大写字母(A-Z),小写字母(a-z),数字,非字母数字字符(限定为\~!@\#$%^&\*\(\)-\_=+|\[\{\}\];:,<.\>/?)四类字符中的三类字符。 + + ![](figures/615c14052ab3f51d9145c3a9.png) + + 执行如下命令检查数据库状态是否正常: + + ``` + gs_om -t status + ``` + + ![](figures/615c16932ab3f51d914979e7.png) + + cluster\_state 显示“Normal”表示数据库可正常使用。 + + ![](figures/615c14a32ab3f51d9146b96f.png) + + 如首次安装数据库不成功,则卸载后重新安装,卸载方式如下: + + ``` + gs_uninstall ‐‐delete‐data + ``` + + - 2.2.4 初始化数据库 + + 使用SQL语句创建数据库database时,指定数据库的字符集为GBK。 + + ``` + #后面跟的是端口号,我的是15400 + gsql -d postgres -p 15400 + ``` + + ``` + CREATE DATABASE mydb WITH ENCODING 'GBK' template = template0; + ``` + + ![](figures/615c14a32ab3f51d9146b994.png) + + 显示如下信息: + + ``` + CREATE DATABASE + ``` + + 创建schema: + + ``` + CREATE SCHEMA tpcds; + ``` + + 创建表: + + ``` + CREATE TABLE tpcds.warehouse_t1 + ( + W_WAREHOUSE_SK INTEGER NOT NULL, + W_WAREHOUSE_ID CHAR(16) NOT NULL, + W_WAREHOUSE_NAME VARCHAR(20) , + W_WAREHOUSE_SQ_FT INTEGER , + W_STREET_NUMBER CHAR(10) , + W_STREET_NAME VARCHAR(60) , + W_STREET_TYPE CHAR(15) , + W_SUITE_NUMBER CHAR(10) , + W_CITY VARCHAR(60) , + W_COUNTY VARCHAR(30) , + W_STATE CHAR(2) , + W_ZIP CHAR(10) , + W_COUNTRY VARCHAR(20) , + W_GMT_OFFSET DECIMAL(5,2) + ); + ``` + + ![](figures/615ffa572ab3f51d91af9b67.jpg) + + 查看表信息: + + ![](figures/615ffb2b2ab3f51d91b0c00c.jpg) + + ``` + insert into tpcds.warehouse_t1(w_warehouse_sk,w_warehouse_id) values(12,'000001'); + insert into tpcds.warehouse_t1(w_warehouse_sk,w_warehouse_id) values(25,'000002'); + select w_warehouse_sk, w_warehouse_id from tpcds.warehouse_t1; + ``` + + 向数据库中添加数据之后查看: + + ![](figures/615ffbbb2ab3f51d91b187c6.jpg) + + 如果不知道自己的端口号,可根据以下方式查看: + + 查看自己的cluster\_config.xml文件,查看自己将端口号设置为了多少. + + 使用如下命令查看: + + ``` + gs_om -t status --detail + cd /opt/huawei/install/data/dn + ``` + + ![](figures/615c14a32ab3f51d9146b960.png) + + +- 2.3 JDBC连接数据库 + - 2.3.1 准备java环境 + + 查看centos的java环境,centos自带java1.8,需要安装配套的javac,注意要是1.8.0版。 + + ``` + yum install java-1.8.0-openjdk-devel.x86_64 + ``` + + 下载驱动包2.0.0版本[postgresql.jar](https://opengauss.obs.cn-south-1.myhuaweicloud.com/2.0.0/x86/openGauss-2.0.0-JDBC.tar.gz),放在路径/usr/lib/jvm/java-1.8.0-openjdk-1.8.0.302.b08-0.el7\_9.x86\_64/jre/lib/ext下: + + ``` + cp postgresql.jar /usr/lib/jvm/java-1.8.0-openjdk-1.8.0.302.b08-0.el7_9.x86_64/jre/lib/ext + ``` + + ![](figures/615c16f62ab3f51d914a1b92.png) + + 测试是否具备运行java代码的环境: + + ``` + java -version + javac -version + ``` + + ![](figures/615c16f62ab3f51d914a1ba8.png) + + 已具备运行环境! + + - 2.3.2 准备好连接的java代码 + + 记得替换成你设置的用户名、密码、端口号,如果你是按照我前面的操作,用户名应该是omm, + + ``` + import java.sql.Connection; + import java.sql.DriverManager; + import java.sql.PreparedStatement; + import java.sql.SQLException; + import java.sql.Statement; + import java.sql.CallableStatement; + + public class test{//keep + public static Connection getConnect(String username, String passwd) + { + //驱动类。 + String driver = "org.postgresql.Driver"; + //数据库连接描述符。将15400替换为自己的端口号 + String sourceURL = "jdbc:postgresql://127.0.0.1:15400/postgres"; + Connection conn = null; + + try + { + //加载驱动。 + Class.forName(driver); + } + catch( Exception e ) + { + e.printStackTrace(); + return null; + } + + try + { + //创建连接。 + conn = DriverManager.getConnection(sourceURL, username, passwd); + System.out.println("Connection succeed!"); + } + catch(Exception e) + { + e.printStackTrace(); + return null; + } + + return conn; + }; + + //try to connect + public static void main(String[] args) + { + // TODO Auto-generated method stub + Connection conn = getConnect("user", "password");//replace by my user and password + //BatchInsertData(conn); + try + { + conn.close(); + } + catch (SQLException e) + { + e.printStackTrace(); + } + } + } + ``` + + - 2.3.3 配置服务端远程连接 + + 以操作系统用户omm登录数据库。 + + 配置listen\_addresses,即远程客户端连接使用的数据库主节点ip或者主机名。 + + 使用如下命令查看数据库主节点目前的listen\_addresses配置。 + + ``` + gs_guc check -I all -c "listen_addresses" + ``` + + 使用如下命令把要查询出的ip追加到listen\_addresses后面,多个配置项之间用英文逗号分隔。例如,追加ip地址10.11.12.13。 + + ``` + gs_guc set -I all -c "listen_addresses='localhost,10.11.12.13'" + ``` + + 执行如下命令重启openGauss + + ``` + gs_om -t stop && gs_om -t start + ``` + + ![](figures/615c15482ab3f51d9147a2ba.png) + + - 2.3.4 连接 + + 首先需要启动数据库 + + ``` + su omm + gs_om -t start + ``` + + 运行java代码 + + ``` + javac test.java + java test + ``` + + ![](figures/615c15482ab3f51d9147a2b3.png) + + - 2.3.5 操纵数据 + + 使用如下java代码访问并对表中数据进行查询(记得替换用户、密码和端口): + + ``` + import java.sql.Connection; + import java.sql.DriverManager; + import java.sql.PreparedStatement; + import java.sql.SQLException; + import java.sql.Statement; + import java.sql.CallableStatement; + import java.sql.ResultSet; + import java.sql.SQLException; + + public class gausstest{//keep + public static Connection getConnect(String username, String passwd) + { + //驱动类。 + String driver = "org.postgresql.Driver"; + //数据库连接描述符。 + String sourceURL = "jdbc:postgresql://127.0.0.1:15400/postgres"; + Connection conn = null; + + try + { + //加载驱动。 + Class.forName(driver); + } + catch( Exception e ) + { + e.printStackTrace(); + return null; + } + + try + { + //创建连接。 + conn = DriverManager.getConnection(sourceURL, username, passwd); + System.out.println("Connection succeed!"); + } + catch(Exception e) + { + e.printStackTrace(); + return null; + } + + return conn; + }; + + //try to connect + public static void main(String[] args) throws SQLException + { + // TODO Auto-generated method stub + Connection conn = getConnect("user", "password");//replace by my user and password + //BatchInsertData(conn); + Statement st = conn.createStatement(); + String sql = "select w_warehouse_sk,w_warehouse_id from tpcds.warehouse_t1"; + ResultSet rs = st.executeQuery(sql); + while(rs.next()) { + int w_warehouse_sk = rs.getInt("w_warehouse_sk"); + String w_warehouse_id = rs.getString("w_warehouse_id"); + System.out.println("w_warehouse_sk = " + w_warehouse_sk + "; w_warehouse_id = " + w_warehouse_id); + } + try + { + conn.close(); + st.close(); + rs.close(); + } + catch (SQLException e) + { + e.printStackTrace(); + } + } + } + ``` + + ![](figures/615ffdad2ab3f51d91b42898.jpg) + + + +## 3. 遇到的问题 + +我感觉我把所有能遇到的问题都遇到了,最后成功是重装一遍,什么问题没遇到。 + +- 3.1 使用gs\_ctl提示找不到命令 + +如下图所示: + +![](figures/615c13152ab3f51d91446977.png) + +参看博客[Linux下解决命令未找到的问题 - ML。 - 博客园 \(cnblogs.com\)](https://www.cnblogs.com/mnote/p/8832806.html),对于本问题主要使用的命令是: + +``` +#进入管理员模式 +su +which gs_ctl +``` + +![](figures/615c16f62ab3f51d914a1b6d.png) + +接下来需要做的是把查找出的路径直接链接到/usr/bin下。操作如下: + +``` +ln -s xxx/xxx /usr/bin +``` + +以上xxx代表你查出来的路径。 + +![](figures/615c533b2ab3f51d91a72523.jpg) + +- 3.2 gs\_om命令找不到 + + 不得不说极简版安装包下没有gs\_om文件,我搜遍了也没有,在企业版中,我因为懒得重装把我同学下载的企业版中的gs\_之类的文件全拷过来了,但是后来遇到了其他问题,我又重装了,不知道我这个操作最终会带来什么影响。 + +- 3.3 sudo和su都用不了 + + sudo chmod -R 777 / 修改根目录权限问题修复,参考了[ 关于不小心777导致没法sudo权限后的修改解决办法\_空木格子的博客-CSDN博客](https://blog.csdn.net/qq_39543212/article/details/84107240) + + 我应该是因为sudo用不了提示sudo: must be setuid root,然后我进入根目录下修改了某个文件为777,直接导致su也用不了。这下好了,要用su让我先用sudo修改相关文件,要用sudo让我先用su修改文件! + + 解决这个问题需要先进入安全模式,进入方法为:在开机的过程中按shift或ESC键,好像在系统中按F1还是F2也可以。 + + 此时,已经进入到具有root权限的字符界面,输入以下命令解决了。 + + ``` + ls -l /usr/bin/sudo + chown root:root /usr/bin/sudo + chmod 4755 /usr/bin/sudo + ``` + +- 3.4 预安装失败 + + ![](figures/615c53892ab3f51d91a7b1e6.png) + + 本问题先参考了链接[openGaussDB 初体验(上) - 云+社区 - 腾讯云 \(tencent.com\)](https://cloud.tencent.com/developer/article/1675265)以下内容,但是没有解决。 + + ![](figures/615c14052ab3f51d9145c37e.png) + + 我解决这个问题的过程是这样的:找到虚拟网络编辑器,电脑连了自己的热点(我听我同学说她的用校园网就不行),然后还原默认设置: + + ![](figures/615c16f62ab3f51d914a1b7d.png) + + ![](figures/615c14a32ab3f51d9146b955.png) + + 然后配置了静态的ip地址,参考了[ CentOS 7 连接不到网络解决方法\(设置静态ip\)_gaokcl的博客-CSDN博客_centos7无法连接网络](https://blog.csdn.net/gaokcl/article/details/82834925?utm_medium=distribute.pc_relevant.none-task-blog-2~default~CTRLIST~default-2.no_search_link&depth_1-utm_source=distribute.pc_relevant.none-task-blog-2~default~CTRLIST~default-2.no_search_link)。但是神奇的是,这样就可以了。不过后来还是重装了。 + +- 3.5 重装openGauss时端口被占用 + + 报错:\[GAUSS-50601\] : The port \[15400\] is occupied or the ip address is incorrectl,有两种方法: + + 修改xml文件中的端口号 + + 杀掉占用端口的进程 + +- 3.6 右上角网络连接图标消失 + + 参考了[centos7右上角网络连接图标消失_shuest的博客-CSDN博客_centos7右上角没有网络图标](https://blog.csdn.net/zs391077005/article/details/106885104?utm_medium=distribute.pc_relevant.none-task-blog-2~default~CTRLIST~default-1.no_search_link&depth_1-utm_source=distribute.pc_relevant.none-task-blog-2~default~CTRLIST~default-1.no_search_link) + + ``` + chkconfig network off + chkconfig network on + service NetworkManager stop + service NetworkManager start + ``` + + 但是有可能遇到后两条命令用不了,然后又去查怎么办,最后也没解决,我重装了。累了累了。 + +- 3.7 循环显示登录界面无法进入 + + 看图吧,我最后又进安全模式解决的,最后修改/etc/selinux/config配置,将SELINUX选项由SELINUX=enforcing改成SELINUX=disabled,重启系统后发现就可以正常登陆系统了: + + ![](figures/615c14052ab3f51d9145c371.png) + + +- 3.8 Connection refused + + 首先需要启动数据库,不启动数据库会出现如下错误: + + 未设置服务端远程连接也会出现以上问题,见2.3.3 + + ![](figures/615c15482ab3f51d9147a2aa.png) + +- 3.9 加载驱动出现问题 + + 以下是开发流程: + + ![](figures/采用JDBC开发应用程序的流程.png) + + 驱动需要按照2.3.1所说,放在指定文件夹下,不然在加载驱动的时候会出现问题。 + + +- 3.10 unreported exception SQLException + + 在本地编译java服务的时候,编译报错:未报告的异常错误; 必须对其进行捕获或声明以便抛出。 + + ![](figures/615fff622ab3f51d91b644eb.jpg) + + 添加代码throw SQLException即可: + + ![](figures/615ffeef2ab3f51d91b5bb72.jpg) + + diff --git "a/content/zh/post/2022/\345\233\275\344\272\247\345\274\200\346\272\220\346\225\260\346\215\256\345\272\223openGauss\347\232\204\345\256\211\350\243\205\350\277\220\350\241\214.md" "b/content/zh/post/2022/\345\233\275\344\272\247\345\274\200\346\272\220\346\225\260\346\215\256\345\272\223openGauss\347\232\204\345\256\211\350\243\205\350\277\220\350\241\214.md" new file mode 100644 index 0000000000000000000000000000000000000000..0dfb1bd4d3d1f11a57815166552857383961676a --- /dev/null +++ "b/content/zh/post/2022/\345\233\275\344\272\247\345\274\200\346\272\220\346\225\260\346\215\256\345\272\223openGauss\347\232\204\345\256\211\350\243\205\350\277\220\350\241\214.md" @@ -0,0 +1,547 @@ ++++ + +title = "国产开源数据库openGauss的安装运行" + +date = "2021-12-27" + +tags = [ "国产开源数据库openGauss的安装运行"] + +archives = "2021-12" + +author = "adadaadadade" + +summary = "国产开源数据库openGauss的安装运行" + +img = "/zh/post/2022/title/img5.png" + +times = "12:30" + ++++ + +# 国产开源数据库openGauss的安装运行 + +## 步骤一:OpenGauss的安装 + +- 环境 + + OS:openEuler 20.03 64bit with ARM + + 架构:arm64 + + 部署:单机 + + +- 安装过程 + - 1、环境配置 + + 安装依赖包: + + ``` + yum install libaio-devel flex bison ncurses-devel glibc-devel patch readline-devel + ``` + + - 2、创建xml配置文件 + + 创建cluster\_config.xml配置文件并进行配置 + + ``` + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ``` + + 注意节点hostname应与/etc/hostname中保持一致 + + - 3、初始化安装环境 + + 1.以root用户登录待安装openGauss的任意主机,并按规划创建存放安装包的目录。 + + ``` + mkdir -p /opt/software/openGauss + chmod 755 -R /opt/software + ``` + + 2.下载安装包并将配置文件“cluster\_config.xml”都上传至上一步所创建的目录中。 + + ``` + wget https://opengauss.obs.cn-south-1.myhuaweicloud.com/2.0.1/arm/openGauss-2.0.1-openEuler-64bit-all.tar.gz + ``` + + 3.解压安装包。 + + ``` + tar -zxvf openGauss-2.0.1-openEuler-64bit-all.tar.gz + tar jxvf openGauss-2.0.1-openEuler-64bit.tar.bz2 + tar -zxvf openGauss-2.0.1-openEuler-64bit-om.tar.gz + tar -zxvf upgrade_sql.tar.gz + ``` + + 4.进入到工具脚本存放目录下。 + + ``` + cd /opt/software/openGauss/script + ``` + + 5.如果是openEuler的操作系统为确保适配python版本,执行如下命令打开gspylib/common/CheckPythonVersion.py文件,将if not pythonVersion = = \(3, 6\):修改为if not pythonVersion \> = \(3, 6\):,键入“ESC”键进入指令模式,执行\*\*:wq\*\*保存并退出修改。(我在实际操作中进入后发现无需修改) + + ``` + vi gspylib/common/CheckPythonVersion.py + ``` + + 6.如果是openEuler的操作系统,执行如下命令打开performance.sh文件,用\#注释sysctl -w vm.min\_free\_kbytes=112640 &\> /dev/null,键入“ESC”键进入指令模式,执行\*\*:wq\*\*保存并退出修改。 + + ``` + vi /etc/profile.d/performance.sh + ``` + + 7.为确保openssl版本正确,执行预安装前请加载安装包中lib库。执行命令如下,其中\_\{packagePath\}\_为用户安装包放置的路径,本示例中为/opt/software/openGauss。 + + ``` + export LD_LIBRARY_PATH=/opt/software/openGauss/script/gspylib/clib:$LD_LIBRARY_PATH + ``` + + 8.为确保成功安装,检查 hostname 与 /etc/hostname 是否一致。预安装过程中,会对hostname进行检查。 + + 9.使用gs\_preinstall准备好安装环境。若为共用环境需加入–sep-env-file=ENVFILE参数分离环境变量,避免与其他用户相互影响,ENVFILE为用户自行指定的环境变量分离文件的路径。 + + 采用交互模式执行前置,并在执行过程中自动创建操作系统root用户互信和omm用户互信: + + ./gs\_preinstall -U omm -G dbgrp -X /opt/software/openGauss/cluster\_config.xml + + 在执行中会要求输入omm用户的密码。 + + 运行结果应类似: + + ``` + plat1:/opt/software/openGauss/script # ./gs_preinstall -U omm -G dbgrp -X /opt/software/openGauss/cluster_config.xml + Parsing the configuration file. + Successfully parsed the configuration file. + Installing the tools on the local node. + Successfully installed the tools on the local node. + Are you sure you want to create trust for root (yes/no)? yes + Please enter password for root. + Password: + Creating SSH trust for the root permission user. + Checking network information. + All nodes in the network are Normal. + Successfully checked network information. + Creating SSH trust. + Creating the local key file. + Successfully created the local key files. + Appending local ID to authorized_keys. + Successfully appended local ID to authorized_keys. + Updating the known_hosts file. + Successfully updated the known_hosts file. + Appending authorized_key on the remote node. + Successfully appended authorized_key on all remote node. + Checking common authentication file content. + Successfully checked common authentication content. + Distributing SSH trust file to all node. + Successfully distributed SSH trust file to all node. + Verifying SSH trust on all hosts. + Successfully verified SSH trust on all hosts. + Successfully created SSH trust. + Successfully created SSH trust for the root permission user. + Setting pssh path + Successfully set core path. + Distributing package. + Begin to distribute package to tool path. + Successfully distribute package to tool path. + Begin to distribute package to package path. + Successfully distribute package to package path. + Successfully distributed package. + Are you sure you want to create the user[omm] and create trust for it (yes/no)? yes + Please enter password for cluster user. + Password: + Please enter password for cluster user again. + Password: + Successfully created [omm] user on all nodes. + Preparing SSH service. + Successfully prepared SSH service. + Installing the tools in the cluster. + Successfully installed the tools in the cluster. + Checking hostname mapping. + Successfully checked hostname mapping. + Creating SSH trust for [omm] user. + Checking network information. + All nodes in the network are Normal. + Successfully checked network information. + Creating SSH trust. + Creating the local key file. + Successfully created the local key files. + Appending local ID to authorized_keys. + Successfully appended local ID to authorized_keys. + Updating the known_hosts file. + Successfully updated the known_hosts file. + Appending authorized_key on the remote node. + Successfully appended authorized_key on all remote node. + Checking common authentication file content. + Successfully checked common authentication content. + Distributing SSH trust file to all node. + Successfully distributed SSH trust file to all node. + Verifying SSH trust on all hosts. + Successfully verified SSH trust on all hosts. + Successfully created SSH trust. + Successfully created SSH trust for [omm] user. + Checking OS software. + Successfully check os software. + Checking OS version. + Successfully checked OS version. + Creating cluster's path. + Successfully created cluster's path. + Setting SCTP service. + Successfully set SCTP service. + Set and check OS parameter. + Setting OS parameters. + Successfully set OS parameters. + Preparing CRON service. + Successfully prepared CRON service. + Setting user environmental variables. + Successfully set user environmental variables. + Setting the dynamic link library. + Successfully set the dynamic link library. + Setting Core file + Successfully set core path. + Setting pssh path + Successfully set pssh path. + Set ARM Optimization. + No need to set ARM Optimization. + Fixing server package owner. + Setting finish flag. + Successfully set finish flag. + Preinstallation succeeded. + ``` + + - 4、执行安装 + + 内存小于安装要求的32G应该做一些配置修改: + + ``` + # vim /etc/sysctl.conf + kernel.shmall = 1125899906842624 + kernel.shmmax = 1351079888211149 + + # vim /opt/huawei/install/data/db1/postgresql.conf + cstore_buffers=16MB + bulk_write_ring_size=128MB + shared_buffers=128MB + max_process_memory=2GB + max_connections=10 + ``` + + 切换到omm用户下执行安装: + + ``` + su - omm + gs_install -X /opt/software/openGauss/cluster_config.xml + ``` + + + +## 步骤二 数据库的简单链接与使用 + +- 1、创建新用户,新数据库并赋予权限 + + 使用gsql 用omm 管理账号登陆,创建新用户jack,创建新数据库testjack,赋予权限,执行 + + ``` + CREATE USER jack PASSWORD 'Gaussdba@Mpp'; + CREATE DATABASE testjack OWNER jack; + GRANT SELECT ON pg_catalog.pg_roles to jack; + GRANT SELECT ON pg_catalog.pg_user_status to jack; + GRANT ALL PRIVILEGES on TABLESPACE pg_default,pg_global TO jack; + ``` + + 然后退出,使用jack用户登录gsql + + ``` + gsql -U jack -d testjack -p "Gaussdba@Mpp" + ``` + + 创建 SCHEMA + + ``` + CREATE SCHEMA jack AUTHORIZATION jack; + ``` + + 退出gsql,赋予jack权限,这里client\_address是客户端的地址 + + ``` + gs_guc set -N all -I all -h "host all jack client_address/32 sha256 + ``` + + 或者也可以修改pg\_hba.conf,添加 + + ``` + host all jack client_address/32 sha256 + ``` + +- 2、允许客户端访问数据库 + + 执行,这里的client\_address是要客户端的地址, listen\_addresses是参数名。 + + ``` + gs_guc set -I all -c "listen_addresses='client_address'" + ``` + + 或在使用omm账号在gsql中 + + ``` + ALTER SYSTEM SET listen_addresses TO "client_address"; + ``` + + 之后重启数据库 + +- 3、关闭防火墙,打开端口 +- 4、使用Data Studio 访问数据库 + + 可在opengauss官网下载DATA STUDIO应用 + + 填入对应参数,注意这里应去掉启用SSL的选项,因为SSL还需要配置证书或密钥。 + + 连接后的界面 + +- 5、使用JDBC访问数据库 + + 我这里使用windows系统作为客户端连接数据库。 + + 在opengauss网站下载对应的JDBC包,并解压。 + + 创建Gauss.java文件 + + ``` + import java.sql.Connection; + import java.sql.DriverManager; + import java.sql.PreparedStatement; + import java.sql.ResultSet; + import java.sql.ResultSetMetaData; + import java.sql.SQLException; + import java.sql.Statement; + import java.sql.CallableStatement; + import java.sql.Types; + import java.util.Collections; + import java.util.Properties; + + public class Gauss { + + public static void main(String[] args) { + Connection connection; + ResultSet resultSet; + String url = "jdbc:postgresql://address:port/testjack"; //address 地址 port 端口 testjack 数据库名 + String user = "××××"; // 数据库用户名 + String password = "××××"; // 对应密码 + String sql; + if(args.length > 0) + { + sql = args[0]; + } + else + { + System.out.println("输入一条sql语句"); + return; + } + + if ((connection = getConnect(user, password, url)) != null) { + System.out.println(connection.toString()); + } + + if ((resultSet = execSql(connection, sql)) != null) + { + + + } + } + + // 以下代码将获取数据库连接操作封装为一个接口,可通过给定用户名和密码来连接数据库。 + public static Connection getConnect(String username, String passwd, String url) { + // 驱动类。 + String driver = "org.postgresql.Driver"; + // 数据库连接描述符。 + String sourceURL = url; + Connection conn = null; + + try { + // 加载驱动。 + Class.forName(driver); + } catch (Exception e) { + e.printStackTrace(); + return null; + } + + try { + // 创建连接。 + conn = DriverManager.getConnection(sourceURL, username, passwd); + System.out.println("Connection succeed!"); + } catch (Exception e) { + e.printStackTrace(); + return null; + } + + return conn; + }; + + // 以下代码将使用Properties对象作为参数建立连接 + public static Connection getConnectUseProp(String username, String passwd, String url) { + // 驱动类。 + String driver = "org.postgresql.Driver"; + // 数据库连接描述符。 + String sourceURL = url + "?"; + Connection conn = null; + Properties info = new Properties(); + + try { + // 加载驱动。 + Class.forName(driver); + } catch (Exception e) { + e.printStackTrace(); + return null; + } + + try { + info.setProperty("user", username); + info.setProperty("password", passwd); + // 创建连接。 + conn = DriverManager.getConnection(sourceURL, info); + System.out.println("Connection succeed!"); + } catch (Exception e) { + e.printStackTrace(); + return null; + } + + return conn; + }; + + public static ResultSet execSql(Connection conn, String sql) { + Statement stmt = null; + ResultSet rs = null; + SQLWarning sqlw = null; + try { + stmt = conn.createStatement(); + // 执行普通SQL语句。 + stmt.execute(sql); + if((sqlw = stmt.getWarnings()) != null) + System.out.println(sqlw.toString()); + if((rs = stmt.getResultSet()) != null) + printResultSet(rs); + + stmt.close(); + } catch (SQLException e) { + if (stmt != null) { + try { + stmt.close(); + } catch (SQLException e1) { + e1.printStackTrace(); + } + } + e.printStackTrace(); + } + return rs; + } + + + private static void printResultSet(ResultSet rs) + { + String line = ""; + try { + ResultSetMetaData rsmd = rs.getMetaData(); + for(int i = 1; i <= rsmd.getColumnCount(); i ++) + { + String label = rsmd.getColumnLabel(i).toString(); + System.out.print(label + "\t"); + line += String.join("", Collections.nCopies(label.length(), "-")) + "\t"; + } + System.out.println("\n" + line); + + while(rs.next()) + { + for(int i = 1; i <= rsmd.getColumnCount(); i ++) + { + System.out.print(rs.getObject(i).toString() + "\t"); + } + System.out.println(""); + + } + } catch (Exception e) { + e.printStackTrace(); + } + } + + } + ``` + + 编译 + + ``` + javac .\Gauss.java -encoding "utf-8" + ``` + + 运行,我这里将postgresql.jar放在同一目录下,创建一个表nt作为测试 + + ``` + java -cp ".;postgresql.jar" Gauss "CREATE TABLE nt(id INTEGER, name VARCHAR(20))" + java -cp ".;postgresql.jar" Gauss "INSERT into nt(id, name) VALUES (1,'n1'),(2,'n2'),(3,'n3');" + java -cp ".;postgresql.jar" Gauss "SELECT * FROM nt;" + ``` + + 最后一句输出结果为,可以看到成功进行了连接和操作。 + + ``` + 九月 13, 2021 11:58:25 上午 org.postgresql.core.v3.ConnectionFactoryImpl openConnectionImpl + 信息: [75000bb7-1475-4579-94cb-f53a01bec9eb] Try to connect. IP: *.*.*.*:**** + 九月 13, 2021 11:58:26 上午 org.postgresql.core.v3.ConnectionFactoryImpl openConnectionImpl + 信息: [*.*.*.*:****/*.*.*.*:****] Connection is established. ID: 75000bb7-1475-4579-94cb-f53a01bec9eb + 九月 13, 2021 11:58:26 上午 org.postgresql.core.v3.ConnectionFactoryImpl openConnectionImpl + 信息: Connect complete. ID: 75000bb7-1475-4579-94cb-f53a01bec9eb + Connection succeed! + id name + -- ---- + 1 n1 + 2 n2 + 3 n3 + ``` + + diff --git "a/content/zh/post/2022/\345\237\272\344\272\216openGauss\345\255\246\344\271\240Docker.md" "b/content/zh/post/2022/\345\237\272\344\272\216openGauss\345\255\246\344\271\240Docker.md" new file mode 100644 index 0000000000000000000000000000000000000000..a95c4c20775eb13c02216c8a41f79983428a2705 --- /dev/null +++ "b/content/zh/post/2022/\345\237\272\344\272\216openGauss\345\255\246\344\271\240Docker.md" @@ -0,0 +1,700 @@ ++++ + +title = "基于openGauss学习Docker" + +date = "2022-01-07" + +tags = [ "基于openGauss学习Docker"] + +archives = "2022-01" + +author = "张玉龙 " + +summary = "基于openGauss学习Docker" + +img = "/zh/post/2022/title/img7.png" + +times = "12:30" + ++++ + +# 基于openGauss学习Docker + +学习了一些开源产品,在潜移默化的发现,是时候该学习下 Docker 了,很多产品都制作了 Docker 镜像,想测试这些产品的时候,使用 Docker 安装就会显得特别方便。 + +## 简单介绍 + +openGauss 是一款高性能、高安全、高可靠的企业级开源关系型数据库。 + +opengauss 在开源后,云和恩墨第一时间制造了docker版本。 + +![](figures/20211204-10f40098-2578-4da8-83c9-dd493f7d3111.png) + +Docker 是基于 Go 语言开发的,开源项目 + +- 官网: [https://www.docker.com/](https://www.docker.com/) +- 文档: [https://docs.docker.com/](https://docs.docker.com/) +- 仓库: [https://hub.docker.com/](https://hub.docker.com/) + +![](figures/20211204-17ff081b-5a00-4c19-974a-69a531902983.png) + +## 安装 Docker + +需要注意 Docker 支持的平台:[https://docs.docker.com/engine/install/](https://docs.docker.com/engine/install/) + +![](figures/20211204-00e0901d-e71f-46d3-95ed-9e14cb28b1ac.png) + +![](figures/20211204-32f1f188-106d-4627-8b7c-c939ddcb1c59.png) + +我的测试环境是 CentOS 7.6 + +``` +# 1. Uninstall old versions 卸载旧版本 +yum remove docker \ + docker-client \ + docker-client-latest \ + docker-common \ + docker-latest \ + docker-latest-logrotate \ + docker-logrotate \ + docker-engine + +# 2. 需要的安装包,默认是国外的地址,很慢,这里使用阿里云的 +yum install -y yum-utils + +# 3. 设置镜像的仓库 +yum-config-manager \ + --add-repo \ + https://mirrors.tuna.tsinghua.edu.cn/docker-ce/linux/centos/docker-ce.repo + +# 4. 更新软件包的索引 +yum makecache fast + +# 5. 安装 dokcer docker-ce是指社区版 +yum install -y docker-ce docker-ce-cli containerd.io +``` + +安装的时候遇到一个小插曲,安装了python3没有修改yum-config-manager文件 + +``` +[root@mogdb ~]# yum-config-manager \ +> --add-repo \ +> https://mirrors.tuna.tsinghua.edu.cn/docker-ce/linux/centos/docker-ce.repo + File "/usr/bin/yum-config-manager", line 135 + except yum.Errors.RepoError, e: + ^ +SyntaxError: invalid syntax + +[root@mogdb ~]# which yum-config-manager +/usr/bin/yum-config-manager +[root@mogdb ~]# vi /usr/bin/yum-config-manager # 换成 python2 +#!/usr/bin/python2 -tt + +[root@mogdb ~]# yum-config-manager \ + --add-repo \ + https://mirrors.tuna.tsinghua.edu.cn/docker-ce/linux/centos/docker-ce.repo + +Loaded plugins: fastestmirror +adding repo from: https://mirrors.tuna.tsinghua.edu.cn/docker-ce/linux/centos/docker-ce.repo +grabbing file https://mirrors.tuna.tsinghua.edu.cn/docker-ce/linux/centos/docker-ce.repo to /etc/yum.repos.d/docker-ce.repo +repo saved to /etc/yum.repos.d/docker-ce.repo +``` + +启动并测试 Docker + +``` +# 启动dokcer +[root@mogdb ~]# systemctl start docker +[root@mogdb ~]# docker version +Client: Docker Engine - Community + Version: 20.10.11 + API version: 1.41 + Go version: go1.16.9 + Git commit: dea9396 + Built: Thu Nov 18 00:38:53 2021 + OS/Arch: linux/amd64 + Context: default + Experimental: true + +Server: Docker Engine - Community + Engine: + Version: 20.10.11 + API version: 1.41 (minimum version 1.12) + Go version: go1.16.9 + Git commit: 847da18 + Built: Thu Nov 18 00:37:17 2021 + OS/Arch: linux/amd64 + Experimental: false + containerd: + Version: 1.4.12 + GitCommit: 7b11cfaabd73bb80907dd23182b9347b4245eb5d + runc: + Version: 1.0.2 + GitCommit: v1.0.2-0-g52b36a2 + docker-init: + Version: 0.19.0 + GitCommit: de40ad0 + +# 测试 hello-world +[root@mogdb ~]# docker run hello-world + +# 查看下载的 hello-world 镜像 +[root@mogdb ~]# docker images +REPOSITORY TAG IMAGE ID CREATED SIZE +hello-world latest feb5d9fea6a5 2 months ago 13.3kB +``` + +## 卸载 Docker + +``` +# 1. 卸载 +yum remove docker-ce docker-ce-cli containerd.io +# 2. 删除 +rm -rf /var/lib/docker +rm -rf /var/lib/containerd +``` + +## 配置阿里云镜像加速 + +``` +mkdir -p /etc/docker +tee /etc/docker/daemon.json <<-'EOF' +{ + "registry-mirrors": ["https://xe6vk78x.mirror.aliyuncs.com"] +} +EOF +systemctl daemon-reload +systemctl restart docker +``` + +## Docker为什么比 VM 快 + +![](figures/20211204-c6e730da-357c-4f03-92d7-95d47f015284.png) + +Docker 有着比虚拟机更少的抽象层 + +Docker 利用的是宿主机的内核,vm需要虚拟机操作系统 + +## Docker的常用命令 + +![](figures/20211204-a73fa928-786e-406e-8289-c87c4275ab5f.png) + +## 基于openGauss 学习这些命令 + +- 基础命令 + + ``` + docker version # 显示 docker 的版本信息 + docker info # 显示 docker 的系统信息,包括镜像和容器的数量 + docker command --help # 帮助 + ``` + +- 镜像命令 + - 搜索镜像,搜索 opengauss 的镜像 + + ``` + [root@mogdb ~]# docker search opengauss + NAME DESCRIPTION STARS OFFICIAL AUTOMATED + enmotech/opengauss openGauss latest images created by Enmotech 12 + ... ... + # 可选项 + --filter=STARS=3000 # 搜索出来的镜像就是STARS(收藏数)大于3000的 + ``` + + - 下载镜像, 下载 opengauss 的镜像 + + ``` + # Usage: docker pull [OPTIONS] NAME[:TAG|@DIGEST] + [root@mogdb ~]# docker pull enmotech/opengauss + Using default tag: latest # 如果不写tag, 默认就是latest + latest: Pulling from enmotech/opengauss + 284055322776: Pull complete # 分成下载,docker images的核心,联合文件系统 + a7ca82b898d7: Pull complete + 2f93c23d8eb5: Pull complete + 3842013b7685: Pull complete + 6bc7e92855e3: Pull complete + 39c9c4e5b487: Pull complete + 1f9d76df94b5: Pull complete + 44db1c59ef84: Pull complete + 63ab02376fd3: Pull complete + cf751b0b3be9: Pull complete + 9dc428e2c8b4: Pull complete + Digest: sha256:d5a3e38fa2553a44e7fa1cd5cad0b4f0845a679858764067d7b0052a228578a0 # 签名 + Status: Downloaded newer image for enmotech/opengauss:latest + docker.io/enmotech/opengauss:latest # 真实地址 + + # 指定版本下载 + [root@mogdb ~]# docker pull enmotech/opengauss:2.0.1 + ``` + + - 查看镜像 + + ``` + [root@mogdb ~]# docker images + REPOSITORY TAG IMAGE ID CREATED SIZE + enmotech/opengauss latest b4dd24d09223 2 months ago 383MB + enmotech/opengauss 2.0.1 c3860afd8014 3 months ago 404MB + + # 可选项 + -a, --all Show all images (default hides intermediate images) + -q, --quiet Only show image IDs + ``` + + - 删除镜像 + + ``` + [root@mogdb ~]# docker rmi -f c3860afd8014 + Untagged: enmotech/opengauss:2.0.1 + Untagged: enmotech/opengauss@sha256:d156596b2900f7eda102aadfd951daad97412b610b96d3dd97d2cdd9d5b70024 + Deleted: sha256:c3860afd80148a6cfbb50269ef47f87257f2ed3fbf143f52b861303b98834833 + Deleted: sha256:193b45dffb62df01fa6c74bef9cf21774fdd550c5995f02bef28f30070db6859 + Deleted: sha256:32ba36efbf27ac2f485d1915fea35ec2a17c9d1b19d373d1edf49fd0f4b6a8de + Deleted: sha256:ca0fd6097e9cf0aae5a1d5047f9b6bda30305a13396313b5cd021530be69bc9d + Deleted: sha256:5d7f04d4882448ed954afc8d797069f3aede8ccc65a650086b9436f75fa11700 + Deleted: sha256:40a09fca023bf0d0353606c2684ba47d73979ffc6cae2dd4a4953d5796c8cb0d + Deleted: sha256:8828e1e7978fba035a5305d8684b94ed322842ed095eb46bffcdef17ad2e091a + Deleted: sha256:e7c2553c8389d79197d6c3ba7c731292cd772588d252683cf706cb660c6e46f0 + Deleted: sha256:d2292dd078208e84e70124123ffc4ebac5c304816a753db61da04f1e7d8a3663 + Deleted: sha256:8d78df12722212e140ae7ba4441c7f9a36365074779b6ca880f097c6e237f9e3 + Deleted: sha256:4f785c07c19d588e80313fc0ee644a19ac6e17a550a9e694c22babc355152367 + Deleted: sha256:21639b09744fc39b4e1fe31c79cdf54470afe4d7239a517c4060bd181f8e3039 + + # 通过ID删除全部容器 + [root@mogdb ~]# docker rmi -f $(docker images -aq) + ``` + + + +- 容器命令 + - 新建容器并启动 + + ``` + # Usage: docker run [OPTIONS] IMAGE [COMMAND] [ARG...] + # 参数说明 + --name # 容器名字,区分容器 + --privileged=true # 使用该参数,容器内的 root 拥有真正的 root 权限。 + -d # 后台方式运行 + -e # 设置环境变量 + -it # 使用交互方式运行,进入容器查看内容 + -p(小) # 容器和宿主机的端口映射 + -p ip:主机端口:容器端口 + -p 主机端口:容器端口 (常用) + -p 容器端口 + -P(大) # 随机指定端口 + + # 新建 opengauss 的容器并后台启动,配置密码 Enmo@123,端口映射 + # docker run --name opengauss --privileged=true -d -e GS_PASSWORD=Enmo@123 -p 5432:5432 enmotech/opengauss:latest + ad8892ff8b45fc3329ed76afd634de136ec7b67fb2ba02221a0ee8886ee932b8 + ``` + + - 列出所有运行的容器 + + ``` + # docker ps + -a # 列出当前正在运行的容器 + 历史运行过的容器 + -n=? # 显示最近创建的容器 + -q # 只显示容器的编号 + + [root@mogdb ~]# docker ps + CONTAINER ID IMAGE COMMAND CREATED STATUS PORTS NAMES + ad8892ff8b45 enmotech/opengauss:latest "entrypoint.sh gauss…" 5 minutes ago Up 5 minutes 0.0.0.0:5432->5432/tcp, :::5432->5432/tcp opengauss + ``` + + - 启停容器 + + ``` + docker start 容器id + docker restart 容器id + docker stop 容器id + docker kill 容器id + ``` + + - 进入当前正在运行的容器 + + ``` + docker exec -it 容器id /bin/bash # 进入容器后开启一个新的终端,可以在里面操作(常用) + docker attach 容器id # 进入容器正在执行的终端,不会启动新的进程 + + # 进入 opengauss 的容器内,在数据库中创建普通用户,测试外部连接 + [root@mogdb ~]# docker exec -it ad8892ff8b45 /bin/bash + root@ad8892ff8b45:/# su - omm + omm@ad8892ff8b45:~$ gsql + gsql ((openGauss 2.1.0 build 590b0f8e) compiled at 2021-09-30 14:29:04 commit 0 last mr ) + Non-SSL connection (SSL connection is recommended when requiring high-security) + Type "help" for help. + + omm=# CREATE USER tpcc_usr WITH PASSWORD "tpcc@1234"; + NOTICE: The encrypted password contains MD5 ciphertext, which is not secure. + CREATE ROLE + omm=# alter user tpcc_usr sysadmin; + ALTER ROLE + omm=# GRANT ALL ON schema public TO tpcc_usr; + GRANT + ``` + + ![](figures/20211204-fc1c14b8-f666-4600-b21e-b73aec582740.png) + + - 退出容器 + + ``` + exit # 退出并停止容器,后台运行的容器不会停止 + Ctrl + P + Q # 容器不停止的退出 + ``` + + - 删除容器 + + ``` + docker rm 容器ID # 删除指定的容器,不能删除正在运行的容器,如果强制删除 rm -f + docker rm -f $(docker ps -aq) # 删除所有容器 + docker ps -aq|xargs docker rm # 删除所有容器 + ``` + + +- 其他命令 + - 查看日志 + + ``` + [root@mogdb ~]# docker logs -f -t --tail 10 ad8892ff8b45 + 2021-12-04T12:24:31.809995352Z 2021-12-04 12:24:31.809 [unknown] [unknown] localhost 140460925998016 0[0:0#0] 0 [BACKEND] LOG: the configure file /usr/local/opengauss/etc/gscgroup_omm.cfg doesn't exist or the size of configure file has changed. Please create it by root user! + 2021-12-04T12:24:31.810007421Z 2021-12-04 12:24:31.809 [unknown] [unknown] localhost 140460925998016 0[0:0#0] 0 [BACKEND] LOG: Failed to parse cgroup config file. + 2021-12-04T12:24:31.831906329Z 2021-12-04 12:24:31.831 [unknown] [unknown] localhost 140460925998016 0[0:0#0] 0 [EXECUTOR] WARNING: Failed to obtain environment value $GAUSSLOG! + 2021-12-04T12:24:31.831931488Z 2021-12-04 12:24:31.831 [unknown] [unknown] localhost 140460925998016 0[0:0#0] 0 [EXECUTOR] DETAIL: N/A + 2021-12-04T12:24:31.831934584Z 2021-12-04 12:24:31.831 [unknown] [unknown] localhost 140460925998016 0[0:0#0] 0 [EXECUTOR] CAUSE: Incorrect environment value. + 2021-12-04T12:24:31.831936999Z 2021-12-04 12:24:31.831 [unknown] [unknown] localhost 140460925998016 0[0:0#0] 0 [EXECUTOR] ACTION: Please refer to backend log for more details. + 2021-12-04T12:24:31.833046968Z 2021-12-04 12:24:31.832 [unknown] [unknown] localhost 140460925998016 0[0:0#0] 0 [EXECUTOR] WARNING: Failed to obtain environment value $GAUSSLOG! + 2021-12-04T12:24:31.833057677Z 2021-12-04 12:24:31.832 [unknown] [unknown] localhost 140460925998016 0[0:0#0] 0 [EXECUTOR] DETAIL: N/A + 2021-12-04T12:24:31.833060758Z 2021-12-04 12:24:31.832 [unknown] [unknown] localhost 140460925998016 0[0:0#0] 0 [EXECUTOR] CAUSE: Incorrect environment value. + 2021-12-04T12:24:31.833063164Z 2021-12-04 12:24:31.832 [unknown] [unknown] localhost 140460925998016 0[0:0#0] 0 [EXECUTOR] ACTION: Please refer to backend log for more details. + ``` + + - 查看容器内的进程信息 + + ``` + [root@mogdb ~]# docker top ad8892ff8b45 + UID PID PPID C STIME TTY TIME CMD + 70 26782 26762 2 20:24 ? 00:00:21 gaussdb + ``` + + - 查询所有容器的资源使用信息 + + ``` + [root@mogdb ~]# docker stats + CONTAINER ID NAME CPU % MEM USAGE / LIMIT MEM % NET I/O BLOCK I/O PIDS + ad8892ff8b45 opengauss 2.65% 374.9MiB / 7.62GiB 4.80% 10.2kB / 58.2kB 6.77MB / 38.1MB 34 + ``` + + - 查看容器的元数据 + + ``` + [root@mogdb ~]# docker inspect ad8892ff8b45 + ``` + + - 从容器内复制文件到宿主机上 + + ``` + docker cp 容器id:PWD/file /home + ``` + + + +## Docker镜像加载原理 + +[https://blog.csdn.net/pjsdsg/article/details/90445128](https://blog.csdn.net/pjsdsg/article/details/90445128) + +- 容器的快照 commit + + ``` + docker commit # 保存容器成为一个新的副本 + docker commit -m="提交的描述信息" -a="作者” 容器id 目标镜像名:[TAG] + ``` + + 测试 commit,前面为了测试外部连接数据库,在数据库中创建了一个普通用户 tpcc\_usr ,下面保存这个容器成为一个新的副本 + + ``` + [root@mogdb ~]# docker commit -a="create database user tpcc" ad8892ff8b45 opengauss:1.0 + sha256:1e760f8f2f3ddf707cb661bdd8549728bdb0ecd83d1166c6f9f350880464c654 + [root@mogdb ~]# docker images + REPOSITORY TAG IMAGE ID CREATED SIZE + opengauss 1.0 1e760f8f2f3d 15 seconds ago 1.21GB #<<<<<<< + enmotech/opengauss latest b4dd24d09223 2 months ago 383MB + ``` + + 使用新创建的副本来新建一个容器,查看用户是否存在 + + ``` + # 容器名opengauss2,端口映射为5433,使用镜像ID是1e760f8f2f3d + [root@mogdb ~]# docker run --name opengauss2 --privileged=true -d -e GS_PASSWORD=Enmo@123 -p 5433:5432 1e760f8f2f3d + 0a1c49aaa9114f19e33fef20753be092f923ffe558aa1d4251c55d3948dff486 + [root@mogdb ~]# docker ps + CONTAINER ID IMAGE COMMAND CREATED STATUS PORTS NAMES + 0a1c49aaa911 1e760f8f2f3d "entrypoint.sh gauss…" 8 seconds ago Up 8 seconds 0.0.0.0:5433->5432/tcp, :::5433->5432/tcp opengauss2 #<<<<<<< + ad8892ff8b45 enmotech/opengauss:latest "entrypoint.sh gauss…" 46 minutes ago Up 37 minutes 0.0.0.0:5432->5432/tcp, :::5432->5432/tcp opengauss + + [root@mogdb ~]# docker exec -it 0a1c49aaa911 /bin/bash + root@0a1c49aaa911:/# su - omm + omm@0a1c49aaa911:~$ gsql + gsql ((openGauss 2.1.0 build 590b0f8e) compiled at 2021-09-30 14:29:04 commit 0 last mr ) + Non-SSL connection (SSL connection is recommended when requiring high-security) + Type "help" for help. + + omm=# \du + List of roles + Role name | Attributes | Member of + -----------+------------------------------------------------------------------------------------------------------------------+----------- + gaussdb | Sysadmin | {} + omm | Sysadmin, Create role, Create DB, Replication, Administer audit, Monitoradmin, Operatoradmin, Policyadmin, UseFT | {} + tpcc_usr | Sysadmin | {} + + omm=# + ``` + + tpcc\_usr 用户存在,使用外部工具测试连接 + + ![](figures/20211204-ba7b78a2-3978-45b4-b868-61334e4087f2.png) + +- 容器数据卷,持久化数据 + + ![](figures/20211204-f08b84a5-2be7-4bc4-826a-397c9ad77d79.png) + + 查看 openGauss 的数据文件路径 + + ``` + omm@0a1c49aaa911:~$ gsql + gsql ((openGauss 2.1.0 build 590b0f8e) compiled at 2021-09-30 14:29:04 commit 0 last mr ) + Non-SSL connection (SSL connection is recommended when requiring high-security) + Type "help" for help. + + omm=# show data_directory; + data_directory + ------------------------- + /var/lib/opengauss/data + (1 row) + ``` + + 指定路径挂载 + + ``` + # 指定路径挂载,ro只读,rw读写,设置 ro ,只能在宿主机上执行写操作,容器内部是只读的 + docker run -it -v 主机目录:容器内目录:ro/rw + + # 测试 + # mkdir /root/data + docker run --name opengauss03 --privileged=true -d \ + -e GS_PASSWORD=Enmo@123 -p 5434:5432 \ + -v /root/data:/var/lib/opengauss/data \ + 1e760f8f2f3d + + ee6e88d47dd90c7efbae1d85e33cedee3d649e33518a64f3ae99d6b8575bffb8 + + [root@mogdb ~]# docker inspect ee6e88d47dd9 + + "Mounts": [ + { + "Type": "bind", + "Source": "/root/data", + "Destination": "/var/lib/opengauss/data", + "Mode": "", + "RW": true, + "Propagation": "rprivate" + } + ], + + [root@mogdb ~]# cd /root/data/ + [root@mogdb data]# ll + total 4900 + drwx------ 3 70 70 21 Dec 4 11:51 asp_data + drwx------ 7 70 70 71 Dec 4 11:51 base + -rw------- 1 70 70 72 Dec 4 21:16 gaussdb.state + drwx------ 2 70 70 4096 Dec 4 21:17 global + -rw------- 1 70 70 354 Dec 4 11:50 gs_gazelle.conf + drwx------ 3 70 70 21 Dec 4 11:51 gs_profile + -rw------- 1 70 70 4915200 Dec 4 11:51 gswlm_userinfo.cfg + -rw------- 1 70 70 20238 Dec 4 11:51 mot.conf + drwx------ 3 70 70 50 Dec 4 11:51 pg_audit + drwx------ 2 70 70 26 Dec 4 11:50 pg_clog + drwx------ 2 70 70 26 Dec 4 11:50 pg_csnlog + -rw------- 1 70 70 0 Dec 4 11:51 pg_ctl.lock + drwx------ 2 70 70 6 Dec 4 11:50 pg_errorinfo + -rw------- 1 70 70 4553 Dec 4 11:51 pg_hba.conf + -rw------- 1 70 70 1636 Dec 4 11:50 pg_ident.conf + drwx------ 4 70 70 39 Dec 4 11:50 pg_llog + drwx------ 2 70 70 6 Dec 4 11:50 pg_location + drwx------ 2 70 70 126 Dec 4 21:16 pg_log + drwx------ 4 70 70 36 Dec 4 11:50 pg_multixact + drwx------ 2 70 70 26 Dec 4 21:16 pg_notify + drwx------ 3 70 70 21 Dec 4 11:51 pg_perf + drwx------ 2 70 70 6 Dec 4 11:50 pg_replslot + drwx------ 2 70 70 6 Dec 4 11:50 pg_serial + drwx------ 2 70 70 6 Dec 4 11:50 pg_snapshots + drwx------ 2 70 70 25 Dec 4 22:10 pg_stat_tmp + drwx------ 2 70 70 6 Dec 4 11:50 pg_tblspc + drwx------ 2 70 70 6 Dec 4 11:50 pg_twophase + -rw------- 1 70 70 4 Dec 4 11:50 PG_VERSION + drwx------ 3 70 70 4096 Dec 4 11:51 pg_xlog + -rw------- 1 70 70 31669 Dec 4 11:51 postgresql.conf + -rw------- 1 70 70 1024 Dec 4 11:50 postgresql.conf.lock + -rw------- 1 70 70 33 Dec 4 21:16 postmaster.opts + -rw------- 1 70 70 68 Dec 4 21:16 postmaster.pid + drwx------ 3 70 70 21 Dec 4 11:51 sql_monitor + drwx------ 5 70 70 67 Dec 4 21:16 undo + ``` + + 具名和匿名挂载 + + ``` + # 匿名挂载 -v 容器内的路径 + docker run --name opengauss04 --privileged=true -d \ + -e GS_PASSWORD=Enmo@123 -p 5435:5432 \ + -v /var/lib/opengauss/data \ + 1e760f8f2f3d + + "Mounts": [ + { + "Type": "volume", + "Name": "e1f39b76c16ef76392b3a3a8312edc0f8c3e033c8c59d6ab60a6429c20236f62", + "Source": "/var/lib/docker/volumes/e1f39b76c16ef76392b3a3a8312edc0f8c3e033c8c59d6ab60a6429c20236f62/_data", + "Destination": "/var/lib/opengauss/data", + "Driver": "local", + "Mode": "", + "RW": true, + "Propagation": "" + } + ], + + # 查看所有 volume 的情况 + [root@mogdb ~]# docker volume ls + DRIVER VOLUME NAME + local 6cede63c42f882b1044b13c0aa20dd788eda6764940b9b8054db9e15087569a3 + local 20df1e593053e108028cd2ada3084042b2f0d96827f236ea809f1b6663d90ef4 + local a1601a649c6828db873110887ade959f86fdf18ccfd6e25c972a4edde661fd35 + local a20478a2a42c64f4ac332f7067acdd5dd72e67ab7b3d8a85e609aaa4cc35d4bf + local df1f97eda08c32d45f11a0faff8522e564ed2442274e6e0609fed30c3947b06b + local e1f39b76c16ef76392b3a3a8312edc0f8c3e033c8c59d6ab60a6429c20236f62 + + # 具名挂载 + docker run --name opengauss05 --privileged=true -d \ + -e GS_PASSWORD=Enmo@123 -p 5436:5432 \ + -v juming:/var/lib/opengauss/data \ + 1e760f8f2f3d + + "Mounts": [ + { + "Type": "volume", + "Name": "juming", + "Source": "/var/lib/docker/volumes/juming/_data", + "Destination": "/var/lib/opengauss/data", + "Driver": "local", + "Mode": "z", + "RW": true, + "Propagation": "" + } + ], + + # 查看挂载的具体位置 + [root@mogdb ~]# docker volume inspect juming + [ + { + "CreatedAt": "2021-12-04T22:17:29+08:00", + "Driver": "local", + "Labels": null, + "Mountpoint": "/var/lib/docker/volumes/juming/_data", + "Name": "juming", + "Options": null, + "Scope": "local" + } + ] + + [root@mogdb ~]# docker volume ls + DRIVER VOLUME NAME + local 6cede63c42f882b1044b13c0aa20dd788eda6764940b9b8054db9e15087569a3 + local 20df1e593053e108028cd2ada3084042b2f0d96827f236ea809f1b6663d90ef4 + local a1601a649c6828db873110887ade959f86fdf18ccfd6e25c972a4edde661fd35 + local a20478a2a42c64f4ac332f7067acdd5dd72e67ab7b3d8a85e609aaa4cc35d4bf + local df1f97eda08c32d45f11a0faff8522e564ed2442274e6e0609fed30c3947b06b + local e1f39b76c16ef76392b3a3a8312edc0f8c3e033c8c59d6ab60a6429c20236f62 + local juming + ``` + + +## DockerFile 制作 openGauss 镜像,源码中已经提供制作脚本,可以直接用 + +- 下载安装包: [https://opengauss.org/zh/download.html](https://opengauss.org/zh/download.html) + + ![](figures/20211204-eb905549-76da-4976-aaa6-dfef16877d00.png) + +- 下载源码包: [https://gitee.com/opengauss/openGauss-server?\_from=gitee\_search](https://gitee.com/opengauss/openGauss-server?_from=gitee_search) + + ![](figures/20211204-7e5f33ac-8420-463d-9639-f67586ad76ed.png) + +- 将安装包和源码包上传 Docker 服务器 + + ``` + [root@mogdb ~]# ll openGauss* + -rw-r--r-- 1 root root 100623501 Dec 4 22:35 openGauss-2.1.0-CentOS-64bit-all.tar.gz + -rw-r--r-- 1 root root 193144438 Dec 4 22:40 openGauss-server-master.zip + ``` + + ``` + # 解压源码包和安装包 + [root@mogdb ~]# unzip openGauss-server-master.zip + [root@mogdb ~]# tar -zxvf openGauss-2.1.0-CentOS-64bit-all.tar.gz + + # 准备目录和文件 + [root@mogdb ~]# cd /root/openGauss-server-master/docker/dockerfiles + [root@mogdb dockerfiles]# mkdir 2.1.0 + [root@mogdb dockerfiles]# cp 1.1.0/* 2.1.0/ + [root@mogdb dockerfiles]# cp /root/openGauss-2.1.0-CentOS-64bit.tar.bz2 2.1.0/ + + # 修改 dockerfile_amd 文件 + [root@mogdb dockerfiles]# sed -i "s/openGauss-1.1.0-CentOS-64bit.tar.bz2/openGauss-2.1.0-CentOS-64bit.tar.bz2/g" 2.1.0/dockerfile_amd + + # 创建 opengauss 镜像 + [root@mogdb dockerfiles]# sh buildDockerImage.sh -v 2.1.0 -i + + Successfully built e336672f2857 + Successfully tagged opengauss:2.1.0 + + + openGauss Docker Image 2.1.0 is ready to be extended: + + --> opengauss:2.1.0 + + Build completed in 42 seconds. + + # 生成opengauss容器 + [root@mogdb ~]# docker images + REPOSITORY TAG IMAGE ID CREATED SIZE + opengauss 2.1.0 e336672f2857 46 seconds ago 616MB + opengauss 1.0 1e760f8f2f3d 3 hours ago 1.21GB + enmotech/opengauss latest b4dd24d09223 2 months ago 383MB + centos 7.6.1810 f1cb7c7d58b7 2 years ago 202MB + + [root@mogdb ~]# docker run --name opengauss10 --privileged=true -d -e GS_PASSWORD=Enmo@123 -p 5866:5432 -v /var/lib/opengauss opengauss:2.1.0 + 30124a1b285a6fe92b4ea55bc340603148e5ba52db481aacf23354e242cfaa9c + [root@mogdb ~]# docker ps + CONTAINER ID IMAGE COMMAND CREATED STATUS PORTS NAMES + 30124a1b285a opengauss:2.1.0 "entrypoint.sh gauss…" 11 seconds ago Up 10 seconds 0.0.0.0:5866->5432/tcp, :::5866->5432/tcp opengauss10 + + # 登录opengauss容器,创建用户 + ​```sh + [root@mogdb ~]# docker exec -it 30124a1b285a /bin/bash + [root@72094285e528 /]# su - omm + [omm@72094285e528 ~]$ gsql + gsql ((openGauss 2.1.0 build 590b0f8e) compiled at 2021-09-30 14:29:04 commit 0 last mr ) + Non-SSL connection (SSL connection is recommended when requiring high-security) + Type "help" for help. + + omm=# CREATE USER tpcc_usr WITH PASSWORD "tpcc@1234"; + NOTICE: The encrypted password contains MD5 ciphertext, which is not secure. + CREATE ROLE + omm=# alter user tpcc_usr sysadmin; + ALTER ROLE + omm=# GRANT ALL ON schema public TO tpcc_usr; + GRANT + ``` + + ![](figures/20211204-ae369c99-359e-419f-a4c2-9dba1f855cd5.png) + + +先写到这吧,其实还有好多内容,后面有时间再写续篇 + diff --git "a/content/zh/post/2022/\345\273\272\350\256\256\346\224\266\350\227\217-\345\234\250openGauss\344\270\212\351\201\207\345\210\260\346\205\242SQL\350\257\245\346\200\216\344\271\210\345\212\236.md" "b/content/zh/post/2022/\345\273\272\350\256\256\346\224\266\350\227\217-\345\234\250openGauss\344\270\212\351\201\207\345\210\260\346\205\242SQL\350\257\245\346\200\216\344\271\210\345\212\236.md" new file mode 100644 index 0000000000000000000000000000000000000000..8f3275139edb77ebaa931d512fb66ada93114253 --- /dev/null +++ "b/content/zh/post/2022/\345\273\272\350\256\256\346\224\266\350\227\217-\345\234\250openGauss\344\270\212\351\201\207\345\210\260\346\205\242SQL\350\257\245\346\200\216\344\271\210\345\212\236.md" @@ -0,0 +1,654 @@ ++++ + +title = "建议收藏 | 在openGauss上遇到慢SQL该怎么办?" + +date = "2022-06-28" + +tags = [ "慢SQL"] + +archives = "2022-05" + +author = "王天庆" + +summary = "openGauss上遇到慢SQL该怎么办" + +img = "/zh/post/2022/title/img7.png" + +times = "12:30" + ++++ + + + +# 建议收藏 | 在openGauss上遇到慢SQL该怎么办? + ++++ + +在数据库的日常使用中,难免会遇到慢SQL。 遇到慢SQL本身并不可怕,困难之处在于如何识别慢SQL并对其优化,使它不至于拖慢整个系统的性能,避免危害到日常业务的正常进行。 + +对不同的数据库来说,由于其系统架构的差异、代码实现的不同,很多慢SQL解决“套路”往往是无法直接复用的。而由于历史原因,很多国内互联网公司的关系型数据库系统都是MySQL, 这也导致网络上关于MySQL数据库的慢SQL分析文章很多,而其他数据库就少得可怜了。为此,我们梳理了在openGauss上进行慢SQL分析的经验,并总结了下来,希望能给openGauss的用户一些启发。openGauss的数据库自治运维系统DBMind也已经初步具备了慢SQL根因分析的能力,感兴趣的读者也可以尝试一下。 + +首先,我们可以通过设置GUC参数log\_min\_duration\_statement 来指定openGauss系统监控的慢SQL阈值。同时,我们也应调大instr\_unique\_sql\_count的数值,以免出现“missing SQL statement, GUC instr\_unique\_sql\_count is too small.”的提示。这里以设置慢SQL检测阈值为5秒(默认数值单位是毫秒)为例: + +``` +gs_guc reload -D $PGDATA -c 'log_min_duration_statement = 5000' -c 'instr_unique_sql_count = 2000' +``` + +然后执行一个慢SQL,可以在dbe\_perf.statement\_history视图中查看到结果: + +``` +select pg_sleep(6); -- 构造的慢SQL +select * from dbe_perf.statement_history order by start_time desc; +``` + +有了上述方法,我们就可以轻易在openGauss数据库中监控到慢SQL了,接下来可以通过下文的方法来分析慢SQL的产生原因。 + +## 索引原因导致的慢SQL + +由索引原因引起的慢SQL在绝大多数数据库系统中都是十分常见的,甚至可以列为第一大慢SQL问题来源。简单来说,大致存在以下几种情况: + +1. 缺乏有效索引 +2. 执行计划没有选择索引扫描,即索引失效 +3. 冗余索引 + +## 缺乏有效索引 + +对于缺乏有效索引的场景,在解决问题时,可以先从SQL语句本身入手,绝大多数此类SQL语句都是SELECT语句,且该类SQL语句涉及到的表数据量较多,且谓词上没有创建索引,导致数据库系统需要通过全盘扫描来获取数据。对于该情况,一般的做法往往比较“暴力”,即直接在WHERE子句、JOIN子句等涉及到的字段上创建索引。一般存在于WHERE子句中的简单比较都是可以使用索引扫描的,因此在该涉及到的字段上创建索引可能是有效的。但是,索引也并非是创建得越多越好(后面我们会提到冗余索引的情况),在创建索引时需要在选择度较高、数据量不是特别少的字段上创建索引,否则该索引收益不大。 + +对于单语句的索引推荐,openGauss数据库已经内置了该功能,用户可以通过调用系统函数gs\_index\_advise\(\) 进行推荐,例如: + +``` +select * from gs_index_advise('select * from t1 where a > 1'); +``` + +单语句索引推荐的核心逻辑可以表示为: + +1. 提取JOIN类算子中的连接条件,保存为连接关系; +2. 提取Filter类算子中的过滤条件,保存为过滤关系; +3. 分析过滤关系中涉及字段的选择度和数据量,将评估适合创建索引的字段加入到候选索引列表中; +4. 分析连接关系,根据表的结果集大小确定驱动表,根据连接关系,将被驱动表中涉及的字段加入到候选索引列表中; +5. 提取Aggregate类算子涉及的字段,将该字段加入到候选索引列表中; +6. 提取Sort算子涉及的字段,将该字段加入到候选索引列表中; +7. 评估候选索引列表中的全部字段,过滤重复索引,合并相关索引; +8. 输出最终索引推荐的结果。 + + 对于推荐出来的候选索引,用户可以自行决策是否创建,也可以通过openGauss的虚拟索引功能来评估索引收益,进行辅助决策。 + + 对于单语句的索引推荐,业内也有不少开源的工具。不过,该类工具多数基于MySQL数据库实现(如美团开源的SQL Advisor)。同时,在索引推荐的层次上,该类工具使用的是对SQL语句进行语法解析后的结果,即根据SQL语句的抽象语法树(Abstract Syntax Tree, AST)进行索引推荐。然而,openGauss的索引推荐功能还可以建立在查询解析之后的查询树(Query Tree)的基础上进行索引推荐,也就是说,openGauss的索引推荐是建立在算子粒度上的。这样,某些被优化器改写的SQL语句(如exists, in 子查询),也可以被轻易地捕获并进行索引推荐,而前文提到的基于AST进行索引推荐的工具是很难实现的。 + + +## 索引失效 + +就索引失效而言,一般存在以下六种情况: + +1. 联合索引(又叫复合索引、多列索引)的最左匹配原则失效:同MySQL类似,openGauss的联合索引也满足最左匹配原则,如果查询不满足最左匹配原则,数据库优化器会倾向于放弃选择该索引扫描; +2. 使用了SELECT \*: 除了老生常谈的可能扫描到不需要的字段之外,使用该写法还有可能导致openGauss的IndexOnlyScan 失效(在MySQL中称为CoveringIndex),也可能导致索引扫描后进行不必要的回表; +3. 谓词中的索引列参与了运算:这个问题一般不会出现在openGauss数据库中,这是因为openGauss的rewrite过程可以将该写法进行改写。但是openGauss的rewrite过程是基于规则进行的,某些情况下会存在改写匹配不上的情况,例如把WHERE子句的中谓词变得复杂一点就可能出现改写失效,进而导致索引失效,例如select a from t1 where b - 0 \> 1 and c < 100; 语句中的减0与否会产生两种截然不同的执行计划; +4. 索引列涉及函数计算:对于openGauss来说,函数计算结果往往是“不可预测”的,故该索引有可能是失效的;不过openGauss支持函数索引(Functional Index),对于必须在字段上执行函数的情况可以选择使用该索引,只不过该索引的维护代价会比较大;同时,如果定义的函数可以被rewrite过程改写,该索引仍然可能是有效的,这点可能与某些数据库的行为不同; +5. 谓词中使用like: 对于字符串类型(如varchar, text)的字段,在使用like进行模糊查询时,在openGauss中默认是不走索引的,这点与MySQL在默认情况下不太一致;openGauss对字符串类型的字段,一般在进行等值查询时会选择使用索引,如果对于该字段更多地进行模糊查询(如like或正则),则需要在创建索引时显式地添加text\_pattern\_ops参数,如create index on movies \(title text\_pattern\_ops\); 同时,同MySQL等数据库一样,该B+ Tree索引也只仅支持前缀匹配查询,如果希望利用B+ Tree进行后缀匹配,可以使用字符串翻转小技巧;对于全文检索,可以使用openGauss支持的tsquery特性,并通过创建GIN或GiST索引加速查询; +6. SQL语义上不应走索引:这种情况的类型有很多,比较典型的是谓词中对同一张表的两列进行比较、不等值比较(如!=, not in, not exists, is not null)、全量排序、类型转换(如字段的类型是varchar, 在谓词中与bigint进行比较时发生了隐式转换)等。 + +## 冗余索引 + +上面我们提到了创建索引的一般情况,对于绝大多数慢SQL场景,创建一个合适的索引就可以使得性能突飞猛进。但是,索引是不是就可以越多越好呢?显然不是。我们日常创建的索引中,使用最多的是B+ Tree索引,因此我们以B+ Tree为例,简单解释一下缘由。 + +众所周知,B+ Tree是一个多叉树,它的每一个子节点都是父节点的一个子“范围”。记录(或记录的位置)最终存储在B+ Tree的叶子节点中。因此,在进行数据检索时,只需要扫描匹配的子节点中的指定“范围”即可。但是,对于数据的删除,也需要付出相同的时间开销,进行B+ Tree节点的调整;如果被索引的数据修改了,还需要调整B+ Tree中原有的节点结构。由于B+ Tree的插入、删除、检索的算法时间复杂度都是相同的,因此当业务系统中的插入和删除操作更多时,索引维护的代价就会更大,甚至超过索引检索时带来的收益。与此同时,索引页也需要占用额外的磁盘空间,被索引数据量越大,索引页占据的空间就越大。而且,当前openGauss中的B+ Tree的实现仍然是有锁的,更多的索引页面有可能涉及更多的锁维护操作。 + +在openGauss数据库中,可以通过下述语句简单识别没有被使用过的索引: + +``` +SELECT s.schemaname, + s.relname AS tablename, + s.indexrelname AS indexname, + pg_relation_size(s.indexrelid) AS index_size +FROM pg_catalog.pg_stat_user_indexes s + JOIN pg_catalog.pg_index i ON s.indexrelid = i.indexrelid +WHERE s.idx_scan = 0 -- has never been scanned + AND 0 <>ALL (i.indkey) + AND NOT i.indisunique + AND NOT EXISTS + (SELECT 1 FROM pg_catalog.pg_constraint c + WHERE c.conindid = s.indexrelid) +ORDER BY pg_relation_size(s.indexrelid) DESC; +``` + +可以修改上述SQL语句中的idx\_scan 条件中的阈值,来调整返回结果。 + +对于workload中全量SQL语句进行索引创建其实是非常困难的,因为需要权衡全量SQL中增删查改语句的占比情况,同时需要估计索引的检索收益和维护代价,这个权衡过程十分复杂,一般的人工操作其实是很难的。因此,在日常数据库使用中,当需要创建索引时,最好进行全局业务的评估,衡量是否会干扰到其他业务,以及创建的总体收益是否为正,以免后期难以维护。 + +不过,对于openGauss数据库来说,可以使用系统级别的索引推荐功能来解决上述痛点问题,可以通过下述命令查看使用说明: + +``` +gs_dbmind component index_advisor --help +``` + +## 系统配置原因导致的慢SQL + +在系统配置中,最常见的配置项就是对资源的配置。这包括允许使用的最大资源(主要是内存)、以及资源的使用方式等。除了调整资源配置,有些情况下还需要配置数据库优化器Cost Model的代价值。下面我们重点看几个会影响SQL语句成为慢SQL的系统参数: + +**max\_process\_memory**: 该参数与enable\_memory\_limit配合使用,用于限制一个openGauss实例可用的最大内存。需要将该参数值与宿主机系统的内存总量进行匹配,将宿主机用于操作系统正常运行所需的内存刨除后,剩下的内存空间就可以尽可能多地划分给openGauss实例使用了。否则,openGauss为了避免造成OOM问题,会通过该参数限制数据库允许使用的最大内存。因此,如果在客户端或者日志中出现类似“memory usage reach the max\_dynamic\_memory”的报错时,一般是由于该参数值太小导致的。 + +**shared\_buffers**: 数据库系统使用的缓存池大小。一般来说,综合来看对数据库影响最大的参数就是它了,因为如果该参数设置得过小,会导致缓存不足,从而产生大量的磁盘I/O. 该参数在openGauss上的默认值很小,只有32MB,对于绝大多数的生产场景是不够的。一般的经验值是设置为系统内存的25%, 甚至在某些场景中还可以再大一点。不过openGauss的buffer没有通过DirectIO实现,仍然使用了系统缓存(cache),所以一般认为超过系统内存的40%也起不到再好的效果了。与此同时,checkpoint\_segments 参数也需要随着shared\_buffers的调大跟着变大一些。 + +**work\_mem**: 显式指定内排序和哈希表能使用的内存空间大小,如果该值设得比较小,会向磁盘写入更多的临时文件。因此,我们可以适当地增加该值的大小。但是需要注意的是,业务系统可能存在并行执行的复杂语句,如果这些语句都占用非常多的work\_mem大小的资源,则可能会导致内存使用占满(如前文所述,openGauss存在内存管控机制,一般不至于由于OOM导致系统重启)。故而,该值设置得很大的时候要关注系统的并发问题。该参数对ORDER BY, DISTINCT, JOIN \(merge join, hash join\), HASH Agg, 基于hash的IN子查询都有影响。 + +**enable\_nestloop**: 开启该参数可以让优化器使用Nest Loop Join\(NLJ\), 但是关闭该参数也不会完全压制优化器选择NLJ. 对于某些复杂查询(如在TPC-H benchmark中的语句)来说,不应该选择NLJ, 但是优化器往往会出现规划错误。那么,在此场景下,可以通过禁用该参数来鼓励优化器选择使用其他JOIN方法。 + +**random\_page\_cost**: 一般与seq\_page\_cost配合调整。该参数调整数据库的CBO优化器中随机扫描的代价。该值设置得越大,数据库越认为随机扫描不可取,也就越不倾向于使用索引。该参数的默认值是4,对于机械硬盘来说,是合适的。但是,如果业务系统的磁盘是固态硬盘的话,就应该适当调小一下该参数值,一般的经验是调整为1. + +**default\_statistics\_target**: 当前openGauss的默认优化器是CBO, 它高度依赖数据的统计信息。因此,对于复杂查询来说,更优质的统计信息往往可以获得更好的执行计划。通过增大该参数的值,可以获得更准确的统计信息,但是也会增加ANALYZE的时间。因此,对于复杂语句较多的场景,可以适当增加该参数值。 + +除了上述列出来的可能会影响SQL语句执行表现的系统参数外,还有很多参数可能会产生影响。不过,影响概率会小很多。如果用户希望检查一下数据库的参数配置是否合理,可以通过DBMind的参数推荐功能测试一下(该功能依赖当前正在运行的业务量,故不同时刻执行的效果可能会不同,建议在业务高峰时使用),相关使用帮助是: + +``` +gs_dbmind component xtuner recommend –help +``` + +如果用户希望针对自己的业务试探出最合适的参数,也可以使用离线模式(tune或train模式)。不过该场景一般是对未上线的业务系统进行初始调参,因为执行该功能可能会影响业务运行,故称之为离线模式。 + +## 资源竞争导致的慢SQL + +当系统同时执行某些SQL语句的时候,它们可能会互相影响,进而导致某些SQL语句变为慢SQL, 这就是典型的资源竞争导致的慢SQL. 同时,不仅数据库中的语句们可能会进行资源竞争。在混合部署的环境中,操作系统上的其他任务也可能会影响数据库系统的表现。 + +对于一般的等待事件(wait event)来说,openGauss具备等待事件的记录视图,用户可以通过下列方法从宏观上查看Top级别的等待事件: + +``` +select * from dbe_perf.wait_events order by total_wait_time desc; +``` + +一般来说,对于数据库外部原因导致的资源竞争包括CPU、内存、IO的竞争,最典型的情况是IO风暴(Freeze IO)、CPU的计算资源的占用等。对于这种情况,一般不要将数据库与其他业务系统混合部署即可避免。 + +比较困难的是,数据库自己的某些任务之间互相影响,例如锁竞争、IO竞争等。 + +数据库中的不同SQL语句对锁资源进行占用,阻塞了其他语句的正常执行,导致SQL语句变慢了,甚至还会触发死锁检测。比较简单的排查当前锁占用情况的SQL语句是: + +``` + SELECT c.relkind, + d.datname, + c.relname, + l.mode, + s.query, + extract(epoch + FROM pg_catalog.now() - s.xact_start) AS holding_time + FROM pg_locks AS l + INNER JOIN pg_database AS d ON l.database = d.oid + INNER JOIN pg_class AS c ON l.relation = c.oid + INNER JOIN pg_stat_activity AS s ON l.pid = s.pid + WHERE s.pid != pg_catalog.pg_backend_pid(); +``` + +值得一提的是,openGauss并不支持pg\_blocking\_pids 函数。所以,通过该函数是无法查看到锁等待情况的。 + +下图展示了通过DBMind提供的openGauss-exporter监控到的数据库持锁情况:![](figures/zh-cn_image_0000001291302034.jpg) + +还有一种情况是IO使用受到影响,例如系统正在进行IO操作时,执行某条SQL语句,该SQL语句对磁盘的访问被阻塞了。典型的数据库系统IO操作包括Analyze, Vacuum以及checkpoint 等。这些问题在PostgreSQL类数据库上是非常令人头疼的问题,这会导致系统运行时出现比较大的性能波动。不过,对于openGauss来说,相比PostgreSQL做了很多优化,例如增量checkpoint, 使用更大的版本号等(可以避免大量的autovacuum for prevent wrap)。 + +当然,除了上面列出的情况外,还存在并发量接近或超过系统负荷导致的性能下降和拒绝服务。例如,大量复杂查询语句对CPU资源的竞争、大并发情况下引起数据库的响应时间变慢等。 + +就资源竞争引起的慢SQL来说,基本都可以通过系统指标来发现。例如监控慢SQL发生时刻的CPU、内存、IO、锁、网络等的使用情况,根据该慢SQL发生的背景信息即可推断出该慢SQL是否由资源竞争导致的,以及是何资源短缺导致的。对于openGauss来说,DBMind提供了非常强大的数据库指标采集功能,即DBMind与Prometheus平台适配的exporter. 用户可以直接通过下述命令查看exporter的启动参数: + +openGauss-exporter: 用于采集数据库指标,除常规指标外,还能监控慢SQL、系统配置等。 + +``` +gs_dbmind component opengauss_exporter --help +``` + +reprocessing-exporter: 可以对Prometheus中已经采集到的指标进行聚合,例如计算QPS、内存使用率等。 + +``` +gs_dbmind component reprocessing_exporter --help +``` + +___注意:openGauss__对于采集指标也进行了权限隔离,必须要求openGauss-expoter__连接的用户具有sysadmin, monadmin __权限才可以获取某些监控表的指标。_ + +## 表本身包含大量数据 + +尽管openGauss对于大的行存表处理性能非常优秀,但表本身的数据情况依然是导致慢SQL的重要原因。一般来说,具有以下几种情况: + +1. 表的数据量很大,且很少被缓存,导致语句需要扫描的元组很多; +2. 表的数据量很大,在修改、删除数据时需要修改较多的元组; +3. 向表中插入的数据量很大; +4. 业务上需要检索出的数据量很多; +5. 频繁的数据修改,导致表中存在很多死元组(dead tuple),影响扫描性能; + + 表的数据量较大导致的慢SQL问题,一般需要从业务上进行入手,直接通过修改数据库来达到优化慢SQL的目的是很难实现的。因此,需要用户分析具体的业务,对业务数据进行冷热分离、分库分表、使用分布式中间件等。如果希望在数据库层进行优化,则可以通过增加宿主机的内存,进而增加max\_process\_memory、shared\_buffers、work\_mem等的大小;使用性能更佳的磁盘;适当创建索引;使用表空间调整磁盘布局等。 + + +## SQL语句写得很差 + +由SQL语句写法问题导致的慢SQL也相对多见,这类写得比较差的慢SQL也被俗称为“烂SQL”。多数情况都下,由“烂SQL”导致的索引失效的问题较多,对于这种情况,可参考前面的描述对SQL语句进行改写,使其能够使用到索引。 + +除了修改慢SQL使其能够使用索引,下面还列出了几种比较常见的、可能优化openGauss数据库性能的SQL改写规则: + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

改写规则

+

改写条件

+

改写说明

+

原始查询语句示例

+

改写后语句示例

+

将'select distinct *'改写为'select *'

+

所查询表格含唯一列或主键

+

通过确定tuple无重复,去掉distinct,从而省去去重步骤,提升效率

+

select distinct * from bmsql_customer limit 10;

+

select * from bmsql_customer limit 10;

+

将having子句中条件放到where子句中

+

-

+

将谓词表达式提前,可有效缩减group时的数据集

+

select cfg_name from bmsql_config group by cfg_name having cfg_name='1'

+

select cfg_name from bmsql_config where cfg_name = '1' group by cfg_name

+

简化where子句中谓词表达式

+

-

+

某些复杂谓词无法有效触发openGauss内的rewrite逻辑,无法使用索引扫描

+

select o_w_id, o_d_id, o_id, o_c_id from bmsql_oorder where o_w_id + 1> 3

+

select o_w_id, o_d_id, o_id, o_c_id from bmsql_oorder where o_w_id > 2

+

将order by或group by中的无用列去掉

+

group by或order by涉及的列包含在where子句中的等值表达式中

+

去掉无用字段,SQL更为简洁

+

select cfg_name from bmsql_config where cfg_name='2' group by cfg_name order by cfg_name, cfg_value

+

select cfg_name from bmsql_config where cfg_name = '2' order by cfg_value

+

去掉where子句中永为真的表达式

+

-

+

去掉无用字段,SQL更为简洁

+

select * from bmsql_config where 1=1 and 2=2 limit 10

+

select * from bmsql_config limit 10

+

将union转换为union all

+

-

+

避免了去重带来的执行代价

+

select * from bmsql_config union select * from bmsql_config

+

select * from bmsql_config union all select * from bmsql_config

+

将delete语句转换为truncate语句

+

无where子句

+

将DML语句转换为DDL语句,一次性回收表空间,执行速度更快

+

delete from bmsql_config

+

truncate table bmsql_config

+

将where子句中'or'连接的等式转换为'in'结构

+

-

+

'in'结构可加快过滤速度

+

select * from bmsql_stock where s_w_id=10 or s_w_id=1 or s_w_id=100 or s_i_id=1 or s_i_id=10

+

select * from bmsql_stock where s_w_id in (1,10,100) or s_i_id in(1,10)

+

将self join查询拆分为效率更高两个子查询

+
  1. self join查询。
  2. where子句包含相同列差值的范围查询。

    例如1<a.id-b.id<10,其中a,b为同一个表的两个alias。

    +
+

通过等值谓词加快查询速度

+

select a.c_id from bmsql_customer a, bmsql_customer b where a.c_id - b.c_id <= 20 and a.c_id > b.c_id

+

select * from (select a.c_id from bmsql_customer as a, bmsql_customer as b where trunc((a.c_id) / 20) = trunc(b.c_id / 20) and a.c_id > b.c_id

+

union all select a.c_id from bmsql_customer as a, bmsql_customer as b where trunc((a.c_id) / 20) = trunc(b.c_id / 20 + 1) and a.c_id - b.c_id <= 20)

+
+ +对于业务系统,SQL语句上线之前的审计工作基本都可以覆盖上述的场景,业内也具备很多对SQL语句进行改写的工具,不过这些工具的一些改写规则并不是绝对意义上的等值改写。而且,很多改写条件对于openGauss来说不见得有效,因为openGauss在数据库内部也存在rewrite逻辑。 + +DBMind平台会进一步演进SQL语句的智能改写功能,提供给用户在线的交互式智能查询改写能力,预计在未来的版本中与用户见面。 + +## 总结 + +我们在上面已经列出了能够导致慢SQL的原因,基本覆盖了在openGauss上造成慢SQL的所有原因。不过,one-by-one 手动地进行慢SQL检查对于用户来说工作量确实太大。故而,openGauss的DBMind功能本身已经集成了对慢SQL进行智能根因识别的能力,用户可以通过运行下述命令在后台启动慢SQL根因分析功能(需要首先部署Prometheus以及expoter,以便能够采集到监控指标): + +``` +gs_dbmind service start -c confpath --only-run slow_query_diagnosis +``` + +_注:显式指定--only-run __参数可以仅启动被选择的DBMind__服务项_ + +被诊断后的慢SQL会存储在元数据库(存放诊断结果的数据库)中,用户可以通过下述命令查看: + +``` +gs_dbmind component slow_query_diagnosis show -c confpath --query SQL --start-time timestamps0 --end-time timestamps1 +``` + +也可以通过与Grafana联合来展示慢SQL的分析结果,DBMind也提供了简单的Grafana配置模板,可供用户参考: + +``` +https://github.com/opengauss-mirror/openGauss-server/blob/master/src/gausskernel/dbmind/tools/misc/grafana-template-slow-query-analysis.json +``` + +由于openGauss官方网站的发行包中的DBMind可能滞后于代码托管平台(gitee或github)上的最新代码,直接编译openGauss又需要花费很多的时间。故而,如果用户只是想单纯提取最新的DBMind功能,可以通过下面的Linux命令来实现: + +``` +git clone -b master --depth 1 [https://gitee.com/opengauss/openGauss-server.git](https://gitee.com/opengauss/openGauss-server.git) +cd openGauss-server/src/gausskernel/dbmind/ +mv tools dbmind +tar zcf dbmind.tar.gz gs_dbmind dbmind +``` + +将生成的dbmind.tar.gz 压缩包在合适的部署位置解压即可。 + +当然,如果用户希望手动检查一下慢SQL的原因,也可以根据附表的检查项来检查慢SQL的产生原因。 + +**附表:慢SQL检查列表** + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

检查项

+

检查方式(系统表或系统视图)

+

检查方法

+

语句执行时存在锁竞争

+

dbe_perf.statement_history(start_time,finish_time, query)、pg_locks(pid, mode, locktype, grant)、pg_stat_activity(xact_start, query_start, query, pid)

+

查询在语句执行期间是否被阻塞。

+

表中死元组占比超过设定阈值

+

dbe_perf.statement_history(query, dbname, schemaname)

+

pg_stat_users_tables(relname, schemaname,n_live_tup, n_dead_tup)

+

n_dead_tup / n_live_tup,占比超过阈值则认为表过度膨胀 (默认阈值:0.2)。

+

语句扫描行数较多

+

dbe_perf.statement_history(n_tuples_fetched, n_tuples_returned, n_live_tup, n_dead_tup)

+

n_tuples_fetched+n_tuples_returned,超过阈值则认为过大(默认阈值:10000)。

+

语句缓存命中率较低

+

dbe_perf.statement_history(n_blocks_fetched, n_blocks_hit)

+

n_block_hit / n_block_fetched,小于阈值则认为较低(默认阈值:0.95)

+

慢SQL(delete、insert、update)相关表存在冗余索引

+

dbe_perf.statement_history(dbname, schemaname, query), pg_stat_user_indexes(schemaname, relname, indexrelname, idx_scan, idx_tup_read, idx_tup_fetch)

+

pg_indexes(schemaname, tablename, indexname, indexdef)

+

SQL相关表满足:① 不是唯一索引;② (idx_scan, idx_tup_read,idx_tup_fetch)=(0,0,0);③ 索引不在数据库的('pg_catalog', 'information_schema','snapshot', 'dbe_pldeveloper')schema下。如果满足则认为次索引为冗余索引,否则为有效索引。

+

更新数据量较多

+

dbe_perf.statement_history(query, n_tuples_updated)

+

pg_stat_user_tables(n_live_tup, n_dead_tup)

+

n_tuples_updated超过阈值则认为更新数据量较多(默认阈值:1000)。

+

插入数据量较多

+

dbe_perf.statement_history(query, n_tuples_inserted)

+

pg_stat_user_tables(n_live_tup, n_dead_tup)

+

n_tuples_inserted超过阈值则认为插入数据量较多(默认阈值:1000)。

+

删除数据量较多

+

dbe_perf.statement_history(query, n_tuples_deleted)

+

pg_stat_user_tables(n_live_tup, n_dead_tup)

+

n_tuples_deleted超过阈值则认为删除数据量较多(默认阈值:1000)。

+

相关表索引个数较多

+

pg_stat_user_indexes(relname,schemaname, indexrelname)

+

如果表中索引数大于阈值并且索引与字段数比率超过设定阈值,则认为索引数较多(索引个数阈值:3,比率默认阈值:0.6)。

+

执行语句发生落盘(外排序)行为

+

dbe_perf.statement(sort_count, sort_spilled_count, sort_mem_used, hash_count, hash_spilled_count, hash_ued_mem, n_calls)

+

分析指标判断是否有hash或者order导致的落盘行为,主要逻辑为:

+

1 如果sort_count或者hash_count不为0,sort_mem_used或者hash_mem_used为0,则此SQL一定发生了落盘行为;

+

2 如果sort_spilled_count或者hash_spilled_count不为0,则执行可能发生落盘行为;

+

语句执行期间相关表正在执行AUTOVACUUM或AUTOANALYZE操作

+

dbe_perf.statement_history(start_time, finish_time, query)

+

pg_stat_user_tables(last_autovacuum, last_autoanalyze)

+

执行SQL期间,正在发生vacuum或者analyze行为。

+

数据库TPS较大

+

dbe_perf.statement_history(start_time, finish_time)

+

pg_stat_database(datname, xact_commit, xact_rolback)

+

相对于正常业务时的TPS,当前TPS增长较大,则认为数据库TPS较大;TPS短期内增长异常则认为是业务风暴。

+

IOWait指标大于设定阈值

+

系统IOWait指标异常升高

+

IOWait大于用户设定阈值(默认阈值:10%)

+

IOPS指标大于设定阈值

+

系统IOPS指标异常

+

IOPS指标大于用户设定阈值(默认阈值:1000)。

+

load average指标大于设定阈值

+

系统load average指标异常

+

load average与服务器逻辑核数比率大于用户设定阈值(默认阈值:0.6)。

+

CPU USAGE指标大于设定阈值

+

系统CPU USAGE指标异常

+

CPU USAGE指标大于用户设定阈值(默认阈值:0.6)。

+

IOUTILS指标大于设定阈值

+

系统IOUTILS指标异常

+

IOUTILS(磁盘利用率)大于用户设定阈值(默认阈值:0.5)。

+

IOCAPACITY指标大于设定阈值

+

系统IO CAPACITY指标异常

+

IOCAPACITY(IO吞吐量)大于用户设定阈值(默认阈值:50MB/s)。

+

IODELAY指标大于设定阈值

+

系统IO DELAY指标异常

+

IO DELAY(IO延迟)大于用户设定阈值(默认阈值:50ms)。

+

网卡丢包率

+

系统网卡丢包率异常

+

NETWORK DROP RATE大于用户设定阈值(默认阈值:0.01)。

+

网卡错误率

+

系统网卡错误率异常

+

NETWORK ERROR RATE大于用户设定阈值(默认阈值:0.01)。

+

线程池占用量异常

+

dbe_perf.global_threadpool_status

+

数据库线程池使用率大于阈值(默认阈值:0.95)

+

连接池占用量异常

+

pg_settings.max_connections,pg_stat_activity

+

数据库连接池占用率大于阈值(默认阈值:0.8)

+

双写延迟较大

+

dbe_perf.wait_events

+

双写延迟大于阈值(默认阈值:100us)

+

表长时间未更新

+

pg_stat_user_tables

+

表未更新时长超过阈值(默认阈值:60s)

+

checkpoint效率低(本规则仅作为粗略判断)

+

pg_stat_bgwriter

+

数据库buffers_backend与(buffers_clean+buffers_checkpoint)占比小于阈值(默认阈值:1000)

+

主备复制效率较低

+

pg_stat_replication

+

主备write_diff、replay_diff、sent_diff超过阈值(默认阈值:500000)

+

执行计划存在异常seqscan算子

+

执行计划

+

seqscan算子代价与总代价比率超过阈值(默认阈值:0.3),此特征也会判断是否缺少相关索引。

+

执行计划存在异常nestloop算子

+

执行计划

+

nestloop算子代价与总代价比率超过阈值(默认阈值:0.3)并且进行nestloop的结果集行数超过阈值(默认阈值:10000)。

+

执行计划存在异常hashjoin算子

+

执行计划

+

hashjoin算子代价与总代价比率超过阈值(默认阈值:0.3)并且进行hashjoin的结果集小于阈值(默认阈值:10000)。

+

执行计划存在异常groupagg算子

+

执行计划

+

groupagg算子代价与总代价比率超过阈值(默认阈值:0.3)并且执行groupagg的行数超过阈值(默认阈值:10000)。

+

SQL写法不优

+

SQL文本、pg_stat_user_tables

+

SQL写法不优导致执行性能较差

+

SQL执行被定时任务影响

+

pg_job,

+

dbe_perf.statement_history

+

定时任务执行影响了SQL执行性能,考虑调整定时任务时间,避免产生影响。

+

执行计划生成时间较长

+

dbe_perf.statement_history

+

SQL执行计划生成时间较长。

+
+ +参考资料 + +\[1\]. [https://www.2ndquadrant.com/en/blog/managing-freezing/](https://www.2ndquadrant.com/en/blog/managing-freezing/) + +\[2\]. [http://mysql.taobao.org/monthly/2016/06/03/](http://mysql.taobao.org/monthly/2016/06/03/) + +\[3\]. [https://www.2ndquadrant.com/en/blog/basics-of-tuning-checkpoints/](https://www.2ndquadrant.com/en/blog/basics-of-tuning-checkpoints/) + +\[4\]. [https://lwn.net/Articles/591723/](https://lwn.net/Articles/591723/) + +\[5\]. [https://dev.mysql.com/doc/refman/8.0/en/glossary.html](https://dev.mysql.com/doc/refman/8.0/en/glossary.html) + +\[6\].[https://github.com/opengauss-mirror/openGauss-server/tree/master/src/gausskernel/dbmind](https://github.com/opengauss-mirror/openGauss-server/tree/master/src/gausskernel/dbmind) + diff --git "a/content/zh/post/2022/\345\276\252\345\272\217\346\270\220\350\277\233-openGauss-GUC-\345\217\202\346\225\260\347\232\204\345\256\232\344\271\211-\345\274\225\345\257\274\345\222\214\345\210\227\350\241\250.md" "b/content/zh/post/2022/\345\276\252\345\272\217\346\270\220\350\277\233-openGauss-GUC-\345\217\202\346\225\260\347\232\204\345\256\232\344\271\211-\345\274\225\345\257\274\345\222\214\345\210\227\350\241\250.md" new file mode 100644 index 0000000000000000000000000000000000000000..52d3459dbb5052059bc9149572a184361f764c0c --- /dev/null +++ "b/content/zh/post/2022/\345\276\252\345\272\217\346\270\220\350\277\233-openGauss-GUC-\345\217\202\346\225\260\347\232\204\345\256\232\344\271\211-\345\274\225\345\257\274\345\222\214\345\210\227\350\241\250.md" @@ -0,0 +1,727 @@ ++++ + +title = "循序渐进 openGauss :GUC 参数的定义、引导和列表" + +date = "2021-12-24" + +tags = [ "循序渐进 openGauss :GUC 参数的定义、引导和列表"] + +archives = "2021-12" + +author = "eygle" + +summary = "循序渐进 openGauss :GUC 参数的定义、引导和列表" + +img = "/zh/post/2022/title/img16.png" + +times = "12:30" + ++++ + +# 循序渐进 openGauss :GUC 参数的定义、引导和列表 + +在添加GUC参数时,需要注意你添加的参数属于什么类别的参数。 + +例如如果你想让普通用户能随时修改它,那么你需要将参数级别设置为PGC\_USERSET。如果你想让超级用户能在线修改它,那么你需要将它设置为PGC\_SUSET。如果你想让它能够在修改配置参数并通过信号生效,那么需要设置为PGC\_SIGHUP。 + +在 openGauss 中,GUC参数相关的代码如下 + +[src/common/backend/utils/misc/guc.cpp](https://gitee.com/opengauss/openGauss-server/blob/master/src/common/backend/utils/misc/guc.cpp) + +## 参数级别介绍 + +``` +/* + * Displayable names for context types (enum GucContext) + * + * Note: these strings are deliberately not localized. + */ +const char* const GucContext_Names[] = { + /* PGC_INTERNAL */ "internal", + /* PGC_POSTMASTER */ "postmaster", + /* PGC_SIGHUP */ "sighup", + /* PGC_BACKEND */ "backend", + /* PGC_SUSET */ "superuser", + /* PGC_USERSET */ "user"}; +``` + +以下是这些参数品类的说明: + +- PGC\_INTERNAL:参数只能通过内部设定,用户不能设定。 +- PGC\_POSTMASTER:参数只能在Postmaster启动时通过读配置文件或处理命令行参数来配置。 +- PGC\_SIGHUP:参数只能在Postmaster启动时配置,或当我们改变了配置文件并发送信号SIGUP通知Postmaster或Postgres的时候进行配置。 +- PGC\_BACKEND:参数只能在Postmaster启动时读配置文件设置,或由客户端在进行连接请求时设置。已经启动的后台进程会忽略此类参数的改变。 +- PGC\_SUSET:参数只能在Postmaster启动时或由超级用户通过SQL语言(SET命令)进行设置。 +- PGC\_USERSET:可以用用户在任何时候进行配置。 + +## 参数来源定义 + +在 [master/src/include/utils/guc.h](https://gitee.com/opengauss/openGauss-server/blob/master/src/include/utils/guc.h) 中,以下数据结构定义了 GUC 参数的来源: + +``` +typedef enum { + PGC_S_DEFAULT, /* hard-wired default ("boot_val") */ + PGC_S_DYNAMIC_DEFAULT, /* default computed during initialization */ + PGC_S_ENV_VAR, /* postmaster environment variable */ + PGC_S_FILE, /* postgresql.conf */ + PGC_S_ARGV, /* postmaster command line */ + PGC_S_DATABASE, /* per-database setting */ + PGC_S_USER, /* per-user setting */ + PGC_S_DATABASE_USER, /* per-user-and-database setting */ + PGC_S_CLIENT, /* from client connection request */ + PGC_S_OVERRIDE, /* special case to forcibly set default */ + PGC_S_INTERACTIVE, /* dividing line for error reporting */ + PGC_S_TEST, /* test per-database or per-user setting */ + PGC_S_SESSION /* SET command */ +} GucSource; +``` + +## 参数的引导 + +在数据库启动时,初始化参数的引导分为三个步骤 + +- **初始化GUC参数** + + Postmaster将首先调用InitializeGUCOptions函数将参数设置为默认值: + + 1)首先调用build\_guc\_variables函数来统计参数个数并分配相应的config\_generic类型的全局指针数组guc\_variables以保存每个参数结构体的地址,并且对该数组进行排序。由于参数是通过全局静态数组ConfigureNamesBool、ConfigureNamesInt、ConfigureNamesReal、ConfigureNamesString、ConfigureNamesEnum存储的,因此在build\_guc\_variables函数中只需要遍历相应的数组,统计参数的个数并将参数结构体中config\_generic域的参数vartype设置为相应的参数类型。当遍历完所有参数后,根据总的参数个数分配config\_generic指针数组guc\_vars,然后再次遍历静态参数数组,将每个参数结构的首地址保存到guc\_vars数组中(这里分配的数组个数为当前参数总数的1.25倍,主要是为了方便以后参数的扩充)。接着将全局变量guc\_variables也指向guc\_vars数组。最后通过快速排序法把guc\_variables按照参数名进行排序。 + + 2)接下来将每个参数设置为默认值。对于guc\_variables中的每个参数,initializeGUCOptions函数先将其config\_generic域中的status设置为0,将reset\_source、tentative\_source、source设置为PGC\_S\_DEFAULT表示默认;stack、sourcefile设置为NULL;然后根据参数值vartype的不同类型分别调用相应的assign\_hook函数(如果该参数设置了该函数),assign\_hook函数用来设置boot\_val,最后将boot\_val赋值给reset\_val和variable指向的变量,通过这样一系列的步骤就将参数设置为了默认值。 + + 3)通过系统调用getenv来获得环境变量PGPORT、PGDATESTYLE、PGCLIENTENCODING的值,不为空则调用SetConfigOption函数来设置这三个变量对应的参数的值。 + + 4)最后,检测系统的最大安全栈深度,如果这个深度值大于100KB且不超过2MB,则用它设置max\_stack\_depth参数。 + +- **配置GUC参数** + + 如果用户启动Postmaster进程时通过命令行参数指定了一些GUC的参数值,那么Postmaster需要从命令行参数中将这些GUC参数的值解析出来并且设置到相应的GUC参数中。根据命令行设置参数主要是通过getopt和SetConfigOption这两个函数来完成的。 + + 对于getopt返回的每一个参数选项及其参数值,通过一个switch语句根据参数选项的不同分别调用SetConfigOption函数设置相应的参数。 + + SetConfigOption函数的第一个参数为参数名;第二个参数为参数值,其值存放在getopt函数返回的optarg字符串中;第三个参数为参数类型最后一个参数为参数来源。由于在这里Postmaster只在处理,命令行参数,所以这里的参数类型和参数来源分别设置为PGC\_POSTMASTER和PGC\_S\_ARGV。 + + SetConfigOption函数是通过调用set\_config\_option\(const char \*name, const char \* value, GucContext context, GucSource source, bool isLocal, bool changeVal\)函数来实现的,其中最后两个参数统一设置为false和true。该函数首先从guc\_variables指向的参数数组中搜索参数名为name的参数,如果没有找到则出错;否则将找到的参数的结构体中GucContext的值与传过来的参数context比较,判断在当前的上下文中参数是否可以设置,如果不能设置的话就报错,否则再将参数结构体中的GucSource与传过来的参数source进行比较,判断当前操作的优先级是否大于或者等于先前的优先级,如果大于或者等于先前的优先级则根据具体参数值的类型将value转化为相应的数据,然后设置参数结构体中的相应数据项即可。 + +- **读取配置文件** + + 当完成了命令行参数的设置之后,接着读配置文件重新配置参数。需要注意的是,在配置文件中设置的参数都不能修改之前通过命令行已经设置的参数,因为其优先级没有通过命令行设置的优先级高。 + + 这个过程主要是调用SelectConfigFiles\(const char \* userDoption, const char \* progname\)函数来实现的,其中第一个参数是通过命令行设置的用户的数据目录,如果没有设置会通过环境变量PGDATA找到;第二个参数为程序名,主要用于错误处理。 + + 该函数首先在数据目录下找到配置文件,然后调用词法分析程序解析文件。对于解析到的每个参数及其参数值,调用SetConfigOption来完成参数的修改。 + + 通过上述三个步骤设置完参数后还要检验参数的合法性。比如,数据目录的用户ID应该等于当前进程的有效用户ID、数据目录应该禁止组用户和其他用户的一切访问、缓冲区的数量至少是允许连接的进程数的两倍并且至少为16,等等。如果一切合法,则将当前目录转入数据目录,然后进行后续的操作。 + + +## 如何查看所有参数级别 + +``` +omm=# \pset pager +Pager usage is off. +omm=# select context,name,short_desc from pg_settings order by context,category,name; + context | name | short_desc +------------+----------------------------------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + backend | local_preload_libraries | Lists shared libraries to preload into each backend. + backend | remotetype | Sets the type of Postgres-XC remote connection + backend | ignore_system_indexes | Disables reading from system indexes. + backend | post_auth_delay | Waits N seconds on connection startup after authentication. + backend | log_connections | Logs each successful connection. + backend | log_disconnections | Logs end of a session, including duration. + internal | lc_collate | Shows the collation order locale. + internal | lc_ctype | Shows the character classification and case conversion locale. + internal | server_encoding | Sets the server (database) character set encoding. + internal | instr_unique_sql_track_type | unique sql track type + internal | block_size | Shows the size of a disk block. + internal | integer_datetimes | Datetimes are integer based. + internal | percentile | Sets the percentile of sql responstime that DBA want to know. + internal | enable_adio_function | Enable adio function. + internal | max_function_args | Shows the maximum number of function arguments. + internal | max_identifier_length | Shows the maximum identifier length. + internal | max_index_keys | Shows the maximum number of index keys. + internal | segment_size | Shows the number of pages per disk file. + internal | server_version | Shows the server version. + internal | server_version_num | Shows the server version as an integer. + internal | wal_block_size | Shows the block size in the write ahead log. + internal | wal_segment_size | Shows the number of pages per write ahead log segment. + internal | update_process_title | Updates the process title to show the active SQL command. + internal | current_logic_cluster | Shows current logic cluster. + internal | sql_compatibility | Choose which SQL format to adapt. + postmaster | audit_data_format | Sets the data format for audit files. + postmaster | audit_directory | Sets the destination directory for audit files. + postmaster | available_zone | Sets the available zone of current instance. + postmaster | elastic_search_ip_addr | Controls elastic search IP address in the system. + postmaster | use_elastic_search | Enables elastic search in the system. + postmaster | autovacuum_freeze_max_age | Age at which to autovacuum a table. + postmaster | autovacuum_max_workers | Sets the maximum number of simultaneously running autovacuum worker processes. + postmaster | comm_tcp_mode | Whether use tcp commucation mode for stream + postmaster | enable_global_plancache | enable to use global plan cache. + postmaster | enable_thread_pool | enable to use thread pool. + postmaster | thread_pool_attr | Spare Cpu that can not be used in thread pool. + postmaster | cn_send_buffer_size | Sets the send buffer size used in CN, unit in KB. + postmaster | asp_sample_num | Sets the active session profile max sample nums in buff + postmaster | comm_control_port | Sets the stream control port the server listens on. + postmaster | comm_max_receiver | Maximum number of internal receiver threads. + postmaster | comm_memory_pool | Sets the memory pool size for communication(in kB). + postmaster | comm_memory_pool_percent | Sets the percent of comm_memory_pool for dynamic workload. + postmaster | comm_quota_size | Sets the stream quota size in kB. + postmaster | comm_sctp_port | Sets the STCP port the server listens on. + postmaster | comm_usable_memory | Sets the total usable memory for communication(in kB). + postmaster | listen_addresses | Sets the host name or IP address(es) to listen to. + postmaster | local_bind_address | Sets the host name or IP address(es) to connect to for sctp. + postmaster | max_connections | Sets the maximum number of concurrent connections for clients. + postmaster | max_inner_tool_connections | Sets the maximum number of concurrent connections for inner tools. + postmaster | port | Sets the TCP port the server listens on. + postmaster | unix_socket_group | Sets the owning group of the Unix-domain socket. + postmaster | unix_socket_permissions | Sets the access permissions of the Unix-domain socket. + postmaster | enableSeparationOfDuty | Enables the user's separation of privileges. + postmaster | sysadmin_reserved_connections | Sets the number of connection slots reserved for system admin. + postmaster | unix_socket_directory | Sets the directory where the Unix-domain socket will be created. + postmaster | ssl | Enables SSL connections. + postmaster | ssl_ca_file | Location of the SSL certificate authority file. + postmaster | ssl_cert_file | Location of the SSL server certificate file. + postmaster | ssl_ciphers | Sets the list of allowed SSL ciphers. + postmaster | ssl_crl_file | Location of the SSL certificate revocation list file. + postmaster | ssl_key_file | Location of the SSL server private key file. + postmaster | pgxc_node_name | The Coordinator or Datanode name. + postmaster | enable_stateless_pooler_reuse | Pooler stateless reuse mode. + postmaster | allow_system_table_mods | Allows modifications of the structure of system tables. + postmaster | comm_sender_buffer_size | The libcomm sender's buffer size in every interaction between DN and CN, or DN and DN, unit(KB) + postmaster | lastval_supported | Enable functionality of lastval() function. + postmaster | support_extended_features | Enables unofficial supported extended features. + postmaster | data_sync_retry | Whether to continue running after a failure to sync data files. + postmaster | config_file | Sets the server's main configuration file. + postmaster | data_directory | Sets the server's data directory. + postmaster | enable_default_cfunc_libpath | Enable check for c function lib path. + postmaster | external_pid_file | Writes the postmaster PID to the specified file. + postmaster | hba_file | Sets the server's "hba" configuration file. + postmaster | ident_file | Sets the server's "ident" configuration file. + postmaster | mot_config_file | Sets mot main configuration file. + postmaster | job_queue_processes | Number of concurrent jobs, optional: [1...1000], default: 10. + postmaster | max_locks_per_transaction | Sets the maximum number of locks per transaction. + postmaster | max_pred_locks_per_transaction | Sets the maximum number of predicate locks per transaction. + postmaster | enable_delta_store | Enable delta for column store. + postmaster | string_hash_compatible | Enables the hash compatibility of char() and varchar() datatype + postmaster | enable_orc_cache | Enable orc metadata cache. + postmaster | enable_mix_replication | All the replication log sent by the wal streaming. + postmaster | data_replicate_buffer_size | Sets the buffer size of data replication. + postmaster | max_replication_slots | Sets the maximum number of simultaneously defined replication slots. + postmaster | max_wal_senders | Sets the maximum number of simultaneously running WAL sender processes. + postmaster | catchup2normal_wait_time | The maximal allowed duration for waiting from catchup to normal state. + postmaster | hot_standby | Allows connections and queries during recovery. + postmaster | wal_receiver_buffer_size | Sets the buffer size to receive data from master. + postmaster | asp_log_directory | Sets the destination directory for asp log files. + postmaster | event_source | Sets the application name used to identify PostgreSQL messages in the event log. + postmaster | logging_collector | Starts a subprocess to capture stderr output and/or csvlogs into log files. + postmaster | perf_directory | Sets the destination directory for perf json files. + postmaster | query_log_directory | Sets the destination directory for slow query log files. + postmaster | numa_distribute_mode | Sets the NUMA node distribution mode. + postmaster | max_files_per_process | Sets the maximum number of simultaneously open files for each server process. + postmaster | shared_preload_libraries | Lists shared libraries to preload into server. + postmaster | cstore_buffers | Sets the number of CStore buffers used by the server. + postmaster | enable_memory_limit | Using memory protect feature. + postmaster | local_syscache_threshold | Sets the maximum threshold for cleaning cache. + postmaster | max_compile_functions | max compile results in postmaster + postmaster | max_prepared_transactions | Sets the maximum number of simultaneously prepared transactions. + postmaster | max_process_memory | Sets the maximum number of memory used by the process. + postmaster | memorypool_enable | Using memory pool. + postmaster | memorypool_size | Sets the number of memory pool used by the server. + postmaster | shared_buffers | Sets the number of shared memory buffers used by the server. + postmaster | track_activity_query_size | Sets the size reserved for pg_stat_activity.query, in bytes. + postmaster | udf_memory_limit | Sets the maximum number of memory used by UDF Master and UDF Workers. + postmaster | UDFWorkerMemHardLimit | Sets the hard memory limit to be used for fenced UDF. + postmaster | walsender_max_send_size | Size of walsender max send size. + postmaster | recovery_max_workers | The max number of recovery threads allowed to run in parallel. + postmaster | recovery_parallelism | The actual number of recovery threads running in parallel. + postmaster | recovery_parse_workers | The number of recovery threads to do xlog parse. + postmaster | recovery_redo_workers | The number belonging to one parse worker to do xlog redo. + postmaster | bbox_blanklist_items | List of names of bbox blanklist items. + postmaster | enable_ffic_log | Enables First Failure Info Capture. + postmaster | max_concurrent_autonomous_transactions | Maximum number of concurrent autonomous transactions processes. + postmaster | alarm_component | Sets the component for alarm function. + postmaster | enable_alarm | Enables alarm or not. + postmaster | enable_nonsysadmin_execute_direct | Enables non-sysadmin users execute direct on CN/DN. + postmaster | max_cached_tuplebufs | how many memory reorderbuffer can use. + postmaster | max_changes_in_memory | how many memory a transaction can use in reorderbuffer. + postmaster | max_resource_package | The maximum number of the resource package(RP) for DN in the compute pool. + postmaster | remote_read_mode | decide way of remote read + postmaster | transparent_encrypted_string | The encrypted string to test the transparent encryption key. + postmaster | transparent_encrypt_kms_region | The region to get transparent encryption key. + postmaster | transparent_encrypt_kms_url | The URL to get transparent encryption key. + postmaster | enable_page_lsn_check | Enable check page lsn when redo + postmaster | force_promote | Enable master update min recovery point. + postmaster | bgwriter_thread_num | Sets the number of background writer threads with incremental checkpoint on. + postmaster | enable_double_write | Enable master double write. + postmaster | enable_incremental_checkpoint | Enable master incremental checkpoint. + postmaster | pagewriter_thread_num | Sets the number of page writer threads. + postmaster | advance_xlog_file_num | Sets the number of xlog files to be initialized in advance. + postmaster | replication_type | Sets the dn's HA mode. + postmaster | sync_config_strategy | Synchronization strategy for configuration files between host and standby. + postmaster | wal_buffers | Sets the number of disk-page buffers in shared memory for WAL. + postmaster | wal_file_init_num | Sets the number of xlog segment files that WAL writer auxiliary thread creates at one time. + postmaster | wal_level | Sets the level of information written to the WAL. + postmaster | wal_log_hints | Writes full pages to WAL when first modified after a checkpoint, even for a non-critical modifications. + postmaster | wal_writer_cpu | Sets the binding CPU number for the WAL writer thread. + postmaster | xlog_idle_flushes_before_sleep | Number of idle xlog flushes before xlog flusher goes to sleep. + postmaster | xloginsert_locks | Sets the number of locks used for concurrent xlog insertions. + sighup | audit_copy_exec | audit copy execution. + sighup | audit_database_process | audit database start, stop, recover and switchover. + sighup | audit_dml_state | audit DML operation. + sighup | audit_dml_state_select | audit DML select operation. + sighup | audit_enabled | Starts a subprocess to capture audit output into audit files. + sighup | audit_file_remain_threshold | audit file remain threshold. + sighup | audit_file_remain_time | the days of the audit files can be remained + sighup | audit_function_exec | audit function execution. + sighup | audit_grant_revoke | audit grant and revoke privilege. + sighup | audit_login_logout | audit user login logout. + sighup | audit_resource_policy | the policy is used to determine how to cleanup the audit files; True means to cleanup the audit files based on space limitation and False means to cleanup the audit files when the remained time is arriving. + sighup | audit_rotation_interval | Automatic audit file rotation will occur after N minutes. + sighup | audit_rotation_size | Automatic audit file rotation will occur after N kilobytes. + sighup | audit_set_parameter | audit set operation. + sighup | audit_space_limit | audit data space limit in MB unit + sighup | audit_system_object | audit DDL operation on system object. + sighup | audit_user_locked | audit lock and unlock user. + sighup | audit_user_violation | audit user violation. + sighup | autoanalyze_timeout | Sets the timeout for auto-analyze action. + sighup | autovacuum | Starts the autovacuum subprocess. + sighup | autovacuum_analyze_scale_factor | Number of tuple inserts, updates, or deletes prior to analyze as a fraction of reltuples. + sighup | autovacuum_analyze_threshold | Minimum number of tuple inserts, updates, or deletes prior to analyze. + sighup | autovacuum_mode | Sets the behavior of autovacuum + sighup | autovacuum_naptime | Time to sleep between autovacuum runs. + sighup | autovacuum_vacuum_cost_delay | Vacuum cost delay in milliseconds, for autovacuum. + sighup | autovacuum_vacuum_cost_limit | Vacuum cost amount available before napping, for autovacuum. + sighup | autovacuum_vacuum_scale_factor | Number of tuple updates or deletes prior to vacuum as a fraction of reltuples. + sighup | autovacuum_vacuum_threshold | Minimum number of tuple updates or deletes prior to vacuum. + sighup | enable_router | enable to use router. + sighup | track_stmt_retention_time | The longest retention time of full SQL and slow query in statement_ history + sighup | support_batch_bind | Sets to use batch bind-execute for PBE. + sighup | max_cn_temp_file_size | Sets the maximum tempfile size used in CN, unit in MB. + sighup | asp_flush_rate | every Nth sample to disk, MOD(sample_id, N) = 0 will flush to dist + sighup | asp_retention_days | set max retention days for pg_asp + sighup | asp_sample_interval | Sets the active session profile max sample nums in buff + sighup | enable_asp | Enable active session profile + sighup | enable_instr_cpu_timer | Enables instruments cpu timer functionality. + sighup | enable_instr_rt_percentile | Calculate percentile info of sql responstime. + sighup | enable_instr_track_wait | Collects information about wait status. + sighup | enable_slow_query_log | Write slow query log. + sighup | enable_stmt_track | Enable full/slow sql feature + sighup | enable_wdr_snapshot | Enable wdr snapshot + sighup | instr_rt_percentile_interval | Sets the interval for calculating percentile in pgstat thread, in seconds + sighup | instr_unique_sql_count | Sets the number of entries collected in gs_instr_unique_sql. + sighup | track_stmt_session_slot | Sets the number of entries collected for full sql/slow sql in each session. + sighup | wdr_snapshot_interval | Sets the interval for wdr snapshot in snapshot thread, in min + sighup | wdr_snapshot_query_timeout | Sets the timeout for wdr snapshot query, in seconds + sighup | wdr_snapshot_retention_days | Sets the max time span for wdr snapshot, in seconds + sighup | authentication_timeout | Sets the maximum allowed time to complete client authentication. + sighup | auth_iteration_count | The iteration count used in RFC5802 authenication. + sighup | failed_login_attempts | max number of login attempts. + sighup | krb_srvname | Sets the name of the Kerberos service. + sighup | krb_caseins_users | Sets whether Kerberos and GSSAPI user names should be treated as case-insensitive. + sighup | krb_server_keyfile | Sets the location of the Kerberos server key file. + sighup | password_encryption_type | The encryption method of password. + sighup | password_lock_time | password lock time + sighup | modify_initial_password | modify the initial password of the initial user. + sighup | password_effect_time | password effective time. + sighup | password_max_length | max length of password. + sighup | password_min_digital | min number of digital character in password. + sighup | password_min_length | min length of password. + sighup | password_min_uppercase | min number of upper character in password. + sighup | password_notify_time | password deadline notice time. + sighup | password_policy | The password complexity-policy of the database system. + sighup | password_reuse_max | max times password can reuse. + sighup | password_reuse_time | max days password can reuse. + sighup | password_min_lowercase | min number of lower character in password. + sighup | password_min_special | min number of special character in password. + sighup | require_ssl | Requires SSL connections. + sighup | ssl_cert_notify_time | Alarm days before ssl cert expires. + sighup | pre_auth_delay | Waits N seconds on connection startup before authentication. + sighup | trace_recovery_messages | Enables logging of recovery-related debugging information. + sighup | wait_dummy_time | Wait for dummy starts or bcm file list received when catchup. + sighup | enable_debug_vacuum | This parameter is just used for logging some vacuum info. + sighup | restart_after_crash | Reinitializes server after backend crashes. + sighup | defer_csn_cleanup_time | Sets the interval time to push cut off csn num. + sighup | enable_prevent_job_task_startup | enable control whether the job task thread can be started. + sighup | enable_security_policy | enable security policy features. + sighup | most_available_sync | Enables master to continue when sync standbys failure. + sighup | synchronous_standby_names | List of names of potential synchronous standbys. + sighup | vacuum_defer_cleanup_age | Number of transactions by which VACUUM and HOT cleanup should be deferred, if any. + sighup | recovery_time_target | The target redo time in seconds for recovery + sighup | replconninfo2 | Sets the replconninfo2 of the HA to listen and authenticate. + sighup | replconninfo3 | Sets the replconninfo3 of the HA to listen and authenticate. + sighup | replconninfo1 | Sets the replconninfo1 of the HA to listen and authenticate. + sighup | replconninfo4 | Sets the replconninfo4 of the HA to listen and authenticate. + sighup | replconninfo5 | Sets the replconninfo5 of the HA to listen and authenticate. + sighup | replconninfo6 | Sets the replconninfo6 of the HA to listen and authenticate. + sighup | replconninfo7 | Sets the replconninfo7 of the HA to listen and authenticate. + sighup | replconninfo8 | Sets the replconninfo8 of the HA to listen and authenticate. + sighup | time_to_target_rpo | The time to the target recovery point in seconds + sighup | wal_keep_segments | Sets the number of WAL files held for standby servers. + sighup | wal_sender_timeout | Sets the maximum time to wait for WAL replication. + sighup | enable_incremental_catchup | Enable incremental searching bcm files when catchup. + sighup | enable_stream_replication | Allows stream replication to standby or secondary. + sighup | hot_standby_feedback | Allows feedback from a hot standby to the primary that will avoid query conflicts. + sighup | max_standby_archive_delay | Sets the maximum delay before canceling queries when a hot standby server is processing archived WAL data. + sighup | recovery_min_apply_delay | Sets the minimum delay for applying changes during recovery. + sighup | wal_receiver_connect_retries | Sets the maximum retries to connect master. + sighup | max_standby_streaming_delay | Sets the maximum delay before canceling queries when a hot standby server is processing streamed WAL data. + sighup | primary_slotname | Set the primary slot name. + sighup | wal_receiver_connect_timeout | Sets the maximum wait time to connect master. + sighup | wal_receiver_status_interval | Sets the maximum interval between WAL receiver status reports to the primary. + sighup | wal_receiver_timeout | Sets the maximum wait time to receive data from master. + sighup | debug_print_parse | Logs each query's parse tree. + sighup | debug_print_plan | Logs each query's execution plan. + sighup | debug_print_rewritten | Logs each query's rewritten parse tree. + sighup | log_autovacuum_min_duration | Sets the minimum execution time above which autovacuum actions will be logged. + sighup | log_checkpoints | Logs each checkpoint. + sighup | log_hostname | Logs the host name in the connection logs. + sighup | log_line_prefix | Controls information prefixed to each log line. + sighup | log_pagewriter | Logs pagewriter thread. + sighup | log_timezone | Sets the time zone to use in log messages. + sighup | asp_flush_mode | Sets the active session profile flush mode:file/table/all. + sighup | asp_log_filename | Sets the file name pattern for asp data files. + sighup | bbox_dump_path | Sets the path of core dump created by bbox_handler. + sighup | log_destination | Sets the destination for server log output. + sighup | log_filename | Sets the file name pattern for log files. + sighup | log_rotation_age | Automatic log file rotation will occur after N minutes. + sighup | log_rotation_size | Automatic log file rotation will occur after N kilobytes. + sighup | log_directory | Sets the destination directory for log files. + sighup | log_file_mode | Sets the file permissions for log files. + sighup | log_truncate_on_rotation | Truncates existing log files of same name during log rotation. + sighup | syslog_ident | Sets the program name used to identify PostgreSQL messages in syslog. + sighup | query_log_file | Sets the file name pattern for slow query log files. + sighup | syslog_facility | Sets the syslog "facility" to be used when syslog enabled. + sighup | cache_connection | pooler cache connection + sighup | bgwriter_delay | Background writer sleep time between rounds. + sighup | bgwriter_flush_after | Number of pages after which previously performed writes are flushed to disk. + sighup | bgwriter_lru_maxpages | Background writer maximum number of LRU pages to flush per round. + sighup | bgwriter_lru_multiplier | Multiple of the average buffer usage to free per round. + sighup | candidate_buf_percent_target | Sets the candidate buffers percent. + sighup | dirty_page_percent_max | Sets the dirty buffers percent. + sighup | enable_memory_context_control | check the max space size of memory context. + sighup | session_history_memory | Sets the maximum number of session history memory used by the process. + sighup | standby_shared_buffers_fraction | The max fraction of shared_buffers usage to standby. + sighup | autovacuum_io_limits | Sets io_limit for autovacum. + sighup | session_statistics_memory | Sets the maximum number of session statistics memory used by the process. + sighup | cpu_collect_timer | Sets the maximum cpu collect time. + sighup | enable_bbox_dump | Enables bbox_handler to create core dump. + sighup | enable_instance_metric_persistent | enable instance resource info persistent function. + sighup | enable_logical_io_statistics | enable logical io statistics function. + sighup | enable_resource_record | enable insert the session info into the user table. + sighup | enable_resource_track | enable resources tracking and recording functionality in the system. + sighup | enable_user_metric_persistent | enable user resource info persistent function. + sighup | instance_metric_retention_time | the instance resource info retention time. + sighup | io_control_unit | Sets the io control unit for reading or writing row tuple. + sighup | topsql_retention_time | the retention time of TopSql + sighup | unique_sql_retention_time | the retention time of unique sql text + sighup | user_metric_retention_time | the user resource info retention time. + sighup | use_workload_manager | Enables workload manager in the system. + sighup | fault_mon_timeout | how many miniutes to monitor lwlock. 0 will disable that + sighup | stats_temp_directory | Writes temporary statistics files to the specified directory. + sighup | alarm_report_interval | Sets the interval time between two alarm report. + sighup | connection_alarm_rate | Reports alarm if connection rate overload. + sighup | enable_access_server_directory | enable sysadmin to create directory + sighup | enable_copy_server_files | enable sysadmin to copy from/to file + sighup | enable_online_ddl_waitlock | Enable ddl wait advisory lock in online expansion. + sighup | operation_mode | Sets the operation mode. + sighup | upgrade_mode | Indicate the upgrade mode: inplace upgrade mode, grey upgrade mode or not in upgrade. + sighup | enable_cbm_tracking | Turn on cbm tracking function. + sighup | enable_xlog_prune | Enable xlog prune when not all standys connected and xlog size is largger than max_xlog_size + sighup | max_io_capacity | The I/O upper limit of batch flush dirty page every second. + sighup | max_redo_log_size | max redo log size. + sighup | max_size_for_xlog_prune | This param set by user is used for xlog to be recycled when not all are connected and the param enable_xlog_prune is on. + sighup | archive_command | Sets the shell command that will be called to archive a WAL file. + sighup | archive_dest | Sets the path that will be used to archive a WAL file. + sighup | archive_mode | Allows archiving of WAL files using archive_command. + sighup | archive_timeout | Forces a switch to the next xlog file if a new file has not been started within N seconds. + sighup | checkpoint_completion_target | Time spent flushing dirty buffers during checkpoint, as fraction of checkpoint interval. + sighup | checkpoint_flush_after | Number of pages after which previously performed writes are flushed to disk. + sighup | checkpoint_segments | Sets the maximum distance in log segments between automatic WAL checkpoints. + sighup | checkpoint_timeout | Sets the maximum time between automatic WAL checkpoints. + sighup | checkpoint_wait_timeout | Sets the maximum wait timeout for checkpointer to start. + sighup | checkpoint_warning | Enables warnings if checkpoint segments are filled more frequently than this. + sighup | datanode_heartbeat_interval | Sets the heartbeat interval of the standby nodes. + sighup | incremental_checkpoint_timeout | Sets the maximum time between automatic WAL checkpoints. + sighup | pagewriter_sleep | PageWriter sleep time. + sighup | fsync | Forces synchronization of updates to disk. + sighup | full_page_writes | Writes full pages to WAL when first modified after a checkpoint. + sighup | wal_sync_method | Selects the method used for forcing WAL updates to disk. + sighup | wal_writer_delay | WAL writer sleep time between WAL flushes. + superuser | lc_messages | Sets the language in which messages are displayed. + superuser | dynamic_library_path | Sets the path for dynamically loadable modules. + superuser | session_replication_role | Sets the session's behavior for triggers and rewrite rules. + superuser | pljava_vmoptions | Options sent to the JVM when it is created + superuser | enable_adio_debug | Enable log debug adio function. + superuser | ignore_checksum_failure | Continues processing after a checksum failure. + superuser | zero_damaged_pages | Continues processing past damaged page headers. + superuser | exit_on_error | Terminates session on any error. + superuser | deadlock_timeout | Sets the time to wait on a lock before checking for deadlock. + superuser | lockwait_timeout | Sets the max time to wait on a lock acquire. + superuser | update_lockwait_timeout | Sets the max time to wait on a lock acquire when concurrently update same tuple. + superuser | enable_extrapolation_stats | Enable extrapolation stats for date datatype. + superuser | enable_fast_numeric | Enable numeric optimize. + superuser | enable_global_stats | Enable global stats for analyze. + superuser | enable_kill_query | Enables cancelling a query that locks some relations owned by a user when the user is dropped. + superuser | enable_change_hjcost | Enable change hash join cost + superuser | enable_csqual_pushdown | Enables colstore qual push down. + superuser | log_duration | Logs the duration of each completed SQL statement. + superuser | log_error_verbosity | Sets the verbosity of logged messages. + superuser | log_lock_waits | Logs long lock waits. + superuser | log_statement | Sets the type of statements logged. + superuser | log_temp_files | Logs the use of temporary files larger than this number of kilobytes. + superuser | raise_errors_if_no_files | raise errors if no files to be imported. + superuser | backtrace_min_messages | Sets the message levels for print backtrace that are logged. + superuser | log_min_duration_statement | Sets the minimum execution time above which statements will be logged. + superuser | log_min_error_statement | Causes all statements generating error at or above this level to be logged. + superuser | log_min_messages | Sets the message levels that are logged. + superuser | temp_file_limit | Limits the total size of all temporary files used by each session. + superuser | fast_extend_file_size | Set fast extend file size used by async dirct IO interface for row store. + superuser | max_stack_depth | Sets the maximum stack depth, in kilobytes. + superuser | autoanalyze | Enable auto-analyze when querying tables with no statistic. + superuser | enable_analyze_check | Enable check if table is analyzed when querying. + superuser | log_executor_stats | Writes executor performance statistics to the server log. + superuser | log_parser_stats | Writes parser performance statistics to the server log. + superuser | log_planner_stats | Writes planner performance statistics to the server log. + superuser | log_statement_stats | Writes cumulative performance statistics to the server log. + superuser | track_activities | Collects information about executing commands. + superuser | track_counts | Collects statistics on database activity. + superuser | track_functions | Collects function-level statistics on database activity. + superuser | track_io_timing | Collects timing statistics for database I/O activity. + superuser | track_sql_count | Collects query info on database activity. + superuser | track_thread_wait_status_interval | Sets the interval for collecting thread status in pgstat thread, in minute + superuser | enable_fast_allocate | enable fallocate to improve file extend performance, make sure filesystem support it, ep:XFS + superuser | lo_compat_privileges | Enables backward compatibility mode for privilege checks on large objects. + superuser | max_keep_log_seg | Sets the threshold for implementing logical replication flow control. + superuser | enable_light_proxy | Turns on light proxy on coordinator. + superuser | enable_pbe_optimization | Turns on pbe optimization: force to reuse generic plan. + superuser | enforce_two_phase_commit | Enforces the use of two-phase commit on transactions thatmade use of temporary objects. + superuser | xc_maintenance_mode | Turns on XC maintenance mode. + user | router | set send node router for sql before unrouter. + user | client_encoding | Sets the client's character set encoding. + user | DateStyle | Sets the display format for date and time values. + user | default_text_search_config | Sets default text search configuration. + user | extra_float_digits | Sets the number of digits displayed for floating-point values. + user | IntervalStyle | Sets the display format for interval values. + user | lc_monetary | Sets the locale for formatting monetary amounts. + user | lc_numeric | Sets the locale for formatting numbers. + user | lc_time | Sets the locale for formatting date and time values. + user | TimeZone | Sets the time zone for displaying and interpreting time stamps. + user | timezone_abbreviations | Selects a file of time zone abbreviations. + user | gin_fuzzy_search_limit | Sets the maximum allowed result for exact search by GIN. + user | tcp_keepalives_count | Maximum number of TCP keepalive retransmits. + user | tcp_keepalives_idle | Time between issuing TCP keepalives. + user | tcp_keepalives_interval | Time between TCP keepalive retransmits. + user | analysis_options | enable/disable sql dfx option. + user | bytea_output | Sets the output format for bytea. + user | check_function_bodies | Checks function bodies during CREATE FUNCTION. + user | client_min_messages | Sets the message levels that are sent to the client. + user | current_schema | Sets the schema search order for names that are not schema-qualified. + user | default_tablespace | Sets the default tablespace to create tables and indexes in. + user | default_transaction_deferrable | Sets the default deferrable status of new transactions. + user | default_transaction_isolation | Sets the transaction isolation level of each new transaction. + user | default_transaction_read_only | Sets the default read-only status of new transactions. + user | enforce_a_behavior | GUC parameter of enforcing adapting to A db. + user | gin_pending_list_limit | Sets the maximum size of the pending list for GIN index. + user | max_query_retry_times | Sets the maximum sql retry times. + user | max_user_defined_exception | GUC parameter of max_user_defined_exception. + user | nls_timestamp_format | defines the default timestamp format to use with the TO_TIMESTAMP functions. + user | omit_encoding_error | Omits encoding convert error. + user | search_path | Sets the schema search order for names that are not schema-qualified. + user | session_timeout | Set the maximum allowed duration of any unused session. + user | statement_timeout | Sets the maximum allowed duration of any statement. + user | transaction_deferrable | Whether to defer a read-only serializable transaction until it can be executed with no possible serialization failures. + user | transaction_isolation | Sets the current transaction's isolation level. + user | temp_tablespaces | Sets the tablespace(s) to use for temporary tables and sort files. + user | transaction_read_only | Sets the current transaction's read-only status. + user | vacuum_freeze_min_age | Minimum age at which VACUUM should freeze a table row. + user | vacuum_freeze_table_age | Age at which VACUUM should scan whole table to freeze tuples. + user | vacuum_gtt_defer_check_age | The defer check age of GTT, used to check expired data after vacuum. + user | xmlbinary | Sets how binary values are to be encoded in XML. + user | xmloption | Sets whether XML data in implicit parsing and serialization operations is to be considered as documents or content fragments. + user | ssl_renegotiation_limit | SSL renegotiation is no longer supported, no matter what value is set. + user | application_type | application distribute type(perfect sharding or not) in gtm free mode. + user | allow_concurrent_tuple_update | Allows concurrent tuple update. + user | track_stmt_details_size | the maximum bytes of statement details to be gathered. + user | track_stmt_stat_level | specify which level statement's statistics to be gathered. + user | comm_debug_mode | Whether use libcomm debug mode for print debug information + user | comm_no_delay | Whether set NO_DELAY option for libcomm socket + user | comm_stat_mode | Whether use libcomm stat mode for print stat data + user | comm_timer_mode | Whether use libcomm timer debug mode for print timer data + user | debug_assertions | Turns on various assertion checks. + user | enable_beta_features | Enable features that ever supported in former version . + user | enable_show_any_tuples | This parameter is just valid when it's a read-only transction, just for analyse.The default_transaction_read_only and transaction_read_only should be true.You'd better keep enable_indexscan and enable_bitmapscan be false to keep seqscan occurs.When enable_show_any_tuples is true, all versions of the tuples are visible, including dirty versions. + user | ha_module_debug | debug ha module. + user | trace_notify | Generates debugging output for LISTEN and NOTIFY. + user | trace_sort | Emits information about resource usage in sorting. + user | minimum_pool_size | Initial pool size. + user | pooler_maximum_idle_time | Maximum idle time of the pooler links. + user | partition_lock_upgrade_timeout | Sets the timeout for partition lock upgrade, in seconds + user | codegen_strategy | Choose whether it is allowed to call C-function in codegen. + user | comm_ackchk_time | Send ack check package to stream sender periodically. + user | query_dop | User-defined degree of parallelism. + user | resource_track_log | Sets resource track log level + user | rewrite_rule | Sets the rewrite rule. + user | sql_beta_feature | Sets the beta feature for SQL engine. + user | geqo | Enables genetic query optimization. + user | geqo_effort | GEQO: effort is used to set the default for other GEQO parameters. + user | geqo_generations | GEQO: number of iterations of the algorithm. + user | geqo_pool_size | GEQO: number of individuals in the population. + user | geqo_seed | GEQO: seed for random path selection. + user | geqo_selection_bias | GEQO: selective pressure within the population. + user | geqo_threshold | Sets the threshold of FROM items beyond which GEQO is used. + user | constraint_exclusion | Enables the planner to use constraints to optimize queries. + user | cost_param | Bitmap controls the use of alternative cost model. + user | cursor_tuple_fraction | Sets the planner's estimate of the fraction of a cursor's rows that will be retrieved. + user | default_statistics_target | Sets the default statistics target. + user | enable_upgrade_merge_lock_mode | If true, use Exclusive Lock mode for deltamerge. + user | from_collapse_limit | Sets the FROM-list size beyond which subqueries are not collapsed. + user | hashagg_table_size | Sets the number of slot in the hash table. + user | join_collapse_limit | Sets the FROM-list size beyond which JOIN constructs are not flattened. + user | max_recursive_times | max recursive times when execute query with recursive-clause. + user | plan_cache_mode | Controls the planner's selection of custom or generic plan. + user | schedule_splits_threshold | The Max count of splits which can be scheduled in memory. + user | td_compatible_truncation | Enable string automatically truncated during insertion. + user | allocate_mem_cost | Sets the planner's estimate of the cost of allocate memory. + user | codegen_cost_threshold | Decided to use LLVM optimization or not. + user | cost_weight_index | Sets the planner's discount when evaluating index cost. + user | cpu_index_tuple_cost | Sets the planner's estimate of the cost of processing each index entry during an index scan. + user | cpu_operator_cost | Sets the planner's estimate of the cost of processing each operator or function call. + user | cpu_tuple_cost | Sets the planner's estimate of the cost of processing each tuple (row). + user | default_limit_rows | Sets the planner's default estimation when limit rows is unknown.Negative value means using percentage of the left tree rows, whereas positive value sets the estimation directly. + user | dngather_min_rows | minimum rows worth do dn gather, 0 meas always, -1 means disable + user | seq_page_cost | Sets the planner's estimate of the cost of a sequentially fetched disk page. + user | acceleration_with_compute_pool | If true, agg/scan may run in compute pool. + user | default_storage_nodegroup | Default storage group for create table. + user | effective_cache_size | Sets the planner's assumption about the size of the disk cache. + user | random_page_cost | Sets the planner's estimate of the cost of a nonsequentially fetched disk page. + user | enable_absolute_tablespace | Enable tablespace using absolute location. + user | enable_beta_opfusion | Enables beta opfusion features. + user | enable_bitmapscan | Enables the planner's use of bitmap-scan plans. + user | enable_bloom_filter | Enable bloom filter check + user | enable_broadcast | Enables the planner's use of broadcast stream plans. + user | enable_codegen | Enable llvm for executor. + user | enable_codegen_print | Enable dump() for llvm function. + user | enable_compress_hll | Enables hll use less memory on datanode. + user | enable_compress_spill | Enables spilling compress. + user | enable_constraint_optimization | Enable optimize query by using informational constraint. + user | enable_hashagg | Enables the planner's use of hashed aggregation plans. + user | enable_hashjoin | Enables the planner's use of hash join plans. + user | enable_dngather | Enables the planner's use of dngather plans. + user | enable_force_vector_engine | Forces to enable the vector engine. + user | enable_hadoop_env | Enable hadoop enviroment. + user | enable_index_nestloop | Enables the planner's use of index-nested join plans. + user | enable_hdfs_predicate_pushdown | Enable hdfs predicate pushdown. + user | enable_hypo_index | Enable hypothetical index for explain. + user | enable_indexonlyscan | Enables the planner's use of index-only-scan plans. + user | enable_indexscan | Enables the planner's use of index-scan plans. + user | enable_material | Enables the planner's use of materialization. + user | enable_mergejoin | Enables the planner's use of merge join plans. + user | enable_nestloop | Enables the planner's use of nested-loop join plans. + user | enable_nodegroup_debug | Enables the planner's node group debug mode. + user | enable_opfusion | Enables opfusion. + user | enable_parallel_ddl | Allow user to implement DDL parallel without dead lock. + user | enable_partition_opfusion | Enables partition opfusion features. + user | enable_partitionwise | Enables the planner's use of partitionwise join plans. + user | enable_seqscan | Enables the planner's use of sequential-scan plans. + user | enable_slot_log | Enables create slot log + user | enable_sonic_hashagg | Enable Sonic hashagg. + user | enable_sonic_hashjoin | Enable Sonic hashjoin. + user | enable_sonic_optspill | Enable Sonic optimized spill. + user | enable_sort | Enables the planner's use of explicit sort steps. + user | enable_tidscan | Enables the planner's use of TID-scan plans. + user | enable_trigger_shipping | Ship a trigger to DN if possible. + user | enable_valuepartition_pruning | Enable optimization for partitioned DFS table to be staticly/dynamically-pruned when possible. + user | enable_vector_engine | Enables the vector engine. + user | expected_computing_nodegroup | Computing node group mode or expected node group for query processing. + user | force_bitmapand | Force the planner's use of bitmap-and plans. + user | opfusion_debug_mode | opfusion debug mode. + user | plan_mode_seed | Specify which plan mode and seed the optimizer generation used. + user | qrw_inlist2join_optmode | Specify inlist2join opimitzation mode. + user | enable_data_replicate | Allows data replicate. + user | RepOriginId | RepOriginId. + user | application_name | Sets the application name to be reported in statistics and logs. + user | connection_info | Sets the connection info to be reported in statistics and logs. + user | debug_pretty_print | Indents parse and plan tree displays. + user | logging_module | enable/disable module logging. + user | gds_debug_mod | Enable GDS-related troubleshoot-logging. + user | plog_merge_age | how long to aggregate profile logs. + user | explain_dna_file | Sets the destination file for explain performance data. + user | backend_flush_after | Number of pages after which previously performed writes are flushed to disk. + user | vacuum_cost_limit | Vacuum cost amount available before napping. + user | vacuum_cost_page_dirty | Vacuum cost for a page dirtied by vacuum. + user | effective_io_concurrency | Number of simultaneous requests that can be handled efficiently by the disk subsystem. + user | vacuum_cost_delay | Vacuum cost delay in milliseconds. + user | vacuum_cost_page_hit | Vacuum cost for a page found in the buffer cache. + user | vacuum_cost_page_miss | Vacuum cost for a page not found in the buffer cache. + user | sql_use_spacelimit | Limit the single sql used space on a single DN. + user | backwrite_quantity | Sets the IO quantity of backwrite buffers used by async dirct IO interface. + user | bulk_read_ring_size | Size of bulk read buffer ring. + user | bulk_write_ring_size | Size of bulk write buffer ring. + user | cstore_backwrite_max_threshold | Cu cache threshold for cstore when do insert by async dirct IO + user | cstore_backwrite_quantity | Each column write threshold for cstore when do insert by async dirct IO + user | cstore_prefetch_quantity | Sets the IO quantity of prefetch CUs used by async dirct IO interface for column store. + user | disable_memory_protect | disable memory protect for query execution. + user | FencedUDFMemoryLimit | Sets the maximum memory to be used for fenced UDF by user. + user | maintenance_work_mem | Sets the maximum memory to be used for maintenance operations. + user | enable_early_free | Using memory early free policy. + user | max_loaded_cudesc | Sets the number of loaded cudesc per column. + user | memory_detail_tracking | Sets the operator name and peak size for triggering the memory logging in that time. + user | memory_tracking_mode | Choose which style to track the memory usage. + user | partition_max_cache_size | The max partition cache size for cstore when do insert + user | partition_mem_batch | Number of partition in-memory batch + user | prefetch_quantity | Sets the IO quantity of prefetch buffers used by async dirct IO interface. + user | psort_work_mem | Sets the maximum memory to be used for partial sort. + user | query_max_mem | Sets the max memory to be reserved for a statement. + user | uncontrolled_memory_context | Sets the white list of MemoryContext allocation. + user | query_mem | Sets the memory to be reserved for a statement. + user | temp_buffers | Sets the maximum number of temporary buffers used by each session. + user | work_mem | Sets the maximum memory to be used for query workspaces. + user | auto_explain_level | auto_explain_level. + user | bbox_dump_count | Sets the maximum number of core dump created by bbox_handler. + user | cgroup_name | Sets the cgroup name to control the queries resource. + user | enable_auto_explain | enable auto explain plans. + user | io_limits | Sets io_limit for each query. + user | io_priority | Sets the IO priority for queries. + user | query_band | Sets query band. + user | resource_track_level | Choose which level info to be collected. + user | session_respool | Sets the session resource pool to control the queries resource. + user | resource_track_cost | Sets the minimum cost to do resource track. + user | resource_track_duration | Sets the minimum duration to record history session info. + user | transaction_pending_time | Sets pend_time for transaction or Stored Procedure. + user | table_skewness_warning_rows | Sets the number of rows returned by DN to enable warning of table skewness. + user | table_skewness_warning_threshold | table skewness threthold + user | ngram_gram_size | N-value for N-gram parser + user | ngram_grapsymbol_ignore | Enables N-gram ignore grapsymbol. + user | check_implicit_conversions | check whether there is an implicit conversion on index column + user | convert_string_to_digit | Convert string to digit when comparing string and digit + user | ngram_punctuation_ignore | Enables N-gram ignore punctuation. + user | acce_min_datasize_per_thread | Used to estimate whether pushdown the plan to the compute pool. + user | cstore_insert_mode | decide destination of data inserted + user | dfs_partition_directory_length | The max length of the value partition directory. + user | enable_save_datachanged_timestamp | If true, save the timestamp when the data of the table changes. + user | explain_perf_mode | Choose which style to print the explain info. + user | hll_default_expthresh | Set parameter expthresh in hll. + user | hll_default_log2m | Set parameter log2m in hll. + user | hll_default_regwidth | Set parameter regwidth in hll. + user | hll_default_sparseon | Set parameter sparseon for hll. + user | hll_max_sparse | Set parameter max_sparse for hll + user | max_active_global_temporary_table | max active global temporary table. + user | show_acce_estimate_detail | If true, show details whether plan is pushed down to the compute pool. + user | skew_option | Choose data skew optimization strategy. + user | behavior_compat_options | compatibility options + user | transform_null_equals | Treats "expr=NULL" as "expr IS NULL". + user | array_nulls | Enables input of NULL elements in arrays. + user | backslash_quote | Sets whether "\'" is allowed in string literals. + user | default_with_oids | Creates new tables with OIDs by default. + user | escape_string_warning | Warn about backslash escapes in ordinary string literals. + user | quote_all_identifiers | When generating SQL fragments, quotes all identifiers. + user | sql_inheritance | Causes subtables to be included by default in various commands. + user | standard_conforming_strings | Causes '...' strings to treat backslashes literally. + user | synchronize_seqscans | Enables synchronized sequential scans. + user | basebackup_timeout | Sets the timeout in seconds for a reponse from gs_basebackup. + user | commit_delay | Sets the delay in microseconds between transaction commit and flushing WAL to disk. + user | commit_siblings | Sets the minimum concurrent open transactions before performing commit_delay. + user | synchronous_commit | Sets the current transaction's synchronization level. + user | retry_ecode_list | Set error code list for CN Retry. + user | enable_twophase_commit | Enable two phase commit when gtm free is on. +(601 rows) +``` + diff --git "a/content/zh/post/2022/\351\205\215\347\275\256MogDB-openGauss\347\232\204grafana-\347\232\204dashboard.md" "b/content/zh/post/2022/\351\205\215\347\275\256MogDB-openGauss\347\232\204grafana-\347\232\204dashboard.md" new file mode 100644 index 0000000000000000000000000000000000000000..4b8b2296887ae680c1e7d67936acf6bcbbe4dafa --- /dev/null +++ "b/content/zh/post/2022/\351\205\215\347\275\256MogDB-openGauss\347\232\204grafana-\347\232\204dashboard.md" @@ -0,0 +1,85 @@ ++++ + +title = "配置MogDB/openGauss的grafana 的dashboard" + +date = "2021-12-27" + +tags = [ "配置MogDB/openGauss的grafana 的dashboard"] + +archives = "2021-12" + +author = "高云龙 " + +summary = "配置MogDB/openGauss的grafana 的dashboard" + +img = "/zh/post/2022/title/img8.png" + +times = "12:30" + ++++ + +# 配置MogDB/openGauss的grafana 的dashboard + +## **概述** + +我们已经介绍了[prometheus + grafana + opengauss\_exporter](https://www.modb.pro/db/173483)完成对MogDB/openGauss 数据库的监控,但这只是第一步,我们还需要通过grafana的dashboard查看各个关注的指标项,本文主要介绍dashboard的配置。 + +## **监控指标汇总** + +数据源选择的是prometheus,主要关注的监控指标分为:基础信息、内存信息、连接信息、复制信息、锁及等待事件、统计信息、query信息以及数据库对象 + +![](figures/20211204-cfc47e9a-4272-48e2-9fba-ab5a17c9b323.png) + +- **基础信息** + + 基础信息是运维人员比较关注的,有变化第一时间可以看到的信息,比如实例IP、数据库版本、数据库运行时间、exporter状态、exporter运行时间等等 + + ![](figures/20211204-183e159b-ef0f-4134-b134-71f99ba6e89a.png) + +- **内存信息** + + 展示数据库内存总体使用情况,按会话状态分组占用内存情况,内存上下文占用内存情况以及占用内存最多的session及sql文本 + + ![](figures/20211204-ffad91b6-007a-441c-8af8-835a9c0e0597.png) + + ![](figures/20211204-b6e374da-906c-4f47-bc31-96f0ca3037fa.png) + +- **连接信息** + + 连接数总体使用情况,各状态连接使用情况以及各应用连接数 + + ![](figures/20211204-ec617df5-639c-43a2-a45e-5d84738909c5.png) + +- **复制信息** + + 复制槽使用占比、复制槽延时、备节点信息及主备之间的延迟 + + ![](figures/20211204-c0cfe4c4-d76b-4a8c-bd04-7a2f81f603a6.png) + +- **锁及等待事件** + + 锁阻塞源信息,锁阻塞详情,锁类型分布情况,锁冲突及死锁检测,等待事件汇总及等待时间汇总信息 + + ![](figures/20211204-aec67dd0-2b24-4f75-8d74-9ea4b2a22edd.png) + + ![](figures/20211204-cf9d6243-d31c-4e37-aa26-953e2822e0c1.png) + +- **统计信息** + + ![](figures/20211204-c8674984-9927-4b9d-bdde-fb9725ea88ee.png) + +- **query信息** + + ![](figures/20211204-41c59db9-f61d-4dae-b29d-7036223ba567.png) + +- **数据库对象** + + ![](figures/20211204-25c40a97-f135-48be-af18-f1fe9986db5b.png) + + +## **json文件下载地址** + +[exporter监控单数据库实例](https://www.modb.pro/download/272899) + +[exporter监控多数据库实例](https://www.modb.pro/download/293587) + diff --git "a/content/zh/post/Apricity/2022-09-29-\343\200\220\346\210\221\345\222\214openGauss\347\232\204\346\225\205\344\272\213\343\200\221\345\210\235\350\257\206openGauss" "b/content/zh/post/Apricity/2022-09-29-\343\200\220\346\210\221\345\222\214openGauss\347\232\204\346\225\205\344\272\213\343\200\221\345\210\235\350\257\206openGauss" new file mode 100644 index 0000000000000000000000000000000000000000..773c6c8ef75988bd20a9fcb2dfd050ad92c3bb99 --- /dev/null +++ "b/content/zh/post/Apricity/2022-09-29-\343\200\220\346\210\221\345\222\214openGauss\347\232\204\346\225\205\344\272\213\343\200\221\345\210\235\350\257\206openGauss" @@ -0,0 +1,180 @@ ++++ + +title = "【我和openGauss的故事】初识openGauss" + +date = "2022-09-29" + +tags = ["【我和openGauss的故事】初识openGauss","SQL"] + +archives = "2022-09" + +author = "liwt" + +sumary = "openGauss是一款全面友好开放,携手伙伴共同打造的企业级开源关系型数据库。" + ++++ + +一、openGauss的介绍 + +openGauss是一款全面友好开放,携手伙伴共同打造的企业级开源关系型数据库。openGauss提供面向多核架构的极致性能、全链路的业务、数据安全、基于AI的调优和高效运维的能力。openGaus深度融合华为在数据库领域多年的研发经验,结合企业级场景需求,持续构建竞争力特性 + +1、关系型数据库 + +关系型数据库,是建立在关系模型基础上的数据库,借助于集合代数等数学概念和方法来处理数据库中的数据。现实世界中的各种实体以及实体之间的各种联系均用关系模型来表示。标准数据查询语言SQL就是一种基于关系数据库的语言,这种语言执行对关系数据库中数据的检索和操作。 + +openGauss的SQL部分代表 “结构化查询语言”。这是一种特殊目的的编程语言,是一种数据库查询和程序设计语言,用于存取数据以及查询、更新和管理关系型数据库系统。简单来说,就是更方便的去管理我们系统中的数据。 + +(1)关系模型的组成 + +数据结构(表结构)+关系操作(八个操作)+完整性约束(三个完整性) + +1、实体完整性 在关系表中,所有元组主码的值都不能为空。 +2、参照完整性 在关系模型中,采用给关系定义外键的形式进行关系间属性的引用,从而实现参照完整性。 +3、自定义完整性 能反映某一具体应用所涉及的数据必须满足的语义要求的约束条件,称为用户自定义的完整性。 + +(2)关系模型的特点 + +1、每一列不可再分; +2、同一关系中属性(字段)不允许重名; +3、关系中不允许有完全相同的元组 +4、关系中交换任意两行的位置不影响数据的实际含义; +5、关系中交换任意两列的位置不影响数据的实际含义 + +2、openGauss的高性能 + +1、高性能 + +提供了面向多核架构的并发控制技术结合鲲鹏硬件优化,在两路鲲鹏下TPCC Benchmark达成性能150万tpmc。 +针对当前硬件多核numa的架构趋势, 在内核关键结构上采用了Numa-Aware的数据结构。 +提供Sql-bypass智能快速引擎技术。 +针对频繁更新场景,提供ustore存储引擎。 + +2、高可用 + +支持主备同步,异步以及级联备机多种部署模式。 +数据页CRC校验,损坏数据页通过备机自动修复。 +备机并行恢复,10秒内可升主提供服务。 +提供基于paxos分布式一致性协议的日志复制及选主框架。 + +3、高安全 + +支持全密态计算,访问控制、加密认证、数据库审计、动态数据脱敏等安全特性,提供全方位端到端的数据安全保护。 + +4、易运维 + +基于AI的智能参数调优和索引推荐,提供AI自动参数推荐。 +慢SQL诊断,多维性能自监控视图,实时掌控系统的性能表现。 +提供在线自学习的SQL时间预测。 + +5、全开放 + +采用木兰宽松许可证协议,允许对代码自由修改,使用,引用。 +数据库内核能力全开放。 +提供丰富的伙伴认证,培训体系和高校课程。 + +二、openGauss的使用 + +1、登录数据库主节点 + +(1)启动服务 + +分布式openGauss: +gs_om -t start 启动服务 +gs_om -t restart 重启服务 +集中式openGauss: +gs_om -t stop 关闭服务 +gs_om -t start 启动服务 + +(2)查询openGauss各实例状态情况 + +gs_om -t status --detail + +(3)检查数据库性能 + +gs_checkperf + +1. 以简要格式在屏幕上显示性能统计结果。 +gs_checkperf -i pmk -U omm +2. 以详细格式在屏幕上显示性能统计结果。 +gs_checkperf -i pmk -U omm --detai + +(4)确认数据库主节点的端口号 + +查到的数据库主节点数据路径下的postgresql.conf文件中查看端口号信息。示例如下: +cat /opt/gaussdb/master1/postgresql.conf |grep port + +[omm@openGauss01 ~]$ cat /opt/gaussdb/master1/postgresql.conf |grep port +port = '36000' # (change requires restart) +#ssl_renegotiation_limit = 0 # amount of data between renegotiations, no longer supported + # supported by the operating system: + + +36000为数据库主节点的端口号 +端口号在安装数据库时,会在xml文件中配置,查看安装时的xml配置文件也可以找到端口 + +(5)列出所有可用的数据库 + +gsql -d postgres -p 36000 -l +[omm@openGauss01 ~]$ gsql -d postgres -p 36000 -l + List of databases + Name | Owner | Encoding | Collate | Ctype | Access privileges +-----------+-------+-----------+---------+-------+------------------- + db1 | song | SQL_ASCII | C | C | + db2 | song | SQL_ASCII | C | C | + kwdb | kw | SQL_ASCII | C | C | + mydb | song | GBK | C | C | + postgres | omm | SQL_ASCII | C | C | + song_suse | song | SQL_ASCII | C | C | + template0 | omm | SQL_ASCII | C | C | =c/omm + + | | | | | omm=CTc/omm + template1 | omm | SQL_ASCII | C | C | =c/omm + + | | | | | omm=CTc/omm +(8 rows) + + +2、查看数据库对象 + +1. 登陆默认数据库postgres: +gsql -d postgres -p 36000 +[omm@openGauss01 ~]$ gsql -d postgres -p 36000 +gsql ((GaussDB Kernel V500R002C00 build fab4f5ea) compiled at 2021-10-24 11:58:09 commit 3086 last mr 6592 release) +Non-SSL connection (SSL connection is recommended when requiring high-security) +Type "help" for help. + +openGauss=# +2. 登陆自建数据库song_suse: +gsql -d 数据库名 -p 36000 -U 用户名 -W 密码 -r +[omm@openGauss01 ~]$ gsql -d song_suse -p 36000 -U song -W Info1234 -r +gsql ((GaussDB Kernel V500R002C00 build fab4f5ea) compiled at 2021-10-24 11:58:09 commit 3086 last mr 6592 release) +Non-SSL connection (SSL connection is recommended when requiring high-security) +Type "help" for help. + +song_suse=> + +(1)查看帮助信息: + +postgres=# ? + + +(2)切换数据库: + +postgres=# c dbname + +(3)列举数据库: + +使用l元命令查看数据库系统的数据库列表。 +postgres=# l +使用如下命令通过系统表pg_database查询数据库列表。 +postgres=# select dataname from pg_database; + +(4)列举表: + +postgres=# dt +postgres=# d + +(5)查询表空间: + +使用gsql程序的元命令查询表空间。postgres=# db +检查pg_tablespace系统表。如下命令可查到系统和用户定义的全部表空间。 +postgres=# select spcname from pg_tablespace; + diff --git "a/content/zh/post/ArsuKron/2022-11-21-openGauss\346\225\260\346\215\256\345\272\223\344\275\277\347\224\250\345\210\206\344\272\253-01.md" "b/content/zh/post/ArsuKron/2022-11-21-openGauss\346\225\260\346\215\256\345\272\223\344\275\277\347\224\250\345\210\206\344\272\253-01.md" new file mode 100644 index 0000000000000000000000000000000000000000..10535884e001a768368443f98939a2a559a3176d --- /dev/null +++ "b/content/zh/post/ArsuKron/2022-11-21-openGauss\346\225\260\346\215\256\345\272\223\344\275\277\347\224\250\345\210\206\344\272\253-01.md" @@ -0,0 +1,387 @@ ++++ + +title = "openGauss数据库使用分享" + +date = "2022-11-21" + +tags = ["openGauss技术文章征集","SQL"] + +archives = "2022-11" + +author = "ArsuKron" + +summary = "介绍openGauss数据库个人使用经验" + ++++ + + +# 一:openGauss数据库简介 +---------------- + +![输入图片说明](../../../../data/img/a9eb1fea46a14a049b2fc36116dbe43f.png) + +想必大家都了解华为这个企业,华为在手机、通信、汽车研发、操作系统研发等方面都有很大的成就,如果不是受到了制裁,估计华为现在能取得更好的成绩。华为这一品牌大家都知道,但是华为的openGauss数据库可能并不是很多人了解过,我也只是在之前的鲲鹏竞赛中才了解到这款数据库。 + +openGauss数据库是华为公司在深度融合技术应用于数据库领域多年经验的基础上,结合企业级场景要求,推出的新一代企业级开源数据库。openGauss提供面向多核架构的极致性能、全链路的业务、数据安全、基于AI的调优和高效运维的能力。作为华为参与研发的国产数据库,openGauss具有很高的性能和可用性。openGauss数据库具有如下特点: + +* openGauss是一个数据库管理系统。 +* openGauss数据库是关系型的。 +* openGauss软件是开源的。 +* openGauss数据库具有高性能、高可用、高安全、易运维、全开放的特点。 + +# 二:openGauss安装注意事项 +----------------- + +## 1.操作系统选择 + +openGauss支持以脚本方式进行极简安装,安装起来并不是很费力,唯一觉得不好的就是目前其只支持在Linux上面进行安装,并不支持在Windows上进行安装,假如个人需要在Windows上使用可以安装一个虚拟机,需要注意的是,对于X86架构的系统,我推荐使用Centos系统,因为这个系统比较常用,但是必须要安装Centos 7.6系统,安装其他版本可能会导致后面部署失败。 + +## 2.系统配置 + +安装好操作系统之后,还需要注意系统的配置信息。假如使用虚拟机进行安装的话,最低配置都需要2核4G,不然到最后使用omm用户进行数据库初始化时候会出问题。 + +此外,在安装Centos之后要将网络连接选择为NAT模式(共享主机的IP地址),不然访问不了外网,并且不能使用DHCP配置,不然IP地址一直变的话就没法玩了,后面使用FinalShell时候也会很麻烦,我的系统配置信息见下图。 + +![输入图片说明](../../../../data/img/image-20221112180727119.png) + +## 3.工具选择 + +关于openGauss数据库的安装过程可以自行参照官方文档的安装教程或者到百度上进行搜索,为了安装和使用更方便,可以下载一个SSH工具进行操作,无论是安装数据库还是操作数据库都会更方便,关于SSH工具有很多选择,比如SSH Secure Shell Client、Xshell、FinalShell等,我使用的是FinalShell,FinalShell是免费的,而且安装操作起来也很方便,关于具体的安装连接过程可以自行百度一下,成功连接后显示下图信息: + +![输入图片说明](../../../../data/img/image-20221112191157151.png) + +# 三:openGauss数据库实战 +---------------- + +## 1.连接数据库 + +步骤一:以操作系统用户omm登录数据库主节点 + +``` +su - omm +``` + +![输入图片说明](../../../../data/img/image-20221112191849553.png) + +步骤二:启动服务 + +``` +gs_om -t start +``` + +![输入图片说明](../../../../data/img/image-20221112192100762.png) + +步骤三:连接数据库(注意端口号) + +``` +gsql -d postgres -p 26000 +``` + +![输入图片说明](../../../../data/img/image-20221112192312132.png) + +**补充:** + +1. 第一次连接数据库时候,需要先修改omm用户密码,新密码要求包含大小写、字符和数字,假设我这里设置密码为Bigdata@123 + + ``` + postgres=# alter role omm identified by 'Bigdata@123' replace 'openGauss@123'; + ALTER ROLE + ``` + +2. 创建数据库用户 + + 默认只有openGauss安装时创建的管理员用户可以访问初始数据库,您还可以创建其他数据库用户帐号。 + + ``` + postgres=# CREATE USER tbug WITH PASSWORD "Bigdata@123"; + CREATE ROLE + ``` + + 如上创建了一个用户名为tbug,密码为Bigdata@123的用户。 + +3. 创建数据库 + + ``` + postgres=# CREATE DATABASE db1 OWNER tbug; + CREATE DATABASE + ``` + + 创建完db\_tpcc数据库后,就可以按如下方法退出postgres数据库,使用新用户连接到此数据库执行接下来的创建表等操作。当然,也可以选择继续在默认的postgres数据库下做后续的体验。 + + ``` + postgres=# \q + ``` + + +## 2.数据表的创建 + +步骤一:连接上自己创建的数据库 + + ``` +gsql -d db1 -p 26000 -U tbug -W Bigdata@123 -r +``` + +![输入图片说明](../../../../data/img/image-20221112193433708.png) + +出现上述页面就表示成功连接上之前创建的db1数据库。 + +步骤二:创建SCHEMA + +``` +CREATE SCHEMA tbug AUTHORIZATION tbug; +CREATE SCHEMA +``` + +步骤三:创建表 + +1. 创建如下Student表 + + ![输入图片说明](../../../../data/img/image-20221112200707734.png) + + ```sql + CREATE TABLE STUDENT + ( + Sno VARCHAR2(17)PRIMARY KEY , + Sname VARCHAR2(10) NOT NULL , + Sage INT , + Ssex VARCHAR2(3) , + Sdept VARCHAR2(20) + ); + ``` + + + ![输入图片说明](../../../../data/img/image-20221112195630340.png) + + 可以看到提示出现: ```CREATE TABLE / PRIMARY KEY will create implicit index “student\_pkey” for table “student”``` + + 它表示系统为主键自动创建了一个隐含的索引“student\_pkey”。 + +2. 查看数据库中的表 + + ```sql + db1=> \d + ``` + + + ![输入图片说明](../../../../data/img/image-20221112201236470.png) + +3. 查看student表详细信息 + + ``` + db1=> \d student + ``` + + + ![输入图片说明](../../../../data/img/image-20221112201305480.png) + + 可以看到自动创建的主键索引信息以及其存储结构(B树)。 + +## 3.插入数据 + +**向表中插入三条数据** + +1. 插入数据 + + ```sql + INSERT INTO STUDENT(Sno, Sname, Sage, Ssex, Sdept) VALUES ('1906145218', '张三', '19', '男', '网络安全'); + INSERT INTO STUDENT(Sno, Sname, Sage, Ssex, Sdept) VALUES ('1904154568', '李四', '20', '女', '网络工程'); + INSERT INTO STUDENT(Sno, Sname, Sage, Ssex, Sdept) VALUES ('1910245451', '王五', '21', '男', '软件工程'); + ``` + + + ![输入图片说明](../../../../data/img/image-20221112201605824.png) + +2. 查看表格数据 + + ```sql + select * from student; + ``` + + + ![输入图片说明](../../../../data/img/image-20221112201652208.png) + +3. 补充 + + 假设性别字段长度设置为varchar2(2),那么后面插入数据时候会出现下面情况 + + + ![输入图片说明](../../../../data/img/image-20221112200549055.png) + + 可以看到数据插入失败,原因是字段值太长,那么这时候可以考虑编码问题,假如用的是GBK编码,那么一个汉字长度就是2个varchar2,这时候是够用的,但是假如编码格式为UTF-8,这时候一个汉字长度就是3个varchar2,如何查看数据库编码格式呢?执行如下命令: + + ``` + db1=> \encoding + ``` + + ![输入图片说明](../../../../data/img/image-20221112202058779.png) + + 可以看到数据库编码格式为UTF-8,这时候性别字段就需要修改为至少3个varchar2。 + +## 4.查询数据 + +1. 查询所有 + + ```sql + db1=> select * from student; + ``` + + + ![输入图片说明](../../../../data/img/image-20221112202633351.png) + +2. 条件查询 + + 查询姓名为“张三”的所有字段 + + ```sql + select * from student where sname='张三'; + ``` + + + ![输入图片说明](../../../../data/img/image-20221112203208888.png) + + 查询张三的学号、年龄及性别 + + ```sql + select sno,sage,ssex from student where sname='张三'; + ``` + + ![输入图片说明](../../../../data/img/image-20221112203348682.png) + +3. 模糊查询 + + * LIKE运算符 + + 在SQL中可以使用“\_”和“%”通配符实现LIKE运算,通配符是一种在 WHERE 子句中拥有特殊意义的字符,“%”通配符可以匹配 0 到多个任意字符,“\_”通配符的功能与“%”类似,其仅匹配任意一个字符。如需匹配两个字符,则使用“\_ \_”。 + + 查询专业名称以“网”开头的数据: + + ```sql + select * from student where sdept like "网%"; + ``` + + ![输入图片说明](../../../../data/img/image-20221112211748281.png) + + * IN运算符 + + IN 运算符也称为“成员条件运算符”,用于判断一个值是否在一个指定的数据集合之内。 + + 查询专业为网络工程、软件工程的学生 + + ```sql + select * from student where sdept in('网络工程','软件工程'); + ``` + + ![输入图片说明](../../../../data/img/image-20221112212000123.png) + +4. BETWEEN...AND运算符 + + 在 WHERE 子句中,可以采用 between...and 运算符选取介于两个值之间的数据,这些值可以是数字和日期类型(取值范围包括边界值)。 + + 查询年龄在20至22岁之间的学生: + + ``` + select * from student where sage between 20 and 22; + ``` + + ![输入图片说明](../../../../data/img/image-20221112212204932.png) + +## 5.修改数据 + +在SQL中,要修改某一字段的值可以使用ALTER语句加上条件来进行修改。 + +修改王五的年龄为25 + +```sql +update student set sage = 25 where sname='王五'; +select * from student; +``` + +![输入图片说明](../../../../data/img/image-20221112212914047.png) + +## 6.删除数据 + +在SQL中,要删除某一记录可以使用DELETE语句加上条件来进行修改。 + +删除王五的信息 + +```sql +delete from student where sname='王五'; +select * from student; +``` + +![输入图片说明](../../../../data/img/image-20221112213135433.png) + +## 7.修改字段 + +* 增加字段 + + ALTER TABLE <表名> ADD <新字段名><数据类型>\[约束条件\] + + 对语法格式的说明如下: + + 1.<表名> 为数据表的名字; + + 2.<新字段名> 为所要添加的字段的名字; + + 3.<数据类型> 为所要添加的字段能存储数据的数据类型; + + 4.\[约束条件\] 是可选的,用来对添加的字段进行约束。 + + SQL 默认在表的最后位置添加新字段,如果希望在开头位置(第一列的前面)添加新字段,那么可以使用 FIRST 关键字,语法格式如下: + + ALTER TABLE <表名> ADD <新字段名> <数据类型> \[约束条件\] FIRST; + 在student表中添加字段(sphone,varchar2(15)) + + ```sql + alter table student add sphone varchar2(15); + \d student + ``` + + ![输入图片说明](../../../../data/img/image-20221112213532172.png) + + +* 修改字段 + + * 修改字段名 + + ALTER TABLE <表名> RENAME COLUMN A to B + + 将sphone修改为stel + + ```sql + alter table student rename column sphone to stel; + \d student + ``` + + ![输入图片说明](../../../../data/img/image-20221112213820436.png) + + * 修改字段类型 + + ALTER TABLE 【表名】 ALTER 【字段名】 type 【字段新类型】; + + 将stel字段长度修改为11 + + ```sql + alter table student alter stel type varchar2(11); + \d student + ``` + + ![输入图片说明](../../../../data/img/image-20221112214622291.png) + + * 删除字段 + + ALTER TABLE <表名> DROP COLUMN <字段名> + + 删除stel字段 + + ```sql + alter table student drop column stel; + \d student + ``` + + + ![输入图片说明](../../../../data/img/image-20221112214718572.png) + +# 四:使用感想 +使用完openGauss数据库之后不得不感慨国产数据库正在兴起,如今我们要警惕西方对我们的封锁,做好自己的数据库如果西方在这一块对我们进行封锁那我们还能有自己的替代品,希望高校能够逐渐使用我们的国产数据库进行教学并鼓励学生进行自我创新。 +openGauss数据库是一款很优秀的数据库,不过在安装便利性、生态完整性、使用便利性方面还有很大的进步空间,相信在我们大家的努力下早晚有一天国产数据库能够媲美Oracle、MySQL等主流数据库,同时也希望openGauss数据库能够做的越来越好。 \ No newline at end of file diff --git "a/content/zh/post/DarkAthena/MPT\350\207\252\345\212\250\345\214\226\346\265\213\350\257\225\345\267\245\345\205\267\345\256\236\350\267\265.md" "b/content/zh/post/DarkAthena/MPT\350\207\252\345\212\250\345\214\226\346\265\213\350\257\225\345\267\245\345\205\267\345\256\236\350\267\265.md" new file mode 100644 index 0000000000000000000000000000000000000000..59fe0fca8616773b5e8e9692f8a41fd11d5dfcf1 --- /dev/null +++ "b/content/zh/post/DarkAthena/MPT\350\207\252\345\212\250\345\214\226\346\265\213\350\257\225\345\267\245\345\205\267\345\256\236\350\267\265.md" @@ -0,0 +1,144 @@ ++++ + +title = "MPT自动化测试工具实践" + +date = "2023-02-07" + +tags = ["MogDB"] + +archives = "2023-02" + +author = "DarkAthena" + +summary = "MPT自动化测试工具实践" + +img = "/zh/post/DarkAthena/title/img31.png" + +times = "10:20" + ++++ + +# 创建测试环境目录及用户 + +``` +mkdir /opt/mpt_test -r +useradd mpt_test +chown mpt_test /opt/mpt_test +``` + +# 下载mpt并解压 + +最新版本地址下载地址见wiki +http://wiki.enmotech.com:8090/pages/viewpage.action?pageId=29361193 + +``` +su - mpt_test +cd /opt/mpt_test +wget https://cdn-mogdb.enmotech.com/mpt/v1.3.0/mpt_x86_64_v1.3.0.zip +unzip mpt_x86_64_v1.3.0.zip +``` + +# 下载配置文件 + +http://wiki.enmotech.com:8090/pages/viewpage.action?pageId=29361193 + +``` +cd mpt_x86_64_v1.3.0 +# 下载测试配置文件至此目录 ,比如 MPT_TMPL_NORMAL_v1.1_20230109.xlsx +``` + +# 申请license ,会收到邮件,手动上传到服务器上 + +``` +./mpt_x86_64 --apply-license +cat > license.json +``` + +# 创建PTK安装配置文件 + +``` +exit ## 切换回root +cd /opt/mpt_test +cat > /opt/mpt_test/config.yaml + +global: + # # cluster name (required) + cluster_name: "mpt_test" + # # system user for running db + user: "mpt_test" + # # system user group, same as username if not given + # group: "omm" + # # base directory for install MogDB server, + # # if any of app_dir, data_dir, log_dir and tool_dir not config, + # # PTK will create corresponding directory under base_dir + base_dir: "/opt/mpt_test/mogdb" + # # default password :Enmo@123 + db_password: "pTk6NDk0MjZiOGQ8PTxFPT8/QkZiR0dlOFo2bWd3a2pxb3BrQXdKTHpHNXJLUFVUckNHNDRoemg5SE05RDQ=" + +db_servers: + - host: "127.0.0.1" + # # database port + db_port: 26100 + db_conf: + checkpoint_segments: 64 + wal_keep_segments: 64 + +ctrl+d ## 保存文件 +``` + +# 安装PTK + +``` +curl --proto '=https' --tlsv1.2 -sSf https://cdn-mogdb.enmotech.com/ptk/install.sh | sh +ptk checkos -f /opt/mpt_test/config.yaml +``` + +# 安装数据库 (如果重新测试,则从这一步开始,主要是为了释放空间,以及避免测试用例编写遗漏了清理环境) + +``` +ptk cluster stop -n mpt_test ## 这里为了删库重测 +ptk uninstall -n mpt_test ## y y n --删除数据库但保留用户 +ptk install -f /opt/mpt_test/config.yaml --skip-create-user -y +ptk cluster install-plugin -n mpt_test +``` + +# 安装数据库兼容性提高组件 + +``` +su - mpt_test +gsql -r +create extension whale; +create extension orafce; +\q + +cd /opt/mpt_test + +wget https://gitee.com/enmotech/compat-tools/repository/archive/master.zip +unzip master.zip +cd compat-tools-master +gsql -f runMe.sql +``` + +# 执行测试(对于测试时间长的,建议nohup执行) + +``` +cd /opt/mpt_test/mpt_x86_64_v1.3.0 +nohup ./mpt_x86_64 -H localhost -P $PGPORT -U $PGUSER -c MPT_TMPL_NORMAL_v1.1_20230109.xlsx -r MPT_Report-$(date +%Y%m%d-%H%M%S).docx --cmd-spliter=xxxxxxx -T W & + +ctrl+c +``` + +# 观察日志 + +``` +tail -f nohup.out +``` + +# 注意 + +## 测试需要预留足够的磁盘空间, 其中pg_log可能会占很大的空间,如果空间紧张,需要手动清理 + +``` +# 查看目录大小 +du -h --max-depth=1 /opt/mpt_test/mogdb +``` diff --git "a/content/zh/post/DarkAthena/MogDB\357\274\2103.0.3.6\347\211\210\346\234\254\357\274\211\346\265\213\350\257\225oracle\350\257\255\346\263\225\347\232\204DBLINK\345\212\237\350\203\275\346\223\215\344\275\234\346\255\245\351\252\244.md" "b/content/zh/post/DarkAthena/MogDB\357\274\2103.0.3.6\347\211\210\346\234\254\357\274\211\346\265\213\350\257\225oracle\350\257\255\346\263\225\347\232\204DBLINK\345\212\237\350\203\275\346\223\215\344\275\234\346\255\245\351\252\244.md" new file mode 100644 index 0000000000000000000000000000000000000000..280f47cc7bf658ab6a97862cf91961ce91d47910 --- /dev/null +++ "b/content/zh/post/DarkAthena/MogDB\357\274\2103.0.3.6\347\211\210\346\234\254\357\274\211\346\265\213\350\257\225oracle\350\257\255\346\263\225\347\232\204DBLINK\345\212\237\350\203\275\346\223\215\344\275\234\346\255\245\351\252\244.md" @@ -0,0 +1,114 @@ ++++ + +title = "MogDB(3.0.3.6版本)测试oracle语法的DBLINK功能操作步骤" + +date = "2023-02-07" + +tags = ["MogDB"] + +archives = "2023-02" + +author = "DarkAthena" + +summary = "MogDB(3.0.3.6版本)测试oracle语法的DBLINK功能操作步骤" + +img = "/zh/post/DarkAthena/title/img31.png" + +times = "10:20" + ++++ + +# 安装POC版本MogDB + +``` +ptk install -f config.yaml -p https://cdn-mogdb.enmotech.com/mogdb-media/3.0.3.6/MogDB-3.0.3.6-CentOS-x86_64.tar.gz +``` + +# 添加插件 + +``` +ptk cluster install-plugin -n xxx -p https://cdn-mogdb.enmotech.com/mogdb-media/3.0.3.6/Plugins-3.0.3-CentOS-x86_64.tar.gz --skip-check-version +``` + +# 切换到数据库用户 + +``` +su - omm +``` + +# 手动下载oracle_fdw + +oracle_fdw没有打到插件包里去,需要单独下载 + +``` +wget https://cdn-mogdb.enmotech.com/mogdb-media/3.0.1.4/oracle_fdw_CentOS_x86.tar.gz +``` + +# 解压 并复制到对应目录 + +``` +tar -xvf oracle_fdw_CentOS_x86.tar.gz +cd oracle_fdw_CentOS_x86 +cp *.sql $GAUSSHOME/app/share/postgresql/extension/ +cp *.control $GAUSSHOME/app/share/postgresql/extension/ +cp *.so $GAUSSHOME/lib/postgresql +``` + +# 下载oracle客户端并配置环境变量 + +``` +wget https://download.oracle.com/otn_software/linux/instantclient/218000/instantclient-basic-linux.x64-21.8.0.0.0dbru.zip +unzip instantclient-basic-linux.x64-21.8.0.0.0dbru.zip +vi ~/.bash_profile +export LD_LIBRARY_PATH=/home/omm/instantclient_21_7:$LD_LIBRARY_PATH +export PATH=/home/omm/instantclient_21_7:$PATH +source ~/.bash_profile +``` + +# 重启数据库 + +``` +gs_ctl restart +``` + +# 连接数据库并创建插件 + +``` +gsql -r +create extension oracle_fdw; +``` + +# 创建服务器 + +``` +create server ora_sv foreign data wrapper oracle_fdw options(dbserver 'xxx.xxx.xxx.xxx:1521/pdb1'); +``` + +# 给服务器设置用户名和密码 + +``` +create user mapping for mogdb的用户名 server ora_sv options(user 'oracle的用户名',password 'oracle的密码'); +``` + +可能会报错 + +> ERROR: No key file usermapping.key.cipher +> Please create usermapping.key.cipher file with gs_guc and gs_ssh, such as :gs_ssh -c “gs_guc generate -S XXX -D $GAUSSHOME/bin -o usermapping” + +先去操作系统输入以下命令,并输入密码 + +``` +gs_guc generate -D $GAUSSHOME/bin -o usermapping +``` + +然后回来创建 + +``` +create user mapping for mogdb的用户名 server ora_sv options(user 'oracle的用户名',password 'oracle的密码'); +``` + +# 测试 + +``` +select * from scott.emp@ora_sv +``` diff --git a/content/zh/post/DarkAthena/images/01fa1633e6829a9adafc9eaf4f3a502c.png b/content/zh/post/DarkAthena/images/01fa1633e6829a9adafc9eaf4f3a502c.png new file mode 100644 index 0000000000000000000000000000000000000000..5b4ba30b87df1419fc61efdf9249464a1e6f765d Binary files /dev/null and b/content/zh/post/DarkAthena/images/01fa1633e6829a9adafc9eaf4f3a502c.png differ diff --git a/content/zh/post/DarkAthena/images/035778d7d2f842f0e43b07f06be50dcc.png b/content/zh/post/DarkAthena/images/035778d7d2f842f0e43b07f06be50dcc.png new file mode 100644 index 0000000000000000000000000000000000000000..700d92fbe1ce554e78b769a226e3a06008e4c33f Binary files /dev/null and b/content/zh/post/DarkAthena/images/035778d7d2f842f0e43b07f06be50dcc.png differ diff --git a/content/zh/post/DarkAthena/images/042e5465f559ceac88bdaf9fec73c530.png b/content/zh/post/DarkAthena/images/042e5465f559ceac88bdaf9fec73c530.png new file mode 100644 index 0000000000000000000000000000000000000000..8a578799bed7d5a2ace3501a70b3542195cc9a32 Binary files /dev/null and b/content/zh/post/DarkAthena/images/042e5465f559ceac88bdaf9fec73c530.png differ diff --git a/content/zh/post/DarkAthena/images/16bcc5b476ab328016746560d331ea19.png b/content/zh/post/DarkAthena/images/16bcc5b476ab328016746560d331ea19.png new file mode 100644 index 0000000000000000000000000000000000000000..7a7c45bf7d0db6cf3e58d8ceedab723f77eb6df6 Binary files /dev/null and b/content/zh/post/DarkAthena/images/16bcc5b476ab328016746560d331ea19.png differ diff --git a/content/zh/post/DarkAthena/images/20230201-49d75e33-2a1e-4eef-b312-aae978af089a.png b/content/zh/post/DarkAthena/images/20230201-49d75e33-2a1e-4eef-b312-aae978af089a.png new file mode 100644 index 0000000000000000000000000000000000000000..3b4e82c5e249f42869f641bb74fea5f5828e1ff9 Binary files /dev/null and b/content/zh/post/DarkAthena/images/20230201-49d75e33-2a1e-4eef-b312-aae978af089a.png differ diff --git a/content/zh/post/DarkAthena/images/20230201-858fc5aa-cd0a-4e31-9141-1c87fb24b9c8.png b/content/zh/post/DarkAthena/images/20230201-858fc5aa-cd0a-4e31-9141-1c87fb24b9c8.png new file mode 100644 index 0000000000000000000000000000000000000000..d62aff052e096962f19d59f6b667fa0c33a6d594 Binary files /dev/null and b/content/zh/post/DarkAthena/images/20230201-858fc5aa-cd0a-4e31-9141-1c87fb24b9c8.png differ diff --git a/content/zh/post/DarkAthena/images/20744cd1b2784c25b9f1d08e2a08b26b.png b/content/zh/post/DarkAthena/images/20744cd1b2784c25b9f1d08e2a08b26b.png new file mode 100644 index 0000000000000000000000000000000000000000..93eff530895579c6c04746e045c5fd8dc70b5151 Binary files /dev/null and b/content/zh/post/DarkAthena/images/20744cd1b2784c25b9f1d08e2a08b26b.png differ diff --git a/content/zh/post/DarkAthena/images/34ed0aa9c0765d744f1ae4b2e23bbcc9.png b/content/zh/post/DarkAthena/images/34ed0aa9c0765d744f1ae4b2e23bbcc9.png new file mode 100644 index 0000000000000000000000000000000000000000..eee1ecbeff4cdbb77d202e642ea6a544c86ef81a Binary files /dev/null and b/content/zh/post/DarkAthena/images/34ed0aa9c0765d744f1ae4b2e23bbcc9.png differ diff --git a/content/zh/post/DarkAthena/images/3c5edcfd66c542453e6284b38773b911.png b/content/zh/post/DarkAthena/images/3c5edcfd66c542453e6284b38773b911.png new file mode 100644 index 0000000000000000000000000000000000000000..9839062800e067f307c326290f049c8e6277bba5 Binary files /dev/null and b/content/zh/post/DarkAthena/images/3c5edcfd66c542453e6284b38773b911.png differ diff --git a/content/zh/post/DarkAthena/images/43f831521c831e975a7ef7258e3f9625.png b/content/zh/post/DarkAthena/images/43f831521c831e975a7ef7258e3f9625.png new file mode 100644 index 0000000000000000000000000000000000000000..b240534e3d5839c046c2ba1c54be95dffa3c3b1d Binary files /dev/null and b/content/zh/post/DarkAthena/images/43f831521c831e975a7ef7258e3f9625.png differ diff --git a/content/zh/post/DarkAthena/images/5e59536d4830366906e336284f44007c.png b/content/zh/post/DarkAthena/images/5e59536d4830366906e336284f44007c.png new file mode 100644 index 0000000000000000000000000000000000000000..b2433f1bcf35fd6e33b8c0761036c75ddc290e4d Binary files /dev/null and b/content/zh/post/DarkAthena/images/5e59536d4830366906e336284f44007c.png differ diff --git a/content/zh/post/DarkAthena/images/74a4eafe28730bc309d5685e8b09b93c.png b/content/zh/post/DarkAthena/images/74a4eafe28730bc309d5685e8b09b93c.png new file mode 100644 index 0000000000000000000000000000000000000000..0464f15c50c09ab3501a6ee96b6b39f9a3dfd735 Binary files /dev/null and b/content/zh/post/DarkAthena/images/74a4eafe28730bc309d5685e8b09b93c.png differ diff --git a/content/zh/post/DarkAthena/images/7a908b5bc6c6a9fc2aa805f4ef6c8544.png b/content/zh/post/DarkAthena/images/7a908b5bc6c6a9fc2aa805f4ef6c8544.png new file mode 100644 index 0000000000000000000000000000000000000000..760cf6dde434f6422720cf3bb5ca191912bdbfba Binary files /dev/null and b/content/zh/post/DarkAthena/images/7a908b5bc6c6a9fc2aa805f4ef6c8544.png differ diff --git a/content/zh/post/DarkAthena/images/88a31fd96492dcec9b34f627b99d8052.png b/content/zh/post/DarkAthena/images/88a31fd96492dcec9b34f627b99d8052.png new file mode 100644 index 0000000000000000000000000000000000000000..628674007c7fc39c1a247d3c24d4ad157e2c3f0d Binary files /dev/null and b/content/zh/post/DarkAthena/images/88a31fd96492dcec9b34f627b99d8052.png differ diff --git a/content/zh/post/DarkAthena/images/961af150a73bdb6abedefb408a16d581.png b/content/zh/post/DarkAthena/images/961af150a73bdb6abedefb408a16d581.png new file mode 100644 index 0000000000000000000000000000000000000000..a6668dc636700fa8be2c269da905a55788e9dda0 Binary files /dev/null and b/content/zh/post/DarkAthena/images/961af150a73bdb6abedefb408a16d581.png differ diff --git a/content/zh/post/DarkAthena/images/9bff3ede038f944c36cf0d1b3c0ff1d9.png b/content/zh/post/DarkAthena/images/9bff3ede038f944c36cf0d1b3c0ff1d9.png new file mode 100644 index 0000000000000000000000000000000000000000..ae18407df3ce65b67c94422c9f2a576a658e0851 Binary files /dev/null and b/content/zh/post/DarkAthena/images/9bff3ede038f944c36cf0d1b3c0ff1d9.png differ diff --git a/content/zh/post/DarkAthena/images/a26b52b8cd3094bdc59d405302f7b8d8.png b/content/zh/post/DarkAthena/images/a26b52b8cd3094bdc59d405302f7b8d8.png new file mode 100644 index 0000000000000000000000000000000000000000..1c47ba7ce75b738024524cc6d33d474871c9a74f Binary files /dev/null and b/content/zh/post/DarkAthena/images/a26b52b8cd3094bdc59d405302f7b8d8.png differ diff --git a/content/zh/post/DarkAthena/images/abcef55aaba11d0adcdcc83fc632da1a.png b/content/zh/post/DarkAthena/images/abcef55aaba11d0adcdcc83fc632da1a.png new file mode 100644 index 0000000000000000000000000000000000000000..12d5c2a7bd4041bc5dfd7b7040d6a6ce430eaf95 Binary files /dev/null and b/content/zh/post/DarkAthena/images/abcef55aaba11d0adcdcc83fc632da1a.png differ diff --git a/content/zh/post/DarkAthena/images/b82d6a1a629cf548e5cbfb024b439757.png b/content/zh/post/DarkAthena/images/b82d6a1a629cf548e5cbfb024b439757.png new file mode 100644 index 0000000000000000000000000000000000000000..28fb38649d53cfc980b9d8895fe5c07dd41304d5 Binary files /dev/null and b/content/zh/post/DarkAthena/images/b82d6a1a629cf548e5cbfb024b439757.png differ diff --git a/content/zh/post/DarkAthena/images/bb6a266ced83afa526b54732382aaccf.png b/content/zh/post/DarkAthena/images/bb6a266ced83afa526b54732382aaccf.png new file mode 100644 index 0000000000000000000000000000000000000000..ba4d7c05129324cadd0b236b50c0581d878f9614 Binary files /dev/null and b/content/zh/post/DarkAthena/images/bb6a266ced83afa526b54732382aaccf.png differ diff --git a/content/zh/post/DarkAthena/images/c5d256000aacb55e4f5989d04614d52d.png b/content/zh/post/DarkAthena/images/c5d256000aacb55e4f5989d04614d52d.png new file mode 100644 index 0000000000000000000000000000000000000000..7b039a797cc344169f83fa5aeb3bd2e6a8c397ea Binary files /dev/null and b/content/zh/post/DarkAthena/images/c5d256000aacb55e4f5989d04614d52d.png differ diff --git a/content/zh/post/DarkAthena/images/ca2a4405d05cce042f494a37b9039c97.png b/content/zh/post/DarkAthena/images/ca2a4405d05cce042f494a37b9039c97.png new file mode 100644 index 0000000000000000000000000000000000000000..b3e5fd7bb22f772f8e9cd60e1b3fd303680006e0 Binary files /dev/null and b/content/zh/post/DarkAthena/images/ca2a4405d05cce042f494a37b9039c97.png differ diff --git a/content/zh/post/DarkAthena/images/cc3f6b4b0fccbb1452cfcf894bef1115.png b/content/zh/post/DarkAthena/images/cc3f6b4b0fccbb1452cfcf894bef1115.png new file mode 100644 index 0000000000000000000000000000000000000000..2066a7ae7608809092260b6d215b6684c934474c Binary files /dev/null and b/content/zh/post/DarkAthena/images/cc3f6b4b0fccbb1452cfcf894bef1115.png differ diff --git a/content/zh/post/DarkAthena/images/e9f0ab7b1e4c1b98f58b42333b74f5db.png b/content/zh/post/DarkAthena/images/e9f0ab7b1e4c1b98f58b42333b74f5db.png new file mode 100644 index 0000000000000000000000000000000000000000..9c88a837f8d94c1f5fdd6e201cf2f93590fcb31e Binary files /dev/null and b/content/zh/post/DarkAthena/images/e9f0ab7b1e4c1b98f58b42333b74f5db.png differ diff --git a/content/zh/post/DarkAthena/images/f06e8c49015bfe49ae1fd3311235c5b6.png b/content/zh/post/DarkAthena/images/f06e8c49015bfe49ae1fd3311235c5b6.png new file mode 100644 index 0000000000000000000000000000000000000000..099deb7d835cc2a6b41431fd1d1c84d72a1dbab8 Binary files /dev/null and b/content/zh/post/DarkAthena/images/f06e8c49015bfe49ae1fd3311235c5b6.png differ diff --git a/content/zh/post/DarkAthena/images/f21188242d4b9e58f48df77806eb5cd0.png b/content/zh/post/DarkAthena/images/f21188242d4b9e58f48df77806eb5cd0.png new file mode 100644 index 0000000000000000000000000000000000000000..31c2fe98f087b81dd68ab9d294f88629b3b3b1ea Binary files /dev/null and b/content/zh/post/DarkAthena/images/f21188242d4b9e58f48df77806eb5cd0.png differ diff --git a/content/zh/post/DarkAthena/images/f7c379f7ed4611bfee7273041cea2113.png b/content/zh/post/DarkAthena/images/f7c379f7ed4611bfee7273041cea2113.png new file mode 100644 index 0000000000000000000000000000000000000000..879fccd317a4d8e0d52d8a85286db5575ee5aac3 Binary files /dev/null and b/content/zh/post/DarkAthena/images/f7c379f7ed4611bfee7273041cea2113.png differ diff --git a/content/zh/post/DarkAthena/title/img.png b/content/zh/post/DarkAthena/title/img.png new file mode 100644 index 0000000000000000000000000000000000000000..86a420b92fb8289658d807d49f137b6d13862f6d Binary files /dev/null and b/content/zh/post/DarkAthena/title/img.png differ diff --git a/content/zh/post/DarkAthena/title/img1.png b/content/zh/post/DarkAthena/title/img1.png new file mode 100644 index 0000000000000000000000000000000000000000..65e2d4c4751f069c64357704715e2ba99beb511a Binary files /dev/null and b/content/zh/post/DarkAthena/title/img1.png differ diff --git a/content/zh/post/DarkAthena/title/img21.png b/content/zh/post/DarkAthena/title/img21.png new file mode 100644 index 0000000000000000000000000000000000000000..1da9e55bd25cbc7cfc6fdef1800b4c95b077829b Binary files /dev/null and b/content/zh/post/DarkAthena/title/img21.png differ diff --git a/content/zh/post/DarkAthena/title/img24.png b/content/zh/post/DarkAthena/title/img24.png new file mode 100644 index 0000000000000000000000000000000000000000..2af578504062e5fa7a7aaf7e1c2014531e51e9c2 Binary files /dev/null and b/content/zh/post/DarkAthena/title/img24.png differ diff --git a/content/zh/post/DarkAthena/title/img25.png b/content/zh/post/DarkAthena/title/img25.png new file mode 100644 index 0000000000000000000000000000000000000000..b71bb7d740d0f375bbea6116ffde9175c0dbcacf Binary files /dev/null and b/content/zh/post/DarkAthena/title/img25.png differ diff --git a/content/zh/post/DarkAthena/title/img3.png b/content/zh/post/DarkAthena/title/img3.png new file mode 100644 index 0000000000000000000000000000000000000000..cb1c24b86a69bea7c9b6b2bd0d99b4eecbf10f2c Binary files /dev/null and b/content/zh/post/DarkAthena/title/img3.png differ diff --git a/content/zh/post/DarkAthena/title/img31.png b/content/zh/post/DarkAthena/title/img31.png new file mode 100644 index 0000000000000000000000000000000000000000..24c200404ece0dee46af324f0c84ad832db02276 Binary files /dev/null and b/content/zh/post/DarkAthena/title/img31.png differ diff --git a/content/zh/post/DarkAthena/title/img33.png b/content/zh/post/DarkAthena/title/img33.png new file mode 100644 index 0000000000000000000000000000000000000000..b903c7f8d5a3ba8b66b2d6be883a4bac7230915e Binary files /dev/null and b/content/zh/post/DarkAthena/title/img33.png differ diff --git a/content/zh/post/DarkAthena/title/img37.png b/content/zh/post/DarkAthena/title/img37.png new file mode 100644 index 0000000000000000000000000000000000000000..7218eccd424015bd9d70bb0c79c33bcd747f2306 Binary files /dev/null and b/content/zh/post/DarkAthena/title/img37.png differ diff --git a/content/zh/post/DarkAthena/title/img38.png b/content/zh/post/DarkAthena/title/img38.png new file mode 100644 index 0000000000000000000000000000000000000000..86a420b92fb8289658d807d49f137b6d13862f6d Binary files /dev/null and b/content/zh/post/DarkAthena/title/img38.png differ diff --git a/content/zh/post/DarkAthena/title/img39.png b/content/zh/post/DarkAthena/title/img39.png new file mode 100644 index 0000000000000000000000000000000000000000..8de785c9982c124dd44d2931c2913c3c5b044791 Binary files /dev/null and b/content/zh/post/DarkAthena/title/img39.png differ diff --git a/content/zh/post/DarkAthena/title/img6.png b/content/zh/post/DarkAthena/title/img6.png new file mode 100644 index 0000000000000000000000000000000000000000..2ddddfa2858d77999b4cfec8e97e4f29ac0cab79 Binary files /dev/null and b/content/zh/post/DarkAthena/title/img6.png differ diff --git "a/content/zh/post/DarkAthena/\343\200\220openGauss\343\200\221\345\234\250WPS\350\241\250\346\240\274\351\207\214\345\210\266\344\275\234\350\277\236\346\216\245\345\210\260openGauss\347\232\204\345\256\236\346\227\266\345\210\267\346\226\260\346\212\245\350\241\250.md" "b/content/zh/post/DarkAthena/\343\200\220openGauss\343\200\221\345\234\250WPS\350\241\250\346\240\274\351\207\214\345\210\266\344\275\234\350\277\236\346\216\245\345\210\260openGauss\347\232\204\345\256\236\346\227\266\345\210\267\346\226\260\346\212\245\350\241\250.md" new file mode 100644 index 0000000000000000000000000000000000000000..8d601b2c08a068abb72cc87f4c5c7f9a1ea97095 --- /dev/null +++ "b/content/zh/post/DarkAthena/\343\200\220openGauss\343\200\221\345\234\250WPS\350\241\250\346\240\274\351\207\214\345\210\266\344\275\234\350\277\236\346\216\245\345\210\260openGauss\347\232\204\345\256\236\346\227\266\345\210\267\346\226\260\346\212\245\350\241\250.md" @@ -0,0 +1,107 @@ ++++ + +title = "【openGauss】在WPS表格里制作连接到openGauss的实时刷新报表" + +date = "2022-12-06" + +tags = ["openGauss"] + +archives = "2022-12" + +author = "DarkAthena" + +summary = "【openGauss】在WPS表格里制作连接到openGauss的实时刷新报表" + +img = "/zh/post/DarkAthena/title/img.png" + +times = "10:20" + ++++ + +## 前言 + +其实我的数据库启蒙,是在一家甲方公司。 +当时一进这家公司,就见到了通过连接数据库自动刷新的excel表。当时学会了这招就一发不可收拾,制作出各种自动刷新的报表。 +想象一下,有些高管不喜欢打开各种复杂的业务系统或者报表系统,一上班就直接打开桌面上的可刷新表格文件,就能看到昨日的报表数据以及今日的实时数据。 +当年智能手机还未普及,没有移动端报表,每晚的值班经理需要查询当日数据编辑短信发送给高管,也是在电脑上用EXCEL直接刷出数据,而且提前在EXCEL里写好了公式,拼接成了短信文本内容,复制粘贴到飞信就把业绩短信发出去了,多少年来一直都是这么用的,只是后来改成了粘贴到微信发送。 + +在当时,这也算是极低成本的半自动化了,好不惬意! + +当时连接的主要是Oracle数据库,现在突然想起,是不是我们的国产数据库也可以这样连接呢? + +## 原理 + +其实原理很简单,就是在windows的odbc数据源中配置好对应的数据库连接,然后在excel或者wps表格中选择导入数据/odbc数据源,选择需要的表及字段,或者直接写个sql查询也行,就可以把数据返回到表格中。当数据库中的数据发生变化时,只需要在表格中点击刷新或者或者设置刷新频率即可 + +## 步骤 + +### 安装ODBC驱动 + +[windows端ODBC驱动下载(支持sha256) https://obs.myhuaweicloud.com/dws/download/dws_8.1.x_odbc_driver_for_windows.zip](https://obs.myhuaweicloud.com/dws/download/dws_8.1.x_odbc_driver_for_windows.zip) + +下载好后,解压,根据使用的wps或msoffice是64位还是32位版本,执行对应的安装程序(目测大多数人安装的都是32位版本),一路“next”就行了。 +不知道自己安装的是32还是64位的,可以把程序打开,到任务管理器里找到对应的程序,会有显示 +![image-1668874050903](./images/3c5edcfd66c542453e6284b38773b911.png) + +### 添加ODBC数据源 + +1. 依次打开 + 控制面板-管理工具-ODBC Data Sources (32-bit) + ![image-1668874143580](./images/9bff3ede038f944c36cf0d1b3c0ff1d9.png) +2. 点击右边的添加按钮,找到 PostgreSQL Unicode,双击 + ![image-1668874242003](./images/01fa1633e6829a9adafc9eaf4f3a502c.png) +3. 然后填写连接信息,并点击Test + ![image-1668874405597](./images/f06e8c49015bfe49ae1fd3311235c5b6.png) +4. 提示连接成功,确定,点击SAVE保存,会弹出安全提示,意思就是连接信息会保存在系统注册表里,不安全,问是否继续,点"是"(本篇不讨论安全问题,请自行斟酌) + ![image-1668874472622](./images/a26b52b8cd3094bdc59d405302f7b8d8.png) +5. 然后就可以看到数据源里多了一个刚刚新建的数据源 + ![image-1668874765760](./images/43f831521c831e975a7ef7258e3f9625.png) + +### 在WPS中引用数据源 + +1. 新建一个空白表格 +2. 点击 数据 - 导入数据,选择ODBC DSN + ![image-1668874917682](./images/cc3f6b4b0fccbb1452cfcf894bef1115.png) +3. 选择刚刚新建的数据源,确定,下一步 + ![image-1668874959657](./images/035778d7d2f842f0e43b07f06be50dcc.png) +4. 选择需要的表,并将需要的字段移到右边,点击下一步 + ![image-1668875300934](./images/34ed0aa9c0765d744f1ae4b2e23bbcc9.png) +5. 选择手工输入连接语句,点击下一步 + ![image-1668875424131](./images/f7c379f7ed4611bfee7273041cea2113.png) +6. 预览没有问题,点击完成 + ![image-1668875476891](./images/7a908b5bc6c6a9fc2aa805f4ef6c8544.png) +7. 选择数据需要存放的开始单元格,点击确定 + ![image-1668875523076](./images/88a31fd96492dcec9b34f627b99d8052.png) +8. 数据就放回到表格中了 + ![image-1668875560923](./images/5e59536d4830366906e336284f44007c.png) + +### 验证数据刷新 + +1. 在数据库中执行sql修改数据,比如让所有人的工资翻倍 + +``` +update scott.emp set sal=sal*2 where 1=1; +``` + +![image-1668875774103](./images/b82d6a1a629cf548e5cbfb024b439757.png) +[windows端gsql下载 (支持sha256)https://obs.myhuaweicloud.com/dws/download/dws_8.1.x_gsql_for_windows.zip](https://obs.myhuaweicloud.com/dws/download/dws_8.1.x_gsql_for_windows.zip) +\2. 在表格数据区域点击鼠标右键,刷新数据 +![image-1668875838278](./images/74a4eafe28730bc309d5685e8b09b93c.png) +\3. 可以看到数据都变更了 +![image-1668875866935](./images/abcef55aaba11d0adcdcc83fc632da1a.png) + +### 配置数据自动刷新 + +1. 在表格数据区域点击鼠标右键,点击数据区域属性 + ![image-1668875958517](./images/ca2a4405d05cce042f494a37b9039c97.png) +2. 设置刷新频率,并勾选打开文件时刷新数据 + ![image-1668876065494](./images/20744cd1b2784c25b9f1d08e2a08b26b.png) + +## 总结 + +本篇只举了个最简单的例子,实际上,新建多个查询,结合表格的公式,并调整表格的样式,是可以制作出相当精美的报表的,而且只用做一次,以后一直都能用,数据都是可以实时从数据库中获取的。 +另外,以上操作在MSOFFICE中的EXCEL也是类似的,只是EXCEL还有个MSQUERY的程序,可以可视化编辑多表的关联关系,有兴趣的可以自己尝试一下。 + +> - **本文作者:** [DarkAthena](https://www.darkathena.top/) +> - **本文链接:** [https://www.darkathena.top/archives/opengauss-wps-excel-realtime-report-sha256](https://www.darkathena.top/archives/opengauss-wps-excel-realtime-report-sha256) +> - **版权声明:** 本博客所有文章除特别声明外,均采用[CC BY-NC-SA 3.0](https://creativecommons.org/licenses/by-nc-sa/3.0/) 许可协议。转载请注明出处! diff --git "a/content/zh/post/DarkAthena/\343\200\220openGauss\343\200\221\346\212\212\345\272\224\347\224\250\345\274\200\345\217\221\344\270\255\347\232\204\350\256\276\347\275\256\345\256\242\346\210\267\347\253\257\345\255\227\347\254\246\347\274\226\347\240\201\345\276\200\347\273\206\344\272\206\350\257\264.md" "b/content/zh/post/DarkAthena/\343\200\220openGauss\343\200\221\346\212\212\345\272\224\347\224\250\345\274\200\345\217\221\344\270\255\347\232\204\350\256\276\347\275\256\345\256\242\346\210\267\347\253\257\345\255\227\347\254\246\347\274\226\347\240\201\345\276\200\347\273\206\344\272\206\350\257\264.md" new file mode 100644 index 0000000000000000000000000000000000000000..a064cfb6a8364f55805d25a264c64e2db457e3bf --- /dev/null +++ "b/content/zh/post/DarkAthena/\343\200\220openGauss\343\200\221\346\212\212\345\272\224\347\224\250\345\274\200\345\217\221\344\270\255\347\232\204\350\256\276\347\275\256\345\256\242\346\210\267\347\253\257\345\255\227\347\254\246\347\274\226\347\240\201\345\276\200\347\273\206\344\272\206\350\257\264.md" @@ -0,0 +1,141 @@ ++++ + +title = "【openGauss】把应用开发中的设置客户端字符编码往细了说" + +date = "2023-02-07" + +tags = ["openGauss"] + +archives = "2023-02" + +author = "DarkAthena" + +summary = "【openGauss】把应用开发中的设置客户端字符编码往细了说" + +img = "/zh/post/DarkAthena/title/img31.png" + +times = "10:20" + ++++ + +## 前言 + +早前写过两篇有关Oracle字符集的文章 +[【ORACLE】谈一谈Oracle数据库使用的字符集,不仅仅是乱码](https://www.darkathena.top/archives/about-oracle-charset) +[【ORACLE】谈一谈NVARCHAR2、NCHAR、NCLOB等数据类型和国家字符集](https://www.darkathena.top/archives/about-nvarchar2-and-national-charset) +基本说明了 ”**数据字符编码**“、”**客户端字符编码**“、”**数据库字符编码**“三者的关系,这些关系,对于openGauss/MogDB/postgresql其实是一样的,即”**数据字符编码**“和”**客户端字符编码**“应保持一致,且对应的字符集为”**数据库字符集**“的”**子集**“。但是实际应用开发中,”**客户端字符编码**“在没有进行主动设定时,往往会受各种因素干扰,比如各种环境变量和数据库参数。本文就针对各种客户端,通过文档加实验来对客户端字符编码应该如何设置来做个分析说明(注:本文的测试环境操作系统为centos7.9) + +## 设置客户端字符编码的各种方式 + +在openGauss中,可以通过很多方式来设置客户端字符集,但实际上最终影响的都是在数据库连接中的 **client_encoding**,该值可以通过执行 **show client_encoding;** 来进行查看。 + +### 操作系统环境变量 + +1. LANG + +``` +export LANG=zh_CN.UTF-8 +``` + +1. LC_CTYPE + +``` +export LC_CTYPE=zh_CN.UTF-8 +``` + +1. PGCLIENTENCODING + +``` +export PGCLIENTENCODING=UTF-8 +``` + +### 数据库级参数 + +1. alter databse dbname set client_encoding=‘UTF-8’; + (注意:alter system set client_encoding=‘UTF-8’; 无法执行) + +### 数据库会话中的参数(会话级) + +1. set client_encoding=‘UTF-8’; +2. set client_encoding to ‘UTF-8’; +3. set names ‘UTF-8’; +4. alter session set client_encoding = ‘UTF8’; + +### gsql元命令(会话级) + +1. \encoding UTF-8 + +### 驱动连接参数 + +1. libpq + +``` +conninfo = "dbname=postgres port=26100 host='192.168.56.117' client_encoding='UTF-8' application_name=test connect_timeout=5 sslmode=disable user='xxx' password='xxx' "; +``` + +1. jdbc + +``` +urlString = "jdbc:opengauss://192.168.56.117:26100/postgres?batchMode=off&allowEncodingChanges=true&characterEncoding=GBK"; +``` + +1. psycopg2 + +``` +conn = psycopg2.connect(database="postgres", user="xxx", password="xxx", host="192.168.56.117", port="26100",client_encoding="GBK") +``` + +### postgresql.conf + +``` +client_encoding= 'UTF8' +``` + +(注:由于设置postgresql.conf的方式在我测试的各种情况下均未产生影响,为节省篇幅,下文不再列出这种方式) + +## 各客户端中取值的优先级和默认值 + +由于可以设置的方式太多,那么必然会出现各个设置互相冲突时,以哪个为准的问题。 +首先明确一点,当前会话执行的客户端字符编码,一定是可以通过 +**show client_encoding;** 这个命令查看的,也就是说,无论怎么设置,最终都是为了影响会话里的 **client_encoding** 参数。接着,我们可以进行各种组合尝试,来确认这个参数的默认值及获取来源 +![image](./images/961af150a73bdb6abedefb408a16d581.png) + +根据以上测试结果,可以得到如下几个结论 +对于gsql + +1. 不论数据库字符编码为何值,其client_encoding会从PGCLIENTENCODING获取 +2. 当PGCLIENTENCODING没有设置时,会从环境变量LC_CTYPE获取 +3. 当LC_CTYPE没有设置时,会从环境变量LC_ALL获取 +4. 当LC_ALL没有设置时,会从环境变量LANG获取 +5. 当LANG没有设置时(unset LANG),默认为sql_ascii + +![image.png](./images/20230201-49d75e33-2a1e-4eef-b312-aae978af089a.png) + +对于libpq + +1. 不论LANG为何值,其client_encoding会从PGCLIENTENCODING获取 +2. 当PGCLIENTENCODING没有设置时,会从database的client_encoding获取(select * from pg_catalog.pg_db_role_setting ) +3. 当database的client_encoding没有设置时,默认为数据库建库时的字符编码(select getdatabaseencoding();) + +![image.png](./images/20230201-858fc5aa-cd0a-4e31-9141-1c87fb24b9c8.png) + +简单来说,就是**libpq**不认LANG、LC_CTYPE,**gsql**不认alter database和数据库字符编码。在不修改应用程序代码时,想指定客户端字符编码,最佳方式为设置PGCLIENTENCODING,因为这样才能保证两者的表现一致。 + +以上同样的测试,我用**psycopg2**和**jdbc**也测了一遍,和**libpq**的表现是完全一致的,也就是说,设定PGCLIENTENCODING对各种程序开发是最通用的设定客户端字符集编码的方案。(单一程序处理多种字符编码的情况本文暂不考虑) + +## 其他 + +附上我用libpq测试[之前文章中的用例](https://www.darkathena.top/archives/about-oracle-charset)结果,可以发现能插入的结果和ORACLE表现是完全一致的,有区别的是,ORACLE中字符编码错了可能也能插入,而MogDB/openGauss/postgresql中则会报错,这个点是好是坏,也只能见仁见智,区分场景了。但我个人偏向于应尽量避免让错误的数据进入数据库 +![image-1675252053480](./images/042e5465f559ceac88bdaf9fec73c530.png) + +本文结论为参考了各种官方文档,加上自己的测试得到的结果,如有不对,请联系我指出。 + +## 参考链接 + +https://docs.mogdb.io/zh/mogdb/v3.0/character-set-support +https://docs.mogdb.io/zh/mogdb/v3.0/1-gsql +http://postgres.cn/docs/13/libpq-connect.html + +- **本文作者:** [DarkAthena](https://www.darkathena.top/) +- **本文链接:** https://www.darkathena.top/archives/about-opengauss-client-encoding +- **版权声明:** 本博客所有文章除特别声明外,均采用[CC BY-NC-SA 3.0](https://creativecommons.org/licenses/by-nc-sa/3.0/) 许可协议。转载请注明出处! diff --git "a/content/zh/post/DarkAthena/\344\270\200\347\247\215\345\217\257\350\203\275\346\230\257\347\233\256\345\211\215\346\234\200\345\277\253\347\232\204\344\273\216ORACLE\345\220\214\346\255\245\346\225\260\346\215\256\345\210\260MogDB(openGauss)\347\232\204\346\226\271\345\274\217.md" "b/content/zh/post/DarkAthena/\344\270\200\347\247\215\345\217\257\350\203\275\346\230\257\347\233\256\345\211\215\346\234\200\345\277\253\347\232\204\344\273\216ORACLE\345\220\214\346\255\245\346\225\260\346\215\256\345\210\260MogDB(openGauss)\347\232\204\346\226\271\345\274\217.md" new file mode 100644 index 0000000000000000000000000000000000000000..a91d8a72cd50afe67ad2d826a6fd00179ef1be7a --- /dev/null +++ "b/content/zh/post/DarkAthena/\344\270\200\347\247\215\345\217\257\350\203\275\346\230\257\347\233\256\345\211\215\346\234\200\345\277\253\347\232\204\344\273\216ORACLE\345\220\214\346\255\245\346\225\260\346\215\256\345\210\260MogDB(openGauss)\347\232\204\346\226\271\345\274\217.md" @@ -0,0 +1,175 @@ ++++ + +title = "一种可能是目前最快的从ORACLE同步数据到MogDB(openGauss)的方式" + +date = "2022-09-15" + +tags = ["openGauss"] + +archives = "2022-09" + +author = "DarkAthena" + +summary = "一种可能是目前最快的从ORACLE同步数据到MogDB(openGauss)的方式" + +img = "/zh/post/DarkAthena/title/img.png" + +times = "18:41" + ++++ + +# 一种可能是目前最快的从ORACLE同步数据到MogDB(openGauss)的方式 + +## 一、前言 +目前openGauss支持oracle中的绝大部分数据类型,基本上不用考虑类型转换的问题。所以从oracle到MogDB(openGauss)的数据同步无非就是从oracle里查出来再插入到MogDB(openGauss)中,只是查出来的结果是存成文件还是放内存而已。 +用文件的形式,oracle端有sqluldr2可以快速导出,MogDB(openGauss)端可以用copy命令快速载入; +如果是放内存,则需要一定量的代码开发,而且目前通用的数据同步工具在导入时,大多用的是逐行insert命令,效率很低。 +所以,是否存在一种可能,只利用现有的工具,就能实现数据的高速同步且不需要存文件? + +在说这个方案前,先简单说明一下这个方案的几个知识点。 +## 二、知识点 +### 1.sqluldr2 +sqluldr2是楼方鑫针对Oracle数据库开发的数据快速导出工具,应该绝大多数oracle用户都用过,因为它依旧是目前从oracle中导出文本数据最快的方式了,速度远超oracle官方的sqlplus spool导出。但是大多数人基本上都只使用其导出文件的功能,而不知道这个工具还可以导出标准输出(所谓标准输出即不生成文件,直接打印在屏幕上) +sqluldr2完整文档:[https://www.doc88.com/p-6522591706862.html](https://www.doc88.com/p-6522591706862.html) + +### 2.gsql/psql(\copy 元命令) +gsql/psql的"\copy"元命令 +(注意区别于sql命令中的"copy","copy"是服务器端的,"\copy"是客户端的), +常用于和表和文件之间的导入和导出,效率很快,因为已经指定了表,不需要再进行字段类型的识别,绕开了sql解析引擎,直接写入表中。 +但大多数人很少用到 stdin和stdout(除非是基于其他高级语言进行数据导入导出的开发),这里的stdin即为标准输入,如果执行"\copy 表名 from stdin",则会让你继续输入数据,然后客户端会把数据保存到对应的表中。 + +### 3.gsql/psql (-c参数) +gsql/psql 的 "-c"参数,可以在连接数据库后马上执行sql命令然后断开登录 + +### 4.linux ( | 管道符) +linux的管道符"|",可以用于输入的重定向,即把前面一条命令的输出,作为后面一条命令的输入 + +## 三、方案说明 +通过以上内容,很自然的可以联想到一种方式,即使用sqluldr2的标准输出,直接作为gsql \copy的标准输入。所谓的两点之间直线最短,用最快的导出加上最快的导入,且省去中间存储文件的阶段,理想状态下,这个速度仅受限于源端或目标端最慢的那一端,比如导出7分钟,导入8分钟,一般总计传输时间就是15分钟,但是用本文的方案,这个传输时间可能就只有8分钟了,因为它是导入导出同时进行的! + +## 四、操作步骤 +1. 需要有个服务器能同时连接oracle及MogDB(openGauss),当然直接用这两个服务器之一也可以,只是注意要安装另一个数据的客户端,本文测试是在mogdb数据库的服务器上执行的 +- oracle客户端下载链接 +https://www.oracle.com/cn/database/technologies/instant-client/linux-x86-64-downloads.html +- MogDB(openGauss)没有提供单独的客户端压缩包,客户端在数据库的安装包中对应的tools压缩包,比如 +MogDB-3.0.1-CentOS-64bit-tools.tar.gz +https://mogdb.io/downloads/mogdb/ + +2. 安装客户端,这两个客户端的安装方式差不多,就是解压,然后配置环境变量LD_LIBRARY_PATH,比如配置连接Oracle的环境变量如下(如果需要永久配置则要修改对应的配置文件) +```bash +export LD_LIBRARY_PATH=/opt/mogdb/instantclient_21_7:$LD_LIBRARY_PATH +``` +3. 下载sqluldr2程序 +这个就自己在网上搜吧,基本下载下来是一个压缩包,里面有两个windows版的和两个linux版的,我们需要的是"sqluldr2_linux64_10204.bin"这个文件,下完后可以把文件名改短点,比如"sqluldr2" + +4. 先找个小表测试下sqluldr2能否导出文件,文件正常生成,说明oracle客户端配置正确 +```bash +./sqluldr2 scott/tiger@192.168.163.108/orcl query=emp quote=0x22 field="," degree=8 file=123.csv +``` +``` +[omm@MiWiFi-R3G-srv mogdb]$ cat 123.csv +7369,"SMITH","CLERK",7902,"1980-12-17 00:00:00",800,,20 +7499,"ALLEN","SALESMAN",7698,"1981-02-20 00:00:00",1600,300,30 +7521,"WARD","SALESMAN",7698,"1981-02-22 00:00:00",1250,500,30 +7566,"JONES","MANAGER",7839,"1981-04-02 00:00:00",2975,,20 +7654,"MARTIN","SALESMAN",7698,"1981-09-28 00:00:00",1250,1400,30 +7698,"BLAKE","MANAGER",7839,"1981-05-01 00:00:00",2850,,30 +7782,"CLARK","MANAGER",7839,"1981-06-09 00:00:00",2450,,10 +7788,"SCOTT","ANALYST",7566,"1987-04-19 00:00:00",3000,,20 +7839,"KING","PRESIDENT",,"1981-11-17 00:00:00",5000,,10 +7844,"TURNER","SALESMAN",7698,"1981-09-08 00:00:00",1500,0,30 +7876,"ADAMS","CLERK",7788,"1987-05-23 00:00:00",1100,,20 +7900,"JAMES","CLERK",7698,"1981-12-03 00:00:00",950,,30 +7902,"FORD","ANALYST",7566,"1981-12-03 00:00:00",3000,,20 +7934,"MILLER","CLERK",7782,"1982-01-23 00:00:00",1300,,10 +``` + +5. 在目标端建立一个同样的表 +```bash +gsql -d postgres -r -p 26000 +``` +```sql +create schema scott; +create table SCOTT.EMP +( + empno NUMBER(4) not null, + ename VARCHAR2(10), + job VARCHAR2(9), + mgr NUMBER(4), + hiredate DATE, + sal NUMBER(7,2), + comm NUMBER(7,2), + deptno NUMBER(2) +); +``` + +6. ★测试通过管道传输数据,没有报错 +```bash +./sqluldr2 scott/tiger@192.168.163.108/orcl query=emp quote=0x22 field="," degree=8 file=- |gsql -d postgres -Umogdb -WEnmo@123 -hlocalhost -p26000 -c "\copy scott.emp from stdin DELIMITER ',' quote '\"' csv" +``` + +7. 在目标端查询scott.emp表,数据和源端一致 + +``` +MogDB=# select * from scott.emp; + empno | ename | job | mgr | hiredate | sal | comm | deptno +-------+--------+-----------+------+---------------------+---------+---------+-------- + 7369 | SMITH | CLERK | 7902 | 1980-12-17 00:00:00 | 800.00 | | 20 + 7499 | ALLEN | SALESMAN | 7698 | 1981-02-20 00:00:00 | 1600.00 | 300.00 | 30 + 7521 | WARD | SALESMAN | 7698 | 1981-02-22 00:00:00 | 1250.00 | 500.00 | 30 + 7566 | JONES | MANAGER | 7839 | 1981-04-02 00:00:00 | 2975.00 | | 20 + 7654 | MARTIN | SALESMAN | 7698 | 1981-09-28 00:00:00 | 1250.00 | 1400.00 | 30 + 7698 | BLAKE | MANAGER | 7839 | 1981-05-01 00:00:00 | 2850.00 | | 30 + 7782 | CLARK | MANAGER | 7839 | 1981-06-09 00:00:00 | 2450.00 | | 10 + 7788 | SCOTT | ANALYST | 7566 | 1987-04-19 00:00:00 | 3000.00 | | 20 + 7839 | KING | PRESIDENT | | 1981-11-17 00:00:00 | 5000.00 | | 10 + 7844 | TURNER | SALESMAN | 7698 | 1981-09-08 00:00:00 | 1500.00 | 0.00 | 30 + 7876 | ADAMS | CLERK | 7788 | 1987-05-23 00:00:00 | 1100.00 | | 20 + 7900 | JAMES | CLERK | 7698 | 1981-12-03 00:00:00 | 950.00 | | 30 + 7902 | FORD | ANALYST | 7566 | 1981-12-03 00:00:00 | 3000.00 | | 20 + 7934 | MILLER | CLERK | 7782 | 1982-01-23 00:00:00 | 1300.00 | | 10 +(14 rows) +``` + +## 五、简单性能测试 +由于硬盘空间不够,测试数据库也是虚拟机和docker中的,就不做详细的测试了,只做个简单的100万数据测试 +```sql +SQL> select count(1) from scott.T_TEST1; + + COUNT(1) +---------- + 1000000 +``` +直接管道传输,用时7s +```bash +[omm@MiWiFi-R3G-srv mogdb]$ echo $(date) +Thu Sep 15 04:24:23 EDT 2022 +[omm@MiWiFi-R3G-srv mogdb]$ ./sqluldr2 scott/tiger@192.168.163.108/orcl query=SCOTT.T_TEST1 quote=0x22 field="," degree=8 file=- |gsql -d postgres -Umogdb -WEnmo@123 -hlocalhost -p26000 -c "\copy SCOTT.T_TEST1 from stdin DELIMITER ',' quote '\"' csv" +[omm@MiWiFi-R3G-srv mogdb]$ echo $(date) +Thu Sep 15 04:24:30 EDT 2022 +``` +在目标端删除测试表,重建,然后用导出文件再导入的方式,导出5s,导入7s,一共12s +```bash +[omm@MiWiFi-R3G-srv mogdb]$ echo $(date) +Thu Sep 15 04:20:00 EDT 2022 +[omm@MiWiFi-R3G-srv mogdb]$ ./sqluldr2 scott/tiger@192.168.163.108/orcl query=SCOTT.T_TEST1 quote=0x22 field="," degree=8 file=1234.csv + 0 rows exported at 2022-09-15 04:20:00, size 0 MB. + 1000000 rows exported at 2022-09-15 04:20:05, size 48 MB. + output file 1234.csv closed at 1000000 rows, size 48 MB. +[omm@MiWiFi-R3G-srv mogdb]$ echo $(date) +Thu Sep 15 04:20:05 EDT 2022 +``` +```bash +[omm@MiWiFi-R3G-srv mogdb]$ echo $(date) +Thu Sep 15 04:23:03 EDT 2022 +[omm@MiWiFi-R3G-srv mogdb]$ gsql -d postgres -Umogdb -WEnmo@123 -hlocalhost -p26000 -c "\copy SCOTT.T_TEST1 from '/opt/mogdb/1234.csv' DELIMITER ',' quote '\"' csv" +[omm@MiWiFi-R3G-srv mogdb]$ echo $(date) +Thu Sep 15 04:23:10 EDT 2022 +``` +大致判断,这次测试的速度上限受导入影响,如果再优化一下数据库相关参数,耗时可以更低,但这里主要是提供一个可行的快速传输数据的思路,就不去做参数调整了。 + +## 六、总结 +了解了这个原理,完全可以写一个程序或者shell脚本,通过把配置进行参数化处理,来制作一个Oracle到openGauss的通用高速数据传输工具。 +> - **本文作者:** [DarkAthena](https://www.darkathena.top) +> - **本文链接:** [https://www.darkathena.top/archives/transport-data-from-oracle-to-opengauss-faster](https://www.darkathena.top/archives/transport-data-from-oracle-to-opengauss-faster) +> - **版权声明:** 本博客所有文章除特别声明外,均采用[CC BY-NC-SA 3.0](https://creativecommons.org/licenses/by-nc-sa/3.0/) 许可协议。转载请注明出处! \ No newline at end of file diff --git "a/content/zh/post/DarkAthena/\344\270\244\347\247\215\345\234\250openGauss\344\270\255\344\275\277\347\224\250\345\255\230\345\202\250\350\277\207\347\250\213\347\224\237\346\210\220\346\226\207\346\234\254\346\226\207\344\273\266\347\232\204\346\226\271\345\274\217.md" "b/content/zh/post/DarkAthena/\344\270\244\347\247\215\345\234\250openGauss\344\270\255\344\275\277\347\224\250\345\255\230\345\202\250\350\277\207\347\250\213\347\224\237\346\210\220\346\226\207\346\234\254\346\226\207\344\273\266\347\232\204\346\226\271\345\274\217.md" new file mode 100644 index 0000000000000000000000000000000000000000..9b4253302a3803be6565e6b6bb6194940f70998f --- /dev/null +++ "b/content/zh/post/DarkAthena/\344\270\244\347\247\215\345\234\250openGauss\344\270\255\344\275\277\347\224\250\345\255\230\345\202\250\350\277\207\347\250\213\347\224\237\346\210\220\346\226\207\346\234\254\346\226\207\344\273\266\347\232\204\346\226\271\345\274\217.md" @@ -0,0 +1,247 @@ ++++ + +title = "两种在openGauss中使用存储过程生成文本文件的方式" + +date = "2022-11-07" + +tags = ["openGauss"] + +archives = "2022-11" + +author = "DarkAthena" + +summary = "两种在openGauss中使用存储过程生成文本文件的方式" + +img = "/zh/post/DarkAthena/title/img.png" + +times = "10:20" + ++++ + +# 两种在openGauss中使用存储过程生成文本文件的方式 + +本文出处:[https://www.modb.pro/db/545619](https://www.modb.pro/db/545619) + +## 前言 + +在很多使用Oracle数据库的业务应用系统中,尤其是涉及到多个系统需要进行大量数据交互的,如果使用httpapi直接传递表格数据,效率会极其的慢,且很耗应用的计算性能,所以往往会采取用文件的方式来传输大量数据。 +Oracle中有一个utl_file包,就适用于此场景,在数据库中直接查询出数据,然后写入文件,省去了应用的内存占用,也节省了应用到数据库之间的网络开销。 + +可是,在openGauss中并无utl_file包,那么在openGauss中该如何实现utl_file包的功能呢? + +## 第三方解决方案 + +由于openGauss源自开源的postgresql,而前面提到的这个场景,肯定有人考虑过。那么,秉着不重复造轮子的原则,先找一下有没有人在postgresql中做过类似的实现吧。 + +这一搜,就搜到了有名的oracle兼容插件orafce。 + +[https://github.com/orafce/orafce](https://github.com/orafce/orafce) + +在orafce中,提供了很多oracle兼容函数、视图,还有dual表,以及几个oracle内置包,比如dbms_output、dbms_sql、utl_file、dbms_pipe、dbms_alert、DBMS_utility、DBMS_random。 + +在很多基于postgresql开发的数据库中,都可以见到这些包,也就是说这些数据库很可能都是用了orafce这个插件,或者是进行了移植。这些包里的函数,和oracle并非完全一致,所以可以根据那些不一致的函数,来识别是否为orafce的代码,关于这个就不再展开了。 + +同样,在openGauss中,虽然默认安装时是没有orafce插件的,但是社区提供了适配过的orafce源码: [https://gitee.com/opengauss/Plugin/tree/master/contrib/orafce](https://gitee.com/opengauss/Plugin/tree/master/contrib/orafce) + +安装这个插件后,数据库中就有utl_file了,使用方式和oracle数据库差不多,但有几点区别需要注意 + +1. 原生pg没有create directory这个语法,所以也不存在像oracle当中的DBA_DIRECTORY这样的视图,所以orafce创建了一个表utl_file_dir,当需要创建目录名和实际目录的对应关系时,插一行数据进去就行了,如果这个表中没有对应目录的记录,则不允许对访问操作系统上的目录。 +2. 不支持nchar/nvarchar文本的读写,不过由于PG本身就没国家字符集这个东西,所以也没必要了 +3. 不支持二进制(raw)的读写,虽然大多数时候一般是读写文本,但是在oracle中,经常会需要采集二进制的读写来确保数据的一致性,因为文本中的特殊字符往往容易出现由于没有转义而出现差异,而且还有字符集的问题。更别提有些oracle数据库中的确有管理二进制文件,比如图片、excel表格、pdf文档等。 +4. utl_file.fclose使用时的语法不一致,oracle中为一个过程,但orafce中为一个带return值的函数 + +#### 针对openGauss进行改进 + +对于第1点,虽然pg没有目录管理,但是openGauss有。在openGauss中,是有create directory语法的,对应的表为pg_directory + +[https://docs.opengauss.org/zh/docs/3.0.0/docs/Developerguide/CREATE-DIRECTORY.html](https://docs.opengauss.org/zh/docs/3.0.0/docs/Developerguide/CREATE-DIRECTORY.html) + +[https://docs.opengauss.org/zh/docs/3.0.0/docs/Developerguide/PG_DIRECTORY.html](https://docs.opengauss.org/zh/docs/3.0.0/docs/Developerguide/PG_DIRECTORY.html) + +所以,我们可以借用pg_directory这个表,来创建一个dba_directory的兼容视图 + +``` +create view public.dba_directories as +select 'SYS' OWNER, +dirname DIRECTORY_NAME, +dirpath DIRECTORY_PATH +FROM pg_catalog.pg_directory; +``` + +然后,在执行create extension orafce之前,修改orafce的sql脚本文件 + +把创建utl_file_dir表的sql注释掉,改成创建utl_file_dir视图,指向pg_directory表,像下面这样 + +``` +/*CREATE TABLE utl_file.utl_file_dir(dir text, dirname text unique);*/ +/*for openGauss*/ +create view utl_file.utl_file_dir as +select dirpath dir,dirname from pg_catalog.pg_directory; +``` + +保存,然后再进数据库执行create extension orafce,这样,就能像oracle数据库中一样,使用create directory创建目录,然后使用utl_file来进行读写文本的操作了。 + +不过需要注意,创建目录,需要对用户进行授权,并且还有在服务器上开启允许访问操作系统目录 + +``` +grant gs_role_directory_create to username; +ALTER SYSTEM SET enable_access_server_directory TO on; +``` + +在openGauss3.0版本中,create directory没有任何意义,因为这个是给GaussDB(for openGauss)中的DBE_FILE包使用的,而开源的openGauss中并没有这个包。本文的这个操作,赋予了create directory新的价值 + +## 非第三方支持 + +很多时候,需求往往会被带偏,没有去了解用户的原始需求场景,而是直接默认了一条路径去进行分析,把问题复杂化了。 + +比如,我们在客户的oracle数据库中,经常有发现这个一个这样的存储过程(存储过程名称可能不一样),传入目录/文件名/分割符/查询sql,通过调用dbms_Sql和utl_file包来生成文件,代码和下面2006年的这篇文章中类似 + +https://blog.csdn.net/mxfhhh/article/details/606168 + +``` +create or replace procedure UP_DATA_TO_TXT +( +p_query in varchar2, +--传入相关的SELECT 语句 严格按如下格式'select id from tablename where pp=ww' +p_dir in varchar2, --utl_file允许的路径,请查看相关文档) +p_filename in varchar2 --要生成的文件名字(形如:aa而不必是aa.txt)名字就可以了 +) is +l_cursor number; +l_row number; --执行行数 +l_id varchar2(40); +l_name varchar2(80); +l_column_value varchar2(1000); +-- +l_output utl_file.file_type; +l_line varchar2(4000):=null; +-- +l_colcnt integer; +l_desctbl DBMS_SQL.DESC_TAB; +l_cnt number default 0; +first_column_value varchar2(1000); +begin +l_cursor:=dbms_sql.open_cursor; +dbms_sql.parse(l_cursor,p_query,dbms_sql.native); --分析语句 +dbms_sql.describe_columns(l_cursor,l_colcnt,l_desctbl);--渲染列,以得到列数 +---- +for i in 1..l_colcnt loop--定义列, +DBMS_SQL.DEFINE_COLUMN(l_cursor,i,l_column_value,1000); +end loop; +----- +l_row:=dbms_sql.execute(l_cursor); --执行语句 +-- +l_output:=utl_file.fopen(p_dir,p_filename||'.txt','w',32760); +LOOP + IF DBMS_SQL.FETCH_ROWS(l_cursor)>0 THEN + ------------------- + l_line:=null; + for i in 2..l_colcnt loop--得到此行列的值 + DBMS_SQL.COLUMN_VALUE(l_cursor,1,first_column_value); + DBMS_SQL.COLUMN_VALUE(l_cursor,i,l_column_value); + l_line:=l_line||'|'||l_column_value; +end loop; + ------------------ + l_line:=first_column_value||l_line; + utl_file.put_line(l_output,l_line); + else + exit; + end if; +end loop ; +-- +utl_file.fclose(l_output); +dbms_sql.close_cursor(l_cursor); --关闭光标 +EXCEPTION +when no_data_found then +utl_file.fclose(l_output); +WHEN OTHERS THEN +IF DBMS_SQL.IS_OPEN(l_cursor) THEN +DBMS_SQL.CLOSE_CURSOR(l_cursor); +END IF; +RAISE; +end UP_DATA_TO_TXT; +``` + +因为oracle数据库中,没有提供一个函数或者语法,能直接用sql或者plsql命令来把一条sql的查询结果生成数据,所以需要借用dbms_sql来查询数据每行按字段循环,来拼接成字符串,再循环调用utl_file,将每行字符串写入文件。 + +但是,mysql/sqlserver/postgresql中都有把sql查询结果生成文件的语法,比如在postgresql中就是下面这个样子 + +``` +copy (select * from table) to '/tmp/file_name.csv' with (delimiter E','); +``` + +copy to 是一个sql语法,就像select /create之类的sql一样,因此,它也可以使用在存储过程中。 + +但是,openGauss加强了安全性,这种直接访问服务器操作系统文件风险太大,因此非初始用户是没有执行copy to的权限的。 + +然而,openGauss参考oracle,提供了存储过程使用创建者权限执行的方法(AUTHID definer),所以,我们可以用openGauss自己提供的方式来绕过openGauss的限制。 + +#### 操作步骤 + +1. 在数据库服务器上,用初始用户登录数据库 +2. 开启允许访问服务器目录,并授权指定用户允许创建文件夹 + +``` +ALTER SYSTEM SET enable_access_server_directory TO on; +grant gs_role_directory_create to username; +``` + +3. 创建存储过程 + +``` +--使用初始用户创建存储过程(注意指定需要使用的schema) +create or replace procedure username.UP_DATA_TO_TXT(P_DIR IN VARCHAR2, --oracle目录 + P_FILENAME IN VARCHAR2, --导出文件名 + P_DELIMITER IN VARCHAR2, --分隔符 + P_QUERY IN VARCHAR2 --导数的sql语句 + ) + AUTHID definer +is +l_realpath text; +begin + select dirpath into l_realpath + from pg_catalog.pg_directory + where dirname=P_DIR; + execute 'COPY ('||P_QUERY||') TO '''||l_realpath||'/'||P_FILENAME||''' with (delimiter E'''||P_DELIMITER||''')'; +end; +``` + +4. 创建文件夹 + +``` +create directory tmpdir as '/tmp'; +``` + +5. 切换用户,测试存储过程 + +``` +begin + UP_DATA_TO_TXT('tmpdir','test123.dat',',','select proname,1 b from pg_proc where rownum<=10'); +end; +``` + +6. 检查操作系统对应目录下是否存在文件 + +``` +cat /tmp/test123.dat + +abbrev,1 +abbrev,1 +abort,1 +abs,1 +abs,1 +abs,1 +abs,1 +abs,1 +abs,1 +abstime,1 +``` + +可以看到,最后这个存储过程调用的效果完全满足客户的原始需求场景,而且效率更高,因为它实际上只需要执行一条sql命令,而不用像oracle中那样嵌套循环拼接字符串。 + +## 总结 + +在很多国产数据库适配的项目中,经常会跳过原始需求场景直接要求支持对应的功能,这是一种舍本逐末的表现。比如本文中的例子,如果仅仅是为了一个sql生成文本的功能,去开发dbms_sql和utl_file这两个包,而且还保留在oracle中那样复杂的逻辑,看上去高兼容度,迁移很顺,但实际上执行性能远远比不过数据库本身自带的一条命令。既然选择了要更换数据库,那么就应该使用目标数据库更合适的方式,代码该改还是得改,否则无法体会到国产数据库更强大的功能和更好的性能。 + +> - **本文作者:** [DarkAthena](https://www.darkathena.top/) +> - **本文链接:** [https://www.darkathena.top/archives/2way-write-file-with-procedure-in-opengauss](https://www.darkathena.top/archives/2way-write-file-with-procedure-in-opengauss) +> - **版权声明:** 本博客所有文章除特别声明外,均采用[CC BY-NC-SA 3.0](https://creativecommons.org/licenses/by-nc-sa/3.0/) 许可协议。转载请注明出处! diff --git "a/content/zh/post/DarkAthena/\345\234\250windows\344\270\255\344\275\277\347\224\250\345\256\271\345\231\250\345\214\226\347\232\204mogeaver.md" "b/content/zh/post/DarkAthena/\345\234\250windows\344\270\255\344\275\277\347\224\250\345\256\271\345\231\250\345\214\226\347\232\204mogeaver.md" new file mode 100644 index 0000000000000000000000000000000000000000..626d0dccd25b16ca6fe37b8d19aa11c5f4a48985 --- /dev/null +++ "b/content/zh/post/DarkAthena/\345\234\250windows\344\270\255\344\275\277\347\224\250\345\256\271\345\231\250\345\214\226\347\232\204mogeaver.md" @@ -0,0 +1,113 @@ ++++ + +title = "在windows中使用容器化的mogeaver" + +date = "2022-09-09" + +tags = ["openGauss"] + +archives = "2022-09" + +author = "DarkAthena" + +summary = "在windows中使用容器化的mogeaver" + +img = "/zh/post/DarkAthena/title/img.png" + +times = "10:20" + ++++ + +# 【openGauss】在windows中使用容器化的mogeaver + +## 前言 + +这是一个对目前某些暂时在linux专享的功能,在windows上实现的探索,不建议在生产中使用。 + +## 思路 + +1. 目前windows10/11上支持搭建docker环境,而不需要使用linux虚拟机 +2. windows上的docker可以借用VcXsrv或xming等X11工具,来将容器中的图形化界面显示到windows上 +3. mogeaver的linux版本是用的gtk做的GUI + +所以,理论上,我们可以找个操作系统的docker镜像,装好gtk和jdk环境,把linux版本的mogeaver放进去,然后在windows上用VcXsrv设置一个显示器,启动容器,把容器的dispaly设置到windows上,就能在windows上使用linux版本的mogeaver了。 + +## 需要解决的问题 + +1. 最好能找到一个带gtk环境的docker镜像,这样就不需要自己从头开始构建了(之前没接触过gtk开发,为了省时间。。。) +2. 一般精简的docker镜像是没有中文字体支持的,因此mogeaver就也不能显示中文字体,而国内的windows的用户应该还是更愿意使用中文界面 +3. 默认在mogeaver中设置好gs_dump/gsql等客户端工具的目录,减少用户配置环境的操作 +4. 用户配置文件信息需要持久化到宿主机上,防止下次启动容器配置丢失 +5. 最好每次在用mogeaver时创建容器,关闭的时候自动删除容器,节约资源 +6. 默认yum仓库源有时候无法连接,需要有个备用源,否则可能构建镜像失败 + +## 半成品 + +项目地址: +https://gitee.com/darkathena/mogeaver-docker + +------ + +#### 使用说明 + +1. 先确保本地windows已安装docker-desktop,并已经启动该服务 + +> https://www.docker.com/get-started/ + +2. 下载VcXsrv + +> https://sourceforge.net/projects/vcxsrv/ + +3. 安装VcXsrv,一路下一步,然后打开XLaunch,一路下一步 +4. 下载mogdb和mogeaver压缩包,放到本文件夹 + +``` +wget https://cdn-mogdb.enmotech.com/mogeaver/22.1.5/mogeaver-ce-22.1.5-linux.gtk.x86_64.tar.gz +wget https://cdn-mogdb.enmotech.com/mogdb-media/3.0.1/MogDB-3.0.1-CentOS-x86_64.tar.gz +``` + +5. 进入本目录,执行构建镜像命令 + +``` +docker build -t mogeaver-docker:latest . +``` + +6. 构建完成后,执行run_mogeaver.bat即可启动mogeaver + +## 个性化修改说明 + +1. 默认用户配置路径在 d:\MogeaverData ,可以通过修改run_mogeaver.bat文件变更 +2. 默认gs_dump导出文件路径在 d:\dump_data ,可以通过修改run_mogeaver.bat文件变更 +3. 如果需要中文字体支持,请取消dockerfile中关于“Chinese font support”部分的注释 +4. 如果需要更换国内yum源,请取消dockerfile中关于“change yum repo”部分的注释 + +#### 特点 + +1. 已集成openGauss客户端命令行工具,比如gsql/gs_dump等,可以通过mogeaver相关功能调用 +2. 关闭程序即删除容器,节省内存资源 + +## 使用截图 + +1. 构建 + + ![image-1662550269020](./images/16bcc5b476ab328016746560d331ea19.png) + +2. 软件打开界面 + ![image-1662550350017](./images/e9f0ab7b1e4c1b98f58b42333b74f5db.png) +3. 执行备份 + ![image-1662550519565](./images/f21188242d4b9e58f48df77806eb5cd0.png) + ![image-1662550700045](./images/c5d256000aacb55e4f5989d04614d52d.png) +4. 生成的文件 + ![image-1662550744841](./images/bb6a266ced83afa526b54732382aaccf.png) + +## 遗留问题 + +由于不明原因,一段时间不操作,容器版本的程序就会闪退。。。 + +## 后记 + +在做这个东西的时候,了解到了目前很多东西都可以容器化,后来又想到了,能不能封装一个gs_dump.exe调用linux容器中的gs_dump,这样就能用windows版本的mogeaver来调用gs_dump了。以后有时间再看怎么弄吧。 + +> - **本文作者:** [DarkAthena](https://www.darkathena.top/) +> - **本文链接:** https://www.darkathena.top/archives/mogeaver-on-docker +> - **版权声明:** 本博客所有文章除特别声明外,均采用[CC BY-NC-SA 3.0](https://creativecommons.org/licenses/by-nc-sa/3.0/) 许可协议。转载请注明出处! diff --git a/content/zh/post/Frank/images/-KpBhFIZvSffso4TokTHA8LomQcLevuzX9SBJD4RIgk.png b/content/zh/post/Frank/images/-KpBhFIZvSffso4TokTHA8LomQcLevuzX9SBJD4RIgk.png new file mode 100644 index 0000000000000000000000000000000000000000..347caa6b7eeef4062ccb7c79495f3882cc487d2f Binary files /dev/null and b/content/zh/post/Frank/images/-KpBhFIZvSffso4TokTHA8LomQcLevuzX9SBJD4RIgk.png differ diff --git a/content/zh/post/Frank/images/0VHoHHjvJv3SbUbRClBO96vF9era1KSfM4JABrhNqCI.png b/content/zh/post/Frank/images/0VHoHHjvJv3SbUbRClBO96vF9era1KSfM4JABrhNqCI.png new file mode 100644 index 0000000000000000000000000000000000000000..ed46b788af8e22278fc7b96f996a062eb172f495 Binary files /dev/null and b/content/zh/post/Frank/images/0VHoHHjvJv3SbUbRClBO96vF9era1KSfM4JABrhNqCI.png differ diff --git a/content/zh/post/Frank/images/2lkwtu3hysA5t9rJv5BGDx5lLDeavfwJwcdfqA_crgc.png b/content/zh/post/Frank/images/2lkwtu3hysA5t9rJv5BGDx5lLDeavfwJwcdfqA_crgc.png new file mode 100644 index 0000000000000000000000000000000000000000..0c42bf228f3e6135c548b38745a35fdd801b942f Binary files /dev/null and b/content/zh/post/Frank/images/2lkwtu3hysA5t9rJv5BGDx5lLDeavfwJwcdfqA_crgc.png differ diff --git a/content/zh/post/Frank/images/58eccf60-364f-424b-9785-ecad541fc26f.png b/content/zh/post/Frank/images/58eccf60-364f-424b-9785-ecad541fc26f.png new file mode 100644 index 0000000000000000000000000000000000000000..66f29be495c732515ba0312c0bdaa1d47b43d518 Binary files /dev/null and b/content/zh/post/Frank/images/58eccf60-364f-424b-9785-ecad541fc26f.png differ diff --git a/content/zh/post/Frank/images/5hYUZcL0vSpgxKcA5tt4lyykneLxZlI0-XflBRWOfkc.png b/content/zh/post/Frank/images/5hYUZcL0vSpgxKcA5tt4lyykneLxZlI0-XflBRWOfkc.png new file mode 100644 index 0000000000000000000000000000000000000000..4f5cc99bb5a8d0c649da1fa54f272bfc20a508ac Binary files /dev/null and b/content/zh/post/Frank/images/5hYUZcL0vSpgxKcA5tt4lyykneLxZlI0-XflBRWOfkc.png differ diff --git a/content/zh/post/Frank/images/8f11c785-f027-47b5-a1ba-726edaacb2f2.png b/content/zh/post/Frank/images/8f11c785-f027-47b5-a1ba-726edaacb2f2.png new file mode 100644 index 0000000000000000000000000000000000000000..9ea79b2c45f42df98fc615b98044dbb8e977c580 Binary files /dev/null and b/content/zh/post/Frank/images/8f11c785-f027-47b5-a1ba-726edaacb2f2.png differ diff --git a/content/zh/post/Frank/images/CIqNKVJAIA0bRKmbhN87vkvw3u9GVIU1dqvtk3C6oAU.png b/content/zh/post/Frank/images/CIqNKVJAIA0bRKmbhN87vkvw3u9GVIU1dqvtk3C6oAU.png new file mode 100644 index 0000000000000000000000000000000000000000..1a0b2964194977bd2b1e62bdb0095577c101d2f9 Binary files /dev/null and b/content/zh/post/Frank/images/CIqNKVJAIA0bRKmbhN87vkvw3u9GVIU1dqvtk3C6oAU.png differ diff --git a/content/zh/post/Frank/images/CXqGX8iKzhqFGoTa4oMk8kyoXIdktRzQek2pVxe4XzI.png b/content/zh/post/Frank/images/CXqGX8iKzhqFGoTa4oMk8kyoXIdktRzQek2pVxe4XzI.png new file mode 100644 index 0000000000000000000000000000000000000000..d7d68702bd1a29c40a87ac18985fc00b65726375 Binary files /dev/null and b/content/zh/post/Frank/images/CXqGX8iKzhqFGoTa4oMk8kyoXIdktRzQek2pVxe4XzI.png differ diff --git a/content/zh/post/Frank/images/F1ACrSt9oWqMrfGvlRRLsQQcC-qNBV9jiZH3Y4RZllk.png b/content/zh/post/Frank/images/F1ACrSt9oWqMrfGvlRRLsQQcC-qNBV9jiZH3Y4RZllk.png new file mode 100644 index 0000000000000000000000000000000000000000..52367345e6caa0d61617814f649b1d2877e140c5 Binary files /dev/null and b/content/zh/post/Frank/images/F1ACrSt9oWqMrfGvlRRLsQQcC-qNBV9jiZH3Y4RZllk.png differ diff --git a/content/zh/post/Frank/images/HIS6hdLiNFe6czt6hwP2pi_NnvwiFnaRrKxs1DhPu5s.png b/content/zh/post/Frank/images/HIS6hdLiNFe6czt6hwP2pi_NnvwiFnaRrKxs1DhPu5s.png new file mode 100644 index 0000000000000000000000000000000000000000..e09d3e87ce50c71bedfdd60d265cde33d9cf33c1 Binary files /dev/null and b/content/zh/post/Frank/images/HIS6hdLiNFe6czt6hwP2pi_NnvwiFnaRrKxs1DhPu5s.png differ diff --git a/content/zh/post/Frank/images/IXg70KyBpL6kZBZfiSaTvcKYBQ7q93o-UlBFGP6FU3w.png b/content/zh/post/Frank/images/IXg70KyBpL6kZBZfiSaTvcKYBQ7q93o-UlBFGP6FU3w.png new file mode 100644 index 0000000000000000000000000000000000000000..56b867f77b132bb3c12ba6370d3d4e51569d33f9 Binary files /dev/null and b/content/zh/post/Frank/images/IXg70KyBpL6kZBZfiSaTvcKYBQ7q93o-UlBFGP6FU3w.png differ diff --git a/content/zh/post/Frank/images/JDztFoVEkBL515qAiQljPxPByxTa4pjIe15g50Ciyl4.png b/content/zh/post/Frank/images/JDztFoVEkBL515qAiQljPxPByxTa4pjIe15g50Ciyl4.png new file mode 100644 index 0000000000000000000000000000000000000000..6809266dc95ea47b98580bfe93b40f6695a24747 Binary files /dev/null and b/content/zh/post/Frank/images/JDztFoVEkBL515qAiQljPxPByxTa4pjIe15g50Ciyl4.png differ diff --git a/content/zh/post/Frank/images/K6wZLF-stZaD6c4LzgP5UMEjo0qxjuOw-Irj42Q33fw.png b/content/zh/post/Frank/images/K6wZLF-stZaD6c4LzgP5UMEjo0qxjuOw-Irj42Q33fw.png new file mode 100644 index 0000000000000000000000000000000000000000..21d0edd387699dd71a4a894a7cf00bc4b2b41577 Binary files /dev/null and b/content/zh/post/Frank/images/K6wZLF-stZaD6c4LzgP5UMEjo0qxjuOw-Irj42Q33fw.png differ diff --git a/content/zh/post/Frank/images/KUsItz-toPUDk-Buu-IDePka9z6bFqkA6gwPYsHD6OY.png b/content/zh/post/Frank/images/KUsItz-toPUDk-Buu-IDePka9z6bFqkA6gwPYsHD6OY.png new file mode 100644 index 0000000000000000000000000000000000000000..b9b0e4fd99cce2d55a044187cd06513731197d26 Binary files /dev/null and b/content/zh/post/Frank/images/KUsItz-toPUDk-Buu-IDePka9z6bFqkA6gwPYsHD6OY.png differ diff --git a/content/zh/post/Frank/images/KrMpACrr5KZlCZEmR_IoUv8oujgifomkHG7sGM_2Yko.png b/content/zh/post/Frank/images/KrMpACrr5KZlCZEmR_IoUv8oujgifomkHG7sGM_2Yko.png new file mode 100644 index 0000000000000000000000000000000000000000..47d33c0c36229c97fd3b36ec5608e1cf5b35104b Binary files /dev/null and b/content/zh/post/Frank/images/KrMpACrr5KZlCZEmR_IoUv8oujgifomkHG7sGM_2Yko.png differ diff --git a/content/zh/post/Frank/images/Mf6D8OGOudOnpvwXJ_oqmPAexpfr_EYSWazakX4FVT0.png b/content/zh/post/Frank/images/Mf6D8OGOudOnpvwXJ_oqmPAexpfr_EYSWazakX4FVT0.png new file mode 100644 index 0000000000000000000000000000000000000000..8c38f58d8923c3d9a74c3852b4ce786b65416499 Binary files /dev/null and b/content/zh/post/Frank/images/Mf6D8OGOudOnpvwXJ_oqmPAexpfr_EYSWazakX4FVT0.png differ diff --git a/content/zh/post/Frank/images/OAlETvVs-VRdXpoBOenwSil6WkhILHKdzb1OEkU1lYY.png b/content/zh/post/Frank/images/OAlETvVs-VRdXpoBOenwSil6WkhILHKdzb1OEkU1lYY.png new file mode 100644 index 0000000000000000000000000000000000000000..11573df1b8cc5d81b526d984d043053c3469a2cd Binary files /dev/null and b/content/zh/post/Frank/images/OAlETvVs-VRdXpoBOenwSil6WkhILHKdzb1OEkU1lYY.png differ diff --git a/content/zh/post/Frank/images/OqBg0ocJYlX1ahwRXdt65SGstd9yCDU8dRTKaxrwO_k.png b/content/zh/post/Frank/images/OqBg0ocJYlX1ahwRXdt65SGstd9yCDU8dRTKaxrwO_k.png new file mode 100644 index 0000000000000000000000000000000000000000..cfdb374d91ace59ff945447be848d7c7621ac018 Binary files /dev/null and b/content/zh/post/Frank/images/OqBg0ocJYlX1ahwRXdt65SGstd9yCDU8dRTKaxrwO_k.png differ diff --git a/content/zh/post/Frank/images/PNcP_TtELNoc1JnjdxdJFZJAd5FlK94zcMgv63wDs-w.webp b/content/zh/post/Frank/images/PNcP_TtELNoc1JnjdxdJFZJAd5FlK94zcMgv63wDs-w.webp new file mode 100644 index 0000000000000000000000000000000000000000..6641239a402a2365254aaaf4632c7fb548fa6587 Binary files /dev/null and b/content/zh/post/Frank/images/PNcP_TtELNoc1JnjdxdJFZJAd5FlK94zcMgv63wDs-w.webp differ diff --git a/content/zh/post/Frank/images/Rr8WMD2FqdxOic2kFbxXJic35fdB_YAqnmerRQwqQG4.png b/content/zh/post/Frank/images/Rr8WMD2FqdxOic2kFbxXJic35fdB_YAqnmerRQwqQG4.png new file mode 100644 index 0000000000000000000000000000000000000000..d48c78c5a9408a80d4c50d8148e65c01b67d271e Binary files /dev/null and b/content/zh/post/Frank/images/Rr8WMD2FqdxOic2kFbxXJic35fdB_YAqnmerRQwqQG4.png differ diff --git a/content/zh/post/Frank/images/SkGqKxzpBFRYbQ3rSQpTWQnIQr8DenGIOcf61GaVkfY.png b/content/zh/post/Frank/images/SkGqKxzpBFRYbQ3rSQpTWQnIQr8DenGIOcf61GaVkfY.png new file mode 100644 index 0000000000000000000000000000000000000000..429bc38434a64b0118b2a4c46c519d986a23ec7f Binary files /dev/null and b/content/zh/post/Frank/images/SkGqKxzpBFRYbQ3rSQpTWQnIQr8DenGIOcf61GaVkfY.png differ diff --git a/content/zh/post/Frank/images/T311n-kMrG-vp6DcsMlDiHUBfuC6BfNcHDMMYjzs1No.png b/content/zh/post/Frank/images/T311n-kMrG-vp6DcsMlDiHUBfuC6BfNcHDMMYjzs1No.png new file mode 100644 index 0000000000000000000000000000000000000000..b25b528cf83df21498005703e1794991d72aa1c0 Binary files /dev/null and b/content/zh/post/Frank/images/T311n-kMrG-vp6DcsMlDiHUBfuC6BfNcHDMMYjzs1No.png differ diff --git a/content/zh/post/Frank/images/URoeE6RPdl6HicMOaUmsEYPGtua3rFF-H434jfHwnnA.png b/content/zh/post/Frank/images/URoeE6RPdl6HicMOaUmsEYPGtua3rFF-H434jfHwnnA.png new file mode 100644 index 0000000000000000000000000000000000000000..0f2bd76f76f75084e717cb4c8cd4b3917461cd67 Binary files /dev/null and b/content/zh/post/Frank/images/URoeE6RPdl6HicMOaUmsEYPGtua3rFF-H434jfHwnnA.png differ diff --git a/content/zh/post/Frank/images/WH2_Mzd71Rk3VVjf1TECjxhpzKx-tAku9kfp3MX1aS0.png b/content/zh/post/Frank/images/WH2_Mzd71Rk3VVjf1TECjxhpzKx-tAku9kfp3MX1aS0.png new file mode 100644 index 0000000000000000000000000000000000000000..0ebe9a1e6b1ebc0ff6ffa5b00a381349d366e1bd Binary files /dev/null and b/content/zh/post/Frank/images/WH2_Mzd71Rk3VVjf1TECjxhpzKx-tAku9kfp3MX1aS0.png differ diff --git a/content/zh/post/Frank/images/ZvFmq2I9sxqe8M4iAg23tiFMEXKRCEMGAtOy8DUsLaQ.png b/content/zh/post/Frank/images/ZvFmq2I9sxqe8M4iAg23tiFMEXKRCEMGAtOy8DUsLaQ.png new file mode 100644 index 0000000000000000000000000000000000000000..93c57effdc39f78ef9870eb7afe6b0a4d3daa633 Binary files /dev/null and b/content/zh/post/Frank/images/ZvFmq2I9sxqe8M4iAg23tiFMEXKRCEMGAtOy8DUsLaQ.png differ diff --git a/content/zh/post/Frank/images/_ULgIk65_zQk1RhUAv8RfsGEDeXXasqkdwUGJ8RBsn0.png b/content/zh/post/Frank/images/_ULgIk65_zQk1RhUAv8RfsGEDeXXasqkdwUGJ8RBsn0.png new file mode 100644 index 0000000000000000000000000000000000000000..1cfb649402a8c51403a88726f956b8de497103d9 Binary files /dev/null and b/content/zh/post/Frank/images/_ULgIk65_zQk1RhUAv8RfsGEDeXXasqkdwUGJ8RBsn0.png differ diff --git a/content/zh/post/Frank/images/aNMGZYnNnS_AQJ0HBUBP8frpYKy9Uy3ZMXDDr1k6X8M.png b/content/zh/post/Frank/images/aNMGZYnNnS_AQJ0HBUBP8frpYKy9Uy3ZMXDDr1k6X8M.png new file mode 100644 index 0000000000000000000000000000000000000000..04d4281cce2d3daf45fcf855706dbd0227895166 Binary files /dev/null and b/content/zh/post/Frank/images/aNMGZYnNnS_AQJ0HBUBP8frpYKy9Uy3ZMXDDr1k6X8M.png differ diff --git a/content/zh/post/Frank/images/aNZdKt9BgaGiInzMuH3jNar-18Ca0fCHGN0vtksiH3w.png b/content/zh/post/Frank/images/aNZdKt9BgaGiInzMuH3jNar-18Ca0fCHGN0vtksiH3w.png new file mode 100644 index 0000000000000000000000000000000000000000..1bf61b7fb8142e7397f1b01a405ea6a1dd33668d Binary files /dev/null and b/content/zh/post/Frank/images/aNZdKt9BgaGiInzMuH3jNar-18Ca0fCHGN0vtksiH3w.png differ diff --git a/content/zh/post/Frank/images/aebKG7cndlMYcoRcq7qs5m-GYbhGj74zOR2NeJKh4GI.png b/content/zh/post/Frank/images/aebKG7cndlMYcoRcq7qs5m-GYbhGj74zOR2NeJKh4GI.png new file mode 100644 index 0000000000000000000000000000000000000000..d238d7109e309e9b70efa0802faf747899560c57 Binary files /dev/null and b/content/zh/post/Frank/images/aebKG7cndlMYcoRcq7qs5m-GYbhGj74zOR2NeJKh4GI.png differ diff --git a/content/zh/post/Frank/images/atOSnaLp81o_VSks5RwCxp3ERunFF8zXfwXrZmc4xrg.png b/content/zh/post/Frank/images/atOSnaLp81o_VSks5RwCxp3ERunFF8zXfwXrZmc4xrg.png new file mode 100644 index 0000000000000000000000000000000000000000..4c529cea76104a8406923a303fd05027843b2102 Binary files /dev/null and b/content/zh/post/Frank/images/atOSnaLp81o_VSks5RwCxp3ERunFF8zXfwXrZmc4xrg.png differ diff --git a/content/zh/post/Frank/images/bC0K0WfvrI-N1f4UtiZg8DHwafzyD9YxsLRgtmm2zyY.png b/content/zh/post/Frank/images/bC0K0WfvrI-N1f4UtiZg8DHwafzyD9YxsLRgtmm2zyY.png new file mode 100644 index 0000000000000000000000000000000000000000..b7b2542e238d99e600dd44dc025d11debe3f559a Binary files /dev/null and b/content/zh/post/Frank/images/bC0K0WfvrI-N1f4UtiZg8DHwafzyD9YxsLRgtmm2zyY.png differ diff --git a/content/zh/post/Frank/images/c73d9245-4405-40e8-89ea-3db746426cc9.png b/content/zh/post/Frank/images/c73d9245-4405-40e8-89ea-3db746426cc9.png new file mode 100644 index 0000000000000000000000000000000000000000..29631d7a1e5531a7a7b8a55d895dfa1abe6c89f1 Binary files /dev/null and b/content/zh/post/Frank/images/c73d9245-4405-40e8-89ea-3db746426cc9.png differ diff --git a/content/zh/post/Frank/images/edUqM-zFoCgSLuhswp5lMJc9cd2xffXoZYJ4sAHx8bQ.png b/content/zh/post/Frank/images/edUqM-zFoCgSLuhswp5lMJc9cd2xffXoZYJ4sAHx8bQ.png new file mode 100644 index 0000000000000000000000000000000000000000..4525802efb649a54240f2b33de3a2606539b8b95 Binary files /dev/null and b/content/zh/post/Frank/images/edUqM-zFoCgSLuhswp5lMJc9cd2xffXoZYJ4sAHx8bQ.png differ diff --git a/content/zh/post/Frank/images/eqpxS-ZiISSZcQTwUIsjH-xwENZifmj2PPNba8BKV0I.png b/content/zh/post/Frank/images/eqpxS-ZiISSZcQTwUIsjH-xwENZifmj2PPNba8BKV0I.png new file mode 100644 index 0000000000000000000000000000000000000000..76a48ed03f0ca1c9488d641a265d950122fb48a8 Binary files /dev/null and b/content/zh/post/Frank/images/eqpxS-ZiISSZcQTwUIsjH-xwENZifmj2PPNba8BKV0I.png differ diff --git a/content/zh/post/Frank/images/hnflVLcPNAr3tYz6tImWv7ATq03s1gUimTuoXpFbkDk.png b/content/zh/post/Frank/images/hnflVLcPNAr3tYz6tImWv7ATq03s1gUimTuoXpFbkDk.png new file mode 100644 index 0000000000000000000000000000000000000000..a308bd66a20795c5aa050525e4f690e218a98e4b Binary files /dev/null and b/content/zh/post/Frank/images/hnflVLcPNAr3tYz6tImWv7ATq03s1gUimTuoXpFbkDk.png differ diff --git a/content/zh/post/Frank/images/hziku7BLZT1zXmOXpv6EZ-z0RI7XipA8tLL6xWF58Mc.png b/content/zh/post/Frank/images/hziku7BLZT1zXmOXpv6EZ-z0RI7XipA8tLL6xWF58Mc.png new file mode 100644 index 0000000000000000000000000000000000000000..1151af6df857928aaf71aed3dec29a4527625c54 Binary files /dev/null and b/content/zh/post/Frank/images/hziku7BLZT1zXmOXpv6EZ-z0RI7XipA8tLL6xWF58Mc.png differ diff --git a/content/zh/post/Frank/images/lWFLxa866L34SpmKIs20gW2Cu1WNuPl9Vmn1ImOIpmw.jpeg b/content/zh/post/Frank/images/lWFLxa866L34SpmKIs20gW2Cu1WNuPl9Vmn1ImOIpmw.jpeg new file mode 100644 index 0000000000000000000000000000000000000000..d3798e446322cdbd47289e262bcb12e5900c2b71 Binary files /dev/null and b/content/zh/post/Frank/images/lWFLxa866L34SpmKIs20gW2Cu1WNuPl9Vmn1ImOIpmw.jpeg differ diff --git a/content/zh/post/Frank/images/lWgH2foPpaW6bs4tZ71JzhLuKib1-kFxJhaWd3RwyQk.png b/content/zh/post/Frank/images/lWgH2foPpaW6bs4tZ71JzhLuKib1-kFxJhaWd3RwyQk.png new file mode 100644 index 0000000000000000000000000000000000000000..915ec38ba9bae0f1d12ccbedc84ee80642fa4253 Binary files /dev/null and b/content/zh/post/Frank/images/lWgH2foPpaW6bs4tZ71JzhLuKib1-kFxJhaWd3RwyQk.png differ diff --git a/content/zh/post/Frank/images/m3DFL8P0xiCzoH26bghnGHVfd4dvVAdlsRhsZDLZ4q8.png b/content/zh/post/Frank/images/m3DFL8P0xiCzoH26bghnGHVfd4dvVAdlsRhsZDLZ4q8.png new file mode 100644 index 0000000000000000000000000000000000000000..b4821b0356f02892987e03bfec6093ca0acc5bab Binary files /dev/null and b/content/zh/post/Frank/images/m3DFL8P0xiCzoH26bghnGHVfd4dvVAdlsRhsZDLZ4q8.png differ diff --git a/content/zh/post/Frank/images/ofpefUbUWTyuI5WDrTWmcWbLLsqGXYJM6MPgODEKgOI.png b/content/zh/post/Frank/images/ofpefUbUWTyuI5WDrTWmcWbLLsqGXYJM6MPgODEKgOI.png new file mode 100644 index 0000000000000000000000000000000000000000..378bdc4ff8de8155419a9d199cfddfa1a702a7f2 Binary files /dev/null and b/content/zh/post/Frank/images/ofpefUbUWTyuI5WDrTWmcWbLLsqGXYJM6MPgODEKgOI.png differ diff --git a/content/zh/post/Frank/images/pkFcRKjQtiMkENwnjalIv9aeJrr4wQV1bj7rRz9oUkU.png b/content/zh/post/Frank/images/pkFcRKjQtiMkENwnjalIv9aeJrr4wQV1bj7rRz9oUkU.png new file mode 100644 index 0000000000000000000000000000000000000000..869cb831c607ca1f26295655da14fabedd6a027a Binary files /dev/null and b/content/zh/post/Frank/images/pkFcRKjQtiMkENwnjalIv9aeJrr4wQV1bj7rRz9oUkU.png differ diff --git a/content/zh/post/Frank/images/qOrAkFDRBKSLD9HUwqbhpDj7gLXJxqdt1MWtava5aHY.png b/content/zh/post/Frank/images/qOrAkFDRBKSLD9HUwqbhpDj7gLXJxqdt1MWtava5aHY.png new file mode 100644 index 0000000000000000000000000000000000000000..9bed683ccfac87c53bfc6fb12851a166fd96b2da Binary files /dev/null and b/content/zh/post/Frank/images/qOrAkFDRBKSLD9HUwqbhpDj7gLXJxqdt1MWtava5aHY.png differ diff --git a/content/zh/post/Frank/images/sUnNlL8xawC9mMzN51RMDeSMOvyi0ej4dIkCyvhLfKc.png b/content/zh/post/Frank/images/sUnNlL8xawC9mMzN51RMDeSMOvyi0ej4dIkCyvhLfKc.png new file mode 100644 index 0000000000000000000000000000000000000000..b79e48e10a3bae5bbf4337b6f287f662c99724a5 Binary files /dev/null and b/content/zh/post/Frank/images/sUnNlL8xawC9mMzN51RMDeSMOvyi0ej4dIkCyvhLfKc.png differ diff --git a/content/zh/post/Frank/images/s_HvosSmYiVHF_o0vzupAk1FjsfqLEdC1_ve7JOW2MI.png b/content/zh/post/Frank/images/s_HvosSmYiVHF_o0vzupAk1FjsfqLEdC1_ve7JOW2MI.png new file mode 100644 index 0000000000000000000000000000000000000000..4e30958eb748eaaa5393f1c1bd65ef6ecdb08f03 Binary files /dev/null and b/content/zh/post/Frank/images/s_HvosSmYiVHF_o0vzupAk1FjsfqLEdC1_ve7JOW2MI.png differ diff --git a/content/zh/post/Frank/images/yepqk11REWqjDl8gMwmh-xN_QgZVjSpS6QSCdx7yr5Q.png b/content/zh/post/Frank/images/yepqk11REWqjDl8gMwmh-xN_QgZVjSpS6QSCdx7yr5Q.png new file mode 100644 index 0000000000000000000000000000000000000000..d84129cc7be42e4c8e684966121e90c754f32a8a Binary files /dev/null and b/content/zh/post/Frank/images/yepqk11REWqjDl8gMwmh-xN_QgZVjSpS6QSCdx7yr5Q.png differ diff --git a/content/zh/post/Frank/images/zHvEfkd3q_7XwpdH2aqUWOFH_XsNM8OdYE_WyuOdHCI.png b/content/zh/post/Frank/images/zHvEfkd3q_7XwpdH2aqUWOFH_XsNM8OdYE_WyuOdHCI.png new file mode 100644 index 0000000000000000000000000000000000000000..ff542946689f3da40bef67b93c0fa64c9e9b29ef Binary files /dev/null and b/content/zh/post/Frank/images/zHvEfkd3q_7XwpdH2aqUWOFH_XsNM8OdYE_WyuOdHCI.png differ diff --git "a/content/zh/post/Frank/openGauss 3.0.0 \350\275\273\351\207\217\347\211\210\351\203\250\347\275\262.md" "b/content/zh/post/Frank/openGauss 3.0.0 \350\275\273\351\207\217\347\211\210\351\203\250\347\275\262.md" new file mode 100644 index 0000000000000000000000000000000000000000..ed5d68195f27dc892e86d6859391610d91e446de --- /dev/null +++ "b/content/zh/post/Frank/openGauss 3.0.0 \350\275\273\351\207\217\347\211\210\351\203\250\347\275\262.md" @@ -0,0 +1,166 @@ ++++ + +title = "OpenGauss3.0.0 轻量版部署" + +date = "2022-05-16" + +tags = ["OpenGauss3.0.0"] + +archives = "2020-05" + +author = "xingchen" + +summary = "OpenGauss3.0.0" + +img = "/zh/post/xingchen/title/58eccf60-364f-424b-9785-ecad541fc26f.png" + +times = "18:40" + ++++ +# OpenGauss3.0.0 轻量版部署 +# 背景 +openGauss 3.0.0 版本是openGauss社区继2.0.0之后发布的又一个Release版本,版本维护生命周期为3.5年。3.0.0版本在高性能、高可用、高安全、高智能、工具链等方面都有持续创新和突破。3.0.0版本除了包含企业版外同时发布了openGauss社区首个轻量版(Lite 版)。 + +今天是openGauss 3.0.0版本发布的第一天,忍不住搞一下试试~~ + +# 实验环境 +```bash +Architecture: aarch64 +CPU op-mode(s): 64-bit +Byte Order: Little Endian +CPU(s): 8 +On-line CPU(s) list: 0-7 +Thread(s) per core: 1 +Core(s) per socket: 8 +Socket(s): 1 +NUMA node(s): 1 +Vendor ID: HiSilicon +Model: 0 +Model name: Kunpeng-920 +Stepping: 0x1 +CPU max MHz: 2600.0000 +CPU min MHz: 2600.0000 +BogoMIPS: 200.00 +L1d cache: 512 KiB +L1i cache: 512 KiB +L2 cache: 4 MiB +L3 cache: 32 MiB +NUMA node0 CPU(s): 0-7 +Vulnerability Itlb multihit: Not affected +Vulnerability L1tf: Not affected +Vulnerability Mds: Not affected +Vulnerability Meltdown: Not affected +Vulnerability Spec store bypass: Not affected +Vulnerability Spectre v1: Mitigation; __user pointer sanitization +Vulnerability Spectre v2: Not affected +Vulnerability Tsx async abort: Not affected +Flags: fp asimd evtstrm aes pmull sha1 sha2 crc32 atomics fphp asimdhp cpuid asimdrdm jscvt fcma dcpop asimddp asimdfhm +``` + +```bash +[frank@ecs-fc4a ~]$ cat /etc/os-release +NAME="Kylin Linux Advanced Server" +VERSION="V10 (Tercel)" +ID="kylin" +VERSION_ID="V10" +PRETTY_NAME="Kylin Linux Advanced Server V10 (Tercel)" +ANSI_COLOR="0;31" +``` + +# 下载 + +![](images/58eccf60-364f-424b-9785-ecad541fc26f.png) +```bash +[frank@ecs-fc4a ~]$ mkdir opengauss +[frank@ecs-fc4a ~]$ wget -c https://opengauss.obs.cn-south-1.myhuaweicloud.com/3.0.0/arm/openGauss-Lite-3.0.0-openEuler-aarch64.tar.gz +--2022-04-02 11:37:41-- https://opengauss.obs.cn-south-1.myhuaweicloud.com/3.0.0/arm/openGauss-Lite-3.0.0-openEuler-aarch64.tar.gz +Resolving opengauss.obs.cn-south-1.myhuaweicloud.com (opengauss.obs.cn-south-1.myhuaweicloud.com)... 139.159.208.230, 121.37.63.38, 139.159.208.67, ... +Connecting to opengauss.obs.cn-south-1.myhuaweicloud.com (opengauss.obs.cn-south-1.myhuaweicloud.com)|139.159.208.230|:443... connected. +HTTP request sent, awaiting response... 200 OK +Length: 21142255 (20M) [application/gzip] +Saving to: ‘openGauss-Lite-3.0.0-openEuler-aarch64.tar.gz’ + +openGauss-Lite-3.0.0-openEuler-aarch64.ta 100%[==================================================================================>] 20.16M 22.4MB/s in 0.9s + +2022-04-02 11:37:42 (22.4 MB/s) - ‘openGauss-Lite-3.0.0-openEuler-aarch64.tar.gz’ saved [21142255/21142255] +``` + +# 安装 +- 解压 + +```bash +[frank@ecs-fc4a ~]$ mkdir opengauss +[frank@ecs-fc4a ~]$ cd opengauss/ +[frank@ecs-fc4a opengauss]$ tar -zxf openGauss-Lite-3.0.0-openEuler-aarch64.tar.gz +[frank@ecs-fc4a opengauss]$ ll +total 41684 +drwx------ 2 frank frank 4096 Apr 1 18:33 dependency +-rw------- 1 frank frank 38398 Apr 1 18:33 install.sh +-rw------- 1 frank frank 21032901 Apr 1 18:33 openGauss-Lite-3.0.0-openEuler-aarch64.bin +-rw------- 1 frank frank 65 Apr 1 18:33 openGauss-Lite-3.0.0-openEuler-aarch64.sha256 +-rw------- 1 frank frank 21142255 Apr 1 18:39 openGauss-Lite-3.0.0-openEuler-aarch64.tar.gz +-rw------- 1 frank frank 742 Apr 1 18:33 opengauss_lite.conf +-rw------- 1 frank frank 2852 Apr 1 18:33 uninstall.sh +-rw------- 1 frank frank 38674 Apr 1 18:33 upgrade_common.sh +-rw------- 1 frank frank 634 Apr 1 18:33 upgrade_config.sh +-rw------- 1 frank frank 392 Apr 1 18:33 upgrade_errorcode.sh +-rw------- 1 frank frank 1100 Apr 1 18:33 upgrade_GAUSSV5.sh +-rw------- 1 frank frank 65 Apr 1 18:33 upgrade_sql.sha256 +-rw------- 1 frank frank 385518 Apr 1 18:33 upgrade_sql.tar.gz +-rw------- 1 frank frank 37 Apr 1 18:33 version.cfg +``` + +- 修改install.sh +由于目前版本不支持kylin v10,所以,这里需要进行,修改,伪装成`openEuler`. +![](images/c73d9245-4405-40e8-89ea-3db746426cc9.png) + +- 安装 + +```bash +[frank@ecs-fc4a opengauss]$ echo OpenGauss@123 | sh ./install.sh --mode single -D ~/opengauss/data -R ~/opengauss/install --start +[frank@ecs-fc4a opengauss]$ source /home/frank/.bashrc +``` + +# 验证 +```bash +[frank@ecs-fc4a opengauss]$ ps ux | grep gaussdb +frank 10446 0.0 1.0 2451136 167808 ? Ssl 11:56 0:00 /home/frank/opengauss/install/bin/gaussdb -D /home/frank/opengauss/data +frank 10635 0.0 0.0 214016 1536 pts/0 S+ 12:36 0:00 grep gaussdb +``` + +```bash +[frank@ecs-fc4a opengauss]$ gs_ctl query -D /home/frank/opengauss/data +[2022-04-02 12:37:26.767][10661][][gs_ctl]: gs_ctl query ,datadir is /home/frank/opengauss/data + HA state: + local_role : Normal + static_connections : 0 + db_state : Normal + detail_information : Normal + + Senders info: +No information + Receiver info: +No information +``` + +# gsql链接openGauss +```SQL +[frank@ecs-fc4a opengauss]$ gsql -d postgres +gsql ((openGauss 3.0.0 build 02c14696) compiled at 2022-04-01 18:28:23 commit 0 last mr release) +Non-SSL connection (SSL connection is recommended when requiring high-security) +Type "help" for help. + +openGauss=# \l + List of databases + Name | Owner | Encoding | Collate | Ctype | Access privileges +-----------+-------+----------+-------------+-------------+------------------- + postgres | frank | UTF8 | en_US.UTF-8 | en_US.UTF-8 | + template0 | frank | UTF8 | en_US.UTF-8 | en_US.UTF-8 | =c/frank + + | | | | | frank=CTc/frank + template1 | frank | UTF8 | en_US.UTF-8 | en_US.UTF-8 | =c/frank + + | | | | | frank=CTc/frank +(3 rows) + +openGauss=# + +``` diff --git "a/content/zh/post/Frank/openGauss MogDB WDR\346\212\245\345\221\212\350\257\246\350\247\243.md" "b/content/zh/post/Frank/openGauss MogDB WDR\346\212\245\345\221\212\350\257\246\350\247\243.md" new file mode 100644 index 0000000000000000000000000000000000000000..a14a2e116b5f61e4380f243ff7eef568331fbc37 --- /dev/null +++ "b/content/zh/post/Frank/openGauss MogDB WDR\346\212\245\345\221\212\350\257\246\350\247\243.md" @@ -0,0 +1,279 @@ ++++ + +title = "openGauss/MogDB WDR报告详解" + +date = "2022-05-16" + +tags = ["openGauss/MogDB WDR报告详解"] + +archives = "2020-05" + +author = "xingchen" + +summary = "openGauss/MogDB WDR报告详解" + +img = "/zh/post/xingchen/title/qOrAkFDRBKSLD9HUwqbhpDj7gLXJxqdt1MWtava5aHY.png" + +times = "18:40" + ++++ +# openGauss/MogDB WDR报告详解 +# 摘要 +> WDR(Workload Diagnosis Report)**负载诊断报告**,是openGauss的工作负载诊断报告,常用于判断openGauss长期性能问题。WDR报告基于两次不同时间点系统的性能快照数据,生成这两个时间点之间的性能表现报表。 + +# 开启WDR快照 +## 参数简介 +### enable\_wdr\_snapshot +**参数说明**: 是否开启数据库监控快照功能。 + +该参数属于SIGHUP类型参数,请参考表[GUC参数分类](https://docs.mogdb.io/zh/mogdb/v2.1/30-appendix)中对应设置方法进行设置。 + +**取值范围**: 布尔型 + +* on: 打开数据库监控快照功能。 +* off: 关闭数据库监控快照功能。 + +**默认值**: off + + + +### wdr\_snapshot\_retention\_days +**参数说明**: 系统中数据库监控快照数据的保留天数,超过设置的值之后,系统每隔wdr\_snapshot\_interval时间间隔,清理snapshot\_id最小的快照数据。 + +该参数属于SIGHUP类型参数,请参考表[GUC参数分类](https://docs.mogdb.io/zh/mogdb/v2.1/30-appendix)中对应设置方法进行设置。 + +\*\*取值范围:\*\*整型,1~8。 + +**默认值**: 8 + + + +### wdr\_snapshot\_query\_timeout +**参数说明**: 系统执行数据库监控快照操作时,设置快照操作相关的sql语句的执行超时时间。如果语句超过设置的时间没有执行完并返回结果,则本次快照操作失败。 + +该参数属于SIGHUP类型参数,请参考表[GUC参数分类](https://docs.mogdb.io/zh/mogdb/v2.1/30-appendix)中对应设置方法进行设置。 + +\*\*取值范围:\*\*整型,100~INT\_MAX(秒)。 + +**默认值**: 100s + + + +### wdr\_snapshot\_interval +**参数说明**: 后台线程Snapshot自动对数据库监控数据执行快照操作的时间间隔。 + +该参数属于SIGHUP类型参数,请参考表[GUC参数分类](https://docs.mogdb.io/zh/mogdb/v2.1/30-appendix)中对应设置方法进行设置。 + +\*\*取值范围:\*\*整型,10~60(分钟)。 + +**默认值**: 1h + + + +## 查看当前wdr相关配置 +```sql +postgres@omm:local=#select name, setting from pg_settings where name like '%wdr%'; + name | setting +-----------------------------+--------- + enable_wdr_snapshot | off + wdr_snapshot_interval | 60 + wdr_snapshot_query_timeout | 100 + wdr_snapshot_retention_days | 8 +(4 rows) +``` +## 开启wdr日志 +```bash +omm@107707f966f0:/var/lib/mogdb/data$ gs_guc reload -D $PGDATA -c "enable_wdr_snapshot=on" +expected instance path: [/var/lib/mogdb/data/postgresql.conf] +gs_guc reload: enable_wdr_snapshot=on: [/var/lib/mogdb/data/postgresql.conf] +server signaled + +Total instances: 1. Failed instances: 0. +Success to perform gs_guc! + +omm@107707f966f0:/var/lib/mogdb/data$ gsql -d postgres -r +gsql ((MogDB 2.1.1 build b5f25b20) compiled at 2022-03-21 14:42:30 commit 0 last mr ) +Non-SSL connection (SSL connection is recommended when requiring high-security) +Type "help" for help. + +postgres@omm:local=#select name, setting from pg_settings where name like '%wdr%'; + name | setting +-----------------------------+--------- + enable_wdr_snapshot | on + wdr_snapshot_interval | 60 + wdr_snapshot_query_timeout | 100 + wdr_snapshot_retention_days | 8 +(4 rows) +``` +## 查看快照统计表 +```sql +postgres@omm:local=#show search_path; + search_path +---------------- + "$user",public +(1 row) + +postgres@omm:local=#alter session set search_path=snapshot; +SET +postgres@omm:local=#show search_path; + search_path +------------- + snapshot +(1 row) + +postgres@omm:local=#\d + List of relations + Schema | Name | Type | Owner | Storage +----------+------------------------------------------+----------+-------+---------------------------------- + snapshot | snap_class_vital_info | table | omm | {orientation=row,compression=no} + snapshot | snap_global_bgwriter_stat | table | omm | {orientation=row,compression=no} + snapshot | snap_global_ckpt_status | table | omm | {orientation=row,compression=no} + snapshot | snap_global_config_settings | table | omm | {orientation=row,compression=no} + snapshot | snap_global_double_write_status | table | omm | {orientation=row,compression=no} + snapshot | snap_global_file_iostat | table | omm | {orientation=row,compression=no} + snapshot | snap_global_file_redo_iostat | table | omm | {orientation=row,compression=no} + snapshot | snap_global_instance_time | table | omm | {orientation=row,compression=no} + snapshot | snap_global_memory_node_detail | table | omm | {orientation=row,compression=no} + snapshot | snap_global_os_runtime | table | omm | {orientation=row,compression=no} + snapshot | snap_global_os_threads | table | omm | {orientation=row,compression=no} + snapshot | snap_global_pagewriter_status | table | omm | {orientation=row,compression=no} + snapshot | snap_global_record_reset_time | table | omm | {orientation=row,compression=no} + snapshot | snap_global_recovery_status | table | omm | {orientation=row,compression=no} + snapshot | snap_global_redo_status | table | omm | {orientation=row,compression=no} + snapshot | snap_global_rel_iostat | table | omm | {orientation=row,compression=no} + snapshot | snap_global_replication_slots | table | omm | {orientation=row,compression=no} + snapshot | snap_global_replication_stat | table | omm | {orientation=row,compression=no} + snapshot | snap_global_rto_status | table | omm | {orientation=row,compression=no} + snapshot | snap_global_shared_memory_detail | table | omm | {orientation=row,compression=no} + snapshot | snap_global_stat_all_indexes | table | omm | {orientation=row,compression=no} + snapshot | snap_global_stat_all_tables | table | omm | {orientation=row,compression=no} + snapshot | snap_global_stat_bad_block | table | omm | {orientation=row,compression=no} + snapshot | snap_global_stat_database | table | omm | {orientation=row,compression=no} + snapshot | snap_global_stat_database_conflicts | table | omm | {orientation=row,compression=no} + snapshot | snap_global_stat_db_cu | table | omm | {orientation=row,compression=no} + snapshot | snap_global_stat_user_functions | table | omm | {orientation=row,compression=no} + snapshot | snap_global_statement_count | table | omm | {orientation=row,compression=no} + snapshot | snap_global_statio_all_indexes | table | omm | {orientation=row,compression=no} + snapshot | snap_global_statio_all_sequences | table | omm | {orientation=row,compression=no} + snapshot | snap_global_statio_all_tables | table | omm | {orientation=row,compression=no} + snapshot | snap_global_thread_wait_status | table | omm | {orientation=row,compression=no} + snapshot | snap_global_threadpool_status | table | omm | {orientation=row,compression=no} + snapshot | snap_global_transactions_prepared_xacts | table | omm | {orientation=row,compression=no} + snapshot | snap_global_transactions_running_xacts | table | omm | {orientation=row,compression=no} + snapshot | snap_global_wait_events | table | omm | {orientation=row,compression=no} + snapshot | snap_global_workload_transaction | table | omm | {orientation=row,compression=no} + snapshot | snap_seq | sequence | omm | + snapshot | snap_statement_responsetime_percentile | table | omm | {orientation=row,compression=no} + snapshot | snap_summary_file_iostat | table | omm | {orientation=row,compression=no} + snapshot | snap_summary_file_redo_iostat | table | omm | {orientation=row,compression=no} + snapshot | snap_summary_rel_iostat | table | omm | {orientation=row,compression=no} + snapshot | snap_summary_stat_all_indexes | table | omm | {orientation=row,compression=no} + snapshot | snap_summary_stat_all_tables | table | omm | {orientation=row,compression=no} + snapshot | snap_summary_stat_bad_block | table | omm | {orientation=row,compression=no} + snapshot | snap_summary_stat_database | table | omm | {orientation=row,compression=no} + snapshot | snap_summary_stat_database_conflicts | table | omm | {orientation=row,compression=no} + snapshot | snap_summary_stat_user_functions | table | omm | {orientation=row,compression=no} + snapshot | snap_summary_statement | table | omm | {orientation=row,compression=no} + snapshot | snap_summary_statement_count | table | omm | {orientation=row,compression=no} + snapshot | snap_summary_statio_all_indexes | table | omm | {orientation=row,compression=no} + snapshot | snap_summary_statio_all_sequences | table | omm | {orientation=row,compression=no} + snapshot | snap_summary_statio_all_tables | table | omm | {orientation=row,compression=no} + snapshot | snap_summary_transactions_prepared_xacts | table | omm | {orientation=row,compression=no} + snapshot | snap_summary_transactions_running_xacts | table | omm | {orientation=row,compression=no} + snapshot | snap_summary_user_login | table | omm | {orientation=row,compression=no} + snapshot | snap_summary_workload_sql_count | table | omm | {orientation=row,compression=no} + snapshot | snap_summary_workload_sql_elapse_time | table | omm | {orientation=row,compression=no} + snapshot | snap_summary_workload_transaction | table | omm | {orientation=row,compression=no} + snapshot | snapshot | table | omm | {orientation=row,compression=no} + snapshot | tables_snap_timestamp | table | omm | {orientation=row,compression=no} +(61 rows) +``` + + +# 手动生产快照 +### SNAPSHOT.SNAPSHOT +SNAPSHOT表记录当前系统中存储的WDR快照数据的索引信息、开始、结束时间。只能在系统库中查询到结果,在用户库中无法查询。 + +**表 1** SNAPSHOT表属性 + +|名称|类型|描述|示例| +| ----- | ----- | ----- | ----- | +|snapshot\_id|bigint|WDR快照序号。|1| +|start\_ts|timestamp|WDR快照的开始时间。|2019-12-28 17:11:27.423742+08| +|end\_ts|timestamp|WDR快照的结束时间。|2019-12-28 17:11:43.67726+08| + + + +```sql +postgres@omm:local=#select * from snapshot.snapshot; + snapshot_id | start_ts | end_ts +-------------+-------------------------------+------------------------------- + 1 | 2022-05-02 11:19:37.239977+00 | 2022-05-02 11:19:37.865708+00 +(1 row) + +postgres@omm:local=#select create_wdr_snapshot(); + create_wdr_snapshot +----------------------------------------- + WDR snapshot request has been submitted +(1 row) + +postgres@omm:local=#select * from snapshot.snapshot; + snapshot_id | start_ts | end_ts +-------------+-------------------------------+------------------------------- + 1 | 2022-05-02 11:19:37.239977+00 | 2022-05-02 11:19:37.865708+00 + 2 | 2022-05-02 11:42:28.047396+00 | 2022-05-02 11:42:28.617173+00 +(2 rows) +``` +# 生成性能报告 +## a. 执行如下命令生成格式化性能报告文件。 +```Plain Text +\a \t \o 服务器文件路径 +``` +上述命令涉及参数说明如下: + +* \\a: 切换非对齐模式。 +* \\t: 切换输出的字段名的信息和行计数脚注。 +* \\o: 把所有的查询结果发送至服务器文件里。 +* 服务器文件路径:生成性能报告文件存放路径。用户需要拥有此路径的读写权限。 + +## b. 执行如下命令将查询到的信息写入性能报告中。 +```Plain Text +select generate_wdr_report(begin_snap_id bigint, end_snap_id bigint, report_type cstring, report_scope cstring, node_name cstring); +``` +命令中涉及的参数说明如下。 + +**表 3** generate\_wdr\_report函数参数说明 + +|参数|说明|取值范围| +| ----- | ----- | ----- | +|begin\_snap\_id|查询时间段开始的snapshot的id(表snapshot.snaoshot中的snapshot\_id)。|\-| +|end\_snap\_id|查询时间段结束snapshot的id。默认end\_snap\_id大于begin\_snap\_id(表snapshot.snaoshot中的snapshot\_id)。|\-| +|report\_type|指定生成report的类型。例如,summary/detail/all。|summary: 汇总数据。
detail: 明细数据。
all: 包含summary和detail。| +|report\_scope|指定生成report的范围,可以为cluster或者node。|cluster: 数据库级别的信息。
node: 节点级别的信息。| +|node\_name|在report\_scope指定为node时,需要把该参数指定为对应节点的名称。(节点名称可以执行select \* from pg\_node\_env;查询)。在report\_scope为cluster时,该值可以指定为省略、空或者为NULL。| | + + + +执行操作 + +```sql +postgres@omm:local=#select * from pg_node_env; + node_name | host | process | port | installpath | datapath | log_directory +-----------+-----------+---------+------+------------------+---------------------+--------------- + mogdb | localhost | 1 | 5432 | /usr/local/mogdb | /var/lib/mogdb/data | pg_log +(1 row) +postgres@omm:local=# +postgres@omm:local=#\a \t \o wdr_20220502.html +postgres@omm:local=#select generate_wdr_report(1,2,'all','node','mogdb'); +``` +## c.执行如下命令关闭输出选项及格式化输出命令。 +```Plain Text +\o \a \t +``` + + +# 查看报告 +![image](images/qOrAkFDRBKSLD9HUwqbhpDj7gLXJxqdt1MWtava5aHY.png) + +![image](images/SkGqKxzpBFRYbQ3rSQpTWQnIQr8DenGIOcf61GaVkfY.png) + diff --git "a/content/zh/post/Frank/openGauss MogDB\350\204\232\346\234\254\346\272\220\347\240\201\346\265\205\346\236\220\357\274\2102\357\274\211\342\200\224\342\200\224 gs_install_plugin gs_install_plugin_local.md" "b/content/zh/post/Frank/openGauss MogDB\350\204\232\346\234\254\346\272\220\347\240\201\346\265\205\346\236\220\357\274\2102\357\274\211\342\200\224\342\200\224 gs_install_plugin gs_install_plugin_local.md" new file mode 100644 index 0000000000000000000000000000000000000000..9c56eacf2303f97ebe314f0a270678e66e492f29 --- /dev/null +++ "b/content/zh/post/Frank/openGauss MogDB\350\204\232\346\234\254\346\272\220\347\240\201\346\265\205\346\236\220\357\274\2102\357\274\211\342\200\224\342\200\224 gs_install_plugin gs_install_plugin_local.md" @@ -0,0 +1,336 @@ +# openGauss/MogDB脚本源码浅析(2)—— gs\_install\_plugin/gs\_install\_plugin\_local +# 摘要 +> openGauss提供了gs\_install\_plugin/gs\_install\_plugin\_local工具用于安装插件,目前支持安装的插件包括pg\_repack、pg\_trgm、dblink、wal2json、orafce、pg\_bulkload、pg\_prewarm等。gs\_install\_plugin适用于一主多备部署的MogDB,gs\_install\_plugin\_local适用于单机部署的MogDB。 + +这两个工具是python编写的脚本,直接可以看到源码实现。本文通过对两个工具的源码进行分析,希望从底层了解插件安装的原理,掌握原理后可以在安装出现问题或者手动安装时对整个安装流程有所帮助。 + + + +# 整体流程 +主要流程有如下几个步骤: + +## 1 解析参数 +根据命令行传入参数进行解析,并设置相关变量,如:要安装哪些插件、是否强制覆盖、指定安装包路径、是否开启调试信息等。 + +![image](images/edUqM-zFoCgSLuhswp5lMJc9cd2xffXoZYJ4sAHx8bQ.png) + +* \-h, --help + +显示帮助信息 + +* \-p + +指定plugins安装包位置,默认在\$GPHOME/script/static寻找plugins安装包 + +* \--all + +安装全部插件 + +* \--force + +指定覆盖旧的插件 + +* \--plugins + +可安装多个插件,--plugins后跟插件名 + +* \--PLUGIN\_NAME + +指定安装某一个插件,如:--dblink + +* \--debug + +输出debug信息 + +**详细请参考“****官方文档****”** + +## 2 初始化Deploy类 +将解析好的参数赋值给Deploy类的成员变量。 + +![image](images/pkFcRKjQtiMkENwnjalIv9aeJrr4wQV1bj7rRz9oUkU.png) + +几处关键代码: + +* 环境变量和关键目录 + +```python +GAUSSHOME = os.getenv("GAUSSHOME") +GPHOME = os.getenv('GPHOME') +LIB_POSTGRESQL_DIR = f'{GAUSSHOME}/lib/postgresql' +EXTENSION_DIR = f'{GAUSSHOME}/share/postgresql/extension' +CONTRIB_DIR = f'{GAUSSHOME}/share/postgresql/contrib' +``` +主要是得到下载插件包的目录和安装openGauss插件的系统目录。 + +* 插件包目录 + +如果没有设置-p选型,则使用默认目录 + +```cpp +self.plugin_pkg = f"{GPHOME}/script/static/Plugins-*.tar.gz" +``` +* 解压插件包的目录 + +```python +self._plugins_temp_dir = f"{GPHOME}/script/static/plugins/" +self.executor(f'mkdir -p {self._plugins_temp_dir}') +logger.debug(f"deconpress plugin package to {self._plugins_temp_dir}") +self.local_execute(f"tar -xf {self.plugin_pkg} -C {self._plugins_temp_dir}") +``` +* 集群相关 + +```bash +-X Specify the XML file path +``` +如果是集群安装(使用gs\_install\_plugin工具),需要指定cluster\_config.xml的位置,通过下面方法获得具体node的ip地址并检查链接: + +```python +self.nodenames = self.read_cluster_hosts(self.xml) +self.ssh_tool = SshTool(self.nodenames) +``` +* 读取desc.json文件 + +解压MogDB插件包后,文件夹下面有desc.json文件,文件中包含了版本信息,插件名称和安装的目标路径。插件名称对应同级目录下插件目录的名称,files节点对应了插件目录中的文件。 + +![image](images/zHvEfkd3q_7XwpdH2aqUWOFH_XsNM8OdYE_WyuOdHCI.png) + +```json +{ + "version": "3.0.1", + "plugins": [ + { + "name": "pg_trgm", + "files": { + "pg_trgm.so": "lib/postgresql", + "pg_trgm.control": "share/postgresql/extension", + "pg_trgm--1.0.sql": "share/postgresql/extension", + "pg_trgm--unpackaged--1.0.sql": "share/postgresql/extension" + } + }, + { + "name": "dblink", + "files": { + "dblink.so": "lib/postgresql", + "dblink.control": "share/postgresql/extension", + "dblink--1.0.sql": "share/postgresql/extension", + "dblink--unpackaged--1.0.sql": "share/postgresql/extension" + } + }, + { + "name": "orafce", + "files": { + "orafce.so": "lib/postgresql", + "orafce.control": "share/postgresql/extension", + "orafce--3.17.sql": "share/postgresql/extension" + } + }, + { + "name": "wal2json", + "files": { + "wal2json.so": "lib/postgresql" + } + }, + { + "name": "pg_repack", + "files": { + "pg_repack": "bin", + "pg_repack.so": "lib/postgresql", + "pg_repack.control": "share/postgresql/extension", + "pg_repack--1.4.6.sql": "share/postgresql/extension" + } + }, + { + "name": "pg_bulkload", + "files": { + "pg_bulkload": "bin", + "pg_bulkload.so": "lib/postgresql", + "pg_bulkload.control": "share/postgresql/extension", + "pg_bulkload--1.0.sql": "share/postgresql/extension", + "pg_bulkload.sql": "share/postgresql/extension", + "pg_bulkload--unpackaged--1.0.sql": "share/postgresql/extension", + "uninstall_pg_bulkload.sql": "share/postgresql/extension", + "pg_timestamp.so": "lib/postgresql", + "pg_timestamp.sql": "share/postgresql/contrib", + "postgresql": "bin", + "uninstall_pg_timestamp.sql": "share/postgresql/contrib" + } + }, + { + "name": "pg_prewarm", + "files": { + "pg_prewarm.so": "lib/postgresql", + "pg_prewarm.control": "share/postgresql/extension", + "pg_prewarm--1.1.sql": "share/postgresql/extension" + } + }, + { + "name": "dolphin", + "files": { + "dolphin.so": "lib/postgresql", + "dolphin.control": "share/postgresql/extension", + "dolphin--1.0.sql": "share/postgresql/extension" + } + }, + { + "name": "whale", + "files": { + "whale.so": "lib/postgresql", + "whale.control": "share/postgresql/extension", + "whale--1.0.sql": "share/postgresql/extension" + } + }, + { + "name": "postgis", + "files": { + "libjson-c.so.2": "lib", + "libgeos_c.so.1": "lib", + "libproj.so.9": "lib", + "libgeos-3.6.2.so": "lib", + "libgdal.so.1": "lib", + "liblwgeom-2.4.so.0": "lib", + "postgis-2.4.so": "lib/postgresql", + "rtpostgis-2.4.so": "lib/postgresql", + "postgis_topology-2.4.so": "lib/postgresql", + "postgis.control": "share/postgresql/extension", + "postgis--2.4.2.sql": "share/postgresql/extension", + "postgis_raster--2.4.2.sql": "share/postgresql/extension", + "postgis_raster.control": "share/postgresql/extension", + "postgis_topology--2.4.2.sql": "share/postgresql/extension", + "postgis_topology.control": "share/postgresql/extension" + } + } + ] +} +``` +## 3 执行部署 +![image](images/hnflVLcPNAr3tYz6tImWv7ATq03s1gUimTuoXpFbkDk.png) + +* 解析desc.json文件,将解析结果存到datas\[\]中 + +```python + datas = [] + # json file to obtain plugin information, add to datas + with open(desc_path, 'r', encoding='utf-8') as f: + result = json.load(f) + if "all" in para: + datas = result.get("plugins") + else: + for plugin in result.get("plugins", {}): + if plugin.get("name") in para: + datas.append(plugin) + if not datas: + raise Exception("Invalid plugins: %s" % para) +``` +* 遍历datas\[\]中插件信息,进行文件copy,完成安装 + +```python +# plugin file copy +for data in datas: + name = data.get("name") + if not name: + continue + for file, path in data.get("files").items(): + source_path = '/'.join([self.plugins_dir, name, file]) + target_dir = '/'.join([GAUSSHOME, path]) + self.copy_file(source_path, target_dir) + print("SUCCESS: %s" % name) +``` +* 文件copy过程 + +```python +def copy_file(self, source, target_dir): + file_name = os.path.basename(source) + + if self.local_mode: + if os.path.exists(f"{target_dir}/{file_name}") and not self.force: + print("Warning: file %s already exists, skip copy" % (file_name)) + return + return shutil.copy2(source, target_dir) + for host in self.nodenames: + if not self.force: + _, output = self.ssh_tool.getSshStatusOutput(f"test -f {target_dir}/{file_name} && echo 1 || echo 0", hostList=[host]) + output = output.split(':')[-1] + if int(output) == 1: + print("Warning: [%s]: file %s already exists, skip copy" % (host, file_name)) + continue + self.ssh_tool.scpFiles(source, target_dir, hostList=[host]) +``` +如果是本地安装则直接copy:`shutil.copy2(source, target_dir)` + +如果是集群按照则使用scp进行copy:`self.ssh_tool.scpFiles(source, target_dir, hostList=[host])` + + + +# 使用gs\_install\_plugin\_local一直openGauss插件 +## 准备环境 +* 下载,安装插件包 + +```bash +[omm@host-10-208-76-194 script]$ cd /opt/mogdb/tool/script/ +[omm@host-10-208-76-194 script]$ mkdir static +[omm@host-10-208-76-194 script]$ cd static +[omm@host-10-208-76-194 static]$ wget https://cdn-mogdb.enmotech.com/mogdb-media/3.0.1/Plugins-3.0.1-openEuler-arm64.tar.gz +[omm@host-10-208-76-194 script]$ ./gs_install_plugin_local +SUCCESS: pg_trgm +SUCCESS: dblink +SUCCESS: orafce +SUCCESS: wal2json +SUCCESS: pg_repack +SUCCESS: pg_bulkload +SUCCESS: pg_prewarm +SUCCESS: dolphin +SUCCESS: whale +SUCCESS: postgis + +``` +* 安装插件(默认全部安装) + +![image](images/0VHoHHjvJv3SbUbRClBO96vF9era1KSfM4JABrhNqCI.png) + +* 创建插件 + +```sql +MogDB=# create extension orafce; +CREATE EXTENSION +MogDB=# + +``` +* 下载编译openGauss插件 + +参见“MogDB秘籍 之 乾坤大挪移”中的关于编译安装openGauss插件章节。 + +## 迁移插件 +> 以lo插件为例,进行迁移 + +* 创建lo目录 + +```bash +[omm@host-10-208-76-194 plugins]$ cd /opt/mogdb/tool/script/static/plugins/plugins +[omm@host-10-208-76-194 plugins]$ ls +dblink desc.json dolphin orafce pg_bulkload pg_prewarm pg_repack pg_trgm postgis wal2json whale +[omm@host-10-208-76-194 plugins]$ mkdir lo +[omm@host-10-208-76-194 plugins]$ ls +dblink desc.json dolphin lo orafce pg_bulkload pg_prewarm pg_repack pg_trgm postgis wal2json whale +[omm@host-10-208-76-194 plugins]$ +``` +* 将openGauss插件复制到MogDB插件目录,并修改属组 + +```bash +[root@host-10-208-76-194 lo]# cp lo.so lo.control lo--1.0.sql /opt/mogdb/tool/script/static/plugins/plugins/lo +[root@host-10-208-76-194 lo]# chown -R omm:omm /opt/mogdb/tool/script/static/plugins/plugins/lo +``` +* 修改desc.json + +![image](images/CIqNKVJAIA0bRKmbhN87vkvw3u9GVIU1dqvtk3C6oAU.png) + +* 重新打包 + +```bash +tar -zcvf Plugins-3.0.1-openEuler-arm64.tar.gz plugins/ +``` +注:这里重新打包有点麻烦,也可以修改代码,注释掉解压的步骤,或者优雅一点,则可以增加参数控制是否重新解压,如果不重新打包则解压是desc.json会被覆盖成老版本的,导致安装新插件失败。 + + + +# 总结 +总的来说这两个工具还是比较好用的,代码实现也比较简单。了解原理之后可以根据几个关键路径手动安装,或者在遇到安装问题的时候可以通过本文的分析继续排查解决。 \ No newline at end of file diff --git "a/content/zh/post/Frank/openGauss MogDB\350\260\203\347\224\250C FUNCTION.md" "b/content/zh/post/Frank/openGauss MogDB\350\260\203\347\224\250C FUNCTION.md" new file mode 100644 index 0000000000000000000000000000000000000000..0c50e5cffbabcbb8c1c4cf149af2ef1a13ba4600 --- /dev/null +++ "b/content/zh/post/Frank/openGauss MogDB\350\260\203\347\224\250C FUNCTION.md" @@ -0,0 +1,182 @@ ++++ + +title = "openGauss/MogDB调用C FUNCTION" + +date = "2022-05-16" + +tags = ["openGauss/MogDB调用C FUNCTION"] + +archives = "2020-05" + +author = "xingchen" + +summary = "openGauss/MogDB调用C FUNCTION" + +img = "/zh/post/xingchen/title/8f11c785-f027-47b5-a1ba-726edaacb2f2.png" + +times = "18:40" + ++++ +# openGauss/MogDB调用C FUNCTION +# 摘要 +> 之前写过一篇关于[postgresql自定义函数实现,通过contrib模块进行扩展](https://blog.csdn.net/xk_xx/article/details/123011397 "postgresql自定义函数实现,通过contrib模块进行扩展")的帖子,今天和恩墨工程师进行了一些交流,在MogDB中也可以实现同样的功能,原以为需要完整的openGauss的源码才能完成,但在恩墨工程师的指点下,了解到,通过既有官网版本的安装包就可以进行插件开发。而且,使用postgres的C FUNCTION要比开发插件更加容易些。也感谢恩墨专家提供的线索和思路:+1: :+1: + +# 环境准备 +* 安装MogDB +参考官方文档,写的已经很详细了。 +* 服务器环境 +本地虚拟机 centos 7.9 + +***注意:尽量进入******omm******用户下进行编译,可以避免一些不必要的环境问题*** + +# 代码 +* C代码 +基本与postgres插件开发一样,关键是4,5,6三行。 + +```cpp +#include "postgres.h" +#include "fmgr.h" + +PG_MODULE_MAGIC; +extern "C" Datum add_ab(PG_FUNCTION_ARGS); +PG_FUNCTION_INFO_V1(add_ab); + +Datum +add_ab(PG_FUNCTION_ARGS) +{ + int32 arg_a = PG_GETARG_INT32(0); + int32 arg_b = PG_GETARG_INT32(1); + + PG_RETURN_INT32(arg_a + arg_b); +} +``` +* CMakeLists.txt + +```makefile +cmake_minimum_required (VERSION 2.8) + +project (gs_plug) +set(CMAKE_CXX_FLAGS "-Wall -std=c++11 -Wall") +set(CMAKE_CXX_FLAGS_DEBUG "-g3") +set(CMAKE_CXX_FLAGS_RELEASE "-O2") +set(CMAKE_BUILD_TYPE Debug) + +set(MOG_INCLUDE /opt/mogdb/app/include/postgresql/server) +set(MOG_LIBPATH /opt/mogdb/app/lib/postgresql/proc_srclib) +include_directories(${MOG_INCLUDE}) + +aux_source_directory(. DIR_SRCS) +add_library (${PROJECT_NAME} SHARED ${DIR_SRCS}) + +install(TARGETS ${PROJECT_NAME} DESTINATION ${MOG_LIBPATH}) +``` +***要点1:获取包含头文件的目录*** + +```Plain Text +[omm@vmpc funcs]$ pg_config --includedir +/opt/mogdb/app/include +``` +所需头文件路径:\`pg\_config --includedir\`/postgresql/server + +***要点1:c函数安装路径*** + +```Plain Text +[omm@vmpc funcs]$ pg_config --pkglibdir +/opt/mogdb/app/lib/postgresql +``` +安装路径:\`pg\_config --pkglibdir\`/proc\_srclib/ + + + +# 编译 & 安装 +```Plain Text +[omm@vmpc funcs]$ mkdir build +[omm@vmpc funcs]$ cd build/ +[omm@vmpc build]$ cmake ../ +CMake Deprecation Warning at CMakeLists.txt:1 (cmake_minimum_required): + Compatibility with CMake < 2.8.12 will be removed from a future version of + CMake. + + Update the VERSION argument value or use a ... suffix to tell + CMake that the project does not need compatibility with older versions. + + +-- The C compiler identification is GNU 4.8.5 +-- The CXX compiler identification is GNU 4.8.5 +-- Detecting C compiler ABI info +-- Detecting C compiler ABI info - done +-- Check for working C compiler: /bin/cc - skipped +-- Detecting C compile features +-- Detecting C compile features - done +-- Detecting CXX compiler ABI info +-- Detecting CXX compiler ABI info - done +-- Check for working CXX compiler: /bin/c++ - skipped +-- Detecting CXX compile features +-- Detecting CXX compile features - done +-- Configuring done +-- Generating done +-- Build files have been written to: /opt/mogdb/funcs/build +[omm@vmpc build]$ make +[ 50%] Building CXX object CMakeFiles/gs_plug.dir/testfunc.cpp.o +[100%] Linking CXX shared library libgs_plug.so +[100%] Built target gs_plug +[omm@vmpc build]$ make install +Consolidate compiler generated dependencies of target gs_plug +[100%] Built target gs_plug +Install the project... +-- Install configuration: "Debug" +-- Installing: /opt/mogdb/app/lib/proc_srclib/libgs_plug.so +``` +**依次执行如下命令** + +```Plain Text +mkdir build +cd build +cmake ../ +make +make install +``` +**确认安装** + +```Plain Text +[omm@vmpc build]$ ll /opt/mogdb/app/lib/proc_srclib/libgs_plug.so +-rwxr-xr-x. 1 omm dbgrp 215696 Apr 2 00:17 /opt/mogdb/app/lib/proc_srclib/libgs_plug.so + +``` +# 验证 +* 链接mogdb + +```Plain Text +[omm@vmpc ~]$ pgcli -p 26000 -d postgres +Server: PostgreSQL 9.2.4 +Version: 3.4.1 +Home: http://pgcli.com +postgres> +``` +* 创建C FUNCTION + +```sql +postgres> CREATE FUNCTION add_ab(a int ,b int ) RETURNS integer + AS 'testfunc.so', 'add_ab' + LANGUAGE C STRICT; +CREATE FUNCTION +Time: 0.039s +``` +* 查看函数 + +![image](images/8f11c785-f027-47b5-a1ba-726edaacb2f2.png) + +* 调用函数 + +```sql +postgres> select add_ab(a := 4, b := 2); ++--------+ +| add_ab | +|--------| +| 6 | ++--------+ +SELECT 1 +Time: 0.033s +postgres> + +``` \ No newline at end of file diff --git "a/content/zh/post/Frank/openGauss\350\204\232\346\234\254\346\272\220\347\240\201\346\265\205\346\236\220\357\274\2101\357\274\211\342\200\224\342\200\224 simpleInstall.md" "b/content/zh/post/Frank/openGauss\350\204\232\346\234\254\346\272\220\347\240\201\346\265\205\346\236\220\357\274\2101\357\274\211\342\200\224\342\200\224 simpleInstall.md" new file mode 100644 index 0000000000000000000000000000000000000000..320dc7669bc09f4079105aa1b9b218e30bf7856e --- /dev/null +++ "b/content/zh/post/Frank/openGauss\350\204\232\346\234\254\346\272\220\347\240\201\346\265\205\346\236\220\357\274\2101\357\274\211\342\200\224\342\200\224 simpleInstall.md" @@ -0,0 +1,319 @@ +# openGauss脚本源码浅析(1)—— simpleInstall +# 摘要 +> 在编译完成openGauss或者已download了openGauss的bin后,想要做简易安装,官方给的使用方法是使用simpleInstall。本文主要介绍simpleInstall的脚本的功能,来了解一下简易安装内部实现原理。 + + + +# openGauss简易安装 +## postgres初始化数据库 +了解openGauss安装之前简单说一下postgres的安装过程,postgres没有提供安装脚本,但是初始化数据库集簇的命令很简单: + +```bash +initdb -D $PGDATA +``` +会在指定的PGDATA目录下创建数据库,可以参考之前的一篇文章[postgresql 15源码浅析(1)—— postgres中的1号数据库](https://www.modb.pro/db/405982) 。 + +## openGauss脚本安装 +![image](images/eqpxS-ZiISSZcQTwUIsjH-xwENZifmj2PPNba8BKV0I.png) + +openGauss简易安装是使用simpleInstall下面的install.sh脚本。 + +# simpleInstall源码解析 +## step1获取命令行参数(get\_param) +先看一下需要那些参数 + +```bash +function usage() +{ + echo " +Usage: sh $0 -w password +Arguments: + -w login password + -p datanode port, default 5432 + --multinode if specify, will install master_slave cluster. default install single node. + -h, --help Show this help, then exit + " +} +``` +主要有3个参数: + +\-w 设置超级用户的源码,这个是必选项,如不设置后续初始化流程无法完成,这也是和postgres的一个区别,postgres不强制要求超级用户密码。且使用-W(大写)设置密码,设置形式是交互的。-w(小写)是openGauss引入的新参数; + +\-p 设置服务端口,非必选,默认是5432; + +\--multinode 初始化数据库提供两种形式,及单节点和主备两种; + +\-w 和 -W的区别 + +![image](images/bC0K0WfvrI-N1f4UtiZg8DHwafzyD9YxsLRgtmm2zyY.png) + +两个参数的功能是一样的,但是-W是postgres保留的功能,是交互是的,需要手动数据密码;-w 是自动的方式将密码通过命令行传入程序,我猜openGauss是为了兼容原有的用法,所以新加了-w的参数,且出于安全性考虑,是必须设置的。 + + + +## step2 检查参数(check\_param) +主要检查一下几项: + +1. \-w参数必须填写,且不能为空 +2. 密码必须大于8个字符 +3. 不能使用root用户安装 +4. 如不设置-p,怎使用默认端口5432,否则使用-p的设置 +5. 如果设置了--multinode,则备节点的端口为主节点端口+200 + + + +## step3 检查安装环境(check\_install\_env) +1. 检查目录是否存在,如果是单节点模式,检查data下是否存在single\_node目录,是否为空目录,如果是主备检查 master 和 slave是否存在,是否为空; +2. 检查端口是否被占用; +3. 检查unix sock是否有权限,是否被占用 + + + +## step4 检查操作系统参数(check\_os) +1. 检查系统共享内存和页大小 + +![image](images/OqBg0ocJYlX1ahwRXdt65SGstd9yCDU8dRTKaxrwO_k.png) + +2. x86下还要检查rdtscp,应该是和指令顺序有关 + +> 这块有留个坑吧,不是很了解,后续有机会在填上。 + + + +## step5 修改应用目录的属组和权限(change\_gausshome\_owner) +1. 修改属组:chown omm:dbgrp \$app +2. 修改权限:chmod 700 \$app + + + +## step6 设置omm用户的环境变量 +设置的环境变量包括:PATH、GAUSSHOME、LD\_LIBRARY\_PATH、GS\_CLUSTER\_NAME、ulimit -n 1000000 + +![image](images/KrMpACrr5KZlCZEmR_IoUv8oujgifomkHG7sGM_2Yko.png) + +上图是omm用户的.bashrc的内容 + + + +## step7-1 初始化数据库(single\_install) +1. 初始化数据库集簇,参考[postgresql 15源码浅析(1)—— postgres中的1号数据库](https://www.modb.pro/db/405982) 基本流程没有太大区别; + +> 第二个坑,后续会对比一下initdb过程postgres和openGauss的有哪些差别。 + +```bash +gs_initdb -w $password -D $app/data/single_node --nodename "sgnode" --locale="en_US.UTF-8" +``` +2. 修改配置文件postgres.conf中的端口 + +```bash +sed -i "/^#port =/c\port = $port" $app/data/single_node/postgresql.conf +``` +3. 启动数据库 + +```bash +gs_ctl start -D $app/data/single_node -Z single_node +``` +## step7-2 初始化数据库(master\_standby\_install) +### init\_db +```bash +function init_db() { + info "[init primary datanode.]" + gs_initdb -D $app/data/master --nodename=datanode1 -E UTF-8 --locale=en_US.UTF-8 -U $user -w $password + info "[init slave datanode.]" + gs_initdb -D $app/data/slave --nodename=datanode2 -E UTF-8 --locale=en_US.UTF-8 -U $user -w $password +} +``` +分别对主备两个库进行初始化。 + +### config\_db +```bash +function config_db() { + info "[config datanode.]" + local -a ip_arr + local -i index=0 + for line in $(/sbin/ifconfig -a|grep inet|grep -v 127.0.0.1|grep -v inet6|awk '{print $2}'|tr -d "addr:") + do + ip_arr[index]=$line + let index=$index+1 + done + sed -i "/^#listen_addresses/c\listen_addresses = 'localhost,${ip_arr[0]}'" $app/data/master/postgresql.conf + sed -i "/^#listen_addresses/c\listen_addresses = 'localhost,${ip_arr[0]}'" $app/data/slave/postgresql.conf + sed -i "/^#port/c\port = $port" $app/data/master/postgresql.conf + sed -i "/^#port/c\port = $slave_port" $app/data/slave/postgresql.conf + sed -i "/^#replconninfo1/c\replconninfo1 = 'localhost=${ip_arr[0]} localport=$(($port+1)) localheartbeatport=$(($port+5)) localservice=$(($port+4)) remotehost=${ip_arr[0]} remoteport=$(($slave_port+1)) remoteheartbeatport=$(($slave_port+5)) remoteservice=$(($slave_port+4))'" $app/data/master/postgresql.conf + sed -i "/^#replconninfo1/c\replconninfo1 = 'localhost=${ip_arr[0]} localport=$(($slave_port+1)) localheartbeatport=$(($slave_port+5)) localservice=$(($slave_port+4)) remotehost=${ip_arr[0]} remoteport=$(($port+1)) remoteheartbeatport=$(($port+5)) remoteservice=$(($port+4))'" $app/data/slave/postgresql.conf + echo "remote_read_mode = non_authentication" | tee -a $app/data/master/postgresql.conf $app/data/slave/postgresql.conf + echo "host all all ${ip_arr[0]}/32 trust" | tee -a $app/data/master/pg_hba.conf $app/data/slave/pg_hba.conf +} +``` +主要修改主备的服务提供端口、主备通信端口 和 客户端登录权限等,涉及配置文件postgres.conf和pg\_hba.conf。 + +### start\_db +![image](images/KUsItz-toPUDk-Buu-IDePka9z6bFqkA6gwPYsHD6OY.png) + +```bash +-b, --mode=MODE the mode of building the datanode or coordinator.MODE can be "full", "incremental", "auto", "standby_full", "copy_secure_files", "copy_upgrade_file", "cross_cluster_full", "cross_cluster_incremental", "cross_cluster_standby_full" +-M the database start as the appointed mode +-D, --pgdata=DATADIR location of the database storage area +``` +## step8 导入范例SQL(import\_sql) +```bash +read -p "Would you like to create a demo database (yes/no)? " input +``` +接收输入,是否要建立范例数据库 + + + +导入两个sql文件中的内容 + +```bash +function fn_load_demoDB() +{ + cd $shell_path + gsql -d postgres -p $port -f school.sql + gsql -d postgres -p $port -f finance.sql +} +``` + + +# 结束 +以上步骤执行成功后,即完成了数据库集簇的初始化和启动过程。如果大家在启动本地安装过程中遇到问题,可以对照以上步骤进行排查。 + +# 启动日志 +```bash +[omm@host-10-208-88-234 simpleInstall]$ sh install.sh -w xk.xmx190035 +[step 1]: check parameter +[step 2]: check install env and os setting +[step 3]: change_gausshome_owner +[step 4]: set environment variables + +/home/omm/.bashrc: line 13: ulimit: open files: cannot modify limit: Operation not permitted +[step 6]: init datanode +The files belonging to this database system will be owned by user "omm". +This user must also own the server process. + +The database cluster will be initialized with locale "en_US.UTF-8". +The default database encoding has accordingly been set to "UTF8". +The default text search configuration will be set to "english". + +creating directory /home/omm/git/openGauss-server/data/single_node ... ok +creating subdirectories ... ok +selecting default max_connections ... 100 +selecting default shared_buffers ... 32MB +creating configuration files ... ok +Begin init undo subsystem meta. +[INIT UNDO] Init undo subsystem meta successfully. +creating template1 database in /home/omm/git/openGauss-server/data/single_node/base/1 ... The core dump path is an invalid directory +2022-05-23 12:30:32.064 [unknown] [unknown] localhost 281459036192784 0[0:0#0] [BACKEND] WARNING: macAddr is 64022/1056020634, sysidentifier is 4195761905/2560264540, randomNum is 246122844 +ok +initializing pg_authid ... ok +setting password ... ok +initializing dependencies ... ok +loading PL/pgSQL server-side language ... ok +creating system views ... ok +creating performance views ... ok +loading system objects' descriptions ... ok +creating collations ... ok +creating conversions ... ok +creating dictionaries ... ok +setting privileges on built-in objects ... ok +initialize global configure for bucketmap length ... ok +creating information schema ... ok +loading foreign-data wrapper for distfs access ... ok +loading foreign-data wrapper for hdfs access ... ok +loading foreign-data wrapper for log access ... ok +loading hstore extension ... ok +loading foreign-data wrapper for MOT access ... ok +loading security plugin ... ok +update system tables ... ok +creating snapshots catalog ... ok +vacuuming database template1 ... ok +copying template1 to template0 ... ok +copying template1 to postgres ... ok +freezing database template0 ... ok +freezing database template1 ... ok +freezing database postgres ... ok + +WARNING: enabling "trust" authentication for local connections +You can change this by editing pg_hba.conf or using the option -A, or +--auth-local and --auth-host, the next time you run gs_initdb. + +Success. You can now start the database server of single node using: + + gaussdb -D /home/omm/git/openGauss-server/data/single_node --single_node +or + gs_ctl start -D /home/omm/git/openGauss-server/data/single_node -Z single_node -l logfile + +[step 7]: start datanode +[2022-05-23 12:30:51.410][991625][][gs_ctl]: gs_ctl started,datadir is /home/omm/git/openGauss-server/data/single_node +[2022-05-23 12:30:51.446][991625][][gs_ctl]: waiting for server to start... +.0 LOG: [Alarm Module]can not read GAUSS_WARNING_TYPE env. + +0 LOG: [Alarm Module]Host Name: host-10-208-88-234 + +0 LOG: [Alarm Module]Host IP: 10.208.88.234 + +0 LOG: [Alarm Module]Cluster Name: dbCluster + +0 LOG: [Alarm Module]Get real path of alarmItem.conf failed! + +0 WARNING: failed to open feature control file, please check whether it exists: FileName=gaussdb.version, Errno=2, Errmessage=No such file or directory. +0 WARNING: failed to parse feature control file: gaussdb.version. +0 WARNING: Failed to load the product control file, so gaussdb cannot distinguish product version. +The core dump path is an invalid directory +2022-05-23 12:30:51.581 [unknown] [unknown] localhost 281460165246992 0[0:0#0] 0 [BACKEND] LOG: when starting as multi_standby mode, we couldn't support data replicaton. +gaussdb.state does not exist, and skipt setting since it is optional.2022-05-23 12:30:51.587 [unknown] [unknown] localhost 281460165246992 0[0:0#0] 0 [BACKEND] LOG: [Alarm Module]can not read GAUSS_WARNING_TYPE env. + +2022-05-23 12:30:51.587 [unknown] [unknown] localhost 281460165246992 0[0:0#0] 0 [BACKEND] LOG: [Alarm Module]Host Name: host-10-208-88-234 + +2022-05-23 12:30:51.636 [unknown] [unknown] localhost 281460165246992 0[0:0#0] 0 [BACKEND] LOG: [Alarm Module]Host IP: 10.208.88.234 + +2022-05-23 12:30:51.636 [unknown] [unknown] localhost 281460165246992 0[0:0#0] 0 [BACKEND] LOG: [Alarm Module]Cluster Name: dbCluster + +2022-05-23 12:30:51.636 [unknown] [unknown] localhost 281460165246992 0[0:0#0] 0 [BACKEND] LOG: [Alarm Module]Get real path of alarmItem.conf failed! + +2022-05-23 12:30:51.641 [unknown] [unknown] localhost 281460165246992 0[0:0#0] 0 [BACKEND] LOG: loaded library "security_plugin" +2022-05-23 12:30:51.642 [unknown] [unknown] localhost 281460165246992 0[0:0#0] 0 [BACKEND] WARNING: could not create any HA TCP/IP sockets +2022-05-23 12:30:51.642 [unknown] [unknown] localhost 281460165246992 0[0:0#0] 0 [BACKEND] WARNING: could not create any HA TCP/IP sockets +2022-05-23 12:30:51.645 [unknown] [unknown] localhost 281460165246992 0[0:0#0] 0 [BACKEND] WARNING: No explicit IP is configured for listen_addresses GUC. +2022-05-23 12:30:51.645 [unknown] [unknown] localhost 281460165246992 0[0:0#0] 0 [BACKEND] LOG: InitNuma numaNodeNum: 1 numa_distribute_mode: none inheritThreadPool: 0. +2022-05-23 12:30:51.645 [unknown] [unknown] localhost 281460165246992 0[0:0#0] 0 [BACKEND] LOG: reserved memory for backend threads is: 220 MB +2022-05-23 12:30:51.645 [unknown] [unknown] localhost 281460165246992 0[0:0#0] 0 [BACKEND] LOG: reserved memory for WAL buffers is: 128 MB +2022-05-23 12:30:51.645 [unknown] [unknown] localhost 281460165246992 0[0:0#0] 0 [BACKEND] LOG: Set max backend reserve memory is: 348 MB, max dynamic memory is: 11027 MB +2022-05-23 12:30:51.645 [unknown] [unknown] localhost 281460165246992 0[0:0#0] 0 [BACKEND] LOG: shared memory 400 Mbytes, memory context 11375 Mbytes, max process memory 12288 Mbytes +2022-05-23 12:30:51.681 [unknown] [unknown] localhost 281460165246992 0[0:0#0] 0 [CACHE] LOG: set data cache size(402653184) +2022-05-23 12:30:51.726 [unknown] [unknown] localhost 281460165246992 0[0:0#0] 0 [CACHE] LOG: set metadata cache size(134217728) +2022-05-23 12:30:51.741 [unknown] [unknown] localhost 281460165246992 0[0:0#0] 0 [SEGMENT_PAGE] LOG: Segment-page constants: DF_MAP_SIZE: 8156, DF_MAP_BIT_CNT: 65248, DF_MAP_GROUP_EXTENTS: 4175872, IPBLOCK_SIZE: 8168, EXTENTS_PER_IPBLOCK: 1021, IPBLOCK_GROUP_SIZE: 4090, BMT_HEADER_LEVEL0_TOTAL_PAGES: 8323072, BktMapEntryNumberPerBlock: 2038, BktMapBlockNumber: 25, BktBitMaxMapCnt: 512 +2022-05-23 12:30:51.755 [unknown] [unknown] localhost 281460165246992 0[0:0#0] 0 [BACKEND] LOG: gaussdb: fsync file "/home/omm/git/openGauss-server/data/single_node/gaussdb.state.temp" success +2022-05-23 12:30:51.755 [unknown] [unknown] localhost 281460165246992 0[0:0#0] 0 [BACKEND] LOG: create gaussdb state file success: db state(STARTING_STATE), server mode(Normal), connection index(1) +2022-05-23 12:30:51.834 [unknown] [unknown] localhost 281460165246992 0[0:0#0] 0 [BACKEND] LOG: max_safe_fds = 976, usable_fds = 1000, already_open = 14 +The core dump path is an invalid directory +2022-05-23 12:30:51.839 [unknown] [unknown] localhost 281460165246992 0[0:0#0] 0 [BACKEND] LOG: user configure file is not found, it will be created. +2022-05-23 12:30:51.842 [unknown] [unknown] localhost 281460165246992 0[0:0#0] 0 [BACKEND] LOG: the configure file /home/omm/git/openGauss-server/etc/gscgroup_omm.cfg doesn't exist or the size of configure file has changed. Please create it by root user! +2022-05-23 12:30:51.842 [unknown] [unknown] localhost 281460165246992 0[0:0#0] 0 [BACKEND] LOG: Failed to parse cgroup config file. +2022-05-23 12:30:51.862 [unknown] [unknown] localhost 281460165246992 0[0:0#0] 0 [EXECUTOR] WARNING: Failed to obtain environment value $GAUSSLOG! +2022-05-23 12:30:51.862 [unknown] [unknown] localhost 281460165246992 0[0:0#0] 0 [EXECUTOR] DETAIL: N/A +2022-05-23 12:30:51.862 [unknown] [unknown] localhost 281460165246992 0[0:0#0] 0 [EXECUTOR] CAUSE: Incorrect environment value. +2022-05-23 12:30:51.862 [unknown] [unknown] localhost 281460165246992 0[0:0#0] 0 [EXECUTOR] ACTION: Please refer to backend log for more details. +2022-05-23 12:30:51.863 [unknown] [unknown] localhost 281460165246992 0[0:0#0] 0 [EXECUTOR] WARNING: Failed to obtain environment value $GAUSSLOG! +2022-05-23 12:30:51.863 [unknown] [unknown] localhost 281460165246992 0[0:0#0] 0 [EXECUTOR] DETAIL: N/A +2022-05-23 12:30:51.863 [unknown] [unknown] localhost 281460165246992 0[0:0#0] 0 [EXECUTOR] CAUSE: Incorrect environment value. +2022-05-23 12:30:51.863 [unknown] [unknown] localhost 281460165246992 0[0:0#0] 0 [EXECUTOR] ACTION: Please refer to backend log for more details. + +[2022-05-23 12:30:52.460][991625][][gs_ctl]: done +[2022-05-23 12:30:52.460][991625][][gs_ctl]: server started (/home/omm/git/openGauss-server/data/single_node) +import sql file +Would you like to create a demo database (yes/no)? yes +Load demoDB [school,finance] success. +[complete successfully]: You can start or stop the database server using: + gs_ctl start|stop|restart -D $GAUSSHOME/data/single_node -Z single_node + +``` +# 备注 +上面留下两个坑,待后续填上。 + +|没填上的坑|挖坑的时间| +| ----- | ----- | +|x86下的rdtscp|2022-05-23| +|initdb过程postgres和openGauss的有哪些差别|2022-05-23| + diff --git "a/content/zh/post/Frank/openGauss\350\264\246\346\234\254\346\225\260\346\215\256\345\272\223\357\274\214\344\275\240\344\270\215\347\237\245\351\201\223\347\232\204\351\202\243\344\272\233\344\272\213\345\204\277openGauss\350\264\246\346\234\254\346\225\260\346\215\256\345\272\223\357\274\214\344\275\240\344\270\215\347\237\245\351\201\223\347\232\204\351\202\243\344\272\233\344\272\213\345\204\277.md" "b/content/zh/post/Frank/openGauss\350\264\246\346\234\254\346\225\260\346\215\256\345\272\223\357\274\214\344\275\240\344\270\215\347\237\245\351\201\223\347\232\204\351\202\243\344\272\233\344\272\213\345\204\277openGauss\350\264\246\346\234\254\346\225\260\346\215\256\345\272\223\357\274\214\344\275\240\344\270\215\347\237\245\351\201\223\347\232\204\351\202\243\344\272\233\344\272\213\345\204\277.md" new file mode 100644 index 0000000000000000000000000000000000000000..ca236ab4104c558a67fdc5986f32b39a94fc0b0a --- /dev/null +++ "b/content/zh/post/Frank/openGauss\350\264\246\346\234\254\346\225\260\346\215\256\345\272\223\357\274\214\344\275\240\344\270\215\347\237\245\351\201\223\347\232\204\351\202\243\344\272\233\344\272\213\345\204\277openGauss\350\264\246\346\234\254\346\225\260\346\215\256\345\272\223\357\274\214\344\275\240\344\270\215\347\237\245\351\201\223\347\232\204\351\202\243\344\272\233\344\272\213\345\204\277.md" @@ -0,0 +1,620 @@ ++++ +title = "openGauss账本数据库,你不知道的那些事儿" +date = "2022-09-19" +tags = ["openGauss技术文章征集", "MogDB"] +archives = "2022-09" +author = "夏克" +summary = "本文将通过对比官方文档关于**“设置账本数据库”**中的几个章节,结合源码来说说文档中操作步骤背后的原理。" ++++ +# openGauss账本数据库,你不知道的那些事儿 +# 摘要 +> 本文将通过对比官方文档关于**“设置账本数据库”**中的几个章节,结合源码来说说文档中操作步骤背后的原理。 + + + +# 账本数据库概述 +## 你知道的那些事儿 +### [官方文档](https://opengauss.org/zh/docs/3.0.0/docs/Developerguide/%E8%B4%A6%E6%9C%AC%E6%95%B0%E6%8D%AE%E5%BA%93%E6%A6%82%E8%BF%B0.html) +> 账本数据库融合了区块链思想,将用户操作记录至两种历史表中:用户历史表和全局区块表。当用户创建防篡改用户表时,系统将自动为该表添加一个hash列来保存每行数据的hash摘要信息,同时在blockchain模式下会创建一张用户历史表来记录对应用户表中每条数据的变更行为;而用户对防篡改用户表的一次修改行为将记录至全局区块表中。由于历史表具有只可追加不可修改的特点,因此历史表记录串联起来便形成了用户对防篡改用户表的修改历史。 + + + +## 你不知道的那些事儿 +### 操作步骤 +#### 1.创建防篡改模式。 +```sql +openGauss=# CREATE SCHEMA ledgernsp WITH BLOCKCHAIN; +``` +首先在这个SQL中我们可以看到`WITH BLOCKCHAIN` ,这里说明创建出来的SCHEMA与普通的SCHEMA不同,但就行不同在哪里我们后面会提到。 + +* 从语法解析看,增加了对BLOCKCHAIN的处理,标记了是否为账本模式。 + +```sql + CreateSchema ::= CREATE SCHEMA schema_name + [ AUTHORIZATION user_name ] [WITH BLOCKCHAIN] [ schema_element [ ... ] ]; +``` +![image](images/URoeE6RPdl6HicMOaUmsEYPGtua3rFF-H434jfHwnnA.png) + +* CreateSchemaStmt 结构中增加了bool类型字段hasBlockChain + +```cpp +typedef struct CreateSchemaStmt { + NodeTag type; + char *schemaname; /* the name of the schema to create */ + char *authid; /* the owner of the created schema */ + bool hasBlockChain; /* whether this schema has blockchain */ + List *schemaElts; /* schema components (list of parsenodes) */ + TempType temptype; /* if the schema is temp table's schema */ + List *uuids; /* the list of uuid(only create sequence or table with serial type need) */ +} CreateSchemaStmt; +``` +##### 你不知道的限制 +账本数据库对于ALTER SCHEMA的几个限制 +* 1、dbe\_perf和snapshot两个模式不能ALTER为blockchain模式。 + +```cpp + if (withBlockchain && ((strncmp(nspName, "dbe_perf", STR_SCHEMA_NAME_LENGTH) == 0) || + (strncmp(nspName, "snapshot", STR_SNAPSHOT_LENGTH) == 0))) { + ereport(ERROR, (errcode(ERRCODE_OPERATE_FAILED), + errmsg("The schema '%s' doesn't allow to alter to blockchain schema", nspName))); + } +``` +![image](images/_ULgIk65_zQk1RhUAv8RfsGEDeXXasqkdwUGJ8RBsn0.png) + +* 2、系统模式不能 ALTER 为blockchain模式。 + +```cpp + if (withBlockchain && !g_instance.attr.attr_common.allowSystemTableMods && + !u_sess->attr.attr_common.IsInplaceUpgrade && IsReservedName(nspName)) + ereport(ERROR, + (errcode(ERRCODE_RESERVED_NAME), + errmsg("The system schema \"%s\" doesn't allow to alter to blockchain schema", nspName))); +``` +![image](images/CXqGX8iKzhqFGoTa4oMk8kyoXIdktRzQek2pVxe4XzI.png) + +* 3、包含了表的SCHEMA不能ALTER为blockchain模式。 + +```cpp + /* + * If the any table exists in the schema, do not change to ledger schema. + */ + StringInfo existTbl = TableExistInSchema(HeapTupleGetOid(tup), TABLE_TYPE_ANY); + if (existTbl->len != 0) { + if (withBlockchain) { + ereport(ERROR, + (errcode(ERRCODE_RESERVED_NAME), + errmsg("It is not supported to change \"%s\" to blockchain schema which includes tables.", + nspName))); + } else { + ereport(ERROR, + (errcode(ERRCODE_RESERVED_NAME), + errmsg("It is not supported to change \"%s\" to normal schema which includes tables.", + nspName))); + } + + } +``` +![image](images/IXg70KyBpL6kZBZfiSaTvcKYBQ7q93o-UlBFGP6FU3w.png) + +##### 查看模式 +![image](images/5hYUZcL0vSpgxKcA5tt4lyykneLxZlI0-XflBRWOfkc.png) + +#### 2.在防篡改模式下创建防篡改用户表。 +```sql +openGauss=# CREATE TABLE ledgernsp.usertable(id int, name text); +``` +![image](images/Mf6D8OGOudOnpvwXJ_oqmPAexpfr_EYSWazakX4FVT0.png) + +##### 你不知道的限制 +* 创建账本表的同时会自动创建一个“历史表”和“历史表的索引”。 + +在建表时`CreateCommand`会调用`AlterCreateChainTables`,如果是账本表再去调用`create_hist_relation`来创建历史表 + +`CreateCommand` -> `AlterCreateChainTables` -> `create_hist_relation` + +```cpp +/* + * AlterCreateChainTables + * If it is a ledger usertable, that should invoking this function. + * then create a history table. + */ +void AlterCreateChainTables(Oid relOid, Datum reloptions, CreateStmt *mainTblStmt) +{ + Relation rel = NULL; + + rel = heap_open(relOid, AccessExclusiveLock); + + /* Ledger user table only support for the regular relation. */ + if (!rel->rd_isblockchain) { + heap_close(rel, NoLock); + return; + } + + create_hist_relation(rel, reloptions, mainTblStmt); + heap_close(rel, NoLock); +} +``` +* 历史表命名规则,参见函数get\_hist\_name + +```cpp +bool get_hist_name(Oid relid, const char *rel_name, char *hist_name, Oid nsp_oid, const char *nsp_name) +{ + errno_t rc; + if (!OidIsValid(relid) || rel_name == NULL) { + return false; + } + nsp_oid = OidIsValid(nsp_oid) ? nsp_oid : get_rel_namespace(relid); + nsp_name = (nsp_name == NULL) ? get_namespace_name(nsp_oid) : nsp_name; + int part_hist_name_len = strlen(rel_name) + strlen(nsp_name) + 1; + if (part_hist_name_len + strlen("_hist") >= NAMEDATALEN) { + rc = snprintf_s(hist_name, NAMEDATALEN, NAMEDATALEN - 1, "%d_%d_hist", nsp_oid, relid); + securec_check_ss(rc, "", ""); + } else { + rc = snprintf_s(hist_name, NAMEDATALEN, NAMEDATALEN - 1, "%s_%s_hist", nsp_name, rel_name); + securec_check_ss(rc, "", ""); + } + return true; +} +``` + * 表名最大长度 `#define NAMEDATALEN 64` + * 如果没有超过长度限制:schema\_table\_hist + * 如果超过长度限制:schema(oid)\_talbe(oid)\_hist,因为oid是unsigned int 类型最大值为4294967295为10位,所以这种命名规则的最大长度为10+1+10+1+4+\\0=27,因此永远不会超过最大长度64。 + +```cpp +omm=# create schema aaaaaaaaaaaaaaaaaaaaaaaaaaaaaa with blockchain; +CREATE SCHEMA +omm=# create table aaaaaaaaaaaaaaaaaaaaaaaaaaaaaa.bbbbbbbbbbbbbbbbbbbbbbbbbbbbbb(id int); +CREATE TABLE +``` +![image](images/hziku7BLZT1zXmOXpv6EZ-z0RI7XipA8tLL6xWF58Mc.png) + +* 历史表索引命名规则,参见函数get\_hist\_name + +```cpp + /* now create index for this new history table */ + char hist_index_name[NAMEDATALEN]; + rc = snprintf_s(hist_index_name, NAMEDATALEN, NAMEDATALEN - 1, "gs_hist_%u_index", relid); +``` + * 命名规则:gs\_hist\_\$(账本表oid)\_index。 + +![image](images/-KpBhFIZvSffso4TokTHA8LomQcLevuzX9SBJD4RIgk.png) + +#### 3、修改防篡改用户表数据 +对防篡改用户表执行INSERT/UPDATE/DELETE。 + +```sql +openGauss=# INSERT INTO ledgernsp.usertable VALUES(1, 'alex'), (2, 'bob'), (3, 'peter'); +INSERT 0 3 +openGauss=# SELECT *, hash FROM ledgernsp.usertable ORDER BY id; + id | name | hash +----+-------+------------------ + 1 | alex | 1f2e543c580cb8c5 + 2 | bob | 8fcd74a8a6a4b484 + 3 | peter | f51b4b1b12d0354b +(3 rows) + +openGauss=# UPDATE ledgernsp.usertable SET name = 'bob2' WHERE id = 2; +UPDATE 1 +openGauss=# SELECT *, hash FROM ledgernsp.usertable ORDER BY id; + id | name | hash +----+-------+------------------ + 1 | alex | 1f2e543c580cb8c5 + 2 | bob2 | 437761affbb7c605 + 3 | peter | f51b4b1b12d0354b +(3 rows) + + +openGauss=# DELETE FROM ledgernsp.usertable WHERE id = 3; +DELETE 1 +openGauss=# SELECT *, hash FROM ledgernsp.usertable ORDER BY id; + id | name | hash +----+------+------------------ + 1 | alex | 1f2e543c580cb8c5 + 2 | bob2 | 437761affbb7c605 +(2 rows) + +``` + + +# 查看账本历史操作记录 +## 你知道的那些事儿 +### [官方文档](https://opengauss.org/zh/docs/3.0.0/docs/Developerguide/%E6%9F%A5%E7%9C%8B%E8%B4%A6%E6%9C%AC%E5%8E%86%E5%8F%B2%E6%93%8D%E4%BD%9C%E8%AE%B0%E5%BD%95.html) +前提条件 + +* `系统中需要有审计管理员或者具有审计管理员权限的角色。` +* `数据库正常运行,并且对防篡改数据库执行了一系列增、删、改等操作,保证在查询时段内有账本操作记录结果产生。` + +## 你不知道的那些事儿 +### 基本操作 +#### 1、查询全局区块表记录。 +```sql +omm=# SELECT * FROM gs_global_chain; + blocknum | dbname | username | starttime | relid | relnsp | relname | relhash | globalhash | + txcommand +----------+--------+----------+-------------------------------+-------+-----------+-----------+------------------+----------------------------------+---------------- +-------------------------------------------------------------- + 1 | omm | omm | 2022-09-17 13:59:37.84824+00 | 16404 | ledgernsp | usertable | a41714001181a294 | 83927d11ba1fd678e8f4b0723a9cd5f2 | INSERT INTO led +gernsp.usertable VALUES(1, 'alex'), (2, 'bob'), (3, 'peter'); + 2 | omm | omm | 2022-09-17 13:59:51.723068+00 | 16404 | ledgernsp | usertable | b3a9ed0755131181 | b5ee73b6c20c817230182f6373c78e20 | UPDATE ledgerns +p.usertable SET name = 'bob2' WHERE id = 2; + 3 | omm | omm | 2022-09-17 13:59:58.159596+00 | 16404 | ledgernsp | usertable | 0ae4b4e4ed2fcab5 | 0cc9938cf7f1ed7f7f1a03c29954380a | DELETE FROM led +gernsp.usertable WHERE id = 3; +(3 rows) +``` +* 注册钩子,在对账本做修改操作的时候注册的钩子函数`ledger_ExecutorEnd`被回调。 + +```cpp +/* + * ledger_hook_init -- install of gchain block record hook. + */ +void ledger_hook_init(void) +{ + t_thrd.security_ledger_cxt.prev_ExecutorEnd = (void *)ExecutorEnd_hook; + ExecutorEnd_hook = ledger_ExecutorEnd; +} +``` +* 生成globalhash规则 + +> 全局区块表记录主要是生成globalhash. + +调用过程: + +ledger\_ExecutorEnd --> ledger\_gchain\_append --> set\_gchain\_comb\_string + +                                                                           --> get\_next\_g\_blocknum + +                                                                           --> gen\_global\_hash + + * set\_gchain\_comb\_string,是一组字符串拼接成的:`rel_name + nsp_name + query_string + rel_hash` + * get\_next\_g\_blocknum,用全局变量g\_blocknum保存 + * gen\_global\_hash,是的set\_gchain\_comb\_string拼出来的串+上一条的hash值拼串然后再去hash——区块链的基本原理 + +```cpp +bool gen_global_hash(hash32_t *hash_buffer, const char *info_string, bool exist, const hash32_t *prev_hash) +{ + errno_t rc = EOK; + int comb_strlen; + char *comb_string = NULL; + /* + * Previous block not exists means current insertion block is genesis, + * then we use global systable as origin combine string for globalhash + * generation. If previous block exists, we will use previous global + * hash as combine string to calculate globalhash. + */ + if (!exist) { + /* generate genesis block globalhash */ + comb_strlen = strlen(GCHAIN_NAME) + strlen(info_string) + 1; + comb_string = (char *)palloc0(comb_strlen); + rc = snprintf_s(comb_string, comb_strlen, comb_strlen - 1, "%s%s", GCHAIN_NAME, info_string); + securec_check_ss(rc, "", ""); + } else { + /* use previous globalhash and current block info to calculate globalhash. */ + char *pre_hash_str = DatumGetCString(DirectFunctionCall1(hash32out, HASH32GetDatum(prev_hash))); + comb_strlen = strlen(pre_hash_str) + strlen(info_string) + 1; + comb_string = (char *)palloc0(comb_strlen); + rc = snprintf_s(comb_string, comb_strlen, comb_strlen - 1, "%s%s", info_string, pre_hash_str); + securec_check_ss(rc, "", ""); + pfree_ext(pre_hash_str); + } + + if (!pg_md5_binary(comb_string, comb_strlen - 1, hash_buffer->data)) { + pfree(comb_string); + ereport(ERROR, (errcode(ERRCODE_OUT_OF_MEMORY), errmsg("Failed to generate globalhash, out of memory"))); + return false; + } + pfree(comb_string); + return true; +} +``` +* 在`src/gausskernel/runtime/executor/nodeModifyTable.cpp`中更新\_hist表的hash值。 + +![image](images/s_HvosSmYiVHF_o0vzupAk1FjsfqLEdC1_ve7JOW2MI.png) + +![image](images/2lkwtu3hysA5t9rJv5BGDx5lLDeavfwJwcdfqA_crgc.png) + +![image](images/aNMGZYnNnS_AQJ0HBUBP8frpYKy9Uy3ZMXDDr1k6X8M.png) + +* 通过set\_user\_tuple\_hash得到账本表hash列的值。 + +```cpp +/* + * set_user_tuple_hash -- calculate and fill the hash attribute of user table's tuple. + * + * tup: row data of user table + * rel: user table + * hash_exists: whether tuple comes with tuplehash. + * + * Note: if hash_exists is true, we should recompute + * tuple hash and compare with tuplehash of itself. + */ +HeapTuple set_user_tuple_hash(HeapTuple tup, Relation rel, bool hash_exists) +{ + uint64 row_hash = gen_user_tuple_hash(rel, tup); + int hash_attrno = user_hash_attrno(rel->rd_att); + if (hash_exists) { + bool is_null; + Datum hash = heap_getattr(tup, hash_attrno + 1, rel->rd_att, &is_null); + if (is_null || row_hash != DatumGetUInt64(hash)) { + ereport(ERROR, (errcode(ERRCODE_OPERATE_INVALID_PARAM), errmsg("Invalid tuple hash."))); + } + return tup; + } + Datum *values = NULL; + bool *nulls = NULL; + bool *replaces = NULL; + /* Build modified tuple */ + int2 nattrs = RelationGetNumberOfAttributes(rel); + values = (Datum*)palloc0(nattrs * sizeof(Datum)); + nulls = (bool*)palloc0(nattrs * sizeof(bool)); + replaces = (bool*)palloc0(nattrs * sizeof(bool)); + values[hash_attrno] = UInt64GetDatum(row_hash); + replaces[hash_attrno] = true; + HeapTuple newtup = heap_modify_tuple(tup, RelationGetDescr(rel), values, nulls, replaces); + + pfree_ext(values); + pfree_ext(nulls); + pfree_ext(replaces); + return newtup; +} +``` + + +# 校验账本数据一致性 +## 你知道的那些事儿 +### [官方文档](https://opengauss.org/zh/docs/3.0.0/docs/Developerguide/%E6%A0%A1%E9%AA%8C%E8%B4%A6%E6%9C%AC%E6%95%B0%E6%8D%AE%E4%B8%80%E8%87%B4%E6%80%A7.html) +> 数据库正常运行,并且对防篡改数据库执行了一系列增、删、改等操作,保证在查询时段内有账本操作记录结果产生。 + + + +## 你不知道的那些事儿 +### 基本操作 +#### 1、校验防篡改用户表ledgernsp.usertable与其对应的历史表是否一致。 +```cpp +omm=# SELECT pg_catalog.ledger_hist_check('ledgernsp', 'usertable'); + ledger_hist_check +------------------- + t +(1 row) +``` +* 校验用户权限 `Only super user or audit admin have access right to blockchain nsp` + +```cpp + /* Only super user or audit admin have access right to blockchain nsp */ + if (nsp_oid == PG_BLOCKCHAIN_NAMESPACE) { + return gs_blockchain_aclmask(roleid, mask); + } +``` +* 校验历史表hash值 + +is\_hist\_hash\_identity --> get\_usertable\_hash\_sum + +                                 --> get\_histtable\_hash\_sum + +```cpp +/* + * is_hist_hash_identity -- check whether user table hash and history table hash are equal + * + * relid: user table oid + * res_hash: hash sum of history table + */ +bool is_hist_hash_identity(Oid relid, uint64 *res_hash) +{ + uint64 user_hash_sum; + uint64 hist_hash_sum; + char hist_name[NAMEDATALEN]; + char *rel_name = get_rel_name(relid); + if (!get_hist_name(relid, rel_name, hist_name)) { + ereport(ERROR, (errcode(ERRCODE_UNDEFINED_OBJECT), errmsg("get hist table name failed."))); + } + Oid histoid = get_relname_relid(hist_name, PG_BLOCKCHAIN_NAMESPACE); + if (!OidIsValid(histoid)) { + ereport(ERROR, (errcode(ERRCODE_UNDEFINED_OBJECT), errmsg("could not find hist table of \"%s\".", rel_name))); + } + + user_hash_sum = get_usertable_hash_sum(relid); + hist_hash_sum = get_histtable_hash_sum(histoid); + + *res_hash = hist_hash_sum; + return user_hash_sum == hist_hash_sum; +} +``` + + +#### 2、查询防篡改用户表ledgernsp.usertable与其对应的历史表以及全局区块表中关于该表的记录是否一致。 +```cpp +omm=# SELECT pg_catalog.ledger_gchain_check('ledgernsp', 'usertable'); + ledger_gchain_check +--------------------- + t +(1 row) +``` +* 校验是否为账本表`ledger_usertable_check` +* 校验用户权限`has_ledger_consistent_privilege` +* 校验历史表hash值`is_hist_hash_identity` +* 计算/校验全局表hash `get_gchain_relhash_sum` + +```cpp +/* + * get_gchain_relhash_sum -- calculate relhash from gs_global_chain + * + * relid: user table oid + */ +static uint64 get_gchain_relhash_sum(Oid relid) +{ + uint64 relhash = 0; + HeapTuple tuple = NULL; + + /* scan the gs_global_chain catalog by relid */ + Relation gchain_rel = heap_open(GsGlobalChainRelationId, AccessShareLock); + Form_gs_global_chain rdata = NULL; + TableScanDesc scan = heap_beginscan(gchain_rel, SnapshotNow, 0, NULL); + while ((tuple = heap_getnext(scan, ForwardScanDirection)) != NULL) { + rdata = (Form_gs_global_chain)GETSTRUCT(tuple); + if (rdata == NULL || rdata->relid != relid) { + continue; + } + relhash += rdata->relhash; + } + heap_endscan(scan); + heap_close(gchain_rel, AccessShareLock); + return relhash; +} +``` + + +# 归档账本数据库 +## 你知道的那些事儿 +### [官方文档](https://opengauss.org/zh/docs/3.0.0/docs/Developerguide/%E5%BD%92%E6%A1%A3%E8%B4%A6%E6%9C%AC%E6%95%B0%E6%8D%AE%E5%BA%93.html) +前提条件: + +* 系统中需要有审计管理员或者具有审计管理员权限的角色。 +* 数据库正常运行,并且对防篡改数据库执行了一系列增、删、改等操作,保证在查询时段内有账本操作记录结果产生。 +* 数据库已经正确配置审计文件的存储路径audit\_directory。 + +## 你不知道的那些事儿 +### 基本操作 +#### 1、对指定用户历史表进行归档操作。 +```cpp + +omm=# SELECT pg_catalog.ledger_hist_archive('ledgernsp', 'usertable'); + ledger_hist_archive +--------------------- + t +(1 row) + +omm=# SELECT * FROM blockchain.ledgernsp_usertable_hist; + rec_num | hash_ins | hash_del | pre_hash +---------+------------------+------------------+---------------------------------- + 4 | e78e75b00d396899 | 84e8bfc3b974e9cf | 6475a497b7a272a92bab012d7f3d615b +(1 row) + +``` +主要步骤如下: + +1. Copy user history table. +2. Do unify and truncate. + 1. sum all hash\_ins and hash\_del for unification. + 2. Do real truncate.`heap_truncate_one_rel` + 3. Do insertion for unified `row.simple_heap_insert` +3. Flush history hash table cache. + + + +#### 2、执行全局区块表导出操作 +```cpp +omm=# SELECT * FROM gs_global_chain; + blocknum | dbname | username | starttime | relid | relnsp | relname | relhash | globalhash | + txcommand +----------+--------+----------+-------------------------------+-------+-----------+-----------+------------------+----------------------------------+---------------- +-------------------------------------------------------------- + 1 | omm | omm | 2022-09-17 13:59:37.84824+00 | 16404 | ledgernsp | usertable | a41714001181a294 | 83927d11ba1fd678e8f4b0723a9cd5f2 | INSERT INTO led +gernsp.usertable VALUES(1, 'alex'), (2, 'bob'), (3, 'peter'); + 2 | omm | omm | 2022-09-17 13:59:51.723068+00 | 16404 | ledgernsp | usertable | b3a9ed0755131181 | b5ee73b6c20c817230182f6373c78e20 | UPDATE ledgerns +p.usertable SET name = 'bob2' WHERE id = 2; + 3 | omm | omm | 2022-09-17 13:59:58.159596+00 | 16404 | ledgernsp | usertable | 0ae4b4e4ed2fcab5 | 0cc9938cf7f1ed7f7f1a03c29954380a | DELETE FROM led +gernsp.usertable WHERE id = 3; +(3 rows) + +omm=# SELECT pg_catalog.ledger_gchain_archive(); + ledger_gchain_archive +----------------------- + t +(1 row) + +omm=# SELECT * FROM gs_global_chain; + blocknum | dbname | username | starttime | relid | relnsp | relname | relhash | globalhash | txcommand +----------+--------+----------+------------------------------+-------+-----------+-----------+------------------+----------------------------------+----------- + 2 | omm | omm | 2022-09-17 13:59:37.84824+00 | 16404 | ledgernsp | usertable | 62a5b5ec53c47eca | 7252d09679b0b3836a2e63da17284ad5 | Archived. +(1 row) + +``` +gs\_global\_chain主要处理流程: + +1. Init and prepare bak dictionary. +2. Using CopyStmt to copy global chain. +3. Do unify and truncate. + 1. Using hash table to do unify, each hash\_entry refers to one relid informations. + 2. Split gs\_global\_chain by relid, and accumulate rel\_hash to a new record for each rel. + 3. Do rel truncate. + 4. Insert newest record to gchain order by relid. +4. Flush global\_hash cache. + + + +# 修复账本数据库 +## 你知道的那些事儿 +### [官方文档](https://www.opengauss.org/zh/docs/latest/docs/Developerguide/%E4%BF%AE%E5%A4%8D%E8%B4%A6%E6%9C%AC%E6%95%B0%E6%8D%AE%E5%BA%93.html) +前提条件: + +* 系统中需要有审计管理员或者具有审计管理员权限的角色。 +* 数据库正常运行,并且对防篡改数据库执行了一系列增、删、改等操作,保证在查询时段内有账本操作记录结果产生。 + +## 你不知道的那些事儿 +### 基本操作 +#### 1、执行历史表修复操作 +```sql +omm=# select * from blockchain.ledgernsp_usertable_hist; + rec_num | hash_ins | hash_del | pre_hash +---------+------------------+------------------+---------------------------------- + 4 | e78e75b00d396899 | 84e8bfc3b974e9cf | 6475a497b7a272a92bab012d7f3d615b +(1 row) + +omm=# SELECT pg_catalog.ledger_hist_repair('ledgernsp', 'usertable'); + ledger_hist_repair +-------------------- + 0000000000000000 +(1 row) + +``` +[drawio](rHmeQ8HWKS_RFXgP-oTUZINZguxBYqh2IV64Y0j5TAA.svg) + +#### 2、执行全局区块表修复操作 +```sql +omm=# select * from gs_global_chain ; + blocknum | dbname | username | starttime | relid | relnsp | relname | relhash | globalhash | txcommand +----------+--------+----------+------------------------------+-------+-----------+-----------+------------------+----------------------------------+----------- + 2 | omm | omm | 2022-09-17 13:59:37.84824+00 | 16404 | ledgernsp | usertable | 62a5b5ec53c47eca | 7252d09679b0b3836a2e63da17284ad5 | Archived. +(1 row) + +omm=# SELECT pg_catalog.ledger_gchain_repair('ledgernsp', 'usertable'); + ledger_gchain_repair +---------------------- + 62a5b5ec53c47eca +(1 row) +``` +首先判断用户权限,之后通过get\_gchain\_relhash\_sum函数计算relhash字段 + +```cpp +/* + * get_gchain_relhash_sum -- calculate relhash from gs_global_chain + * + * relid: user table oid + */ +static uint64 get_gchain_relhash_sum(Oid relid) +{ + uint64 relhash = 0; + HeapTuple tuple = NULL; + + /* scan the gs_global_chain catalog by relid */ + Relation gchain_rel = heap_open(GsGlobalChainRelationId, AccessShareLock); + Form_gs_global_chain rdata = NULL; + TableScanDesc scan = heap_beginscan(gchain_rel, SnapshotNow, 0, NULL); + while ((tuple = heap_getnext(scan, ForwardScanDirection)) != NULL) { + rdata = (Form_gs_global_chain)GETSTRUCT(tuple); + if (rdata == NULL || rdata->relid != relid) { + continue; + } + relhash += rdata->relhash; + } + heap_endscan(scan); + heap_close(gchain_rel, AccessShareLock); + return relhash; +} +``` +主要是计算并修复`gs_global_chain中的relhash`字段。 + +![image](images/F1ACrSt9oWqMrfGvlRRLsQQcC-qNBV9jiZH3Y4RZllk.png) + +# 总结 +账本数据库其实并不像我们想象的那么复制,实际上就是利用了区块链的最基本的原理,即当前记录的特征值 + 上一条记录特征值的hash值,再进行hash。下一条与上一条记录具有数据关联性,形成“链”的结构,如果篡改了其中的数据,则会导致“链”断开,导致不能与后面数据记录形成hash关联。\_hist表记录了用户表每一步数据变化的过程,gs\_global\_chain表记录了所有防篡改模式下对用户表的操作记录。用户表结合\_hist和global表就能完整记录和校验。 + diff --git "a/content/zh/post/Frank/\346\265\213\350\257\204\346\212\245\345\221\212\357\274\232\346\226\207\344\273\266\345\257\274\345\205\245\345\223\252\345\256\266\345\274\272\357\274\237.md" "b/content/zh/post/Frank/\346\265\213\350\257\204\346\212\245\345\221\212\357\274\232\346\226\207\344\273\266\345\257\274\345\205\245\345\223\252\345\256\266\345\274\272\357\274\237.md" new file mode 100644 index 0000000000000000000000000000000000000000..06725338837f6c65230b0c8f7f19e6c908179a34 --- /dev/null +++ "b/content/zh/post/Frank/\346\265\213\350\257\204\346\212\245\345\221\212\357\274\232\346\226\207\344\273\266\345\257\274\345\205\245\345\223\252\345\256\266\345\274\272\357\274\237.md" @@ -0,0 +1,604 @@ +# 测评报告:文件导入哪家强? +# 文件导入哪家强? +![image](images/PNcP_TtELNoc1JnjdxdJFZJAd5FlK94zcMgv63wDs-w.webp) + +# 引子 +> 最近业务上遇到一个场景,需要将一个/多个文本文件导入到与其结构对应的表中。功能需求比较简单,大部分的关系数据库基本都支持这个功能。基于上面的场景把手头上的几款开源数据库和国产数据库的文件导入功能进行了性能对比。 + +# 摘要 +本文将针对文件导入功能,在MySQL、PostgreSQL、达梦8、LvorySQL、openGauss、OceanBase六款数据库进行了对比测试。测试过程中使用相同的硬件环境和操作系统,且各数据库使用安装时的默认参数。 + +> ***注意:*** + +> 1. ***由于Oracle没有提供ARM相关的安装介质,所以本次测试并未对比Oracle的sqlldr;*** +> 2. ***所有被测数据库都未经过优化,全部使用默认参数;*** + +# 测试环境 +## 服务器 +|CPU|Kunpeng-920| +| ----- | ----- | +|Architecture|aarch64| +|On-line CPU(s) list|0-7| +|CPU主频|2600MHz| + +## 操作系统 +```bash +NAME="openEuler" +VERSION="20.03 (LTS-SP2)" +ID="openEuler" +VERSION_ID="20.03" +PRETTY_NAME="openEuler 20.03 (LTS-SP2)" +ANSI_COLOR="0;31" +``` +## 系统性能 +* 以下是unixbench跑分,供对比参考 + +```bash +------------------------------------------------------------------------ +Benchmark Run: Wed Dec 07 2022 15:48:16 - 15:57:14 +8 CPUs in system; running 1 parallel copy of tests + +Dhrystone 2 using register variables 40999324.8 lps (10.0 s, 2 samples) +Double-Precision Whetstone 4314.8 MWIPS (9.3 s, 2 samples) +Execl Throughput 3762.5 lps (29.8 s, 1 samples) +File Copy 1024 bufsize 2000 maxblocks 685100.0 KBps (30.0 s, 1 samples) +File Copy 256 bufsize 500 maxblocks 189282.0 KBps (30.0 s, 1 samples) +File Copy 4096 bufsize 8000 maxblocks 1992147.0 KBps (30.0 s, 1 samples) +Pipe Throughput 1231178.1 lps (10.0 s, 2 samples) +Pipe-based Context Switching 105636.0 lps (10.0 s, 2 samples) +Process Creation 8963.6 lps (30.0 s, 1 samples) +Shell Scripts (1 concurrent) 7087.6 lpm (60.0 s, 1 samples) +Shell Scripts (8 concurrent) 3055.4 lpm (60.0 s, 1 samples) +System Call Overhead 907956.1 lps (10.0 s, 2 samples) + +System Benchmarks Index Values BASELINE RESULT INDEX +Dhrystone 2 using register variables 116700.0 40999324.8 3513.2 +Double-Precision Whetstone 55.0 4314.8 784.5 +Execl Throughput 43.0 3762.5 875.0 +File Copy 1024 bufsize 2000 maxblocks 3960.0 685100.0 1730.1 +File Copy 256 bufsize 500 maxblocks 1655.0 189282.0 1143.7 +File Copy 4096 bufsize 8000 maxblocks 5800.0 1992147.0 3434.7 +Pipe Throughput 12440.0 1231178.1 989.7 +Pipe-based Context Switching 4000.0 105636.0 264.1 +Process Creation 126.0 8963.6 711.4 +Shell Scripts (1 concurrent) 42.4 7087.6 1671.6 +Shell Scripts (8 concurrent) 6.0 3055.4 5092.4 +System Call Overhead 15000.0 907956.1 605.3 + ======== +System Benchmarks Index Score 1258.0 + +------------------------------------------------------------------------ +Benchmark Run: Wed Dec 07 2022 15:57:14 - 16:06:18 +8 CPUs in system; running 16 parallel copies of tests + +Dhrystone 2 using register variables 326630138.1 lps (10.0 s, 2 samples) +Double-Precision Whetstone 35757.3 MWIPS (8.7 s, 2 samples) +Execl Throughput 23047.5 lps (29.7 s, 1 samples) +File Copy 1024 bufsize 2000 maxblocks 525702.0 KBps (30.0 s, 1 samples) +File Copy 256 bufsize 500 maxblocks 139688.0 KBps (30.0 s, 1 samples) +File Copy 4096 bufsize 8000 maxblocks 1834232.0 KBps (30.0 s, 1 samples) +Pipe Throughput 9754452.8 lps (10.0 s, 2 samples) +Pipe-based Context Switching 1181849.8 lps (10.0 s, 2 samples) +Process Creation 42295.7 lps (30.0 s, 1 samples) +Shell Scripts (1 concurrent) 32399.6 lpm (60.0 s, 1 samples) +Shell Scripts (8 concurrent) 4529.9 lpm (60.1 s, 1 samples) +System Call Overhead 3386402.8 lps (10.0 s, 2 samples) + +System Benchmarks Index Values BASELINE RESULT INDEX +Dhrystone 2 using register variables 116700.0 326630138.1 27988.9 +Double-Precision Whetstone 55.0 35757.3 6501.3 +Execl Throughput 43.0 23047.5 5359.9 +File Copy 1024 bufsize 2000 maxblocks 3960.0 525702.0 1327.5 +File Copy 256 bufsize 500 maxblocks 1655.0 139688.0 844.0 +File Copy 4096 bufsize 8000 maxblocks 5800.0 1834232.0 3162.5 +Pipe Throughput 12440.0 9754452.8 7841.2 +Pipe-based Context Switching 4000.0 1181849.8 2954.6 +Process Creation 126.0 42295.7 3356.8 +Shell Scripts (1 concurrent) 42.4 32399.6 7641.4 +Shell Scripts (8 concurrent) 6.0 4529.9 7549.8 +System Call Overhead 15000.0 3386402.8 2257.6 + ======== +System Benchmarks Index Score 4252.8 + +``` +# 数据准备 +使用benchmarksql-5.0中的历史表作为被测表,建表脚本如下: + +```sql +create table bmsql_history ( + hist_id integer, + h_c_id integer, + h_c_d_id integer, + h_c_w_id integer, + h_d_id integer, + h_w_id integer, + h_date timestamp, + h_amount decimal(6,2), + h_data varchar(24) +); +``` + + +## 测试数据生成脚本 +根据字段类型写了一个生成测试数据的脚本,代码如下: + +```python +# -*- coding=utf-8 -*- + +import csv +import random +import time +import string + +# 创建列表,保存header内容 +header_list = ["hist_id", "h_c_id", "h_c_d_id", "h_c_w_id", "h_d_id", "h_w_id", "h_date", "h_amount", "h_data"] + +g_count = 0 + +def random_list(n): + data_list = [] + global g_count + for i in range(n): + g_count = g_count + 1 + l = [g_count, + random.randint(0,1000), + random.randint(0,1000), + random.randint(0,1000), + random.randint(0,1000), + random.randint(0,1000), + time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()), + #random.randint(0,1000), + round(random.uniform(0, 9999.0), 2), + ''.join(random.sample(string.ascii_letters + string.digits, 20)) + ] + data_list.append(l) + + return data_list + +# 以写方式打开文件。注意添加 newline="",否则会在两行数据之间都插入一行空白。 +with open("bmsql_history.csv", mode="w", encoding="utf-8", newline="") as f: + + # 基于打开的文件,创建 csv.writer 实例 + writer = csv.writer(f) + + # 写入 header。 + # writerow() 一次只能写入一行。 + writer.writerow(header_list) + + # 写入数据。 + # writerows() 一次写入多行。 + for i in range(10000): + writer.writerows(random_list(1000)) + +``` + + +## 测试数据 +执行脚本后会生成10000000行测试数据,具体如下图: + +![image](images/HIS6hdLiNFe6czt6hwP2pi_NnvwiFnaRrKxs1DhPu5s.png) + +# 性能测试 +## PostgreSQL COPY +### 简介 +`COPY` moves data between PostgreSQL tables and standard file-system files. `COPY TO` copies the contents of a table *to* a file, while `COPY FROM` copies data *from* a file to a table (appending the data to whatever is in the table already). `COPY TO` can also copy the results of a `SELECT` query. + +If a column list is specified, `COPY TO` copies only the data in the specified columns to the file. For `COPY FROM`, each field in the file is inserted, in order, into the specified column. Table columns not specified in the `COPY FROM` column list will receive their default values. + +`COPY` with a file name instructs the PostgreSQL server to directly read from or write to a file. The file must be accessible by the PostgreSQL user (the user ID the server runs as) and the name must be specified from the viewpoint of the server. When `PROGRAM` is specified, the server executes the given command and reads from the standard output of the program, or writes to the standard input of the program. The command must be specified from the viewpoint of the server, and be executable by the PostgreSQL user. When `STDIN` or `STDOUT` is specified, data is transmitted via the connection between the client and the server. + +### 语法 +```sql + Copy ::= COPY table_name [ ( column_name [, ...] ) ] + FROM { 'filename' | STDIN } + [ [ USING ] DELIMITERS 'delimiters' ] + [ WITHOUT ESCAPING ] + [ LOG ERRORS ] + [ REJECT LIMIT 'limit' ] + [ WITH ( option [, ...] ) ] + | copy_option + | TRANSFORM ( { column_name [ data_type ] [ AS transform_expr ] } [, ...] ) + | FIXED FORMATTER ( { column_name( offset, length ) } [, ...] ) [ ( option [, ...] ) | copy_option [ ...] ] ; +``` + + +### 测试 +* PostgreSQL版本(编译安装) + +![image](images/WH2_Mzd71Rk3VVjf1TECjxhpzKx-tAku9kfp3MX1aS0.png) + +```sql +postgres=# copy bmsql_history from '/home/postgres/bmsql_history.csv' delimiter ',' +postgres-# ; +COPY 10000000 +Time: 19829.354 ms (00:19.829) +``` +耗时:约20s + +## IvorySQL COPY +### 简介 +参考PostgreSQL简介。 + +### 语法 +参考PostgreSQL简介。 + +### 测试 +* IvorySQL版本(通过编译安装) + +![image](images/aebKG7cndlMYcoRcq7qs5m-GYbhGj74zOR2NeJKh4GI.png) + +![image](images/Rr8WMD2FqdxOic2kFbxXJic35fdB_YAqnmerRQwqQG4.png) + +```sql +postgres=# copy bmsql_history from '/home/ivory/bmsql_history.csv' delimiter ','; +COPY 10000000 +Time: 21108.218 ms (00:21.108) +``` +![image](images/OAlETvVs-VRdXpoBOenwSil6WkhILHKdzb1OEkU1lYY.png) + +耗时:约20s + +## openGauss COPY +### 简介 +参考PostgreSQL简介。 + +### 语法 +参考PostgreSQL简介。 + +### 测试 +> 分别用编译安装和安装包安装了openGauss,测试结果基本一致。 + +#### 编译安装openGauss +```sql +openGauss=# vacuum; +VACUUM +openGauss=# analyze; +ANALYZE +openGauss=# \timing +Timing is on. +openGauss=# copy bmsql_history from '/home/omm/bmsql_history.csv' delimiter ','; +COPY 10000000 +Time: 112710.938 ms +openGauss=# select version(); + version +------------------------------------------------------------------------------------------------------------------------------------------------------------ + (openGauss 3.1.0 build 2586b083) compiled at 2022-11-28 15:46:36 commit 0 last mr debug on aarch64-unknown-linux-gnu, compiled by g++ (GCC) 7.3.0, 64-bit +(1 row) + +``` +#### 安装包安装极简版 +```sql +openGauss=# truncate bmsql_history; +TRUNCATE TABLE +openGauss=# \timing +Timing is on. +openGauss=# copy bmsql_history from '/home/omm/bmsql_history.csv' delimiter ','; +COPY 10000000 +Time: 122703.615 ms +openGauss=# select version(); + version +------------------------------------------------------------------------------------------------------------------------------------------------------- + (openGauss 3.1.0 build 4e931f9a) compiled at 2022-09-29 14:19:54 commit 0 last mr on aarch64-unknown-linux-gnu, compiled by g++ (GCC) 7.3.0, 64-bit +(1 row) + +Time: 0.568 ms + +``` +耗时:约120s + +## MySQL LOAD +### 简介 +1. MySQL load data 语句能快速将一个文本文件的内容导入到对应的数据库表中(一般文本的一行对应表的一条记录); +2. 数据库应用程序开发中,涉及大批量数据需要插入时,使用 load data 语句的效率比一般的 insert 语句的高很多; +3. 可以看成select … into outfile语句的反操作,select … into outfile将数据库表中的数据导出保存到一个文件中。参考MySQL 5.7 官方手册 ; + +### 语法 +```sql +LOAD DATA + [LOW_PRIORITY | CONCURRENT] [LOCAL] + INFILE 'file_name' + [REPLACE | IGNORE] + INTO TABLE tbl_name + [PARTITION (partition_name [, partition_name] ...)] + [CHARACTER SET charset_name] + [{FIELDS | COLUMNS} + [TERMINATED BY 'string'] + [[OPTIONALLY] ENCLOSED BY 'char'] + [ESCAPED BY 'char'] + ] + [LINES + [STARTING BY 'string'] + [TERMINATED BY 'string'] + ] + [IGNORE number {LINES | ROWS}] + [(col_name_or_user_var + [, col_name_or_user_var] ...)] + [SET col_name={expr | DEFAULT}, + [, col_name={expr | DEFAULT}] ...] +``` + + +### 测试 +```sql +-- LOAD DATA LOCAL INFILE '文件路径' INTO TABLE tableName FIELDS TERMINATED BY ','; + +LOAD DATA LOCAL INFILE '/root/bmsql_history.csv' INTO TABLE bmsql_history FIELDS TERMINATED BY ','; +``` +![image](images/T311n-kMrG-vp6DcsMlDiHUBfuC6BfNcHDMMYjzs1No.png) + +耗时:69.58s + +![image](images/lWgH2foPpaW6bs4tZ71JzhLuKib1-kFxJhaWd3RwyQk.png) + + + +## DM8 dmfldr +### 简介 +dmfldr(DM Fast Loader)是 DM 提供的快速数据装载命令行工具。用户通过使用dmfldr 工具能够把按照一定格式排序的文本数据以简单、快速、高效的方式载入到 DM 数据库中,或把 DM 数据库中的数据按照一定格式写入文本文件。 + +![image](images/ofpefUbUWTyuI5WDrTWmcWbLLsqGXYJM6MPgODEKgOI.png) + +### 语法/参数说明 +```bash +[dmdba@host-10-208-227-136 ~]$ dmfldr help +version: 03134283938-20221019-172201-20018 +格式: ./dmfldr KEYWORD=value + +例程: ./dmfldr SYSDBA/SYSDBA CONTROL='/opt/data/fldr.ctl' + +USERID 必须是命令行中的第一个参数 +字符串类型参数必须以引号封闭 + +关键字 说明(默认值) +-------------------------------------------------------------------------------- +USERID 用户名/口令, 格式:{[/] | /}[@][