diff --git "a/content/zh/post/zhengxue/images/om\351\203\250\347\275\262/doradoxlog\346\230\240\345\260\204.png" "b/content/zh/post/zhengxue/images/om\351\203\250\347\275\262/doradoxlog\346\230\240\345\260\204.png" new file mode 100644 index 0000000000000000000000000000000000000000..1aa58cb69c8b66fe81f920efd02229970bb05e6b Binary files /dev/null and "b/content/zh/post/zhengxue/images/om\351\203\250\347\275\262/doradoxlog\346\230\240\345\260\204.png" differ diff --git "a/content/zh/post/zhengxue/images/om\351\203\250\347\275\262/doradoxlog\346\230\240\345\260\2041.png" "b/content/zh/post/zhengxue/images/om\351\203\250\347\275\262/doradoxlog\346\230\240\345\260\2041.png" new file mode 100644 index 0000000000000000000000000000000000000000..8db70070179a887cec9c0bb218894290fd526cee Binary files /dev/null and "b/content/zh/post/zhengxue/images/om\351\203\250\347\275\262/doradoxlog\346\230\240\345\260\2041.png" differ diff --git "a/content/zh/post/zhengxue/images/om\351\203\250\347\275\262/lun\346\237\245\350\257\242.jpg" "b/content/zh/post/zhengxue/images/om\351\203\250\347\275\262/lun\346\237\245\350\257\242.jpg" new file mode 100644 index 0000000000000000000000000000000000000000..fa356b1aed49262e23bdca9389761411fa8eb92a Binary files /dev/null and "b/content/zh/post/zhengxue/images/om\351\203\250\347\275\262/lun\346\237\245\350\257\242.jpg" differ diff --git "a/content/zh/post/zhengxue/images/om\351\203\250\347\275\262/\350\277\234\347\250\213\345\244\215\345\210\266\345\257\271\345\210\233\345\273\272.png" "b/content/zh/post/zhengxue/images/om\351\203\250\347\275\262/\350\277\234\347\250\213\345\244\215\345\210\266\345\257\271\345\210\233\345\273\272.png" new file mode 100644 index 0000000000000000000000000000000000000000..95eeed196b7d6aa9a46dd4df2b4f42c11c2154ab Binary files /dev/null and "b/content/zh/post/zhengxue/images/om\351\203\250\347\275\262/\350\277\234\347\250\213\345\244\215\345\210\266\345\257\271\345\210\233\345\273\272.png" differ diff --git "a/content/zh/post/zhengxue/images/om\351\203\250\347\275\262/\350\277\234\347\250\213\345\244\215\345\210\266\345\257\271\345\210\233\345\273\2721.jpg" "b/content/zh/post/zhengxue/images/om\351\203\250\347\275\262/\350\277\234\347\250\213\345\244\215\345\210\266\345\257\271\345\210\233\345\273\2721.jpg" new file mode 100644 index 0000000000000000000000000000000000000000..9a7f969326c3309c22cb817caf9c965843b57015 Binary files /dev/null and "b/content/zh/post/zhengxue/images/om\351\203\250\347\275\262/\350\277\234\347\250\213\345\244\215\345\210\266\345\257\271\345\210\233\345\273\2721.jpg" differ diff --git "a/content/zh/post/zhengxue/images/\347\243\201\351\230\265\346\220\255\345\273\272/lun\346\237\245\350\257\242.jpg" "b/content/zh/post/zhengxue/images/\347\243\201\351\230\265\346\220\255\345\273\272/lun\346\237\245\350\257\242.jpg" new file mode 100644 index 0000000000000000000000000000000000000000..89fda3aae501d23d774353a363d25c629ab78129 Binary files /dev/null and "b/content/zh/post/zhengxue/images/\347\243\201\351\230\265\346\220\255\345\273\272/lun\346\237\245\350\257\242.jpg" differ diff --git "a/content/zh/post/zhengxue/images/\347\243\201\351\230\265\346\220\255\345\273\272/\345\210\233\345\273\272lun.jpg" "b/content/zh/post/zhengxue/images/\347\243\201\351\230\265\346\220\255\345\273\272/\345\210\233\345\273\272lun.jpg" new file mode 100644 index 0000000000000000000000000000000000000000..c21067fbcf164e465c5e9891a5fb7e22ed8c538f Binary files /dev/null and "b/content/zh/post/zhengxue/images/\347\243\201\351\230\265\346\220\255\345\273\272/\345\210\233\345\273\272lun.jpg" differ diff --git "a/content/zh/post/zhengxue/\350\265\204\346\272\220\346\261\240\345\214\226\346\224\257\346\214\201\345\220\214\345\237\216dorado\345\217\214\351\233\206\347\276\244\345\210\207\346\215\242.md" "b/content/zh/post/zhengxue/\350\265\204\346\272\220\346\261\240\345\214\226\346\224\257\346\214\201\345\220\214\345\237\216dorado\345\217\214\351\233\206\347\276\244\345\210\207\346\215\242.md" index 6c4d491e24fcdfb5bfdf6e7ab009e4f1a0cd314e..b442e38958d56304e63c51290bca244be5ee6d8e 100644 --- "a/content/zh/post/zhengxue/\350\265\204\346\272\220\346\261\240\345\214\226\346\224\257\346\214\201\345\220\214\345\237\216dorado\345\217\214\351\233\206\347\276\244\345\210\207\346\215\242.md" +++ "b/content/zh/post/zhengxue/\350\265\204\346\272\220\346\261\240\345\214\226\346\224\257\346\214\201\345\220\214\345\237\216dorado\345\217\214\351\233\206\347\276\244\345\210\207\346\215\242.md" @@ -37,6 +37,8 @@ times = "9:30" 基于《资源池化+同城dorado双集群》部署方式,集群间切换设计如下: ###   1.1.主备集群状态 +前提条件:已经部署资源池化同城双集群环境 + @@ -60,7 +62,7 @@ times = "9:30" - + @@ -78,10 +80,42 @@ openGauss=# select * from pg_stat_get_stream_replications(); (1 row) ``` -run mode 指数据库内核运行模式是primary还是standby还是normal,是t_thrd.postmaster_cxt.HaShmData->current_mode或t_thrd.xlog_cxt.server_mode参数指代的主备运行模式类型 +`Tips`:run mode 指数据库内核运行模式是primary还是standby还是normal,是t_thrd.postmaster_cxt.HaShmData->current_mode或t_thrd.xlog_cxt.server_mode参数指代的主备运行模式类型 ###   1.2.failover - 基于cm模拟部署方式,因此没有管控平台切换同步复制对方向的操作。 + 以下提到的/home/omm/ss_hatest/dn0为数据库dn目录,解释如下: +
容灾中心 备端 首备节点0standbyMain Standby standby(资源池化+传统备)
从备节点1
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
集群中心节点类型local roledn目录
生产中心主端主节点0primary/home/omm/ss_hatest/dn0
备节点1standby/home/omm/ss_hatest/dn1
容灾中心备端首备节点0Main Standby/home/omm/ss_hatest1/dn0
从备节点1standby/home/omm/ss_hatest1/dn1
+  双集群间failover即主集群故障,备集群升为主集群的过程,操作过程如下: (1) kill 主集群 @@ -95,18 +129,25 @@ gs_ctl stop -D /home/omm/ss_hatest1/dn1 ``` gs_guc set -Z datanode -D /home/omm/ss_hatest1/dn0 -c "cluster_run_mode=cluster_primary" ``` -(4) 以主集群模式重启备集群的节点 + +(4) 切换远程同步复制主从端 + 如果是cm模拟部署方式(博客:博客资源池化同城dorado双集群部署二之cm模拟部署),不需要在管控平台切换同步复制对方向的操作。 + + 如果是om部署方式(博客:资源池化同城dorado双集群部署四之om部署),则在拉起集群之前,需要在管控平台切换同步复制对方向的操作,操作如下: + 登录到备存储管控平台,操作data protection -> luns -> remote replication pairs(远程复制对) -> 找到远程同步复制xlog对应的lun -> More -> Primary/Standby Switchover,操作完后,即可看到Local Resource从Secondary变成Primary。 + +(5) 以主集群模式重启备集群的节点 ``` gs_ctl start -D /home/omm/ss_hatest1/dn0 -M primary gs_ctl start -D /home/omm/ss_hatest1/dn1 ``` -(5) 查询新主集群 + +(6) 查询新主集群 ``` gs_ctl query -D /home/omm/ss_hatest1/dn0 ``` ###   1.2.switchover - 基于cm模拟部署方式,因此没有管控平台切换同步复制对方向的操作。  双集群间switchover即主集群降为备集群,备集群升为主集群的过程,操作过程如下: (1) stop 主集群 @@ -123,7 +164,14 @@ gs_ctl stop -D /home/omm/ss_hatest1/dn1 ``` gs_guc set -Z datanode -D /home/omm/ss_hatest1/dn0 -c "cluster_run_mode=cluster_primary" ``` -(4) 以主集群模式重启备集群的节点 + +(4) 切换远程同步复制主从端 + 如果是cm模拟部署方式(博客:博客资源池化同城dorado双集群部署二之cm模拟部署),不需要在管控平台切换同步复制对方向的操作。 + + 如果是om部署方式(博客:资源池化同城dorado双集群部署四之om部署),则在拉起集群之前,需要在管控平台切换同步复制对方向的操作,操作如下: + 登录到备存储管控平台,操作data protection -> luns -> remote replication pairs(远程复制对) -> 找到远程同步复制xlog对应的lun -> More -> Primary/Standby Switchover,操作完后,即可看到Local Resource从Secondary变成Primary。 + +(5) 以主集群模式重启备集群的节点 ``` gs_ctl start -D /home/omm/ss_hatest1/dn0 -M primary gs_ctl start -D /home/omm/ss_hatest1/dn1 @@ -150,7 +198,7 @@ gs_ctl query -D /home/omm/ss_hatest/dn0 ###   2.1.failover - 基于cm模拟部署方式 + 该章节介绍基于cm模拟部署方式的集群内切换,om部署方式的双集群和资源池化原有集群内切换方法一样。  主集群内failover即主集群主节点降为备节点,备节点升为主节点的过程,操作过程如下:  (1) 检查节点状态 @@ -203,7 +251,7 @@ No information 备集群首备节点0 gs_ctl query -D /home/omm/ss_hatest1/dn0 HA state: - local_role : Standby + local_role : Main Standby static_connections : 1 db_state : Normal detail_information : Normal @@ -317,7 +365,7 @@ export CM_CONFIG_PATH=/opt/omm/openGauss-server/src/test/ss/cm_config.ini **说明**:模拟主节点0故障,REFORMER_ID模拟reform锁被备节点1抢到,即为将要做failover的节点,BITMAP_ONLINE模拟cm获取的在线节点是节点1(bitmap = 2 = 0b10) -###   2.1.failover +###   2.2.switchover  基于cm模拟部署方式  主集群内failover即主集群主节点降为备节点,备节点升为主节点的过程,操作过程如下: @@ -418,7 +466,7 @@ No information [zx@node1host54 pg_log]$ gs_ctl query -D /home/zx/ss_hatest1/dn0 [2023-04-24 15:53:44.305][3878378][][gs_ctl]: gs_ctl query ,datadir is /home/zx/ss_hatest1/dn0 HA state: - local_role : Standby + local_role : Main Standby static_connections : 2 db_state : Normal detail_information : Normal @@ -460,3 +508,4 @@ No information **说明**:switchover成功后,备集群的首备节点0与主集群新主节点1容灾关系自动连接成功,同步复制功能正常,备集群首备回放正常 ***Notice:不推荐直接用于生产环境*** +***作者:Shirley_zhengx*** diff --git "a/content/zh/post/zhengxue/\350\265\204\346\272\220\346\261\240\345\214\226\346\224\257\346\214\201\345\220\214\345\237\216dorado\345\217\214\351\233\206\347\276\244\351\203\250\347\275\262(\344\270\200)\344\271\213dd\346\250\241\346\213\237.md" "b/content/zh/post/zhengxue/\350\265\204\346\272\220\346\261\240\345\214\226\346\224\257\346\214\201\345\220\214\345\237\216dorado\345\217\214\351\233\206\347\276\244\351\203\250\347\275\262(\344\270\200)\344\271\213dd\346\250\241\346\213\237.md" index d540bb323d5f621fe21d4ea2810aac94c27f1039..8fcd081ea77bae88bfff6102bbef90a1d227a35a 100644 --- "a/content/zh/post/zhengxue/\350\265\204\346\272\220\346\261\240\345\214\226\346\224\257\346\214\201\345\220\214\345\237\216dorado\345\217\214\351\233\206\347\276\244\351\203\250\347\275\262(\344\270\200)\344\271\213dd\346\250\241\346\213\237.md" +++ "b/content/zh/post/zhengxue/\350\265\204\346\272\220\346\261\240\345\214\226\346\224\257\346\214\201\345\220\214\345\237\216dorado\345\217\214\351\233\206\347\276\244\351\203\250\347\275\262(\344\270\200)\344\271\213dd\346\250\241\346\213\237.md" @@ -31,7 +31,11 @@ times = "9:30" # 资源池化支持同城dorado双集群部署(一)----dd模拟 -资源池化支持同城dorado双集群部署方式:dd模拟(手动部署+无cm)、cm模拟(手动部署dd模拟+有cm)、磁阵(手动部署)、集群管理工具部署 +资源池化支持同城dorado双集群部署方式: +(一) dd模拟(手动部署 + 无cm) +(二) cm模拟(手动部署dd模拟 + 有cm) +(三) 磁阵搭建(手动部署) +(四) 集群管理工具部署(om + cm) ## 1.环境描述 @@ -395,7 +399,7 @@ gs_ctl build -D /opt/omm/cluster/dn0 -b cross_cluster_full -g 0 --vgname=+data - 参数解释: + -b cross_cluster_full + -g 0 指资源池化的节点0,表明是对节点0进行build -+ -q ++ -q build成功后,不启动数据库  (4)备集群从备节点1初始化  @shirley_zhengx tell you in secret that is very important!@:备集群第一次初始化的时候,一定要初始化首备节点0并对首备做完build之后,再初始化备集群其它从备节点,即第(3)要在第(4)之前执行 @very very important!@: @@ -479,7 +483,7 @@ No information gs_ctl query -D /opt/omm/cluster/dn0 [2023-04-03 19:29:20.472][2720317][][gs_ctl]: gs_ctl query ,datadir is /opt/omm/cluster/dn0 HA state: - local_role : Standby + local_role : Main Standby static_connections : 1 db_state : Normal detail_information : Normal @@ -549,3 +553,4 @@ select * from test01; ***Notice:不推荐直接用于生产环境*** +***作者:Shirley_zhengx*** diff --git "a/content/zh/post/zhengxue/\350\265\204\346\272\220\346\261\240\345\214\226\346\224\257\346\214\201\345\220\214\345\237\216dorado\345\217\214\351\233\206\347\276\244\351\203\250\347\275\262(\344\270\211)\344\271\213\347\243\201\351\230\265\346\220\255\345\273\272.md" "b/content/zh/post/zhengxue/\350\265\204\346\272\220\346\261\240\345\214\226\346\224\257\346\214\201\345\220\214\345\237\216dorado\345\217\214\351\233\206\347\276\244\351\203\250\347\275\262(\344\270\211)\344\271\213\347\243\201\351\230\265\346\220\255\345\273\272.md" new file mode 100644 index 0000000000000000000000000000000000000000..858dd3be4015eb7e418c9ef296d1695d291ad802 --- /dev/null +++ "b/content/zh/post/zhengxue/\350\265\204\346\272\220\346\261\240\345\214\226\346\224\257\346\214\201\345\220\214\345\237\216dorado\345\217\214\351\233\206\347\276\244\351\203\250\347\275\262(\344\270\211)\344\271\213\347\243\201\351\230\265\346\220\255\345\273\272.md" @@ -0,0 +1,371 @@ ++++ +title = "资源池化支持同城dorado双集群部署(三)----磁阵搭建" +date = "2023-05-18" +tags = ["资源池化支持同城dorado双集群部署(三)----磁阵搭建"] +archives = "2023-05-18" +author = "shirley_zhengx" +summary = "资源池化支持同城dorado双集群部署(三)----磁阵搭建" +img = "/zh/post/zhengxue/title/img1.png" +times = "9:30" ++++ + + + +- [1. 环境描述](#1.环境描述) + - [1.1.组网方式](#1.1.组网方式) + - [1.2.环境配置](#1.2.环境配置) +- [2. 环境搭建](#2.环境搭建) + - [2.1.创建lun](#2.1.创建lun) + - [2.2.下载源码编译](#2.2.下载源码编译) + - [2.3.环境变量](#2.3.环境变量) + - [2.4.dss配置-磁阵搭建](#2.4.dss配置-磁阵搭建) + - [2.5.数据库部署](#2.5.数据库部署) + + + + + +# 资源池化支持同城dorado双集群部署(三)----磁阵搭建 + +资源池化支持同城dorado双集群部署方式: +(一) dd模拟(手动部署 + 无cm) +(二) cm模拟(手动部署dd模拟 + 有cm) +(三) 磁阵搭建(手动部署) +(四) 集群管理工具部署(om + cm) + + +## 1.环境描述 + + 针对磁阵搭建(手动部署)作出指导,环境描述如下: + +###   1.1.组网方式 + + + + + + + + + + + + + + + + + + + +
生产中心主端业务计算节点0主存储节点Dorado
业务计算节点1
容灾中心备端业务计算节点0备存储节点Dorado
业务计算节点1
+ + ** 缺个图,后面补充哈!!!** +###   1.2.环境配置 + + 支持存储远程复制 + + +## 2. 环境搭建 + +针对资源池化双集群部署之《资源池化磁阵搭建(手动部署) + dorado同步复制》作出指导,无cm部署,环境搭建如下: + +###   2.1.创建lun + +(1) 主存储创建lun组和lun + 主存储管控平台(DeviceManager)登录:https://主存储ip:8088 + 在管控平台上创建lun组和lun,并映射到主机之后,在业务节点上查看挂好的lun +(1.1)创建主机 +(1.2)创建lun组 +(1.3)创建lun +如下图所示: +![](./images/磁阵搭建/创建lun.jpg) +步骤:Services -> LUN Groups -> LUN -> Create + +(2) 主存储对应的业务计算节点上查看挂好的lun +``` +rescan-scsi-bus.sh upadmin show vlun lsscsi -is +``` + +![](./images/磁阵搭建/lun查询.jpg) + +lun说明: zx_mpp_doradoxlog 指dorado同步复制需要的lun(可以理解为共享盘,盘符/dev/sdh),zx_mpp_dssdata是资源池化主集群的数据盘(盘符/dev/sdn),zx_mpp_dssxlog0是资源池化主集群节点0对应的xlog盘(盘符/dev/sdo) + +修改盘符属组 +``` +chown mpp:mpp /dev/sdh +chown mpp:mpp /dev/sdn +chown mpp:mpp /dev/sdo +``` + +###   2.2.下载源码编译 + 如果用已打包好的openGauss-server包则跳过该步骤,进行2.3,如果修改代码开发中,则进行代码更新并编译,如下步骤: + +(1) 下载三方库 + 根据平台操作系统下载对应三方库,三方库下载地址:https://gitee.com/opengauss/openGauss-server 主页上README.md中查找需要的三方库binarylibs + +获取master分支openEuler_x86系统对应的三方库 +``` +wget https://opengauss.obs.cn-south-1.myhuaweicloud.com/latest/binarylibs/openGauss-third_party_binarylibs_openEuler_x86_64.tar.gz +``` +(2) 下载cbb并编译 +``` +git clone https://gitee.com/opengauss/CBB.git -b master cbb +cd CBB/build/linux/opengauss +sh build.sh -3rd $binarylibsDir -m Debug +``` + 编译成功会自动将二进制放入三方库openGauss-third_party_binarylibs_openEuler_x86_64/kernel/component目录下 +(3) 下载dss并编译 +``` +git clone https://gitee.com/opengauss/DSS.git -b master dss +cd CBB/build/linux/opengaussDSS +sh build.sh -m Debug -3rd $binarylibsDir +``` + +(4) 下载dms并编译 +``` +git clone https://gitee.com/opengauss/DMS.git -b master dms +cd CBB/build/linux/opengauss +sh build.sh -m Debug -3rd $binarylibsDir +``` + +(5) 下载openGauss-server并编译 + 编译过程需要cbb、dss、dms的二进制,会从openGauss-third_party_binarylibs_openEuler_x86_64/kernel/component中获取 +``` +git clone https://gitee.com/opengauss/openGauss-server.git -b master openGauss-server +sh build.sh -3rd $binarylibsDir -m Debug +``` + 编译完之后的二进制存放在openGauss-server/mppdb_temp_install/目录下 + + +###   2.3.环境准备 +由于机器资源不足,这里以一个业务计算服务器上部署一主为例 +(1) 二进制准备 +创建一个自己用户的目录,例如/opt/mpp,将已编好的openGauss-server/mppdb_temp_install/拷贝放至/opt/mpp目录下,即/opt/mpp/mppdb_temp_install +(2) 提权 +sudo setcap CAP_SYS_RAWIO+ep /opt/mpp/mppdb_temp_install/bin/perctrl +(3) 主集群主节点对应的环境变量ss_env0 + +``` +export HOME=/opt/mpp +export GAUSSHOME=${HOME}/mppdb_temp_install/ +export GAUSSLOG=${HOME}/cluster/gausslog0 +export SS_DATA=${HOME}/cluster/ss_data +export DSS_HOME=${HOME}/cluster/ss_data/dss_home0 +export LD_LIBRARY_PATH=$GAUSSHOME/lib:$LD_LIBRARY_PATH +export PATH=$GAUSSHOME/bin:$PATH +``` +`Tips`: 环境变量里面一定要写export,即使`echo $GCC_PATH`存在,也要写export才能真正导入路径 + +参数说明: +HOME 为用户自己创建的工作目录; +GAUSSHOME 为编译完成的目标文件路径,包含openGauss的bin、lib等; +GAUSSLOG 为运行时的日志目录,包含dss、dms等日志 +SS_DATA 为共享存储的根目录,即dss相关配置的根目录 +DSS_HOME 为dssserver配置对应的目录 + + +###   2.4.dss配置-磁阵搭建 +配置脚本dss_autoscript.sh如下: + +dss_autoscript.sh +``` +#!/bin/bash + +source /opt/mpp/ss_env0 + +DSS_HOME_ONE=${SS_DATA}/dss_home0 +# 如果部署一个节点,则删除DSS_HOME_TWO +DSS_HOME_TWO=${SS_DATA}/dss_home1 + +function clean_dir() +{ + ps ux | grep 'dssserver -D /opt/mpp/cluster/ss_data/dss_home0' | grep -v grep | awk '{print $2}' | xargs kill -9 + ps ux | grep 'dssserver -D /opt/mpp/cluster/ss_data/dss_home1' | grep -v grep | awk '{print $2}' | xargs kill -9 + rm -rf ${SS_DATA} +} + +function create_one_device() +{ + mkdir -p ${SS_DATA} + mkdir -p ${DSS_HOME_ONE} + mkdir -p ${DSS_HOME_ONE}/cfg + mkdir -p ${DSS_HOME_ONE}/log + echo "data:/dev/sdn" > ${DSS_HOME_ONE}/cfg/dss_vg_conf.ini + echo "log0:/dev/sdo" >> ${DSS_HOME_ONE}/cfg/dss_vg_conf.ini + # 如果部署一个节点,则删除log1这一行 + echo "log1:/dev/sdz" >> ${DSS_HOME_ONE}/cfg/dss_vg_conf.ini + echo "INST_ID = 0" > ${DSS_HOME_ONE}/cfg/dss_inst.ini + echo "_LOG_BACKUP_FILE_COUNT = 128" >> ${DSS_HOME_ONE}/cfg/dss_inst.ini + echo "_LOG_MAX_FILE_SIZE = 20M" >> ${DSS_HOME_ONE}/cfg/dss_inst.ini + echo "LSNR_PATH = ${DSS_HOME_ONE}" >> ${DSS_HOME_ONE}/cfg/dss_inst.ini + echo "STORAGE_MODE = RAID" >> ${DSS_HOME_ONE}/cfg/dss_inst.ini + echo "_SHM_KEY=42" >> ${DSS_HOME_ONE}/cfg/dss_inst.ini + echo "_log_LEVEL = 255" >> ${DSS_HOME_ONE}/cfg/dss_inst.ini +} + +# 如果部署一个节点,则不需要执行create_two_device +function create_two_device() +{ + mkdir -p ${DSS_HOME_TWO} + mkdir -p ${DSS_HOME_TWO}/cfg + mkdir -p ${DSS_HOME_TWO}/log + echo "data:/dev/sdn" > ${DSS_HOME_TWO}/cfg/dss_vg_conf.ini + echo "log0:/dev/sdo" >> ${DSS_HOME_TWO}/cfg/dss_vg_conf.ini + echo "log1:/dev/sdz" >> ${DSS_HOME_TWO}/cfg/dss_vg_conf.ini + echo "INST_ID = 1" > ${DSS_HOME_TWO}/cfg/dss_inst.ini + echo "_LOG_BACKUP_FILE_COUNT = 128" >> ${DSS_HOME_TWO}/cfg/dss_inst.ini + echo "_LOG_MAX_FILE_SIZE = 20M" >> ${DSS_HOME_TWO}/cfg/dss_inst.ini + echo "LSNR_PATH = ${DSS_HOME_TWO}" >> ${DSS_HOME_TWO}/cfg/dss_inst.ini + echo "STORAGE_MODE = RAID" >> ${DSS_HOME_TWO}/cfg/dss_inst.ini + echo "_SHM_KEY=42" >> ${DSS_HOME_TWO}/cfg/dss_inst.ini + echo "_log_LEVEL = 255" >> ${DSS_HOME_TWO}/cfg/dss_inst.ini +} + +# 无论部署几个节点,都只在第一个节点执行一次create_vg +function create_vg() +{ + dd if=/dev/zero bs=2048 count=100000 of=/dev/sdn + dd if=/dev/zero bs=2048 count=100000 of=/dev/sdo + # 如果部署一个节点,则删除log1对应的盘符/dev/sdz这一行 + dd if=/dev/zero bs=2048 count=100000 of=/dev/sdz + ${GAUSSHOME}/bin/dsscmd cv -g data -v /dev/sdn -s 2048 -D ${DSS_HOME_ONE} + ${GAUSSHOME}/bin/dsscmd cv -g log0 -v /dev/sdo -s 65536 -D ${DSS_HOME_ONE} + # 如果部署一个节点,则删除log1这一行 + ${GAUSSHOME}/bin/dsscmd cv -g log1 -v /dev/sdz -s 65536 -D ${DSS_HOME_ONE} +} + +function start_dssserver() +{ + #dssserver -D /opt/mpp/cluster/ss_data/dss_home0/ & + dssserver -D ${DSS_HOME_ONE} & + if [ $? -ne 0 ]; then + echo "dssserver startup failed." + exit 1 + fi + sleep 3 +} + +function gs_initdb_dn() +{ + rm -rf /opt/mpp/cluster/dn0/* + gs_initdb -D /opt/mpp/cluster/dn0 --nodename=node0 -U mpp -w Huawei@123 --vgname=+data,+log0 --enable-dss --dms_url="0:172.16.108.23:4411" -I 0 --socketpath='UDS:/opt/mpp/cluster/ss_data/dss_home0/.dss_unix_d_socket' -d -n -g /dev/sdh +} + + +function assign_parameter() +{ + gs_guc set -Z datanode -D /opt/mpp/cluster/dn0 -c "port = 44100" + gs_guc set -Z datanode -D /opt/mpp/cluster/dn0 -c "listen_addresses = '*'" + gs_guc set -Z datanode -D /opt/mpp/cluster/dn0 -c "ss_enable_reform = off" + gs_guc set -Z datanode -D /opt/mpp/cluster/dn0 -c "xlog_file_path = '/dev/sdh'" + gs_guc set -Z datanode -D /opt/mpp/cluster/dn0 -c "xlog_lock_file_path = '/opt/mpp/cluster/shared_lock_primary.lock'" + gs_guc set -Z datanode -D /opt/mpp/cluster/dn0 -c "application_name = 'dn_master_0'" + gs_guc set -Z datanode -D /opt/mpp/cluster/dn0 -c "cross_cluster_replconninfo1='localhost=10.10.10.10 localport=25400 remotehost=20.20.20.10 remoteport=25400'" + gs_guc set -Z datanode -D /opt/mpp/cluster/dn0 -c "cross_cluster_replconninfo2='localhost=10.10.10.10 localport=25400 remotehost=20.20.20.20 remoteport=25400'" + gs_guc set -Z datanode -D /opt/mpp/cluster/dn0 -c "cluster_run_mode = 'cluster_primary'" + gs_guc set -Z datanode -D /opt/mpp/cluster/dn0 -c "ha_module_debug = off" + gs_guc set -Z datanode -D /opt/mpp/cluster/dn0 -c "ss_log_level = 255" + gs_guc set -Z datanode -D /opt/mpp/cluster/dn0 -c "ss_log_backup_file_count = 100" + gs_guc set -Z datanode -D /opt/mpp/cluster/dn0 -c "ss_log_max_file_size = 1GB" + gs_guc set -Z datanode -D /opt/mpp/cluster/dn0 -h "host all all 172.16.108.54/32 trust" + gs_guc set -Z datanode -D /opt/mpp/cluster/dn0 -h "host all all 172.16.108.55/32 trust" +} + + +if [ "$1" == "first_create" ]; then + clean_dir + create_one_device + # 如果部署一个节点,则不需要执行create_two_device + create_two_device + create_vg + start_dssserver + #gs_initdb_dn + #assign_parameter +else + echo "you can create vg" + create_vg +fi + +``` + @Notice Thing!@:主集群都执行dss_autoscript.sh脚本配置dss, 用户需要自行修改脚本中的/opt/mpp/ss_env0环境变量、DSS_HOME_ONE 和 DSS_HOME_TWO目录,将其配置成自己的目录。还需要修改create_one_device和create_two_device中data和xlog对应的盘符 + +ps x 查看dss进程,如下 +``` +[mpp@nodename dn0]$ ps x + PID TTY STAT TIME COMMAND + 69160 pts/2 S 0:00 -bash + 80294 pts/2 Sl 5:56 dssserver -D /opt/mpp/cluster/ss_data/dss_home0 + 80309 pts/2 S 0:00 perctrl 8 11 + 345361 pts/2 R+ 0:00 ps x +``` + +###   2.5 数据库部署 +####    2.5.1 主集群(生产中心) + (1) 主集群主节点0初始化 + @Precondition!@:节点0对应的dssserver必须提前拉起,即dsserver进程存在 + +使用dss的data数据卷、log0日志卷、dorado共享卷/dev/sdh 初始化主集群节点0 + +``` +gs_initdb -D /opt/mpp/cluster/dn0 --nodename=node0 -U mpp -w Huawei@123 --vgname=+data,+log0 --enable-dss --dms_url="0:10.10.10.10:4411,1:10.10.10.10:4412" -I 0 --socketpath='UDS:/opt/mpp/cluster/ss_data/dss_home0/.dss_unix_d_socket' -d -n -g /dev/sdh +``` + 参数解释: ++ --vgname 卷名,做了xlog分盘,数据卷是+data,节点0对应的日志卷是+log0 ++ --dms_url 0表示0节点,10:10:10:10指节点ip,4411是端口,这是一组参数,表示0节点的dms节点信息,如果是两节点,以逗号为分割,后面是节点1的dms节点信息,如果没有cm部署,两个节点ip可以不一样也可以一样,即可以在同一个机器上部署两个数据库,也可在不同机器上部署两个数据库,如果有cm部署,两个节点ip必须不一样。 ++ -g 指dorado同步复制共享xlog盘 + +(2)配置主集群主节点0 + postgresql.conf文件 +``` +port = 44100 +listen_addresses = '*' +ss_enable_reform = off +ss_log_level = 255 +ss_log_backup_file_count = 100 +ss_log_max_file_size = 1GB +xlog_lock_file_path = '/opt/mpp/cluster/dn0/redolog.lock' +``` + 参数解释: ++ ss_enable_reform dms reform功能,没有cm的情况下,设置该参数为off + + +(3)主集群备节点1初始化 +``` +gs_initdb -D /opt/mpp/cluster/dn1 --nodename=node1 -U mpp -w Huawei@123 --vgname=+data,+log1 --enable-dss --dms_url="0:10.10.10.10:4411,1:10.10.10.10:4412" -I 0 --socketpath='UDS:/opt/mpp/cluster/ss_data/dss_home1/.dss_unix_d_socket' +``` + +主集群备节点1配置参数 +port = 48100 +listen_addresses = '*' +ss_enable_reform = off +ss_log_level = 255 +ss_log_backup_file_count = 100 +ss_log_max_file_size = 1GB +xlog_lock_file_path = '/opt/mpp/cluster/dn0/redolog.lock' + +(4)主集群启动 +``` +主节点0启动 +gs_ctl start -D /opt/mpp/cluster/dn0 -M primary + + +备节点1启动 +gs_ctl start -D /opt/mpp/cluster/dn0 +``` + @important point@: 没有部署cm的情况下,以-M primary启动主集群主节点 + +ps x 查看进程,如下所示: +``` +[mpp@nodename dn0]$ ps x + PID TTY STAT TIME COMMAND + 69160 pts/2 S 0:00 -bash + 80294 pts/2 Sl 5:56 dssserver -D /opt/mpp/cluster/ss_data/dss_home0 + 80309 pts/2 S 0:00 perctrl 8 11 + 141835 ? Ssl 18:48 /opt/mpp/mppdb_temp_install/bin/gaussdb -D /opt/mpp/cluster/dn0 -M primary + 345361 pts/2 R+ 0:00 ps x + ``` + + +***Notice:不推荐直接用于生产环境*** +***作者:Shirley_zhengx*** diff --git "a/content/zh/post/zhengxue/\350\265\204\346\272\220\346\261\240\345\214\226\346\224\257\346\214\201\345\220\214\345\237\216dorado\345\217\214\351\233\206\347\276\244\351\203\250\347\275\262(\344\272\214)\344\271\213cm\346\250\241\346\213\237.md" "b/content/zh/post/zhengxue/\350\265\204\346\272\220\346\261\240\345\214\226\346\224\257\346\214\201\345\220\214\345\237\216dorado\345\217\214\351\233\206\347\276\244\351\203\250\347\275\262(\344\272\214)\344\271\213cm\346\250\241\346\213\237.md" index a16c0d98a730b3c5d706cbd6fd617d11692fa146..8383d870ac7f02ee26e950beac2f291e0d5d0f00 100644 --- "a/content/zh/post/zhengxue/\350\265\204\346\272\220\346\261\240\345\214\226\346\224\257\346\214\201\345\220\214\345\237\216dorado\345\217\214\351\233\206\347\276\244\351\203\250\347\275\262(\344\272\214)\344\271\213cm\346\250\241\346\213\237.md" +++ "b/content/zh/post/zhengxue/\350\265\204\346\272\220\346\261\240\345\214\226\346\224\257\346\214\201\345\220\214\345\237\216dorado\345\217\214\351\233\206\347\276\244\351\203\250\347\275\262(\344\272\214)\344\271\213cm\346\250\241\346\213\237.md" @@ -31,7 +31,11 @@ times = "9:30" # 资源池化支持同城dorado双集群部署(二)----cm模拟 -资源池化支持同城dorado双集群部署方式:dd模拟(手动部署+无cm)、cm模拟(手动部署dd模拟+有cm)、磁阵(手动部署)、集群管理工具部署 +资源池化支持同城dorado双集群部署方式: +(一) dd模拟(手动部署 + 无cm) +(二) cm模拟(手动部署dd模拟 + 有cm) +(三) 磁阵搭建(手动部署) +(四) 集群管理工具部署(om + cm) ## 1.环境描述 @@ -186,8 +190,7 @@ sh build.sh -3rd $binarylibsDir -m Debug 环境变量 ``` -export HOME=/opt/omm -export GAUSSHOME=${HOME}/openGauss-server/mppdb_temp_install/ +export GAUSSHOME=/openGauss-server/mppdb_temp_install/ export LD_LIBRARY_PATH=$GAUSSHOME/lib:$LD_LIBRARY_PATH export PATH=$GAUSSHOME/bin:$PATH export DSS_HOME=/home/omm/ss_hatest/dss_home0 @@ -196,17 +199,16 @@ export CM_CONFIG_PATH=/opt/omm/openGauss-server/src/test/ss/cm_config.ini `Tips`: 环境变量里面一定要写export,即使`echo $GCC_PATH`存在,也要写export才能真正导入路径 参数说明: -HOME 为用户自己创建的工作目录; GAUSSHOME 为编译完成的目标文件路径,包含openGauss的bin、lib等; CM_CONFIG_PATH 用于主集群cm模拟部署下的集群内节点切换 +目录/home/omm中omm指自己创建的用户 (2) 备集群环境变量ss_env1 ``` -export HOME=/opt/omm -export GAUSSHOME=${HOME}/openGauss-server/mppdb_temp_install/ +export GAUSSHOME=/openGauss-server/mppdb_temp_install/ export LD_LIBRARY_PATH=$GAUSSHOME/lib:$LD_LIBRARY_PATH export PATH=$GAUSSHOME/bin:$PATH -export DSS_HOME=/home/omm/ss_hatest/dss_home0 +export DSS_HOME=/home/omm/ss_hatest1/dss_home0 export CM_CONFIG_PATH=/opt/omm/openGauss-server/src/test/ss/cm_config_standby.ini ``` @@ -220,6 +222,19 @@ export CM_CONFIG_PATH=/opt/omm/openGauss-server/src/test/ss/cm_config_standby.in sh ha_test.sh dual_cluster ``` +dssserver有可能存在端口冲突,执行sh ha_test.sh dual_cluster之前修改conf_start_dss_inst.sh脚本中dss端口 +``` +DSS_PORT_BASE=30000 +``` + +数据库有可能存在端口冲突,执行sh ha_test.sh dual_cluster之前修改ha_test.sh脚本中数据库端口 +``` +PGPORT=(6600 6700) +STANDBY_PGPORT=(9600 9700) + +nodedata_cfg="0:127.0.0.1:6611,1:127.0.0.1:6711" +standby_nodedata_cfg="0:127.0.0.1:9611,1:127.0.0.1:9711" +```  ha_test.sh脚本适配了双集群模拟, 执行的时候带上dual_cluster就是双集群,不带就是单集群。脚本会自动将数据库拉起,执行完该脚本后,就相当于部署了2套独立的资源池化  (2) 集群状态查询 @@ -289,11 +304,11 @@ No information  postgresql.conf文件 ``` port = 6600 -xlog_file_path = '/home/zx/ss_hatest/dorado_shared_disk' -xlog_lock_file_path = '/home/zx/ss_hatest/shared_lock_primary' +xlog_file_path = '/home/omm/ss_hatest/dorado_shared_disk' +xlog_lock_file_path = '/home/omm/ss_hatest/shared_lock_primary' application_name = 'dn_master_0' cross_cluster_replconninfo1='localhost=127.0.0.1 localport=6600 remotehost=127.0.0.1 remoteport=9600' -cross_cluster_replconninfo1='localhost=127.0.0.1 localport=6600 remotehost=127.0.0.1 remoteport=9700' +cross_cluster_replconninfo2='localhost=127.0.0.1 localport=6600 remotehost=127.0.0.1 remoteport=9700' cluster_run_mode = 'cluster_primary' ha_module_debug = off ss_log_level = 255 @@ -313,23 +328,45 @@ host all all 10.10.10.10/32 sha256 host all all 10.10.10.20/32 sha256 ``` -(2) 以primary模式重启主集群主节点0 +(2) 配置主集群备节点1的dorado容灾参数 + postgresql.conf文件 +``` +port = 6700 +xlog_file_path = '/home/omm/ss_hatest/dorado_shared_disk' +xlog_lock_file_path = '/home/omm/ss_hatest/shared_lock_primary' +application_name = 'dn_master_1' +cross_cluster_replconninfo1='localhost=127.0.0.1 localport=6700 remotehost=127.0.0.1 remoteport=9600' +cross_cluster_replconninfo2='localhost=127.0.0.1 localport=6700 remotehost=127.0.0.1 remoteport=9700' +cluster_run_mode = 'cluster_primary' +ha_module_debug = off +ss_log_level = 255 +ss_log_backup_file_count = 100 +ss_log_max_file_size = 1GB +``` + +(3) 以primary模式重启主集群主节点0 ``` gs_ctl start -D /home/omm/ss_hatest/dn0 -M primary ``` 执行build前一定要给主集群主节点0配置容灾参数并以primary模式重启主集群主节点0 +如果是cm模拟方式,可以不用指定-M参数,reform会自动识别模式 +(4) 启动主集群备节点1 + +``` +gs_ctl start -D /home/omm/ss_hatest/dn0 +``` #####    2.5.1.2 备集群(容灾中心) (1) 配置备集群首备节点0的容灾参数  postgresql.conf文件 ``` port = 9600 -xlog_file_path = '/home/zx/ss_hatest/dorado_shared_disk' -xlog_lock_file_path = '/home/zx/ss_hatest/shared_lock_standby' +xlog_file_path = '/home/omm/ss_hatest/dorado_shared_disk' +xlog_lock_file_path = '/home/omm/ss_hatest/shared_lock_standby' application_name = 'dn_standby_0' cross_cluster_replconninfo1='localhost=127.0.0.1 localport=9600 remotehost=127.0.0.1 remoteport=6600' -cross_cluster_replconninfo1='localhost=127.0.0.1 localport=9600 remotehost=127.0.0.1 remoteport=6700' +cross_cluster_replconninfo2='localhost=127.0.0.1 localport=9600 remotehost=127.0.0.1 remoteport=6700' cluster_run_mode = 'cluster_standby' ha_module_debug = off ss_log_level = 255 @@ -353,10 +390,33 @@ host all all 10.10.10.20/32 sha256 gs_ctl build -D /home/zx/ss_hatest1/dn0 -b cross_cluster_full -g 0 --vgname=+data --enable-dss --socketpath='UDS:/home/zx/ss_hatest1/dss_home0/.dss_unix_d_socket' -q ``` -(3) 以standby模式重启备集群首备节点0 +(3) 配置备集群从备节点1的容灾参数 + postgresql.conf文件 +``` +port = 9700 +xlog_file_path = '/home/zx/ss_hatest/dorado_shared_disk' +xlog_lock_file_path = '/home/zx/ss_hatest/shared_lock_standby' +application_name = 'dn_standby_1' +cross_cluster_replconninfo1='localhost=127.0.0.1 localport=9700 remotehost=127.0.0.1 remoteport=6600' +cross_cluster_replconninfo2='localhost=127.0.0.1 localport=9700 remotehost=127.0.0.1 remoteport=6700' +cluster_run_mode = 'cluster_standby' +ha_module_debug = off +ss_log_level = 255 +ss_log_backup_file_count = 100 +ss_log_max_file_size = 1GB +``` + +(4) 以standby模式重启备集群首备节点0 ``` gs_ctl start -D /home/omm/ss_hatest1/dn0 -M standby ``` +如果是cm模拟方式,可以不用指定-M参数,reform会自动识别模式 + +(5) 以standby模式重启备集群从备节点1 +``` +gs_ctl start -D /home/omm/ss_hatest1/dn0 +``` + ####   2.5.2 自动化容灾搭建 同2.5.1 手动容灾搭建效果一致,只是用shell脚本自动化执行 ``` @@ -372,7 +432,7 @@ sh standby_full_build_reconnect.sh [2023-04-18 09:38:34.397][1498175][][gs_ctl]: gs_ctl query ,datadir is /home/omm/ss_hatest/dn0 HA state: local_role : Primary - static_connections : 1 + static_connections : 2 db_state : Normal detail_information : Normal @@ -422,8 +482,8 @@ No information [omm@nodename pg_log]$ gs_ctl query -D /home/omm/ss_hatest1/dn0 [2023-04-18 11:33:09.288][2760315][][gs_ctl]: gs_ctl query ,datadir is /home/omm/ss_hatest1/dn0 HA state: - local_role : Standby - static_connections : 1 + local_role : Main Standby + static_connections : 2 db_state : Normal detail_information : Normal @@ -435,14 +495,14 @@ No information peer_role : Primary peer_state : Normal state : Normal - sender_sent_location : 2/A458 - sender_write_location : 2/A458 - sender_flush_location : 2/A458 - sender_replay_location : 2/A458 - receiver_received_location : 2/A458 - receiver_write_location : 2/A458 - receiver_flush_location : 2/A458 - receiver_replay_location : 2/A458 + sender_sent_location : 2/5C8 + sender_write_location : 2/5C8 + sender_flush_location : 2/5C8 + sender_replay_location : 2/5C8 + receiver_received_location : 2/5C8 + receiver_write_location : 2/5C8 + receiver_flush_location : 2/5C8 + receiver_replay_location : 2/5C8 sync_percent : 100% channel : 127.0.0.1:41952<--127.0.0.1:6600 ``` @@ -471,7 +531,7 @@ gs_ctl query -D /opt/omm/cluster/dn0 [2023-04-03 19:29:20.472][1324519][][gs_ctl]: gs_ctl query ,datadir is /opt/omm/cluster/dn0 HA state: local_role : Primary - static_connections : 1 + static_connections : 2 db_state : Normal detail_information : Normal @@ -520,8 +580,8 @@ No information gs_ctl query -D /opt/omm/cluster/dn0 [2023-04-03 19:29:20.472][2720317][][gs_ctl]: gs_ctl query ,datadir is /opt/omm/cluster/dn0 HA state: - local_role : Standby - static_connections : 1 + local_role : Main Standby + static_connections : 2 db_state : Normal detail_information : Normal @@ -595,5 +655,6 @@ select * from test01; pg_controldata -I 0 --enable-dss --socketpath=UDS:$DSS_HOME/.dss_unix_d_socket +data ``` -(2) + ***Notice:不推荐直接用于生产环境*** +***作者:Shirley_zhengx*** diff --git "a/content/zh/post/zhengxue/\350\265\204\346\272\220\346\261\240\345\214\226\346\224\257\346\214\201\345\220\214\345\237\216dorado\345\217\214\351\233\206\347\276\244\351\203\250\347\275\262(\345\233\233)\344\271\213om\351\203\250\347\275\262.md" "b/content/zh/post/zhengxue/\350\265\204\346\272\220\346\261\240\345\214\226\346\224\257\346\214\201\345\220\214\345\237\216dorado\345\217\214\351\233\206\347\276\244\351\203\250\347\275\262(\345\233\233)\344\271\213om\351\203\250\347\275\262.md" new file mode 100644 index 0000000000000000000000000000000000000000..008d858778e6560964904792c9d9f9266f387b62 --- /dev/null +++ "b/content/zh/post/zhengxue/\350\265\204\346\272\220\346\261\240\345\214\226\346\224\257\346\214\201\345\220\214\345\237\216dorado\345\217\214\351\233\206\347\276\244\351\203\250\347\275\262(\345\233\233)\344\271\213om\351\203\250\347\275\262.md" @@ -0,0 +1,638 @@ ++++ +title = "资源池化支持同城dorado双集群部署(四)----集群管理工具部署" +date = "2023-05-24" +tags = ["资源池化支持同城dorado双集群部署(四)----集群管理工具部署"] +archives = "2023-05-24" +author = "shirley_zhengx" +summary = "资源池化支持同城dorado双集群部署(四)----集群管理工具部署" +img = "/zh/post/zhengxue/title/img1.png" +times = "9:30" ++++ + + + +- [1. 环境描述](#1.环境描述) + - [1.1.组网方式](#1.1.组网方式) + - [1.2.环境配置](#1.2.环境配置) +- [2. 环境搭建](#2.环境搭建) + - [2.1.创建lun](#2.1.创建lun) + - [2.2.下载源码编译](#2.2.下载源码编译) + - [2.3.环境变量](#2.3.环境变量) + - [2.4.数据库部署](#2.4.数据库部署) + + + + + + +# 资源池化支持同城dorado双集群部署(四)----集群管理工具部署 + +资源池化支持同城dorado双集群部署方式: +(一) dd模拟(手动部署 + 无cm) +(二) cm模拟(手动部署dd模拟 + 有cm) +(三) 磁阵搭建(手动部署) +(四) 集群管理工具部署(om + cm) + + +## 1.环境描述 + + 针对磁阵搭建(手动部署)作出指导,环境描述如下: + +###   1.1.组网方式 + + + + + + + + + + + + + + + + + + + +
生产中心主端业务计算节点0主存储Dorado
业务计算节点1
容灾中心备端业务计算节点0备存储Dorado
业务计算节点1
+ +举例说明: + + + + + + + + + + + + + + + + + + + +
生产中心主端业务计算节点0:10.10.10.10主存储:144.144.144.44Dorado
业务计算节点1:10.10.10.20
容灾中心备端业务计算节点0:20.20.20.10备存储:144.144.144.45Dorado
业务计算节点1:20.20.20.20
+ + ** 缺个图,后面补充哈!!!** +###   1.2.环境配置 + + 支持存储远程复制 + + +## 2. 环境搭建 + +针对资源池化双集群部署之《资源池化om集群管理工具部署 + dorado同步复制》作出指导,环境搭建如下: + +###   2.1.创建lun + +(1) 主存储创建lun + 主存储管控平台(DeviceManager)登录:https://主存储ip:8088,例如对应1.1章节表格就是:https://144.144.144.44:8088 + 在管控平台上创建lun组和lun,并映射到主机之后,在业务节点上查看挂好的lun ++ (1.1)创建主机 +在存储管控平台上,业务计算节点都称之为主机 ++ (1.2)创建lun组 +由于lun需要映射,当有多个业务计算节点(主机),则可以通过创建lun组,将主机加入到lun组中,每次闯将lun的时候,只需要将lun加入到lun组中,则自动会映射到主机组中包含的所有业务计算节点ip(主机) ++ (1.3)创建lun +如下图所示: +![](./images/磁阵搭建/创建lun.jpg) +步骤:Services -> LUN Groups -> LUN -> Create + +映射到业务节点的方式一: +在图中Map to Host 选择业务节点ip (例如1.1章节表格中主存储对应的业务计算节点0的ip:10.10.10.10) + +映射到业务节点的方式二: +在图中Add to LUN Group 选择(1.2)创建的lun组,lun组有对应的主机组,会自动映射到主机组中包含的所有业务计算节点ip(例如1.1章节表格中主存储对应的业务计算节点0的ip:10.10.10.10和业务计算节点1的ip:10.10.10.20) + +(2) 主存储对应的业务计算节点上查看挂好的lun +``` +rescan-scsi-bus.sh upadmin show vlun lsscsi -is +``` + +![](./images/om部署/lun查询.jpg) + +lun说明: 这里显示的是om部署资源池化需要用的lun,zx_mpp_dssdata是资源池化主集群的数据盘(盘符/dev/sdab),zx_mpp_dssxlog0是资源池化主集群节点0对应的xlog盘(盘符/dev/sdac),zx_mpp_dssxlog0是资源池化主集群节点1对应的xlog盘(盘符/dev/sdad),zx_mpp_cm0是资源池化主集群cm对应的votingDiskPath盘(盘符/dev/sdae),zx_mpp_cm0是资源池化主集群cm对应的shareDiskDir盘(盘符/dev/sdaf) + + +修改盘符属组 +``` +chown mpp:mpp /dev/sdab +chown mpp:mpp /dev/sdac +chown mpp:mpp /dev/sdad +chown mpp:mpp /dev/sdae +chown mpp:mpp /dev/sdaf +``` +`Tips`: 主机组包含的主机节点都需要修改属组,全部修改 + +(3) 主存储创建dorado同步复制的lun +主存储上创建的dorado同步复制的lun,步骤同(1.2)一样,lun名称是zx_mpp_doradoxlog。盘符/dev/sdm +``` +执行upadmin show vlun查看: +Vlun ID Disk Name +12 sdm zx_mpp_doradoxlog +``` + +(4) 在备存储上同样操作执行上面步骤(1)和(2),不操作步骤(3) + 备存储管控平台(DeviceManager)登录:https://备存储ip:8088,例如对应1.1章节表格就是:https://144.144.144.45:8088 +执行步骤(1)和(2),备存储上不用操作步骤(3),可以在建立远程同步复制的时候自动创建,在《2.4.3 建立容灾关系》章节中介绍。 + +###   2.2.下载源码编译 + 如果用已打包好的openGauss-server包则跳过该步骤,进行2.3,如果修改代码开发中,则进行代码更新并编译,如下步骤: + +(1) 下载三方库 + 根据平台操作系统下载对应三方库,三方库下载地址:https://gitee.com/opengauss/openGauss-server 主页上README.md中查找需要的三方库binarylibs + +获取master分支openEuler_x86系统对应的三方库 +``` +wget https://opengauss.obs.cn-south-1.myhuaweicloud.com/latest/binarylibs/openGauss-third_party_binarylibs_openEuler_x86_64.tar.gz +``` +(2) 下载cbb并编译 +``` +git clone https://gitee.com/opengauss/CBB.git -b master cbb +cd CBB/build/linux/opengauss +sh build.sh -3rd $binarylibsDir -m Debug +``` + 编译成功会自动将二进制放入三方库openGauss-third_party_binarylibs_openEuler_x86_64/kernel/component目录下 +(3) 下载dss并编译 +``` +git clone https://gitee.com/opengauss/DSS.git -b master dss +cd CBB/build/linux/opengaussDSS +sh build.sh -3rd $binarylibsDir -m Debug +``` + +(4) 下载dms并编译 +``` +git clone https://gitee.com/opengauss/DMS.git -b master dms +cd CBB/build/linux/opengauss +sh build.sh -3rd $binarylibsDir -m Debug +``` + +(5) 下载openGauss-server并编译 + 编译过程需要cbb、dss、dms的二进制,会从openGauss-third_party_binarylibs_openEuler_x86_64/kernel/component中获取 +``` +git clone https://gitee.com/opengauss/openGauss-server.git -b master openGauss-server +sh build.sh -3rd $binarylibsDir -m Debug +``` + 编译完之后的二进制存放在openGauss-server/mppdb_temp_install/目录下 + + +###   2.3.环境准备 +由于机器资源不足,这里部署一主一备为例 +(1) 二进制包准备 +创建一个自己用户的目录,例如/opt/mpp,将已编好的包或者发布包拷贝放至/opt/mpp/gauss_package目录下,包含om、cm、server的二进制 +如下所示: +``` +[root@node1 gauss_package]# ll +总用量 413236 +-rw-r--r-- 1 mpp mpp 6980293 5月 23 15:03 openGauss-5.1.0-openEuler-64bit-cm.tar.gz +-rw-r--r-- 1 mpp mpp 18591448 5月 23 15:25 openGauss-5.1.0-openEuler-64bit-Libpq.tar.gz +-rw-r--r-- 1 mpp mpp 65 5月 23 15:04 openGauss-5.1.0-openEuler-64bit-om.sha256 +-rw-r--r-- 1 mpp mpp 11055254 5月 23 15:04 openGauss-5.1.0-openEuler-64bit-om.tar.gz +-rw-r--r-- 1 mpp mpp 65 5月 23 15:25 openGauss-5.1.0-openEuler-64bit.sha256 +-rw-r--r-- 1 mpp mpp 347317866 5月 23 15:25 openGauss-5.1.0-openEuler-64bit.tar.bz2 +-rw-r--r-- 1 mpp mpp 38680532 5月 23 15:25 openGauss-5.1.0-openEuler-64bit-tools.tar.gz +-rw------- 1 mpp mpp 65 5月 23 15:25 upgrade_sql.sha256 +-rw------- 1 mpp mpp 493901 5月 23 15:25 upgrade_sql.tar.gz +-rw-r--r-- 1 mpp mpp 32 5月 23 15:02 version.cfg +``` + +(2) 准备xml文件 +/opt/mpp/1p1s.xml文件配置一主一备资源池化集群 + +``` + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +``` +`Tips`: 用户需要修改节点名称、节点IP、目录、盘符、端口号 + + +###   2.4.om部署双集群 +####    2.4.1 主集群(生产中心) + (1) 解压om包 +``` +su - root +cd /opt/mpp/gauss_package +tar -zxvf openGauss-x.x.x-openEuler-64bit-all.tar.gz +tar -zxvf openGauss-x.x.x-openEuler-64bit-om.tar.gz +``` + + (2)执行预安装 +``` +cd script +./gs_preinstall -U mpp -G mpp -X /opt/mpp/1p1s.xml --sep-env-file=/home/mpp/ss_env +``` + 参数解释: ++ sep-env-file 分离环境变量 + + +(3)执行安装 +``` +su - mpp +source /home/mpp/ss_env +gs_install -X /opt/mpp/1p1s.xml --dorado_config=/dev/sdm +``` + +--dorado_config传入参数对应2.1章节步骤(3),主存储上创建的dorado同步复制lun的盘符 + +(4)查看主集群状态 +``` +[mpp@node1 dn_6001]$ cm_ctl query -Cvidp +[ CMServer State ] + +node node_ip instance state +------------------------------------------------------------------------- +1 node1 10.10.10.10 1 /opt/mpp/install/cm/cm_server Primary +2 node2 10.10.10.20 2 /opt/mpp/install/cm/cm_server Standby + + +[ Defined Resource State ] + +node node_ip res_name instance state +--------------------------------------------------------- +1 node1 10.10.10.10 dms_res 6001 OnLine +2 node2 10.10.10.20 dms_res 6002 OnLine +1 node1 10.10.10.10 dss 20001 OnLine +2 node2 10.10.10.20 dss 20002 OnLine + +[ Cluster State ] + +cluster_state : Normal +redistributing : No +balanced : Yes +current_az : AZ_ALL + +[ Datanode State ] + +node node_ip instance state | node node_ip instance state +------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ +1 node1 10.10.10.10 6001 25400 /opt/mpp/install/data/dn P Primary Normal | 2 node2 10.10.10.20 6002 25400 /opt/mpp/install/data/dn S Standby Normal + +``` + +####    2.4.2 备集群(容灾中心) +需要提前做《2.1.创建lun》 和 《2.3.环境准备》 + + (1) 解压om包 +``` +su - root +cd /opt/mpp/gauss_package +tar -zxvf openGauss-x.x.x-openEuler-64bit-all.tar.gz +tar -zxvf openGauss-x.x.x-openEuler-64bit-om.tar.gz +``` + + (2)执行预安装 +``` +cd script +./gs_preinstall -U mpp -G mpp -X /opt/mpp/1p1s.xml --sep-env-file=/home/mpp/ss_env +``` + +(3)执行安装 +``` +gs_install -X /opt/mpp/1p1s.xml +``` + +(4)查看备集群状态 +``` + +[mpp@node2 dn_6002]$ cm_ctl query -Cvidp +[ CMServer State ] + +node node_ip instance state +------------------------------------------------------------------------- +1 node1 20.20.20.10 1 /opt/mpp/install/cm/cm_server Primary +2 node2 20.20.20.20 2 /opt/mpp/install/cm/cm_server Standby + + +[ Defined Resource State ] + +node node_ip res_name instance state +--------------------------------------------------------- +1 node1 20.20.20.10 dms_res 6001 OnLine +2 node2 20.20.20.20 dms_res 6002 OnLine +1 node1 20.20.20.10 dss 20001 OnLine +2 node2 20.20.20.20 dss 20002 OnLine + +[ Cluster State ] + +cluster_state : Normal +redistributing : No +balanced : Yes +current_az : AZ_ALL + +[ Datanode State ] + +node node_ip instance state | node node_ip instance state +------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ +1 node1 20.20.20.10 6001 25400 /opt/mpp/install/data/dn P Primary Normal | 2 node2 20.20.20.20 6002 25400 /opt/mpp/install/data/dn S Standby Normal +``` +####    2.4.3 建立容灾关系 +(1) 主集群(生产中心)配置参数,每个节点都配置 + 节点0的postgresql.conf文件 +``` +xlog_file_path = '/dev/sdm' +xlog_lock_file_path = '/home/mpp/install/shared_lock_primary' +application_name = 'dn_master_0' +cross_cluster_replconninfo1='localhost=10.10.10.10 localport=25400 remotehost=20.20.20.10 remoteport=25400' +cross_cluster_replconninfo2='localhost=10.10.10.10 localport=25400 remotehost=20.20.20.20 remoteport=25400' +cluster_run_mode = 'cluster_primary' +ha_module_debug = off +ss_log_level = 255 +ss_log_backup_file_count = 100 +ss_log_max_file_size = 1GB +``` + + 节点1的postgresql.conf文件 +``` +xlog_file_path = '/dev/sdm' +xlog_lock_file_path = '/home/mpp/install/shared_lock_primary' +application_name = 'dn_master_1' +cross_cluster_replconninfo1='localhost=10.10.10.20 localport=25400 remotehost=20.20.20.10 remoteport=25400' +cross_cluster_replconninfo2='localhost=10.10.10.20 localport=25400 remotehost=20.20.20.20 remoteport=25400' +cluster_run_mode = 'cluster_primary' +ha_module_debug = off +ss_log_level = 255 +ss_log_backup_file_count = 100 +ss_log_max_file_size = 1GB +``` + +(2) 重启主集群,查询主集群状态 +``` +cm_ctl start +cm_ctl query -Cvidp +``` + +(3) 登录主管控平台,配置同步复制 + +  在主存储管控平台(DeviceManager)登录:https://主存储ip:8088 +  data protection -> luns -> remote replication pairs(远程复制对) -> create ->选择主存储需要同步复制给备存储的lun -> next +  请原谅这里截图工具的搓,标记笔太难用了,画的蓝圈圈很丑但很个性! +![](./images/om部署/远程复制对创建.png) +图中第5步选择主存储上创建的dorado同步复制的lun,lun名称是zx_mpp_doradoxlog + +选择同步 -> Automatic -> 备存储的存储池名称 -> next +![](./images/om部署/远程复制对创建1.jpg) + +最后会显示successful,表示成功 + +(4) 登录备管控平台,映射到备集群 +  在备存储管控平台(DeviceManager)登录:https://备存储ip:8088 +在备存储上做映射方式一: +  Services -> LUNs -> zx_mpp_doradoxlog -> More -> Map -> 选择备存储对应的业务计算节点ip(例如1.1章节表格中的20.20.20.10) +![](./images/om部署/doradoxlog映射.PNG) +选择第(3)中和主存储做了远程复制对的lun,名称是zx_mpp_doradoxlog + +在备存储上做映射方式二: +Services -> LUNs -> 双击zx_mpp_doradoxlog -> Mapping -> Operation -> map -> 择备存储对应的业务计算节点0的ip(例如1.1章节表格中的20.20.20.10) +再次点击Operation -> map -> 择备存储对应的业务计算节点1的ip(例如1.1章节表格中的20.20.20.20) +![](./images/om部署/doradoxlog映射1.PNG) + +在业务计算节点上查看lun情况: +``` +[root@node1 mpp]# upadmin show vlun +-------------------------------------------------------------------------- + Vlun ID Disk Name Lun WWN Status Capacity + 31 sdag zx_mpp_doradoxlog 65fdfg Normal 1.00TB +-------------------------------------------------------------------------- +``` + +修改属组 +``` +chown mpp:mpp /dev/sdag +``` + +(5) 备集群(容灾中心)配置数据库参数,每个节点都配置 + 节点0的postgresql.conf文件 +``` +xlog_file_path = '/dev/sdag' +xlog_lock_file_path = '/home/mpp/install/shared_lock_standby' +application_name = 'dn_standby_0' +cross_cluster_replconninfo1='localhost=20.20.20.10 localport=25400 remotehost=10.10.10.10 remoteport=25400' +cross_cluster_replconninfo2='localhost=20.20.20.10 localport=25400 remotehost=10.10.10.20 remoteport=25400' +cluster_run_mode = 'cluster_standby' +ha_module_debug = off +ss_log_level = 255 +ss_log_backup_file_count = 100 +ss_log_max_file_size = 1GB +``` + + 节点1的postgresql.conf文件 +``` +xlog_file_path = '/dev/sdag' +xlog_lock_file_path = '/home/mpp/install/shared_lock_standby' +application_name = 'dn_standby_1' +cross_cluster_replconninfo1='localhost=20.20.20.20 localport=25400 remotehost=10.10.10.10 remoteport=25400' +cross_cluster_replconninfo2='localhost=20.20.20.20 localport=25400 remotehost=10.10.10.20 remoteport=25400' +cluster_run_mode = 'cluster_standby' +ha_module_debug = off +ss_log_level = 255 +ss_log_backup_file_count = 100 +ss_log_max_file_size = 1GB +``` +(6) 停备集群对首备进行build +方式一 +``` +# 设置该参数后,执行cm_ctl stop,dss不会被踢出集群 +cm_ctl set --param --server -k delay_arbitrate_max_cluster_timeout=0 + +# kill cm_server 等待om_monitor重新拉起cm_server,使修改的参数生效 +kill cm_server + +# 先停从备,防止首备先停发生failover +cm_ctl stop -n 2 -I 6002 +cm_ctl stop -n 1 -I 6001cm_server + +# 执行build命令,首备节点的dssserver进程必须正常 +gs_ctl build -D /opt/mpp/install/data/dn -b cross_cluster_full -g 0 --vgname=+data --enable-dss --socketpath='UDS:/opt/mpp/install/dss_home/.dss_unix_d_socket' -q + +# 设置该参数后,执行cm_ctl stop,dss不会被踢出集群 +cm_ctl set --param --server -k delay_arbitrate_max_cluster_timeout=10 + +# kill cm_server 等待om_monitor重新拉起cm_server,使修改的参数生效 +kill cm_server +``` + +方式二 +``` +# 停止备集群 +cm_ctl stop + +# 在首备节点导入dss维护环境变量,导入该环境可以在cm部署的情况下手动拉起dssserver +export DSS_MAINTAIN=TRUE + +# 拉起首备dsserver +dsserver -D /opt/mpp/install/dss_home & + +# 执行build命令,首备节点的dssserver进程必须正常 +gs_ctl build -D /opt/mpp/install/data/dn -b cross_cluster_full -g 0 --vgname=+data --enable-dss --socketpath='UDS:/opt/mpp/install/dss_home/.dss_unix_d_socket' -q +``` + +首备就是指备集群中还没有建立容灾关系之前,显示primary的节点,即为首备节点,对其进行build + +(7) 备集群(容灾中心)配置cm参数 +配置下列参数,用于区分集群模式,cm会根据不同集群模式进行状态校验 +``` +cm_ctl set --param --server -k backup_open=1 +cm_ctl set --param --agent -k agent_backup_open=1 +``` + +参数说明: +backup_open=1,agent_backup_open=1 使用dorado介质的集群转双集群备集群 +backup_open=0,agent_backup_open=0 资源池化单集群 + +(7) 重启备集群 +``` +cm_ctl start +``` + +(8) 步骤(5)~(7)自动化脚本 +``` +#!/bin/bash + +source /home/mpp/ss_env + +#stop standby cluster +echo "stop standby cluster." +cm_ctl stop + +#assign parameter to main standby +echo "assign parameter to main standby." +gs_guc set -N node1 -D /opt/mpp/install/data/dn -c "xlog_file_path = '/dev/sdk'" +gs_guc set -N node1 -D /opt/mpp/install/data/dn -c "xlog_lock_file_path = '/opt/mpp/install/shared_lock_standby.lock" +gs_guc set -N node1 -D /opt/mpp/install/data/dn -c "application_name = 'dn_standby_0'" +gs_guc set -N node1 -D /opt/mpp/install/data/dn -c "cross_cluster_replconninfo1='localhost=172.16.108.54 localport=25400 remotehost=172.16.108.23 remoteport=44100'" +gs_guc set -N node1 -D /opt/mpp/install/data/dn -c "cluster_run_mode = 'cluster_standby'" +gs_guc set -N node1 -D /opt/mpp/install/data/dn -c "ha_module_debug = off" +gs_guc set -N node1 -D /opt/mpp/cluster/dn0 -h "host all all 172.16.108.23/32 trust" + + +#assign parameter to standby +echo "assign parameter to standby." +gs_guc set -N node2 -D /opt/mpp/install/data/dn -c "xlog_file_path = '/dev/sdc'" +gs_guc set -N node2 -D /opt/mpp/install/data/dn -c "xlog_lock_file_path = '/opt/mpp/install/shared_lock_standby.lock'" +gs_guc set -N node2 -D /opt/mpp/install/data/dn -c "application_name = 'dn_standby_1'" +gs_guc set -N node2 -D /opt/mpp/install/data/dn -c "cross_cluster_replconninfo1='localhost=172.16.108.55 localport=25400 remotehost=172.16.108.23 remoteport=44100'" +gs_guc set -N node2 -D /opt/mpp/install/data/dn -c "cluster_run_mode = 'cluster_standby'" +gs_guc set -N node2 -D /opt/mpp/install/data/dn -c "ha_module_debug = off" +gs_guc set -N node2 -D /opt/mpp/cluster/dn0 -h "host all all 172.16.108.23/32 trust" + +#start dssserver +export DSS_MAINTAIN=TRUE +dssserver -D /opt/mpp/install/dss_home & + +#build +#gs_ctl build -D /opt/mpp/install/data/dn -b cross_cluster_full -g 0 --vgname=+data --enable-dss --socketpath='UDS:/opt/mpp/install/dss_home/.dss_unix_d_socket' -q + + +#assign cm parameter +echo "assign cm parameter to all node." +cm_ctl set --param --server -k backup_open=1 +cm_ctl set --param --agent -k agent_backup_open=1 +``` + +(9) 查询状态 +主集群使用cm_ctl query -Cvidp查询出来同第步一样 +备集群查询结果如下,备集群节点0从没有建立容灾关系时的primary变成建立容灾关系之后的Main Standby +``` +[mpp@node2 dn_6002]$ cm_ctl query -Cvidp +[ CMServer State ] + +node node_ip instance state +------------------------------------------------------------------------- +1 node1 20.20.20.10 1 /opt/mpp/install/cm/cm_server Primary +2 node2 20.20.20.20 2 /opt/mpp/install/cm/cm_server Standby + + +[ Defined Resource State ] + +node node_ip res_name instance state +--------------------------------------------------------- +1 node2 20.20.20.10 dms_res 6001 OnLine +2 node2 20.20.20.20 dms_res 6002 OnLine +1 node2 20.20.20.10 dss 20001 OnLine +2 node2 20.20.20.20 dss 20002 OnLine + +[ Cluster State ] + +cluster_state : Normal +redistributing : No +balanced : Yes +current_az : AZ_ALL + +[ Datanode State ] + +node node_ip instance state | node node_ip instance state +------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ +1 node1 20.20.20.10 6001 25400 /opt/mpp/install/data/dn P Main Standby Normal | 2 node2 20.20.20.20 6002 25400 /opt/mpp/install/data/dn S Standby Normal +``` + +***Notice:不推荐直接用于生产环境*** +***作者:Shirley_zhengx***