diff --git a/content/zh/post/July/figures/11.png b/content/zh/post/July/figures/11.png new file mode 100644 index 0000000000000000000000000000000000000000..8dc69c8a98de3d5a1d629506533775eaa3d445ef Binary files /dev/null and b/content/zh/post/July/figures/11.png differ diff --git a/content/zh/post/July/figures/22.png b/content/zh/post/July/figures/22.png new file mode 100644 index 0000000000000000000000000000000000000000..2ee69b361ddabaafde14efe3c10edb2d99a4c4a1 Binary files /dev/null and b/content/zh/post/July/figures/22.png differ diff --git a/content/zh/post/July/figures/33.png b/content/zh/post/July/figures/33.png new file mode 100644 index 0000000000000000000000000000000000000000..82696999a02ab7e66b98029987edbf8ec4008d5e Binary files /dev/null and b/content/zh/post/July/figures/33.png differ diff --git a/content/zh/post/July/figures/4.png b/content/zh/post/July/figures/4.png new file mode 100644 index 0000000000000000000000000000000000000000..71c7d0bb360e00505c6cdbe3304c9d8e5a470a47 Binary files /dev/null and b/content/zh/post/July/figures/4.png differ diff --git a/content/zh/post/July/figures/5.png b/content/zh/post/July/figures/5.png new file mode 100644 index 0000000000000000000000000000000000000000..6aaa0f4d9d5560533f32a1f2aef057e8adbcc05f Binary files /dev/null and b/content/zh/post/July/figures/5.png differ diff --git "a/content/zh/post/July/figures/Gin-\347\264\242\345\274\225\347\273\223\346\236\204\347\244\272\346\204\217\345\233\276.png" "b/content/zh/post/July/figures/Gin-\347\264\242\345\274\225\347\273\223\346\236\204\347\244\272\346\204\217\345\233\276.png" new file mode 100644 index 0000000000000000000000000000000000000000..961eb26d228d46df1613dc3edc8be61c3f54d99b Binary files /dev/null and "b/content/zh/post/July/figures/Gin-\347\264\242\345\274\225\347\273\223\346\236\204\347\244\272\346\204\217\345\233\276.png" differ diff --git "a/content/zh/post/July/openGauss-Gin-\347\264\242\345\274\225.md" "b/content/zh/post/July/openGauss-Gin-\347\264\242\345\274\225.md" new file mode 100644 index 0000000000000000000000000000000000000000..34833decaa1a176547b0b68c730be1a932450dbe --- /dev/null +++ "b/content/zh/post/July/openGauss-Gin-\347\264\242\345\274\225.md" @@ -0,0 +1,352 @@ ++++ + +title = "openGauss Gin 索引" + +date = "2021-09-21" + +tags = [ "openGauss Gin 索引"] + +archives = "2021-09" + +author = "吴松" + +summary = "openGauss Gin 索引" + +img = "/zh/post/July/title/img5.png" + +times = "12:30" + ++++ + +# openGauss Gin 索引 + + + +## 概述 + +GIN(Generalized Inverted Index)通用倒排索引,是首选的文本搜索索引类型。倒排索引对应的列上的数据类型通常是一个多值类型,索引中包含每个单词的索引条目,以及所匹配的位置的压缩列表。如果搜索条件是多个单词,可以先使用第一个单词进行匹配,再在找到的结果中使用其他单词删除不匹配的项。Gin 索引的 key 是多值类型中出现的单词,叶子节点中存储了每个单词出现的 TID 的列表。如果这个 TID 列表比较小,它可以和元素放在同一个页面中(称为 posting list)。如果列表比较大,就需要用到更高效的数据结构 B-tree,这样的 B-tree 位于单独的数据页上(称为 posting tree)。 + +## 索引结构 + +Gin 索引大的组织结构是一棵B-tree 如图-1 所示 + +其中也有 meta-page、root-page 等 page,如果一个 key 对应的 tids 比较少可以和 key 放在同一个 page 中作为叶子节点; 如果对应的 tids 比较多\(占用的空间的大小\),需要将这些 tids 放到单独的数据页上,并且以 B-tree 的形式组织方便快速查找,叶子节点中记录对应的 B-tree 的 root-page 的信息。 + +**图 1** Gin 索引结构示意图 +![](figures/Gin-索引结构示意图.png "Gin-索引结构示意图") + +## 语法 + +``` +CREATE INDEX name ON table USING GIN (column); +``` + +openGauss 中创建 gin 索引时,索引列的类型必须是 tsvector 类型。 + +``` +Example: + +postgres=# create table ts(doc text, doc_tsv tsvector); + +postgres=# insert into ts(doc) values + ('Can a sheet slitter slit sheets?'), + ('How many sheets could a sheet slitter slit?'), + ('I slit a sheet, a sheet I slit.'), + ('Upon a slitted sheet I sit.'), + ('Whoever slit the sheets is a good sheet slitter.'), + ('I am a sheet slitter.'), + ('I slit sheets.'), + ('I am the sleekest sheet slitter that ever slit sheets.'), + ('She slits the sheet she sits on.'); + +postgres=# update ts set doc_tsv = to_tsvector(doc); + +postgres=# create index on ts using gin(doc_tsv); +``` + +![](figures/5.png) + +查询一个既包含 many 又包含 slitter 的 doc 如下: + +![](figures/4.png) + +## 实现 + +Gin 索引的实现主要在 src/gausskernel/storage/access/gin 下,主要文件及功能如下: + +**表 1** + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

文件

+

功能

+

ginbtree.cpp

+

倒排索引page处理相关函数

+

ginarrayproc.cpp

+

支持倒排索引处理各种数组类型的函数

+

gindatapage.cpp

+

倒排索引处理 posting tree page 相关实现

+

gininsert.cpp

+

倒排索引插入相关实现

+

ginpostinglist.cpp

+

倒排索引处理 posting list 相关实现

+

ginscan.cpp

+

倒排索引扫描相关实现

+

ginget.cpp

+

倒排索引scan过程中获取tuple相关实现

+

ginxlog.cpp

+

倒排索引xlog回放相关实现

+

ginvacuum.cpp

+

倒排索引delete和vacuum相关实现

+
+ +查看 pg\_am 中 Gin 索引相关处理函数: + + + + + + + + + + + + + + + + + + + + + + + + +

amname

+

gin

+

aminsert

+

gininsert

+

ambeginscan

+

ginbeginscan

+

amendscan

+

ginendscan

+

amgetbitmap

+

gingetbitmap

+

ambuild

+

ginbuild

+

+

+
+ +## 构建 Gin 索引 + +``` +ginbuild +{ + ... + // 初始化工作,如 创建 gin 索引的 meta 和 root,即 XLOG 等 + buildInitialize(index, &buildstate); + // scan heap tuples 调用 ginBuildCallback 处理每个要加入索引的 tuple + // ginBuildCallback 会从 heap tuple 中提取 entries,如果有多个值 + // 会对这些值进行去重和排序。得到去重及排完序的 entries 后,调用 ginInsertBAEntries + // 将这些 entries 及 对应的 tids 插入一棵RB-tree + reltuples = tableam_index_build_scan(heap, index, indexInfo, false, ginBuildCallback, (void*)&buildstate); + ... + // 从RB-tree中把之前插入的 entries 和 tids scan 出来,插入到 gin index 中 + while ((list = ginGetBAEntry(&buildstate.accum, &attnum, &key, &category, &nlist)) != NULL) { + /* there could be many entries, so be willing to abort here */ + CHECK_FOR_INTERRUPTS(); + // 如果 key 不存在,则新增一个 key entry,如果已经存在则更新对应的 tids + // 首先在gin索引中查找到对应 key 的叶子节点,如果 key 已经存在,更新对应的 tids + // 不存在则插入一个新的叶子节点 + ginEntryInsert(&buildstate.ginstate, attnum, key, category, list, nlist, &buildstate.buildStats); + } + + ... + // 更新 meta-page 中的信息, 记 XLOG + ginUpdateStats(index, &buildstate.buildStats); + ... + 返回结果 +} +``` + +在向gin索引中插入数据时,首先和B-tree索引一样,首先需要查找对应的 key 是否存在; + +如果 key 已经存在,则查看现在叶子节点中 key 对应的 tids 是 posting tree 还是 posting list,更新 tids; + +posting list 如果由于更新导致 tids 比较多,可能变为 posting tree + +如果 key 不存在,则在叶子节点中插入这个新的 key 以及对应的 tids。 + +``` +void ginEntryInsert(GinState *ginstate, OffsetNumber attnum, Datum key, GinNullCategory category, + ItemPointerData *items, uint32 nitem, GinStatsData *buildStats) +{ + GinBtreeData btree; + GinBtreeEntryInsertData insertdata; + GinBtreeStack *stack = NULL; + IndexTuple itup; + Page page; + + insertdata.isDelete = FALSE; + + /* During index build, count the to-be-inserted entry */ + if (buildStats != NULL) + buildStats->nEntries++; + + ginPrepareEntryScan(&btree, attnum, key, category, ginstate); + + // 在 B-tree 中找到叶子节点 + stack = ginFindLeafPage(&btree, false); + page = BufferGetPage(stack->buffer); + + // 如果 key 已经存在 + if (btree.findItem(&btree, stack)) { + /* found pre-existing entry */ + itup = (IndexTuple)PageGetItem(page, PageGetItemId(page, stack->off)); + // 如果是 posting tree 结构 + if (GinIsPostingTree(itup)) { + /* add entries to existing posting tree */ + BlockNumber rootPostingTree = GinGetPostingTree(itup); + + /* release all stack */ + LockBuffer(stack->buffer, GIN_UNLOCK); + freeGinBtreeStack(stack); + + /* insert into posting tree */ + ginInsertItemPointers(ginstate->index, rootPostingTree, items, nitem, buildStats); + return; + } + // 如果是 posting list + /* modify an existing leaf entry */ + itup = addItemPointersToLeafTuple(ginstate, itup, items, nitem, buildStats); + + insertdata.isDelete = TRUE; + } else { // 对应的 key 不存在, 需要新建一个叶子节点里的对象 + /* no match, so construct a new leaf entry */ + itup = buildFreshLeafTuple(ginstate, attnum, key, category, items, nitem, buildStats); + } + + /* Insert the new or modified leaf tuple */ + insertdata.entry = itup; + ginInsertValue(&btree, stack, &insertdata, buildStats); + pfree(itup); + itup = NULL; +} +``` + +gin 的 B-tree 也会涉及分裂等问题,和 B-tree 的分裂类似,因此在使用过程中也会有与 B-tree 索引使用过程中 moveright 类似的动作,本文不展开介绍分裂相关内容了。 + +相关数据结构: + +``` +// 用于表示一个 key 及 与其关联的 tids 的数据结构 +typedef struct GinEntryAccumulator { + RBNode rbnode; + Datum key; + GinNullCategory category; + OffsetNumber attnum; + bool shouldSort; + ItemPointerData *list; + uint32 maxcount; /* allocated size of list[] */ + uint32 count; /* current number of list[] entries */ +} GinEntryAccumulator; + +// Gin 索引整体结构为 B-tree 结构 +// B-tree 中的一个节点 +typedef struct GinBtreeStack { + BlockNumber blkno; + Buffer buffer; + OffsetNumber off; + ItemPointerData iptr; + /* predictNumber contains predicted number of pages on current level */ + uint32 predictNumber; + struct GinBtreeStack *parent; // 父节点 +} GinBtreeStack; + +typedef struct GinBtreeData *GinBtree; +``` + +gin 索引的查找和插入的流程在构建 gin 索引的流程中都有涉及,和 B-tree 有些类似,本文不展开介绍了。 + +另外需要注意的一点是,gin 索引是行存表和列存表都支持的索引类型,但是在pg\_am中行存表的 gin 和 列存表的 gin 是两条记录,cgin pg\_am 中相关处理函数如下所示: + +**表 2** + + + + + + + + + + + + + + + + + + + + + + + + +

amname

+

cgin

+

aminsert

+

gininsert

+

ambeginscan

+

ginbeginscan

+

amendscan

+

ginendscan

+

amgetbitmap

+

cgingetbitmap

+

ambuild

+

cginbuild

+

+

+
+ +可以看出列存表的 gin 索引大部分处理函数和行存表是共用的,但索引构建的实现和行存不同,主要差异点是行存表和列存表底层存储及访问方式的差异,gin 索引本身的实现并没有太大差别。 + +索引删除和vacuum相关的内容不在本文讨论,这块内容后面单独叙述。 + diff --git "a/content/zh/post/July/openGauss-\345\210\227\345\255\230\350\241\250PSort\347\264\242\345\274\225.md" "b/content/zh/post/July/openGauss-\345\210\227\345\255\230\350\241\250PSort\347\264\242\345\274\225.md" new file mode 100644 index 0000000000000000000000000000000000000000..d334089beff4ecac2d2c491ba98614cb6643e231 --- /dev/null +++ "b/content/zh/post/July/openGauss-\345\210\227\345\255\230\350\241\250PSort\347\264\242\345\274\225.md" @@ -0,0 +1,181 @@ ++++ + +title = "openGauss 列存表PSort索引" + +date = "2021-09-24" + +tags = [ "openGauss 列存表PSort索引"] + +archives = "2021-09" + +author = "吴松" + +summary = "openGauss 列存表PSort索引" + +img = "/zh/post/July/title/img10.png" + +times = "12:30" + ++++ + +# openGauss 列存表PSort索引 + +## 概述 + +PSort\(Partial sort\) Index是在列存表的列上建的聚簇索引。CUDesc 上有每个 CU 的 min 和 max 值,但如果业务的数据模型较为离散,查询时通过 min 和 max 值去过滤 CU 会出现大量的 CU 误读取,例如每个 CU 的 min 和 max跨度都比较大时,其查询效率接近全表扫描。例如下图中的场景,查询2基本命中所有的 CU, 此时查找近似全表扫描。 + +![](figures/11.png) + +PSort索引可以对部分区间(一般会包含多个CU覆盖的行)内的数据按照索引键进行排序,使得 CU 之间的交集尽量减少,提升查询的效率。 + +## PSort 索引使用 + +在批量插入列存表的过程中,如果发现有 PSort 索引,会先对这批数据进行排序。PSort索引表的组织形式也是 cstore 表(CUDesc 是 astore 表),表的字段包含了索引键的各个字段,加上对应的行号\(TID\)字段。插入数据的过程中如果发现有PSort索引,会将一定数量的数据按照PSort索引的索引键进行排序,与 TID 字段共同拼装成向量数组,再插入到 PSort 索引的 cstore 表中。 所以 PSort 索引数据中列数比实际的索引键要多一列,多出的这一列用于存储这条记录在数据 cstore 存储中的位置。 + +``` +// 构建 PSort 索引过程中构造索引数据 +inline void ProjectToIndexVector(VectorBatch *scanBatch, VectorBatch *outBatch, IndexInfo *indexInfo) +{ + Assert(scanBatch && outBatch && indexInfo); + int numAttrs = indexInfo->ii_NumIndexAttrs; + AttrNumber *attrNumbers = indexInfo->ii_KeyAttrNumbers; + Assert(outBatch->m_cols == (numAttrs + 1)); + + // index column + for (int i = 0; i < numAttrs; i++) { + AttrNumber attno = attrNumbers[i]; + Assert(attno > 0 && attno <= scanBatch->m_cols); + + // shallow copy + outBatch->m_arr[i].copy(&scanBatch->m_arr[attno - 1]); + } + + // ctid column + // 最后一列是 tid + outBatch->m_arr[numAttrs].copy(scanBatch->GetSysVector(-1)); + + outBatch->m_rows = scanBatch->m_rows; +} +``` + +cstore 表执行插入流程,如果有 Psort 索引,会先将数据插入排序队列 + +``` +void CStoreInsert::BatchInsert(_in_ VectorBatch* pBatch, _in_ int options) +{ + Assert(pBatch || IsEnd()); + + /* keep memory space from leaking during bulk-insert */ + MemoryContext oldCnxt = MemoryContextSwitchTo(m_tmpMemCnxt); + + // Step 1: relation has partial cluster key + // We need put data into sorter contatiner, and then do + // batchinsert data + if (NeedPartialSort()) { + Assert(m_tmpBatchRows); + + if (pBatch) { + Assert(pBatch->m_cols == m_relation->rd_att->natts); + m_sorter->PutVecBatch(m_relation, pBatch); // 插入局部排序队列 + } + + if (m_sorter->IsFull() || IsEnd()) { // 排序队列满了或者插入数据输入结束 + m_sorter->RunSort(); // 按照索引键排序 + + /* reset and fetch next batch of values */ + DoBatchInsert(options); + m_sorter->Reset(IsEnd()); + + /* reset and free all memory blocks */ + m_tmpBatchRows->reset(false); + } + } + + // Step 2: relation doesn't have partial cluster key + // We need cache data until batchrows is full + else { + Assert(m_bufferedBatchRows); + + // If batch row is full, we can do batchinsert now + if (IsEnd()) { + if (ENABLE_DELTA(m_bufferedBatchRows)) { + InsertDeltaTable(m_bufferedBatchRows, options); + } else { + BatchInsertCommon(m_bufferedBatchRows, options); + } + m_bufferedBatchRows->reset(true); + } + + // we need cache data until batchrows is full + if (pBatch) { + Assert(pBatch->m_rows <= BatchMaxSize); + Assert(pBatch->m_cols && m_relation->rd_att->natts); + Assert(m_bufferedBatchRows->m_rows_maxnum > 0); + Assert(m_bufferedBatchRows->m_rows_maxnum % BatchMaxSize == 0); + + int startIdx = 0; + while (m_bufferedBatchRows->append_one_vector( + RelationGetDescr(m_relation), pBatch, &startIdx, m_cstorInsertMem)) { + BatchInsertCommon(m_bufferedBatchRows, options); + m_bufferedBatchRows->reset(true); + } + Assert(startIdx == pBatch->m_rows); + } + } + + // Step 3: We must update index data for this batch data + // if end of batchInsert + FlushIndexDataIfNeed(); + + MemoryContextReset(m_tmpMemCnxt); + (void)MemoryContextSwitchTo(oldCnxt); +} +``` + +![](figures/22.png) + +图 cstore表插入流程示意图 + +插入流程中更新索引数据的代码 + +``` +void CStoreInsert::InsertIdxTableIfNeed(bulkload_rows* batchRowPtr, uint32 cuId) +{ + Assert(batchRowPtr); + + if (relation_has_indexes(m_resultRelInfo)) { + /* form all tids */ + bulkload_indexbatch_set_tids(m_idxBatchRow, cuId, batchRowPtr->m_rows_curnum); + + for (int indice = 0; indice < m_resultRelInfo->ri_NumIndices; ++indice) { + /* form index-keys data for index relation */ + for (int key = 0; key < m_idxKeyNum[indice]; ++key) { + bulkload_indexbatch_copy(m_idxBatchRow, key, batchRowPtr, m_idxKeyAttr[indice][key]); + } + + /* form tid-keys data for index relation */ + bulkload_indexbatch_copy_tids(m_idxBatchRow, m_idxKeyNum[indice]); + + /* update the actual number of used attributes */ + m_idxBatchRow->m_attr_num = m_idxKeyNum[indice] + 1; + + if (m_idxInsert[indice] != NULL) { + /* 插入PSort 索引 */ + m_idxInsert[indice]->BatchInsert(m_idxBatchRow, 0); + } else { + /* 插入 cbtree/cgin 索引 */ + CStoreInsert::InsertNotPsortIdx(indice); + } + } + } +} +``` + +索引插入流程和普通 cstore 数据插入相同。 + +使用 PSort 索引查询时,由于 PSort 索引 CU 内部已经有序,因此可以使用二分查找快速找到对应数据在 psort 索引中的行号,这一行数据的 tid 字段就是这条数据在数据 cstore 中的行号。 + +![](figures/33.png) + +图-2 PSort 索引查询示意图 + diff --git "a/content/zh/post/July/openGauss-\345\271\266\345\217\221\351\207\215\345\273\272\347\264\242\345\274\225\344\273\243\347\240\201\345\256\236\347\216\260.md" "b/content/zh/post/July/openGauss-\345\271\266\345\217\221\351\207\215\345\273\272\347\264\242\345\274\225\344\273\243\347\240\201\345\256\236\347\216\260.md" new file mode 100644 index 0000000000000000000000000000000000000000..02a52a9c1affe29c504ee585cac7d75f48068ea7 --- /dev/null +++ "b/content/zh/post/July/openGauss-\345\271\266\345\217\221\351\207\215\345\273\272\347\264\242\345\274\225\344\273\243\347\240\201\345\256\236\347\216\260.md" @@ -0,0 +1,134 @@ ++++ + +title = "openGauss 并发重建索引代码实现" + +date = "2021-09-22" + +tags = [ "openGauss 并发重建索引代码实现"] + +archives = "2021-09" + +author = "李宏达" + +summary = "openGauss 并发重建索引代码实现" + +img = "/zh/post/July/title/img9.png" + +times = "12:30" + ++++ + +# openGauss 并发重建索引代码实现 + +本文主要讲解并发创建索引过程中,索引数据追加部分的原理和代码实现。 + +先看一下代码中关于这部分功能实现的注释。 + +``` +/* + +validate_index - support code for concurrent index builds We do a concurrent index build by first inserting the catalog entry for the index via index_create(), marking it not indisready and not indisvalid. +Then we commit our transaction and start a new one, then we wait for all transactions that could have been modifying the table to terminate. Now we know that any subsequently-started transactions will see the index and honor its constraints on HOT updates; so while existing HOT-chains might be broken with respect to the index, no currently live tuple will have an incompatible HOT update done to it. We now build the index normally via index_build(), while holding a weak lock that allows concurrent insert/update/delete. Also, we index only tuples that are valid as of the start of the scan (see IndexBuildHeapScan), whereas a normal build takes care to include recently-dead tuples. This is OK because we won’t mark the index valid until all transactions that might be able to see those tuples are gone. The reason for doing that is to avoid bogus unique-index failures due to concurrent UPDATEs (we might see different versions of the same row as being valid when we pass over them, if we used HeapTupleSatisfiesVacuum). This leaves us with an index that does not contain any tuples added to the table while we built the index. +Next, we mark the index “indisready” (but still not “indisvalid”) and commit the second transaction and start a third. Again we wait for all transactions that could have been modifying the table to terminate. Now we know that any subsequently-started transactions will see the index and insert their new tuples into it. We then take a new reference snapshot which is passed to validate_index(). Any tuples that are valid according to this snap, but are not in the index, must be added to the index. +(Any tuples committed live after the snap will be inserted into the index by their originating transaction. Any tuples committed dead before the snap need not be indexed, because we will wait out all transactions that might care about them before we mark the index valid.) +validate_index() works by first gathering all the TIDs currently in the index, using a bulkdelete callback that just stores the TIDs and doesn’t ever say “delete it”. (This should be faster than a plain indexscan; also, not all index AMs support full-index indexscan.) Then we sort the TIDs, and finally scan the table doing a “merge join” against the TID list to see which tuples are missing from the index. Thus we will ensure that all tuples valid according to the reference snapshot are in the index. +Building a unique index this way is tricky: we might try to insert a tuple that is already dead or is in process of being deleted, and we mustn’t have a uniqueness failure against an updated version of the same row. We could try to check the tuple to see if it’s already dead and tell index_insert() not to do the uniqueness check, but that still leaves us with a race condition against an in-progress update. To handle that,we expect the index AM to recheck liveness of the to-be-inserted tuple +before it declares a uniqueness error. +After completing validate_index(), we wait until all transactions that were alive at the time of the reference snapshot are gone; this is necessary to be sure there are none left with a transaction snapshot older than the reference (and hence possibly able to see tuples we did not index). Then we mark the index “indisvalid” and commit. Subsequent transactions will be able to use it for queries. +Doing two full table scans is a brute-force strategy. We could try to be cleverer, eg storing new tuples in a special area of the table (perhaps making the table append-only by setting use_fsm). However that would add yet more locking issues. +*/ +``` + +以上是代码中的官方注释,可以看出整个并发建索引过程中需要两次 table scan: + +第一次获取 snapshot1,然后 scan table 中 snapshot1 可见的 heap tuple,据此构建索引,然后将索引标记为可写。这部分代码相对比较容易理解,主要是 scan table 基于 snapshot 判断 heap tuple 的可见性,然后基于 scan 出的 heap tuple,根据索引类型创建索引。代码实现主要在 index\_build 中。 + +以 B-tree 索引为例,核心代码如下: + +``` +bt_build +{ + // table scan + // 表扫描,基于 snapshot 判断 heap tuple 可见性 + if (RelationIsGlobalIndex(index)) { + allPartTuples = GlobalIndexBuildHeapScan(heap, index, indexInfo, btbuildCallback, (void*)&buildstate); + } else { + reltuples = tableam_index_build_scan(heap, index, indexInfo, true, btbuildCallback, (void*)&buildstate); + } + // 按照索引 key 对 tuple 进行排序 + // 基于排完序的 tuple 构建 btree + _bt_leafbuild(buildstate.spool, buildstate.spool2); + ... +} +``` + +第二次获取 snapshot2,在索引数据中追加 snapshot1 及 snpashot2 之间插入且不在索引中的数据。做法是首先获取当前索引中索引到的所有tids (用的 bulkdelete callback 而不是 index scan,因为前者速度更快,且不是所有的索引都支持 full-index indexscan),然后 scan table 中 snapshot2 可见的所有 heap tuple,获得 tids’,最后 tids’ 和 tids 的差集就是需要在索引中追加的 heap tuple 的 tids。 + +唯一索引处理起来要更麻烦一些,在一条数据的多个版本时,不应该误报违反唯一原则,这可能需要在发现违反唯一原则的时候重新做一次检查。 + +这部分代码的实现是 validate\_index,这里列出其中的关键代码 + +``` +validate_index +{ + ... + // scan index and gather all the tids into a tuplesort object + // 这段代码收集索引中的 tids 走的是 vacuum 流程中扫描索引的流程,是按照 physical order 扫描 index pages, + // 但在 callback 中只是收集 tids 并不会真正删除任何内容 + state.tuplesort = tuplesort_begin_datum( + TIDOID, TIDLessOperator, InvalidOid, false, u_sess->attr.attr_memory.maintenance_work_mem, false); + state.htups = state.itups = state.tups_inserted = 0; + (void)index_bulk_delete(&ivinfo, NULL, validate_index_callback, (void*)&state); + /* Execute the sort */ + // 按照 tid 大小排序 + tuplesort_performsort(state.tuplesort); + /* + * Now scan the heap and "merge" it with the index + */ + // 第二次 table scan ,每个 scan 出的 tuple, 如果是在 hot-chain 上则是 + // hot-chain 的 root tuple ,在 索引 scan 出的 tuple 中(已经按照 tid 排序)查找,找不到则说明不在索引中,应该追加到索引中。 + // 调用 index_insert 将这个 heap tuple 的索引数据插入索引 + tableam_index_validate_scan(heapRelation, indexRelation, indexInfo, snapshot, &state); + ... +} + +validate_index_heapscan 的主要代码逻辑如下: + +validate_index_heapscan +{ + ... + // 遍历 heap tuple + while ((heapTuple = heap_getnext(scan, ForwardScanDirection)) != NULL) + { + ... + // 如果在 hot-chain,用 hot-chain 的 root tuple 的 tid 在索引中查找 + if (HeapTupleIsHeapOnly(heapTuple)) { + root_offnum = root_offsets[root_offnum - 1]; + Assert(OffsetNumberIsValid(root_offnum)); + ItemPointerSetOffsetNumber(&rootTuple, root_offnum); + } + ... + // 在 索引的 tids 中查找,由于索引的 tids 是有序的, + // 当 heap tuple 的 tid 小于索引的 tid 继续查找,否则 + // 1. 在索引中找到(tid相等),不需要再插入索引 + // 2. 不在索引中,需要插入 + while (!tuplesort_empty && (!indexcursor || ItemPointerCompare(indexcursor, &rootTuple) < 0)) { + ... + } + // 没有找到对应的 tid,需要插入索引 + if ((tuplesort_empty || ItemPointerCompare(indexcursor, &rootTuple) > 0) && !in_index[root_offnum - 1]) { + ... + // 追加索引 + (void)index_insert(indexRelation, + values, + isnull, + &rootTuple, + heapRelation, + indexInfo->ii_Unique ? UNIQUE_CHECK_YES : UNIQUE_CHECK_NO); + } + } +} +``` + +本文主要内容是结合代码详解 并发创建索引 过程中第二次 table scan 追加索引部分的实现,希望能对理解这部分的代码有所帮助。 + diff --git "a/content/zh/post/July/openGauss\345\244\207\344\273\275\346\201\242\345\244\215.md" "b/content/zh/post/July/openGauss\345\244\207\344\273\275\346\201\242\345\244\215.md" new file mode 100644 index 0000000000000000000000000000000000000000..be19aa1a8889047949ccfd7a5488d172f463e18c --- /dev/null +++ "b/content/zh/post/July/openGauss\345\244\207\344\273\275\346\201\242\345\244\215.md" @@ -0,0 +1,558 @@ ++++ + +title = "openGauss备份恢复" + +date = "2021-09-20" + +tags = [ "openGauss备份恢复"] + +archives = "2021-09" + +author = "李宏达" + +summary = "openGauss备份恢复" + +img = "/zh/post/July/title/img7.png" + +times = "12:30" + ++++ + +# openGauss备份恢复 + +## gs\_probackup + +- docker + + - 1.0.1 + + ``` + docker run --name brm_opengauss \ + --privileged=true -d -e GS_PASSWORD=mtkOP@123 \ + -v `pwd`/conf/brm.yaml:/etc/brm.yaml \ + -v `pwd`/var/lib/brm:/var/lib/brm \ + -v `pwd`/var/log/brm:/var/log/brm \ + enmotech/opengauss:1.0.1 + ``` + + - 1.1.0 + + ``` + docker run --name brm_opengauss_1230 \ + --privileged=true -d -e GS_PASSWORD=mtkOP@123 \ + -v `pwd`/conf/brm.yaml:/etc/brm.yaml \ + -v `pwd`/var/lib/brm:/var/lib/brm \ + -v `pwd`/var/log/brm:/var/log/brm \ + enmotech/opengauss:1.1.0 + ``` + + + +## 初始化 + +``` +export BACKUP_PATH=/home/omm/backup +gs_probackup init +``` + +## 添加实例 + +``` +gs_probackup add-instance --instance testdb --pgdata +``` + +## 备份 + +``` +gs_probackup backup --instance testdb -b full +``` + +## 配置数据库归档 + +设置参数 + +目录为 /wal/ + +``` +archive_mode = on +archive_command = 'cp %p /wal//%f' +# cp %p /usr/local/pgsql/data/pg_archive/%f' +``` + +## 查看备份 + +``` +gs_probackup show --instance testdb +``` + +- 查看归档备份 + + ``` + gs_probackup show --instance testdb --archive + ``` + + +## 恢复 + +``` +gs_probackup restore -B backup-path --instance=instance_name + [-D pgdata-path] [-i backup-id] [-j threads_num] [--progress] + [--force] [--no-sync] [--no-validate] [--skip-block-validation] + [--external-mapping=OLDDIR=NEWDIR] [-T OLDDIR=NEWDIR] + [--skip-external-dirs] [-I incremental_mode] + [--recovery-target-time=time|--recovery-target-xid=xid + |--recovery-target-lsn=lsn|--recovery-target-name=target-name] + [--recovery-target-inclusive=boolean] + [--recovery-target-timeline=timeline] + [--recovery-target=immediate|latest] + [--recovery-target-action=pause|promote|shutdown] + [--restore-command=cmdline] + [--remote-proto=protocol] [--remote-host=destination] + [--remote-path=path] [--remote-user=username] + [--remote-port=port] [--ssh-options=ssh_options] + [--log-level-console=log-level-console] + [--log-level-file=log-level-file] + [--log-filename=log-filename] + [--error-log-filename=error-log-filename] + [--log-directory=log-directory] + [--log-rotation-size=log-rotation-size] + [--log-rotation-age=log-rotation-age] + + -B, --backup-path=backup-path location of the backup storage area + --instance=instance_name name of the instance + -D, --pgdata=pgdata-path location of the database storage area + -i, --backup-id=backup-id backup to restore + -j, --threads=threads_num number of parallel threads + --progress show progress + --force ignore invalid status of the restored backup + --no-sync do not sync restored files to disk + --no-validate disable backup validation during restore + --skip-block-validation set to validate only file-level checksum + --external-mapping=OLDDIR=NEWDIR + relocate the external directory from OLDDIR to NEWDIR + -T, --tablespace-mapping=OLDDIR=NEWDIR + relocate the tablespace from directory OLDDIR to NEWDIR + --skip-external-dirs do not restore all external directories + -I, --incremental-mode=none|checksum|lsn + reuse valid pages available in PGDATA if they have not changed + (default: none) +``` + +## 场景 + +- 环境配置 + + ``` + gs_probackup + + export BACKUP_PATH=/var/lib/brm + gs_probackup init + gs_probackup add-instance --instance testdb01 -D /var/lib/opengauss/data + gs_probackup set-config --instance testdb01 --pgdatabase postgres + gs_probackup show-config --instance testdb01 + # Backup instance information + pgdata = /var/lib/opengauss/data + system-identifier = 6910097200378281726 + # Connection parameters + pgdatabase = postgres + # Archive parameters + archive-timeout = 5min + # Logging parameters + log-level-console = LOG + log-level-file = OFF + log-filename = pg_probackup.log + log-rotation-size = 0TB + log-rotation-age = 0d + # Retention parameters + retention-redundancy = 0 + retention-window = 0 + wal-depth = 0 + # Compression parameters + compress-algorithm = none + compress-level = 1 + # Remote access parameters + remote-proto = ssh + ``` + + +- 数据库设置 + + ``` + [omm@0150b32d2461 ~]$ gsql + gsql ((openGauss 1.0.1 build e9da9fb9) compiled at 2020-10-01 13:58:32 commit 0 last mr ) + Non-SSL connection (SSL connection is recommended when requiring high-security) + Type "help" for help. + + omm=# show archive_mode; + archive_mode + -------------- + on + (1 row) + + omm=# show archive_command; + archive_command + ------------------------------------ + cp %p /var/lib/brm/wal/testdb01/%f + (1 row) + + omm=# select pg_switch_xlog(); + pg_switch_xlog + ---------------- + 0/72000150 + (1 row) + ``` + + +- 查看归档 + + ``` + [omm@0150b32d2461 ~]$ ls -l /var/lib/brm/wal/testdb01/ + total 49152 + -rw------- 1 omm omm 16777216 Jan 11 03:30 00000001000000000000002F + -rw------- 1 omm omm 16777216 Jan 11 03:30 000000010000000000000071 + -rw------- 1 omm omm 16777216 Jan 11 03:30 000000010000000000000072 + ``` + +- 模拟基础环境 + + ``` + [omm@0150b32d2461 ~]$ gsql + gsql ((openGauss 1.0.1 build e9da9fb9) compiled at 2020-10-01 13:58:32 commit 0 last mr ) + Non-SSL connection (SSL connection is recommended when requiring high-security) + Type "help" for help. + + omm=# create table brm_test(t timestamp); + CREATE TABLE + + omm=# insert into brm_test values(now()); + INSERT 0 1 + omm=# select * from brm_test; + t + ---------------------------- + 2021-01-11 03:33:40.737837 + 2021-01-11 03:38:46.32794 + 2021-01-11 03:39:42.466014 + 2021-01-11 03:40:02.816579 + 2021-01-11 07:29:21.98839 + (5 rows) + ``` + + +- 全备份 + + ``` + [omm@0150b32d2461 ~]$ gs_probackup backup --instance testdb01 -b full + INFO: Backup start, pg_probackup version: 2.4.2, instance: testdb01, backup ID: QMRFD9, backup mode: FULL, wal mode: STREAM, remote: false, compress-algorithm: none, compress-level: 1 + LOG: Backup destination is initialized + WARNING: This PostgreSQL instance was initialized without data block checksums. pg_probackup have no way to detect data block corruption without them. Reinitialize PGDATA with option '--data-checksums'. + LOG: Database backup start + LOG: started streaming WAL at 0/86000000 (timeline 1) + check identify system success + send START_REPLICATION 0/86000000 success + keepalive message is received + INFO: PGDATA size: 317MB + INFO: Start transferring data files + LOG: Creating page header map "/var/lib/brm/backups/testdb01/QMRFD9/page_header_map" + keepalive message is received + keepalive message is received + keepalive message is received + keepalive message is received + keepalive message is received + keepalive message is received + keepalive message is received + keepalive message is received + keepalive message is received + keepalive message is received + keepalive message is received + keepalive message is received + keepalive message is received + keepalive message is received + keepalive message is received + keepalive message is received + INFO: Data files are transferred, time elapsed: 31s + INFO: wait for pg_stop_backup() + keepalive message is received + INFO: pg_stop backup() successfully executed + LOG: stop_lsn: 0/860001D0 + LOG: Looking for LSN 0/860001D0 in segment: 000000010000000000000086 + LOG: Found WAL segment: /var/lib/brm/backups/testdb01/QMRFD9/database/pg_xlog/000000010000000000000086 + LOG: Thread [0]: Opening WAL segment "/var/lib/brm/backups/testdb01/QMRFD9/database/pg_xlog/000000010000000000000086" + LOG: Found LSN: 0/860001D0 + (null): not renaming 000000010000000000000087, segment is not complete. + LOG: finished streaming WAL at 0/87000130 (timeline 1) + LOG: Getting the Recovery Time from WAL + LOG: Thread [0]: Opening WAL segment "/var/lib/brm/backups/testdb01/QMRFD9/database/pg_xlog/000000010000000000000086" + INFO: Syncing backup files to disk + INFO: Backup files are synced, time elapsed: 2s + INFO: Validating backup QMRFD9 + INFO: Backup QMRFD9 data files are valid + INFO: Backup QMRFD9 resident size: 349MB + INFO: Backup QMRFD9 completed + ``` + +- 模拟增量数据 + + ``` + gsql ((openGauss 1.0.1 build e9da9fb9) compiled at 2020-10-01 13:58:32 commit 0 last mr ) + Non-SSL connection (SSL connection is recommended when requiring high-security) + Type "help" for help. + omm=# select pg_current_xlog_location(), + pg_xlogfile_name(pg_current_xlog_location()), + pg_xlogfile_name(pg_current_xlog_location()), + txid_current(), + now();omm-# omm-# omm-# omm-# + pg_current_xlog_location | pg_xlogfile_name | pg_xlogfile_name | txid_current | now + --------------------------+--------------------------+--------------------------+--------------+------------------------------- + 0/87000130 | 000000010000000000000087 | 000000010000000000000087 | 11209 | 2021-01-11 07:57:25.414668+00 + (1 row) + + omm=# select pg_switch_xlog(); + pg_switch_xlog + ---------------- + 0/870001D0 + (1 row) + + omm=# insert into brm_test values(now()); + INSERT 0 1 + omm=# select pg_current_xlog_location(), + pg_xlogfile_name(pg_current_xlog_location()), + pg_xlogfile_name(pg_current_xlog_location()), + txid_current(), + now(); + omm-# omm-# omm-# omm-# pg_current_xlog_location | pg_xlogfile_name | pg_xlogfile_name | txid_current | now + --------------------------+--------------------------+--------------------------+--------------+------------------------------- + 0/88000208 | 000000010000000000000088 | 000000010000000000000088 | 11211 | 2021-01-11 07:57:40.428398+00 + (1 row) + + omm=# select pg_switch_xlog(); + pg_switch_xlog + ---------------- + 0/880002A8 + (1 row) + + omm=# select pg_switch_xlog(); + pg_switch_xlog + ---------------- + 0/89000150 + (1 row) + + omm=# insert into brm_test values(now()); + INSERT 0 1 + omm=# + omm=# select pg_current_xlog_location(), + pg_xlogfile_name(pg_current_xlog_location()), + pg_xlogfile_name(pg_current_xlog_location()), + txid_current(), + now();omm-# omm-# omm-# omm-# + pg_current_xlog_location | pg_xlogfile_name | pg_xlogfile_name | txid_current | now + --------------------------+--------------------------+--------------------------+--------------+------------------------------- + 0/8A000208 | 00000001000000000000008A | 00000001000000000000008A | 11213 | 2021-01-11 07:58:06.702327+00 + (1 row) + + omm=# select pg_switch_xlog(); + pg_switch_xlog + ---------------- + 0/8A0002A8 + (1 row) + + omm=# select pg_current_xlog_location(), + pg_xlogfile_name(pg_current_xlog_location()), + pg_xlogfile_name(pg_current_xlog_location()), + txid_current(), + now(); + omm-# omm-# omm-# omm-# pg_current_xlog_location | pg_xlogfile_name | pg_xlogfile_name | txid_current | now + --------------------------+--------------------------+--------------------------+--------------+------------------------------- + 0/8B000130 | 00000001000000000000008B | 00000001000000000000008B | 11214 | 2021-01-11 07:58:15.204024+00 + (1 row) + + omm=# + ``` + + +- 查看备份信息 + + ``` + [omm@0150b32d2461 ~]$ gs_probackup show --archive + + ARCHIVE INSTANCE 'testdb01' + =============================================================================================================================== + TLI Parent TLI Switchpoint Min Segno Max Segno N segments Size Zratio N backups Status + =============================================================================================================================== + 1 0 0/0 000000010000000000000086 00000001000000000000008A 5 80MB 1.00 1 OK + [omm@0150b32d2461 ~]$ gs_probackup show + + BACKUP INSTANCE 'testdb01' + =================================================================================================================================== + Instance Version ID Recovery Time Mode WAL Mode TLI Time Data WAL Zratio Start LSN Stop LSN Status + =================================================================================================================================== + testdb01 9.2 QMRFD9 2021-01-11 07:56:30+00 FULL STREAM 1/0 41s 333MB 16MB 0.95 0/86000028 0/860001D0 OK + ``` + +- 基于时间点的恢复 + + 恢复全量备份 —\> 2 .用户指定了xid/time/lsn. brm进行遍历所有备份,找出最近的备份集通过gs\_probackup进行恢复 + + ``` + [omm@0150b32d2461 ~]$ gs_probackup restore --instance testdb01 -D /home/omm/a1/ -i QMRFD9 + LOG: Restore begin. + LOG: there is no file tablespace_map + LOG: check tablespace directories of backup QMRFD9 + LOG: check external directories of backup QMRFD9 + INFO: Validating backup QMRFD9 + INFO: Backup QMRFD9 data files are valid + LOG: Thread [1]: Opening WAL segment "/var/lib/brm/backups/testdb01/QMRFD9/database/pg_xlog/000000010000000000000086" + INFO: Backup QMRFD9 WAL segments are valid + INFO: Backup QMRFD9 is valid. + INFO: Restoring the database from backup at 2021-01-11 07:55:57+00 + LOG: there is no file tablespace_map + LOG: Restore directories and symlinks... + INFO: Start restoring backup files. PGDATA size: 333MB + LOG: Start thread 1 + INFO: Backup files are restored. Transfered bytes: 349MB, time elapsed: 2s + INFO: Restore incremental ratio (less is better): 105% (349MB/333MB) + INFO: Syncing restored files to disk + INFO: Restored backup files are synced, time elapsed: 0 + INFO: Restore of backup QMRFD9 completed. + ``` + +- 编辑recover.conf —\> 3. 如果没有指定time/lsn/xid不生成recover.conf文件.如果指定了生成recover.conf. + + ``` + vi a1/recover.conf + # recovery_target_time = '2021-01-11 03:40:02+00' + recovery_target_lsn = '0/880002A8' + #recovery_target_action = 'pause' + %p --> pg_xlog/000000010000000000000001 + %f --> 000000010000000000000001 + restore_command = 'cp /var/lib/brm/wal/testdb01/%f %p' + # restore_command = 'brm get-wal -f %f -p %p' + pause_at_recovery_target = true + ``` + +- 编辑配置文件(同一台防止端口冲突关闭归档 —\> 4. 是否需要配置postgres.conf文件 + + ``` + echo "port=6433" >> a1/postgresql.conf + echo "archive_mode=off" >> a1/postgresql.conf + ``` + +- 启动实例 + + gs\_ctl start -D /home/omm/a1 —\> 5. 恢复成功进行gs\_ctl start -D 恢复目录 + + ``` + [2021-01-11 08:14:56.533][313][][gs_ctl]: gs_ctl started,datadir is -D "/home/omm/a1" + [2021-01-11 08:14:56.576][313][][gs_ctl]: port:5432 already in use. /proc/net/tcp: + sl local_address rem_address st tx_queue rx_queue tr tm->when retrnsmt uid timeout inode + 0: 00000000:1538 00000000:0000 0A 00000000:00000000 00:00000000 00000000 70 0 2236132 1 0000000000000000 100 0 0 10 0 + [2021-01-11 08:14:56.576][313][][gs_ctl]: CheckPort: popen(command:lsof -i:5432 | grep -E 'COMMAND|LISTEN'). + COMMAND PID USER FD TYPE DEVICE SIZE/OFF NODE NAME + + GaussMast 1 omm 7u IPv4 2236132 0t0 TCP *:postgres (LISTEN) + + GaussMast 1 omm 8u IPv6 2236133 0t0 TCP *:postgres (LISTEN) + + [2021-01-11 08:14:56.673][313][][gs_ctl]: port conflict when start server + [2021-01-11 08:14:56.674][313][][gs_ctl]: waiting for server to start... + .0 LOG: The core dump path in core_pattern is an invalid directory. + 0 [BACKEND] LOG: Begin to start openGauss Database. + 2021-01-11 08:14:56.761 [unknown] [unknown] localhost 139701065868352 0 0 [BACKEND] LOG: Transparent encryption disabled. + 2021-01-11 08:14:56.763 [unknown] [unknown] localhost 139701065868352 0 0 [BACKEND] WARNING: could not create any HA TCP/IP sockets + 2021-01-11 08:14:56.765 [unknown] [unknown] localhost 139701065868352 0 0 [BACKEND] WARNING: No explicit IP is configured for listen_addresses GUC. + 2021-01-11 08:14:56.765 [unknown] [unknown] localhost 139701065868352 0 0 [BACKEND] LOG: InitNuma numaNodeNum: 1 numa_distribute_mode: none inheritThreadPool: 0. + 2021-01-11 08:14:56.765 [unknown] [unknown] localhost 139701065868352 0 0 [BACKEND] LOG: shared memory 321 Mbytes, memory context 11454 Mbytes, max process memory 12288 Mbytes + 2021-01-11 08:14:56.765 [unknown] [unknown] localhost 139701065868352 0 0 [BACKEND] LOG: Initilize the memory protect with Process Chunks number 11454, change bits 20 + 2021-01-11 08:14:56.785 [unknown] [unknown] localhost 139701065868352 0 0 [CACHE] LOG: set data cache size(402653184) + 2021-01-11 08:14:56.796 [unknown] [unknown] localhost 139701065868352 0 0 [CACHE] LOG: set metadata cache size(134217728) + 2021-01-11 08:14:56.848 [unknown] [unknown] localhost 139701065868352 0 0 [BACKEND] LOG: gaussdb: fsync file "/home/omm/a1/gaussdb.state.temp" success + 2021-01-11 08:14:56.849 [unknown] [unknown] localhost 139701065868352 0 0 [BACKEND] LOG: create gaussdb state file success: db state(STARTING_STATE), server mode(Normal) + 2021-01-11 08:14:56.908 [unknown] [unknown] localhost 139701065868352 0 0 [BACKEND] LOG: max_safe_fds = 976, usable_fds = 1000, already_open = 14 + 2021-01-11 08:14:56.909 [unknown] [unknown] localhost 139701065868352 0 0 [BACKEND] LOG: The core dump path in core_pattern is an invalid directory. + 2021-01-11 08:14:56.910 [unknown] [unknown] localhost 139701065868352 0 0 [BACKEND] LOG: Success to start openGauss Database. If you specify "&", please press any key to exit... + [2021-01-11 08:14:57.675][313][][gs_ctl]: waitpid 319 failed, exitstatus is 256, ret is 2 + + [2021-01-11 08:14:57.675][313][][gs_ctl]: stopped waiting + [2021-01-11 08:14:57.675][313][][gs_ctl]: could not start server + [2021-01-11 08:14:57.675][313][][gs_ctl]: Examine the log output. + [omm@0150b32d2461 ~]$ vi a1/recovery.conf + [omm@0150b32d2461 ~]$ gs_ctl start -D /home/omm/a1/ + [2021-01-11 08:15:29.342][352][][gs_ctl]: gs_ctl started,datadir is -D "/home/omm/a1" + [2021-01-11 08:15:29.401][352][][gs_ctl]: port:5432 already in use. /proc/net/tcp: + sl local_address rem_address st tx_queue rx_queue tr tm->when retrnsmt uid timeout inode + 0: 00000000:1538 00000000:0000 0A 00000000:00000000 00:00000000 00000000 70 0 2236132 1 0000000000000000 100 0 0 10 0 + [2021-01-11 08:15:29.401][352][][gs_ctl]: CheckPort: popen(command:lsof -i:5432 | grep -E 'COMMAND|LISTEN'). + COMMAND PID USER FD TYPE DEVICE SIZE/OFF NODE NAME + + GaussMast 1 omm 7u IPv4 2236132 0t0 TCP *:postgres (LISTEN) + + GaussMast 1 omm 8u IPv6 2236133 0t0 TCP *:postgres (LISTEN) + + [2021-01-11 08:15:29.500][352][][gs_ctl]: port conflict when start server + [2021-01-11 08:15:29.500][352][][gs_ctl]: waiting for server to start... + .0 LOG: The core dump path in core_pattern is an invalid directory. + 0 [BACKEND] LOG: Begin to start openGauss Database. + 2021-01-11 08:15:29.627 [unknown] [unknown] localhost 140439454434368 0 0 [BACKEND] LOG: Transparent encryption disabled. + 2021-01-11 08:15:29.628 [unknown] [unknown] localhost 140439454434368 0 0 [BACKEND] WARNING: could not create any HA TCP/IP sockets + 2021-01-11 08:15:29.631 [unknown] [unknown] localhost 140439454434368 0 0 [BACKEND] WARNING: No explicit IP is configured for listen_addresses GUC. + 2021-01-11 08:15:29.631 [unknown] [unknown] localhost 140439454434368 0 0 [BACKEND] LOG: InitNuma numaNodeNum: 1 numa_distribute_mode: none inheritThreadPool: 0. + 2021-01-11 08:15:29.631 [unknown] [unknown] localhost 140439454434368 0 0 [BACKEND] LOG: shared memory 321 Mbytes, memory context 11454 Mbytes, max process memory 12288 Mbytes + 2021-01-11 08:15:29.631 [unknown] [unknown] localhost 140439454434368 0 0 [BACKEND] LOG: Initilize the memory protect with Process Chunks number 11454, change bits 20 + 2021-01-11 08:15:29.659 [unknown] [unknown] localhost 140439454434368 0 0 [CACHE] LOG: set data cache size(402653184) + 2021-01-11 08:15:29.674 [unknown] [unknown] localhost 140439454434368 0 0 [CACHE] LOG: set metadata cache size(134217728) + 2021-01-11 08:15:29.741 [unknown] [unknown] localhost 140439454434368 0 0 [BACKEND] LOG: gaussdb: fsync file "/home/omm/a1/gaussdb.state.temp" success + 2021-01-11 08:15:29.741 [unknown] [unknown] localhost 140439454434368 0 0 [BACKEND] LOG: create gaussdb state file success: db state(STARTING_STATE), server mode(Normal) + 2021-01-11 08:15:29.775 [unknown] [unknown] localhost 140439454434368 0 0 [BACKEND] LOG: max_safe_fds = 976, usable_fds = 1000, already_open = 14 + 2021-01-11 08:15:29.775 [unknown] [unknown] localhost 140439454434368 0 0 [BACKEND] LOG: The core dump path in core_pattern is an invalid directory. + 2021-01-11 08:15:29.777 [unknown] [unknown] localhost 140439454434368 0 0 [BACKEND] LOG: Success to start openGauss Database. If you specify "&", please press any key to exit... + + [2021-01-11 08:15:30.517][352][][gs_ctl]: done + [2021-01-11 08:15:30.517][352][][gs_ctl]: server started (/home/omm/a1) + ``` + +- 验证 + + ``` + [omm@0150b32d2461 ~]$ gsql -p6433 + gsql ((openGauss 1.0.1 build e9da9fb9) compiled at 2020-10-01 13:58:32 commit 0 last mr ) + Non-SSL connection (SSL connection is recommended when requiring high-security) + Type "help" for help. + + omm=# select * from brm_test; + t + ---------------------------- + 2021-01-11 03:33:40.737837 + 2021-01-11 03:38:46.32794 + 2021-01-11 03:39:42.466014 + 2021-01-11 03:40:02.816579 + 2021-01-11 07:29:21.98839 + 2021-01-11 07:57:36.799356 + (6 rows) + ``` + + +## lsn + +``` +[omm@7ec0d4302ea3 ~]$ gs_probackup validate --instance testdb01 -D /home/omm/a1/ --recovery-target-lsn=0/79000228 +LOG: Validate begin. +INFO: Validating backup QMR39R +INFO: Backup QMR39R data files are valid +LOG: Thread [1]: Opening WAL segment "/var/lib/brm/backups/testdb01/QMR39R/database/pg_xlog/000000010000000000000076" +LOG: Extracting pagemap from tli 1 on range from 0/760001D0 to 0/0 +LOG: Thread [1]: Opening WAL segment "/var/lib/brm/wal/testdb01/000000010000000000000076" +WARNING: Thread [1]: Could not read WAL record at 0/77000000: read xlog page failed at 0/77000028 +ERROR: Thread [1]: WAL segment "/var/lib/brm/wal/testdb01/000000010000000000000076" is absent +WARNING: Recovery can be done up to time 2021-01-11 03:35:03+00, xid 9930 and LSN 0/760001D0 +ERROR: Not enough WAL records to lsn 0/79000228 +[omm@7ec0d4302ea3 ~]$ ls -l /var/lib/brm/wal/testdb01/000000010000000000000076 +-rw------- 1 omm omm 16777216 Jan 11 03:35 /var/lib/brm/wal/testdb01/000000010000000000000076 +[omm@0150b32d2461 ~]$ gs_probackup restore --instance testdb01 --recovery-target-lsn='0/880002A8' --restore-command='cp /var/lib/brm/wal/testdb01/%f %p' --no-validate -D /home/omm/a1/ +LOG: Restore begin. +LOG: there is no file tablespace_map +LOG: check tablespace directories of backup QMRFD9 +# recovery.conf generated by pg_probackup 2.4.2 +LOG: check external directories of backup QMRFD9 +WARNING: Backup QMRFD9 is used without validation. +ERROR: Backup QMRFD9 was created for version 9.2 which doesn't support recovery_target_lsn ---> +``` + diff --git "a/content/zh/post/July/openGauss\346\225\260\346\215\256\345\272\223xlog\347\233\256\345\275\225\346\273\241\351\227\256\351\242\230\345\244\204\347\220\206.md" "b/content/zh/post/July/openGauss\346\225\260\346\215\256\345\272\223xlog\347\233\256\345\275\225\346\273\241\351\227\256\351\242\230\345\244\204\347\220\206.md" new file mode 100644 index 0000000000000000000000000000000000000000..4939d0217880765a9675cfc39afdd20854a57acb --- /dev/null +++ "b/content/zh/post/July/openGauss\346\225\260\346\215\256\345\272\223xlog\347\233\256\345\275\225\346\273\241\351\227\256\351\242\230\345\244\204\347\220\206.md" @@ -0,0 +1,148 @@ ++++ + +title = "openGauss数据库xlog目录满问题处理" + +date = "2021-09-21" + +tags = [ "openGauss数据库xlog目录满问题处理"] + +archives = "2021-09" + +author = "阎书利" + +summary = "openGauss数据库xlog目录满问题处理" + +img = "/zh/post/July/title/img11.png" + +times = "12:30" + ++++ + +# openGauss数据库xlog目录满问题处理 + +openGauss数据库xlog满通常为以下几个原因: + +1.主备状态不正常,存在网络问题,集群内有宕机的节点 + +2.xlog保留数量过多 + +3.逻辑复制槽失效,且未及时清理 + +4.开启归档,但归档失败导致xlog不清理 + +**首先,确认数据库状态** + +``` +gs_om -t query +``` + +确认主备状态,是否存在宕机的节点。 + +查看是否存在down,Standby Need repair\(WAL\)或者unkown的状态。 + +如果数据库状态不正常,xlog目录100% + +需要手动移走一部分xlog后,检查数据库状态后将库拉起,并排查相关问题。 + +如果数据库状态正常,仅xlog目录大,则继续排查其他问题。 + +**清理:** + +1.找一个空间大的目录 + +例如: + +``` +su - omm +cd /opengauss_bak +mkdir xlog_mv_0919 +``` + +2.移走部分xlog,到xlog路径下 + +``` +cd /ogdata/data/dn1/pg_xlog +``` + +查看xlog数量,看是否xlog保留过多 + +``` +ls | wc -l +``` + +!!!为了恢复环境,移动一小部分xlog,其余等处理之后,自己清理 + +生成移动xlog语句,并检查(前1000条) + +``` +ls -ltr | head -n 1000 | awk '{print "mv "$9 " /opengauss_bak/xlog_mv_0919/"}' +``` + +3.\#实际执行移动操作 + +``` +ls -ltr | head -n 1000 | awk '{print "mv "$9 " /opengauss_bak/xlog_mv_0919/"}' | sh +``` + +4.移动之后df -Th看空间是否下来 + +5.gs\_om -t query 查看数据库状态 + +如果不正常,需要先尝试拉起主数据库 + +``` +gs_ctl start -D /ogdata/data/dn1 +``` + +然后依次拉起备机数据库 + +``` +gs_ctl start -D /ogdata/data/dn1 -M standby +``` + +备库拉不起来则先不处理,等找到xlog目录满源头后(例如主库删除失效逻辑复制后),考虑做build\(先尝试增量不行再用增量) + +``` +gs_ctl build -D /ogdata/data/dn1 -b incremental +gs_ctl build -D /ogdata/data/dn1 -b full +``` + +6.登录主数据库查看逻辑复制槽状态,查看有无失效逻辑复制槽 + +``` +select * from pg_replication_slots; +``` + +7.在主库删除失效逻辑复制槽 + +``` +select * from pg_drop_replication_slot('aohdoasdaoiodiandoan'); +---------aohdoasdaoiodiandoan为逻辑复制槽名字 +``` + +删除失效的逻辑复制槽,主库和备库的xlog目录应该都会释放一部分空间 + +8.删除后 df -Th看空间是否下来 + +9.参数调整 + +(1)查看wal\_keep\_segments参数,该参数为Xlog日志文件段数量,“pg\_xlog”目录下保留事务日志文件的最小数目。 + +(2)查看max\_size\_for\_xlog\_prune参数,在enable\_xlog\_prune打开时生效,如果有备机断连且xlog日志大小大于此阈值,则回收日志。 + +根据实际状况,可进行修改。 + +(3)如果是PG13版本,可考虑开启max\_slot\_wal\_keep\_size参数,他是允许replication slot 保留的wal文件的最大 + +大小,用于防止wal无限增大导致主库的文件系统空间被撑爆,设置该参数之后如果超过该参数值,PostgreSQL将开始删除最 + +早的WAL文件。默认值是-1,-1表示表示禁用本功能。单位是MB。 + +10.检查归档模式是否开启 + +``` +show archive_mode; +``` + +到归档目录下,看开启归档参数时,是否有归档。并检查归档空间,排除归档相关问题。 +