From 9eadcf9020d5df184c02d6e83418d575ad9006c3 Mon Sep 17 00:00:00 2001 From: Rock Date: Mon, 13 May 2024 14:05:56 +0800 Subject: [PATCH] Supports SMP with parallel serial scheduling --- src/gausskernel/optimizer/path/allpaths.cpp | 30 +- src/gausskernel/optimizer/path/joinpath.cpp | 350 +++++++++++++----- src/gausskernel/optimizer/path/joinrels.cpp | 4 +- .../optimizer/path/streampath_base.cpp | 6 +- src/gausskernel/optimizer/plan/createplan.cpp | 2 - src/gausskernel/optimizer/plan/planner.cpp | 5 +- src/gausskernel/optimizer/plan/streamplan.cpp | 4 +- .../optimizer/plan/streamwalker.cpp | 5 +- src/gausskernel/optimizer/util/pathnode.cpp | 341 ++++++++++++++++- src/include/nodes/relation.h | 1 + src/include/optimizer/pathnode.h | 3 +- src/test/regress/expected/plan_hint_iud.out | 31 +- src/test/regress/pg_regress.cpp | 2 +- 13 files changed, 647 insertions(+), 137 deletions(-) diff --git a/src/gausskernel/optimizer/path/allpaths.cpp b/src/gausskernel/optimizer/path/allpaths.cpp index bf5ac06a29..1dab15bc28 100755 --- a/src/gausskernel/optimizer/path/allpaths.cpp +++ b/src/gausskernel/optimizer/path/allpaths.cpp @@ -289,7 +289,17 @@ RelOptInfo* make_one_rel(PlannerInfo* root, List* joinlist) * Generate access paths for the entire join tree. */ rel = make_rel_from_joinlist(root, joinlist); - + /* + * The generated SMP path are stored in partial_pathlist; however, partial_pathlist + * is not used to store plans permanently. Therefore, it is necessary to compare the optimal + * plan in partial_pathlist with the plans in pathlist from the serial process + * and save the best one in pathlist. + */ + if (list_length(joinlist) <=1 && rel->partial_pathlist != NULL) { + Path* partial_path = (Path*)linitial(rel->partial_pathlist); + add_path(root, rel, partial_path); + set_cheapest(rel, root); + } /* * The result should join all and only the query's base rels. */ @@ -1174,14 +1184,14 @@ static void set_plain_rel_pathlist(PlannerInfo* root, RelOptInfo* rel, RangeTblE } else { add_path(root, rel, create_cstorescan_path(root, rel)); if (can_parallel) - add_path(root, rel, create_cstorescan_path(root, rel, u_sess->opt_cxt.query_dop)); + add_partial_path(root, rel, create_cstorescan_path(root, rel, u_sess->opt_cxt.query_dop)); } break; } case REL_ROW_ORIENTED: { add_path(root, rel, create_seqscan_path(root, rel, required_outer)); if (can_parallel) - add_path(root, rel, create_seqscan_path(root, rel, required_outer, u_sess->opt_cxt.query_dop)); + add_partial_path(root, rel, create_seqscan_path(root, rel, required_outer, u_sess->opt_cxt.query_dop)); break; } default: { @@ -1219,7 +1229,18 @@ static void set_plain_rel_pathlist(PlannerInfo* root, RelOptInfo* rel, RangeTblE RelationClose(relation); } #endif - + /* + * if it's a partition table, To support the partition plan, + * we will no longer separate pathlist and partial_pathlist. + */ + if (rel->isPartitionedTable || rel->orientation == REL_COL_ORIENTED) { + ListCell* pathCell = NULL; + foreach(pathCell, rel->partial_pathlist) { + Path* path = (Path*)lfirst(pathCell); + add_path(root, rel, path); + } + rel->partial_pathlist = NULL; + } /* Now find the cheapest of the paths for this rel */ set_cheapest(rel); @@ -3270,7 +3291,6 @@ RelOptInfo* standard_join_search(PlannerInfo* root, int levels_needed, List* ini if (IS_STREAM_PLAN && permit_gather(root, HINT_GATHER_JOIN)) { CreateGatherPaths(root, rel, true); } - /* Find and save the cheapest paths for this rel */ set_cheapest(rel, root); diff --git a/src/gausskernel/optimizer/path/joinpath.cpp b/src/gausskernel/optimizer/path/joinpath.cpp index a8ea8bba2b..32f04e48b3 100755 --- a/src/gausskernel/optimizer/path/joinpath.cpp +++ b/src/gausskernel/optimizer/path/joinpath.cpp @@ -53,6 +53,8 @@ static void sort_inner_and_outer(PlannerInfo* root, RelOptInfo* joinrel, RelOptI List* restrictlist, List* mergeclause_list, JoinType jointype, JoinPathExtraData* extra, Relids param_source_rels); static void match_unsorted_outer(PlannerInfo* root, RelOptInfo* joinrel, RelOptInfo* outerrel, RelOptInfo* innerrel, List* restrictlist, List* mergeclause_list, JoinType jointype, JoinPathExtraData* extra, Relids param_source_rels); +static void match_partial_unsorted_outer(PlannerInfo* root, RelOptInfo* joinrel, RelOptInfo* outerrel, RelOptInfo* innerrel, + List* restrictlist, List* mergeclause_list, JoinType jointype, JoinPathExtraData* extra, Relids param_source_rels); static void hash_inner_and_outer(PlannerInfo* root, RelOptInfo* joinrel, RelOptInfo* outerrel, RelOptInfo* innerrel, List* restrictlist, JoinType jointype, JoinPathExtraData* extra, Relids param_source_rels); @@ -295,7 +297,7 @@ void add_paths_to_joinrel(PlannerInfo* root, RelOptInfo* joinrel, RelOptInfo* ou * (That's okay because we know that nestloop can't handle right/full * joins at all, so it wouldn't work in the prohibited cases either.) */ - if (mergejoin_allowed) + if (mergejoin_allowed) { match_unsorted_outer(root, joinrel, outerrel, @@ -305,6 +307,21 @@ void add_paths_to_joinrel(PlannerInfo* root, RelOptInfo* joinrel, RelOptInfo* ou jointype, &extra, param_source_rels); + if (u_sess->opt_cxt.query_dop > 1) { + /* + *The match_partial_unsorted_outer function is used in SMP scenarios. + */ + match_partial_unsorted_outer(root, + joinrel, + outerrel, + innerrel, + restrictlist, + mergeclause_list, + jointype, + &extra, + param_source_rels); + } + } #ifdef NOT_USED @@ -613,7 +630,6 @@ static void try_nestloop_path(PlannerInfo* root, RelOptInfo* joinrel, JoinType j if (add_path_precheck(joinrel, workspace.startup_cost, workspace.total_cost, pathkeys, required_outer) || add_path_hintcheck(root->parse->hintState, joinrel->relids, outer_path, inner_path, HINT_KEYWORD_NESTLOOP)) { -#ifdef STREAMPLAN /* check exec type of inner and outer path before generate join path. */ if (IS_STREAM_PLAN && CheckJoinExecType(root, outer_path, inner_path)) { execOnCoords = true; @@ -645,47 +661,22 @@ static void try_nestloop_path(PlannerInfo* root, RelOptInfo* joinrel, JoinType j if (u_sess->opt_cxt.query_dop > 1) nlpgen->addNestLoopPath(&cur_workspace, distribution, u_sess->opt_cxt.query_dop); } else { -#ifdef ENABLE_MULTIPLE_NODES - /* - * We choose candidate distribution list here with heuristic method, - * (1) for un-correlated query block, we will get a list of candidate Distribution - * (2) for correlated query block, we should shuffle them (inner and outer) to a correlated subplan node - * group - */ - List* candidate_distribution_list = - ng_get_join_candidate_distribution_list(outer_path, inner_path, root->is_correlated, - get_join_distribution_perference_type(joinrel, inner_path, outer_path)); - - /* - * For each candidate distribution (node group), we do nest loop join computing on it, - * if outer or inner is not in candidate node group, we should do shuffle. - */ - ListCell* lc = NULL; - foreach (lc, candidate_distribution_list) { - Distribution* distribution = (Distribution*)lfirst(lc); - JoinCostWorkspace cur_workspace; - copy_JoinCostWorkspace(&cur_workspace, &workspace); - - nlpgen->addNestLoopPath(&cur_workspace, distribution, 1); - - if (u_sess->opt_cxt.query_dop > 1) - nlpgen->addNestLoopPath(&cur_workspace, distribution, u_sess->opt_cxt.query_dop); - } -#else JoinCostWorkspace cur_workspace; copy_JoinCostWorkspace(&cur_workspace, &workspace); nlpgen->addNestLoopPath(&cur_workspace, NULL, 1); - if (u_sess->opt_cxt.query_dop > 1) + if (u_sess->opt_cxt.query_dop > 1) { nlpgen->addNestLoopPath(&cur_workspace, NULL, u_sess->opt_cxt.query_dop); -#endif + if (joinrel->partial_pathlist != NULL) { + Path* cheapest_partial_path = (Path*)linitial(joinrel->partial_pathlist); + add_path(root, joinrel, cheapest_partial_path ,false); + } + } } delete nlpgen; - } else -#endif - { + } else { /* try nestloop path single */ TryNestLoopPathSingle(root, joinrel, @@ -816,12 +807,10 @@ static void try_mergejoin_path(PlannerInfo* root, RelOptInfo* joinrel, JoinType if (add_path_precheck(joinrel, workspace.startup_cost, workspace.total_cost, pathkeys, required_outer) || add_path_hintcheck(root->parse->hintState, joinrel->relids, outer_path, inner_path, HINT_KEYWORD_MERGEJOIN)) { -#ifdef STREAMPLAN /* check exec type of inner and outer path before generate join path. */ if (IS_STREAM_PLAN && CheckJoinExecType(root, outer_path, inner_path)) { execOnCoords = true; } - if (IS_STREAM_PLAN && !execOnCoords) { MergeJoinPathGen* mjpgen = New(CurrentMemoryContext) MergeJoinPathGen(root, joinrel, @@ -850,45 +839,16 @@ static void try_mergejoin_path(PlannerInfo* root, RelOptInfo* joinrel, JoinType if (u_sess->opt_cxt.query_dop > 1) mjpgen->addMergeJoinPath(&cur_workspace, distribution, u_sess->opt_cxt.query_dop); } else { -#ifdef ENABLE_MULTIPLE_NODES - /* - * We choose candidate distribution list here with heuristic method, - * (1) for un-correlated query block, we will get a list of candidate Distribution - * (2) for correlated query block, we should shuffle them (inner and outer) to a correlated subplan node - * group - */ - List* candidate_distribution_list = - ng_get_join_candidate_distribution_list(outer_path, inner_path, root->is_correlated, - get_join_distribution_perference_type(joinrel, inner_path, outer_path)); - - /* - * For each candidate distribution (node group), we do merge join computing on it, - * if outer or inner is not in candidate node group, we should do shuffle. - */ - ListCell* lc = NULL; - foreach (lc, candidate_distribution_list) { - Distribution* distribution = (Distribution*)lfirst(lc); - JoinCostWorkspace cur_workspace; - copy_JoinCostWorkspace(&cur_workspace, &workspace); - - mjpgen->addMergeJoinPath(&cur_workspace, distribution, 1); - if (u_sess->opt_cxt.query_dop > 1) - mjpgen->addMergeJoinPath(&cur_workspace, distribution, u_sess->opt_cxt.query_dop); - } -#else JoinCostWorkspace cur_workspace; copy_JoinCostWorkspace(&cur_workspace, &workspace); mjpgen->addMergeJoinPath(&cur_workspace, NULL, 1); if (u_sess->opt_cxt.query_dop > 1) mjpgen->addMergeJoinPath(&cur_workspace, NULL, u_sess->opt_cxt.query_dop); -#endif } delete mjpgen; - } else -#endif - { + } else { /* try mergejoin path for single */ TryMergeJoinPathSingle(root, joinrel, @@ -991,7 +951,6 @@ static void try_hashjoin_path(PlannerInfo* root, RelOptInfo* joinrel, JoinType j if (add_path_precheck(joinrel, workspace.startup_cost, workspace.total_cost, NIL, required_outer) || add_path_hintcheck(root->parse->hintState, joinrel->relids, outer_path, inner_path, HINT_KEYWORD_HASHJOIN)) { -#ifdef STREAMPLAN /* check exec type of inner and outer path before generate join path. */ if (IS_STREAM_PLAN && CheckJoinExecType(root, outer_path, inner_path)) { execOnCoords = true; @@ -1022,43 +981,20 @@ static void try_hashjoin_path(PlannerInfo* root, RelOptInfo* joinrel, JoinType j if (u_sess->opt_cxt.query_dop > 1) hjpgen->addHashJoinPath(&cur_workspace, distribution, u_sess->opt_cxt.query_dop); } else { -#ifdef ENABLE_MULTIPLE_NODES - /* - * We choose candidate distribution list here with heuristic method, - * (1) for un-correlated query block, we will get a list of candidate Distribution - * (2) for correlated query block, we should shuffle them (inner and outer) to a correlated subplan node - * group - */ - List* candidate_distribution_list = - ng_get_join_candidate_distribution_list(outer_path, inner_path, root->is_correlated, - get_join_distribution_perference_type(joinrel, inner_path, outer_path)); - - /* - * For each candidate distribution (node group), we do hash join computing on it, - * if outer or inner is not in candidate node group, we should do shuffle. - */ - ListCell* lc = NULL; - foreach (lc, candidate_distribution_list) { - Distribution* distribution = (Distribution*)lfirst(lc); - JoinCostWorkspace cur_workspace; - copy_JoinCostWorkspace(&cur_workspace, &workspace); - - hjpgen->addHashJoinPath(&cur_workspace, distribution, 1); - if (u_sess->opt_cxt.query_dop > 1) - hjpgen->addHashJoinPath(&cur_workspace, distribution, u_sess->opt_cxt.query_dop); - } -#else JoinCostWorkspace cur_workspace; copy_JoinCostWorkspace(&cur_workspace, &workspace); hjpgen->addHashJoinPath(&cur_workspace, NULL, 1); - if (u_sess->opt_cxt.query_dop > 1) + if (u_sess->opt_cxt.query_dop > 1) { hjpgen->addHashJoinPath(&cur_workspace, NULL, u_sess->opt_cxt.query_dop); -#endif + if (joinrel->partial_pathlist != NULL) { + Path* cheapest_partial_path = (Path*)linitial(joinrel->partial_pathlist); + add_path(root, joinrel, cheapest_partial_path ,false); + } + } } delete hjpgen; } else -#endif { /* try hash join single */ TryHashJoinPathSingle(root, @@ -1698,6 +1634,181 @@ static void match_unsorted_outer(PlannerInfo* root, RelOptInfo* joinrel, RelOptI pfree_ext(join_used); } +/* + * match_partial_unsorted_outer + * The match_partial_unsorted_outer function is used to generate SMP plans. + * Since SMP currently does not support merge join, merge join is excluded. + * Other aspects are the same as the match_unsorted_outer function. + */ +static void match_partial_unsorted_outer(PlannerInfo* root, RelOptInfo* joinrel, RelOptInfo* outerrel, RelOptInfo* innerrel, + List* restrictlist, List* mergeclause_list, JoinType jointype, JoinPathExtraData* extra, Relids param_source_rels) +{ + if (outerrel->partial_pathlist == NULL || innerrel->partial_pathlist == NULL) { + return; + } + JoinType save_jointype = jointype; + bool nestjoinOK = false; + bool useallclauses = false; + Path* matpath = NULL; + ListCell* l = NULL; + List* merge_pathkeys = NULL; + /* + * Nestloop only supports inner, left, semi, and anti joins. Also, if we + * are doing a right or full mergejoin, we must use *all* the mergeclauses + * as join clauses, else we will not have a valid plan. (Although these + * two flags are currently inverses, keep them separate for clarity and + * possible future changes.) + */ + switch (jointype) { + case JOIN_INNER: + case JOIN_LEFT: + case JOIN_SEMI: + case JOIN_ANTI: + case JOIN_LEFT_ANTI_FULL: + nestjoinOK = true; + useallclauses = false; + break; + case JOIN_RIGHT: + case JOIN_FULL: + case JOIN_RIGHT_SEMI: + case JOIN_RIGHT_ANTI: + case JOIN_RIGHT_ANTI_FULL: + nestjoinOK = false; + useallclauses = true; + break; + case JOIN_UNIQUE_OUTER: + case JOIN_UNIQUE_INNER: + jointype = JOIN_INNER; + nestjoinOK = true; + useallclauses = false; + break; + default: { + ereport(ERROR, + (errmodule(MOD_OPT), + errcode(ERRCODE_UNRECOGNIZED_NODE_TYPE), + errmsg("unrecognized join type when match unsorted outer: %d", (int)jointype))); + + nestjoinOK = false; /* keep compiler quiet */ + useallclauses = false; + } break; + } + + Path* outer_cheapest_total = (Path*)linitial(outerrel->partial_pathlist); + Path* inner_cheapest_total = (Path*)linitial(innerrel->partial_pathlist); + Path* inner_cheapest_total_orig = inner_cheapest_total; + + /* + * If inner_cheapest_total is parameterized by the outer rel, ignore it; + * we will consider it below as a member of cheapest_parameterized_paths, + * but the other possibilities considered in this routine aren't usable. + */ + if (PATH_PARAM_BY_REL(inner_cheapest_total, outerrel)) + inner_cheapest_total = NULL; + + /* + * If we need to unique-ify the inner path, we will consider only the + * cheapest-total inner. + */ + if (save_jointype == JOIN_UNIQUE_INNER) { + if (inner_cheapest_total == NULL) + return; + inner_cheapest_total = (Path*)create_unique_path(root, innerrel, inner_cheapest_total, extra->sjinfo); + AssertEreport(inner_cheapest_total != NULL, MOD_OPT_JOIN, "inner cheapest path is NULL"); + } else if (nestjoinOK && inner_cheapest_total != NULL ) { + /* + * Consider materializing the cheapest inner path, unless + * enable_material is off or the path in question materializes its + * output anyway. + */ + if (u_sess->attr.attr_sql.enable_material && + !ExecMaterializesOutput(inner_cheapest_total->pathtype)) { + matpath = (Path*)create_material_path(inner_cheapest_total); + } else if (ExecMaterializesOutput(inner_cheapest_total->pathtype)) { + /* if inner is already materialized, we accept it */ + matpath = inner_cheapest_total; + } + } + + foreach (l, outerrel->partial_pathlist) { + Path* outerpath = (Path*)lfirst(l); + + /* for non-optimal inner, we only try outer path with the same distributed key */ + if (inner_cheapest_total_orig != linitial(innerrel->partial_pathlist) && + outerpath != outer_cheapest_total) + continue; + + /* + * We cannot use an outer path that is parameterized by the inner rel. + */ + if (PATH_PARAM_BY_REL(outerpath, innerrel)) + continue; + + /* + * If we need to unique-ify the outer path, it's pointless to consider + * any but the cheapest outer. (XXX we don't consider parameterized + * outers, nor inners, for unique-ified cases. Should we?) + */ + if (save_jointype == JOIN_UNIQUE_OUTER) { + if (outerpath != outer_cheapest_total) { + continue; + } + outerpath = (Path*)create_unique_path(root, outerrel, outerpath, extra->sjinfo); + AssertEreport(outerpath != NULL, MOD_OPT_JOIN, "outer path is NULL"); + } + + /* + * The result will have this sort order (even if it is implemented as + * a nestloop, and even if some of the mergeclauses are implemented by + * qpquals rather than as true mergeclauses): + */ + merge_pathkeys = build_join_pathkeys(root, joinrel, jointype, outerpath->pathkeys); + + if (save_jointype == JOIN_UNIQUE_INNER) { + try_nestloop_path(root, + joinrel, + jointype, + save_jointype, + extra, + param_source_rels, + outerpath, + inner_cheapest_total, + restrictlist, + merge_pathkeys); + } else if (nestjoinOK) { + /* because parameter info not support parallel plan, so we remove it*/ + ListCell* llc2 = NULL; + foreach (llc2, innerrel->partial_pathlist) { + Path* innerpath = (Path*)lfirst(llc2); + try_nestloop_path(root, + joinrel, + jointype, + save_jointype, + extra, + param_source_rels, + outerpath, + innerpath, + restrictlist, + merge_pathkeys); + } + + + /* Also consider materialized form of the cheapest inner path */ + if (matpath != NULL) { + try_nestloop_path(root, + joinrel, + jointype, + save_jointype, + extra, + param_source_rels, + outerpath, + matpath, + restrictlist, + merge_pathkeys); + } + } + } +} + /* * hash_inner_and_outer * Create hashjoin join paths by explicitly hashing both the outer and @@ -1926,6 +2037,53 @@ static void hash_inner_and_outer(PlannerInfo* root, RelOptInfo* joinrel, RelOptI } i++; } + if (u_sess->opt_cxt.query_dop > 1 && outerrel->partial_pathlist != NULL && innerrel->partial_pathlist != NULL) { + Path* cheapest_outter_path = (Path*)linitial(outerrel->partial_pathlist); + Path* cheapest_inner_path = (Path*)linitial(innerrel->partial_pathlist); + jointype = save_jointype; + if (jointype == JOIN_UNIQUE_OUTER) { + cheapest_outter_path = (Path*)create_unique_path(root, outerrel, cheapest_outter_path, extra->sjinfo); + AssertEreport(cheapest_outter_path != NULL, MOD_OPT_JOIN, "outer cheapest path is NULL"); + jointype = JOIN_INNER; + try_hashjoin_path(root, + joinrel, + jointype, + save_jointype, + extra, + param_source_rels, + cheapest_outter_path, + cheapest_inner_path, + restrictlist, + hashclauses); + /* no possibility of cheap startup here */ + } else if (jointype == JOIN_UNIQUE_INNER) { + cheapest_inner_path = (Path*)create_unique_path(root, innerrel, cheapest_inner_path, extra->sjinfo); + AssertEreport(cheapest_inner_path != NULL, MOD_OPT_JOIN, "inner cheapest path is NULL"); + jointype = JOIN_INNER; + try_hashjoin_path(root, + joinrel, + jointype, + save_jointype, + extra, + param_source_rels, + cheapest_outter_path, + cheapest_inner_path, + restrictlist, + hashclauses); + } else { + try_hashjoin_path(root, + joinrel, + jointype, + save_jointype, + extra, + param_source_rels, + (Path*)cheapest_outter_path, + (Path*)cheapest_inner_path, + restrictlist, + hashclauses); + } + } + if (join_used != NULL) pfree_ext(join_used); } diff --git a/src/gausskernel/optimizer/path/joinrels.cpp b/src/gausskernel/optimizer/path/joinrels.cpp index 228e96dfe3..17329dfed9 100755 --- a/src/gausskernel/optimizer/path/joinrels.cpp +++ b/src/gausskernel/optimizer/path/joinrels.cpp @@ -755,14 +755,13 @@ RelOptInfo* make_join_rel(PlannerInfo* root, RelOptInfo* rel1, RelOptInfo* rel2) u_sess->opt_cxt.op_work_mem = work_mem_orig; root->glob->estiopmem = esti_op_mem_orig; -#ifdef STREAMPLAN /* * If there are join quals that cannot generate Stream plan, we mark it and try * to get a PGXC plan instead. * e.g. cannot broadcast hashed results for inner plan of semi join when outer * plan is replicated now. */ - +#ifdef STREAMPLAN if (IS_STREAM && NIL == joinrel->pathlist) { /* * We remove the useless RelOptInfo and then try other join path if the current level @@ -782,7 +781,6 @@ RelOptInfo* make_join_rel(PlannerInfo* root, RelOptInfo* rel1, RelOptInfo* rel2) } } #endif - return joinrel; } diff --git a/src/gausskernel/optimizer/path/streampath_base.cpp b/src/gausskernel/optimizer/path/streampath_base.cpp index af8d4d505e..69b6e2ffb3 100755 --- a/src/gausskernel/optimizer/path/streampath_base.cpp +++ b/src/gausskernel/optimizer/path/streampath_base.cpp @@ -104,7 +104,11 @@ PathGen::~PathGen() */ void PathGen::addPath(Path* new_path) { - add_path(m_root, m_rel, new_path); + if (new_path->dop > 1) { + add_partial_path(m_root, m_rel, new_path); + } else { + add_path(m_root, m_rel, new_path); + } } /* diff --git a/src/gausskernel/optimizer/plan/createplan.cpp b/src/gausskernel/optimizer/plan/createplan.cpp index 71d36c4097..2260cbc264 100755 --- a/src/gausskernel/optimizer/plan/createplan.cpp +++ b/src/gausskernel/optimizer/plan/createplan.cpp @@ -1857,11 +1857,9 @@ static Plan* create_unique_plan(PlannerInfo* root, UniquePath* best_path) * minimum output tlist, without any stuff we might have added to the * subplan tlist. */ -#ifdef STREAMPLAN if (IS_STREAM_PLAN && best_path->hold_tlist) { tlist = newtlist; } -#endif /* Can't figure out the real size, so give a rough estimation */ Size hashentrysize = alloc_trunk_size((subplan->plan_width) + MAXALIGN(sizeof(MinimalTupleData))); diff --git a/src/gausskernel/optimizer/plan/planner.cpp b/src/gausskernel/optimizer/plan/planner.cpp index 5c9506b829..2c614f876c 100755 --- a/src/gausskernel/optimizer/plan/planner.cpp +++ b/src/gausskernel/optimizer/plan/planner.cpp @@ -12722,10 +12722,9 @@ List* add_groupId_to_groupExpr(List* query_group, List* tlist) if (target_group_id != NULL) { List* grouplist = NIL; - /* Add gropuingId expr to group by clause */ - groupMaxLen++; + target_group_id->ressortgroupref = groupMaxLen; - /* Add gropuing() expr to group by clause */ + /* Add grouping() expr to group by clause */ SortGroupClause* grpcl = makeNode(SortGroupClause); grpcl->tleSortGroupRef = groupMaxLen; grpcl->eqop = INT4EQOID; diff --git a/src/gausskernel/optimizer/plan/streamplan.cpp b/src/gausskernel/optimizer/plan/streamplan.cpp index 15d2e1c800..ab965a8a68 100644 --- a/src/gausskernel/optimizer/plan/streamplan.cpp +++ b/src/gausskernel/optimizer/plan/streamplan.cpp @@ -1062,13 +1062,13 @@ void CreateGatherPaths(PlannerInfo* root, RelOptInfo* rel, bool isJoin) { /* Create gather path on plain rel pathlist. */ ListCell* lc = NULL; - List* pathlist = rel->pathlist; + List* partial_pathlist = rel->partial_pathlist; if (!PreCheckGatherParse(root, rel) || !PreCheckGatherOthers(root, rel, isJoin)) { return; } - foreach(lc, pathlist) { + foreach(lc, partial_pathlist) { Path* path = (Path*)lfirst(lc); /* only add gather for path which execute on datanodes */ diff --git a/src/gausskernel/optimizer/plan/streamwalker.cpp b/src/gausskernel/optimizer/plan/streamwalker.cpp index ee5c8be2ee..8ec25413c7 100755 --- a/src/gausskernel/optimizer/plan/streamwalker.cpp +++ b/src/gausskernel/optimizer/plan/streamwalker.cpp @@ -444,7 +444,10 @@ static void stream_walker_query(Query* query, shipping_context *cxt) stream_walker_query_insertinto_rep(query, cxt); /* mark shippable flag based on rte shippbility */ stream_walker_finalize_cxt(query, cxt); - + if (list_length(query->resultRelations) > 1) { + /* turn off dop for multiple modify */ + u_sess->opt_cxt.query_dop = 1; + } /* Mark query's can_push and global_shippable flag. */ query->can_push = cxt->current_shippable; cxt->global_shippable = cxt->global_shippable && cxt->current_shippable; diff --git a/src/gausskernel/optimizer/util/pathnode.cpp b/src/gausskernel/optimizer/util/pathnode.cpp index afa419a29d..57e2b1ce00 100755 --- a/src/gausskernel/optimizer/util/pathnode.cpp +++ b/src/gausskernel/optimizer/util/pathnode.cpp @@ -1332,10 +1332,10 @@ static void set_predpush_same_level_hint(HintState* hstate, RelOptInfo* rel, Pat */ void set_hint_value(RelOptInfo* join_rel, Path* new_path, HintState* hstate) { - if (hstate == NULL) { + /* if query_dop > 1, hint_value may not equal to zero */ + if (hstate == NULL || (new_path->hint_value != 0 && new_path->dop != 0)) { return; } - AssertEreport(new_path->hint_value == 0, MOD_OPT, ""); set_scan_hint(new_path, hstate); @@ -1587,7 +1587,7 @@ static bool AddPathPreCheck(Path* newPath) * * Returns nothing, but modifies parent_rel->pathlist. */ -void add_path(PlannerInfo* root, RelOptInfo* parent_rel, Path* new_path) +void add_path(PlannerInfo* root, RelOptInfo* parent_rel, Path* new_path, bool is_delete_path) { bool accept_new = true; /* unless we find a superior old path */ ListCell* insert_after = NULL; /* where to insert new item */ @@ -1798,9 +1798,10 @@ void add_path(PlannerInfo* root, RelOptInfo* parent_rel, Path* new_path) /* * Delete the data pointed-to by the deleted cell, if possible */ - if (!IsA(old_path, IndexPath)) + if (!IsA(old_path, IndexPath) && old_path->dop <= 1){ pfree_ext(old_path); /* p1_prev does not advance */ + } } else { /* new belongs after this old path if it has cost >= old's */ if (new_path->total_cost >= old_path->total_cost && new_path->hint_value <= old_path->hint_value) @@ -1857,6 +1858,334 @@ void add_path(PlannerInfo* root, RelOptInfo* parent_rel, Path* new_path) lappend_cell(parent_rel->pathlist, insert_after, new_path); else parent_rel->pathlist = lcons(new_path, parent_rel->pathlist); + } else { + /* Reject and recycle the new path */ + if (!IsA(new_path, IndexPath) && is_delete_path) + pfree_ext(new_path); + } +} + +/* + * add_path + * Consider a potential implementation path for the specified parent rel, + * and add it to the rel's partial_pathlist if it is worthy of consideration. + * A path is worthy if it has a better sort order (better pathkeys) or + * cheaper cost (on either dimension), or generates fewer rows, than any + * existing path that has the same or superset parameterization rels. + * + * We also remove from the rel's partial_pathlist any old paths that are dominated + * by new_path --- that is, new_path is cheaper, at least as well ordered, + * generates no more rows, and requires no outer rels not required by the + * old path. + * + * In most cases, a path with a superset parameterization will generate + * fewer rows (since it has more join clauses to apply), so that those two + * figures of merit move in opposite directions; this means that a path of + * one parameterization can seldom dominate a path of another. But such + * cases do arise, so we make the full set of checks anyway. + * + * There is one policy decision embedded in this function, along with its + * sibling add_path_precheck: we treat all parameterized paths as having + * NIL pathkeys, so that they compete only on cost. This is to reduce + * the number of parameterized paths that are kept. See discussion in + * src/backend/optimizer/README. + * + * The partial_pathlist is kept sorted by total_cost, with cheaper paths + * at the front. Within this routine, that's simply a speed hack: + * doing it that way makes it more likely that we will reject an inferior + * path after a few comparisons, rather than many comparisons. + * However, add_path_precheck relies on this ordering to exit early + * when possible. + * + * NOTE: discarded Path objects are immediately pfree'd to reduce planner + * memory consumption. We dare not try to free the substructure of a Path, + * since much of it may be shared with other Paths or the query tree itself; + * but just recycling discarded Path nodes is a very useful savings in + * a large join tree. We can recycle the List nodes of partial_pathlist, too. + * + * BUT: we do not pfree IndexPath objects, since they may be referenced as + * children of BitmapHeapPaths as well as being paths in their own right. + * + * 'parent_rel' is the relation entry to which the path corresponds. + * 'new_path' is a potential path for parent_rel. + * + * Returns nothing, but modifies parent_rel->partial_pathlist. + */ +void add_partial_path(PlannerInfo* root, RelOptInfo* parent_rel, Path* new_path) +{ + bool accept_new = true; /* unless we find a superior old path */ + ListCell* insert_after = NULL; /* where to insert new item */ + List* new_path_pathkeys = NIL; + ListCell* p1 = NULL; + ListCell* p1_prev = NULL; + ListCell* p1_next = NULL; + bool small_fuzzy_factor_is_used = false; + + /* + * This is a convenient place to check for query cancel --- no part of the + * planner goes very long without calling add_path(). + */ + CHECK_FOR_INTERRUPTS(); + + if (!AddPathPreCheck(new_path)) { + return; + } + + /* Set path's hint_value. */ + if (root != NULL && root->parse->hintState != NULL) { + set_hint_value(parent_rel, new_path, root->parse->hintState); + } + + /*Set path's index_hint */ + if (root != NULL && root->parse->indexhintList != NULL) { + set_index_hint_value(new_path, root->parse->indexhintList); + } + + /* we will add cn gather path when cn gather hint switch on */ + if (root != NULL && EXEC_CONTAIN_COORDINATOR(new_path->exec_type) && permit_gather(root)) { + RangeTblEntry* rte = root->simple_rte_array[parent_rel->relid]; + bool isSysTable = (rte != NULL && rte->rtekind == RTE_RELATION && is_sys_table(rte->relid)); + + if (!isSysTable) { + AddGatherPath(root, parent_rel, new_path); + return; + } + } + + if (OPTIMIZE_PLAN != u_sess->attr.attr_sql.plan_mode_seed) { + parent_rel->partial_pathlist = lcons(new_path, parent_rel->partial_pathlist); + return; + } + + /* Pretend parameterized paths have no pathkeys, per comment above */ + new_path_pathkeys = new_path->param_info ? NIL : new_path->pathkeys; + + /* + * Loop to check proposed new path against old paths. Note it is possible + * for more than one old path to be tossed out because new_path dominates + * it. + * + * We can't use foreach here because the loop body may delete the current + * list cell. + */ + p1_prev = NULL; + for (p1 = list_head(parent_rel->partial_pathlist); p1 != NULL; p1 = p1_next) { + Path* old_path = (Path*)lfirst(p1); + bool remove_old = false; /* unless new proves superior */ + bool eq_diskey = true; + PathCostComparison costcmp = COSTS_DIFFERENT; + PathKeysComparison keyscmp = PATHKEYS_DIFFERENT; + BMS_Comparison outercmp = BMS_DIFFERENT; + double rowscmp; + + p1_next = lnext(p1); + + /* + * Do a fuzzy cost comparison with 1% fuzziness limit. (XXX does this + * percentage need to be user-configurable?) + */ + costcmp = compare_path_costs_fuzzily(new_path, old_path, FUZZY_FACTOR); + + /* + * If the two paths compare differently for startup and total cost, + * then we want to keep both, and we can skip comparing pathkeys and + * required_outer rels. If they compare the same, proceed with the + * other comparisons. Row count is checked last. (We make the tests + * in this order because the cost comparison is most likely to turn + * out "different", and the pathkeys comparison next most likely. As + * explained above, row count very seldom makes a difference, so even + * though it's cheap to compare there's not much point in checking it + * earlier.) + */ + if (costcmp != COSTS_DIFFERENT) { + /* Similarly check to see if either dominates on pathkeys */ + List* old_path_pathkeys = NIL; + + old_path_pathkeys = old_path->param_info ? NIL : old_path->pathkeys; + keyscmp = compare_pathkeys(new_path_pathkeys, old_path_pathkeys); + if (keyscmp != PATHKEYS_DIFFERENT) { + switch (costcmp) { + case COSTS_EQUAL: + outercmp = bms_subset_compare(PATH_REQ_OUTER(new_path), PATH_REQ_OUTER(old_path)); + if (keyscmp == PATHKEYS_BETTER1) { + if ((outercmp == BMS_EQUAL || outercmp == BMS_SUBSET1) && new_path->rows <= old_path->rows) + remove_old = true; /* new dominates old */ + } else if (keyscmp == PATHKEYS_BETTER2) { + if ((outercmp == BMS_EQUAL || outercmp == BMS_SUBSET2) && new_path->rows >= old_path->rows) + accept_new = false; /* old dominates new */ + } else { + if (outercmp == BMS_EQUAL) { + /* + * Same pathkeys and outer rels, and fuzzily + * the same cost, so keep just one; to decide + * which, first check rows and then do a fuzzy + * cost comparison with very small fuzz limit. + * (We used to do an exact cost comparison, + * but that results in annoying + * platform-specific plan variations due to + * roundoff in the cost estimates.) If things + * are still tied, arbitrarily keep only the + * old path. Notice that we will keep only + * the old path even if the less-fuzzy + * comparison decides the startup and total + * costs compare differently. + */ + if (new_path->rows < old_path->rows) + remove_old = true; /* new dominates old */ + else if (new_path->rows > old_path->rows) + accept_new = false; /* old dominates new */ + else { + small_fuzzy_factor_is_used = true; + if (compare_path_costs_fuzzily(new_path, old_path, SMALL_FUZZY_FACTOR) == + COSTS_BETTER1) + remove_old = true; /* new dominates old */ + else + accept_new = false; /* old equals or + * dominates new */ + } + } else if (outercmp == BMS_SUBSET1 && new_path->rows <= old_path->rows) + remove_old = true; /* new dominates old */ + else if (outercmp == BMS_SUBSET2 && new_path->rows >= old_path->rows) + accept_new = false; /* old dominates new */ + /* else different parameterizations, keep both */ + } + break; + case COSTS_BETTER1: + if (keyscmp != PATHKEYS_BETTER2) { + outercmp = bms_subset_compare(PATH_REQ_OUTER(new_path), PATH_REQ_OUTER(old_path)); + if ((outercmp == BMS_EQUAL || outercmp == BMS_SUBSET1) && new_path->rows <= old_path->rows) + remove_old = true; /* new dominates old */ + } + break; + case COSTS_BETTER2: + if (keyscmp != PATHKEYS_BETTER1) { + outercmp = bms_subset_compare(PATH_REQ_OUTER(new_path), PATH_REQ_OUTER(old_path)); + if ((outercmp == BMS_EQUAL || outercmp == BMS_SUBSET2) && new_path->rows >= old_path->rows) + accept_new = false; /* old dominates new */ + } + break; + default: + + /* + * can't get here, but keep this case to keep compiler + * quiet + */ + break; + } + } + } + + if (IS_STREAM_PLAN) { + /* When compare the path with single node distribution with other path with non-single + * node distribution, the former will be kept and the latter will be removed. + */ + bool is_new_path_single_node_distribution = ng_is_single_node_group_distribution(&new_path->distribution); + bool is_old_path_single_node_distribution = ng_is_single_node_group_distribution(&old_path->distribution); + /*When open dn gather, only the path with single node distribution will be kept for the cost after.*/ + if ((is_new_path_single_node_distribution && !is_old_path_single_node_distribution) + || (!is_new_path_single_node_distribution && is_old_path_single_node_distribution)) { + /* When compare the path with single node distribution with other path with non-single + * node distribution, the former will be kept and the latter will be removed. + */ + if (costcmp == COSTS_BETTER1 || costcmp == COSTS_BETTER2) { + eq_diskey = true; + } + } else if (!is_new_path_single_node_distribution && !is_old_path_single_node_distribution) { + eq_diskey = equal_distributekey(root, new_path->distribute_keys, old_path->distribute_keys); + } else { + // Remove when they are all single node distribution. + eq_diskey = true; + } + } + /* + * Remove current element from pathlist if dominated by new. + */ + if (remove_old && eq_diskey) { + ereport(DEBUG1, + (errmodule(MOD_OPT_JOIN), + (errmsg("An old path is removed with cost = %lf .. %lf; rows = %lf", + old_path->startup_cost, + old_path->total_cost, + old_path->rows)))); + rowscmp = old_path->rows - new_path->rows; + if (log_min_messages <= DEBUG1) + debug1_print_compare_result( + costcmp, keyscmp, outercmp, rowscmp, root, old_path, small_fuzzy_factor_is_used); + parent_rel->partial_pathlist = list_delete_cell(parent_rel->partial_pathlist, p1, p1_prev); + + /* + * Delete the data pointed-to by the deleted cell, if possible + */ + if (!IsA(old_path, IndexPath)) { + ListCell* lc = NULL; + bool is_deleted = false; + foreach(lc, parent_rel->pathlist) { //可以考虑方向记path->(list of exists) + Path* path_parallel = (Path*)lfirst(lc); + if (path_parallel == old_path) { + parent_rel->pathlist = list_delete_cell2(parent_rel->pathlist, lc); + is_deleted = true; + break; + } + } + if (!is_deleted) { + pfree_ext(old_path); + } + } + /* p1_prev does not advance */ + } else { + /* new belongs after this old path if it has cost >= old's */ + if (new_path->total_cost >= old_path->total_cost && new_path->hint_value <= old_path->hint_value) + insert_after = p1; + /* p1_prev advances */ + p1_prev = p1; + } + + /* we should accept the new if distribute key differs */ + if (!accept_new && !eq_diskey) { + accept_new = true; + /* new belongs after this old path if it has cost >= old's */ + if (new_path->total_cost >= old_path->total_cost && new_path->hint_value <= old_path->hint_value) + insert_after = p1; + /* p1_prev advances */ + p1_prev = p1; + } + + /* + * If we found an old path that dominates new_path, we can quit + * scanning the pathlist; we will not add new_path, and we assume + * new_path cannot dominate any other elements of the pathlist. + */ + if (!accept_new) { + ereport(DEBUG1, + (errmodule(MOD_OPT_JOIN), + (errmsg("A new path is not accepted with cost = %lf .. %lf; rows = %lf", + new_path->startup_cost, + new_path->total_cost, + new_path->rows)))); + rowscmp = old_path->rows - new_path->rows; + if (log_min_messages <= DEBUG1) { + debug1_print_new_path(root, new_path, small_fuzzy_factor_is_used); + debug1_print_compare_result( + costcmp, keyscmp, outercmp, rowscmp, root, old_path, small_fuzzy_factor_is_used); + } + break; + } + } + + if (accept_new) { + /* Accept the new path: insert it at proper place in pathlist */ + ereport(DEBUG1, + (errmodule(MOD_OPT_JOIN), + (errmsg("A new path is accepted with cost = %lf .. %lf; rows = %lf", + new_path->startup_cost, + new_path->total_cost, + new_path->rows)))); + if (log_min_messages <= DEBUG1) + debug1_print_new_path(root, new_path, small_fuzzy_factor_is_used); + if (insert_after != NULL) + lappend_cell(parent_rel->partial_pathlist, insert_after, new_path); + else + parent_rel->partial_pathlist = lcons(new_path, parent_rel->partial_pathlist); } else { /* Reject and recycle the new path */ if (!IsA(new_path, IndexPath)) @@ -3016,7 +3345,9 @@ UniquePath* create_unique_path(PlannerInfo* root, RelOptInfo* rel, Path* subpath if (subpath == lfirst(lc)) break; } - AssertEreport(lc != NULL, MOD_OPT_JOIN, "Subpath should be one of cheapest total path of rel"); + if (subpath->dop == 1) { + AssertEreport(lc != NULL, MOD_OPT_JOIN, "Subpath should be one of cheapest total path of rel"); + } AssertEreport(subpath->parent == rel || subpath->parent->base_rel == rel, MOD_OPT_JOIN, ""); /* ... or if SpecialJoinInfo is the wrong one */ AssertEreport(sjinfo->jointype == JOIN_SEMI, MOD_OPT_JOIN, "Join type should be semi join"); diff --git a/src/include/nodes/relation.h b/src/include/nodes/relation.h index 65a80bd29a..930dc876a8 100755 --- a/src/include/nodes/relation.h +++ b/src/include/nodes/relation.h @@ -714,6 +714,7 @@ typedef struct RelOptInfo { List* distribute_keys; /* distribute key */ List* pathlist; /* Path structures */ List* ppilist; /* ParamPathInfos used in pathlist */ + List* partial_pathlist; /* partial Paths */ struct Path* cheapest_gather_path; struct Path* cheapest_startup_path; List* cheapest_total_path; /* contain all cheapest total paths from different distribute key */ diff --git a/src/include/optimizer/pathnode.h b/src/include/optimizer/pathnode.h index 6ff21d6003..0fecb39ff7 100644 --- a/src/include/optimizer/pathnode.h +++ b/src/include/optimizer/pathnode.h @@ -46,7 +46,8 @@ extern int compare_fractional_path_costs(Path* path1, Path* path2, double fracti extern void set_cheapest(RelOptInfo* parent_rel, PlannerInfo* root = NULL); extern Path* get_cheapest_path(PlannerInfo* root, RelOptInfo* rel, const double* agg_groups, bool has_groupby); extern Path* find_hinted_path(Path* current_path); -extern void add_path(PlannerInfo* root, RelOptInfo* parent_rel, Path* new_path); +extern void add_path(PlannerInfo* root, RelOptInfo* parent_rel, Path* new_path, bool is_delete_path = true); +extern void add_partial_path(PlannerInfo* root, RelOptInfo* parent_rel, Path* new_path); extern bool add_path_precheck( RelOptInfo* parent_rel, Cost startup_cost, Cost total_cost, List* pathkeys, Relids required_outer); diff --git a/src/test/regress/expected/plan_hint_iud.out b/src/test/regress/expected/plan_hint_iud.out index 2a13cdd586..e617c9e0d5 100755 --- a/src/test/regress/expected/plan_hint_iud.out +++ b/src/test/regress/expected/plan_hint_iud.out @@ -325,14 +325,13 @@ deallocate all; ------------------------------------------------------------- Update on t1 -> Streaming(type: LOCAL GATHER dop: 1/8) - -> Nested Loop - Join Filter: (t1.c2 = t2.c2) - -> HashAggregate - Group By Key: t2.c2 + -> Hash Semi Join + Hash Cond: (t1.c2 = t2.c2) + -> Seq Scan on t1 + -> Hash -> Streaming(type: BROADCAST dop: 8/8) -> Seq Scan on t2 - -> Seq Scan on t1 -(9 rows) +(8 rows) --- No expand :EXP update t1 set c2 = 2 where c2 in (select /*+ no_expand */ c2 from t2); @@ -457,14 +456,13 @@ deallocate all; ------------------------------------------------------------- Delete on t1 -> Streaming(type: LOCAL GATHER dop: 1/8) - -> Nested Loop - Join Filter: (t1.c2 = t2.c2) - -> HashAggregate - Group By Key: t2.c2 + -> Hash Semi Join + Hash Cond: (t1.c2 = t2.c2) + -> Seq Scan on t1 + -> Hash -> Streaming(type: BROADCAST dop: 8/8) -> Seq Scan on t2 - -> Seq Scan on t1 -(9 rows) +(8 rows) --- No expand :EXP delete t1 where c2 in (select /*+ no_expand */ c2 from t2); @@ -577,15 +575,14 @@ deallocate all; --- Set :EXP merge /*+ set(query_dop 1008) */ into t1 using t2 on t1.c1 = t2.c1 when matched then update set t1.c2 = t2.c2 when not matched then insert values (t2.c1, t2.c2); - QUERY PLAN ----------------------------------------------------- + QUERY PLAN +-------------------------------------------- Merge on t1 -> Nested Loop Left Join - -> Streaming(type: LOCAL GATHER dop: 1/8) - -> Seq Scan on t2 + -> Seq Scan on t2 -> Index Scan using t1_pkey on t1 Index Cond: (c1 = t2.c1) -(6 rows) +(5 rows) --- Plancache prepare merge_g as merge /*+ use_gplan */ into t1 using t2 on t1.c1 = t2.c1 and t1.c1 = $1 when matched then update set t1.c2 = t2.c2 when not matched then insert values (t2.c1, t2.c2); diff --git a/src/test/regress/pg_regress.cpp b/src/test/regress/pg_regress.cpp index bdac8906af..e155f4171a 100644 --- a/src/test/regress/pg_regress.cpp +++ b/src/test/regress/pg_regress.cpp @@ -5461,7 +5461,7 @@ static void check_global_variables() } } -#define BASE_PGXC_LIKE_MACRO_NUM 1419 +#define BASE_PGXC_LIKE_MACRO_NUM 1415 static void check_pgxc_like_macros() { #ifdef BUILD_BY_CMAKE -- Gitee