From d63950964ebb6f4746a1684a738f62f45a77f6a0 Mon Sep 17 00:00:00 2001 From: yangzhizheng Date: Tue, 9 Apr 2024 09:11:38 +0800 Subject: [PATCH 1/4] Supports SMP with parallel serial scheduling --- src/gausskernel/optimizer/path/allpaths.cpp | 11 +- src/gausskernel/optimizer/path/joinpath.cpp | 318 ++++++++++++++-- src/gausskernel/optimizer/path/joinrels.cpp | 2 - .../optimizer/path/streampath_base.cpp | 6 +- src/gausskernel/optimizer/plan/createplan.cpp | 4 - src/gausskernel/optimizer/plan/streamplan.cpp | 4 +- .../optimizer/plan/streamwalker.cpp | 5 +- src/gausskernel/optimizer/util/pathnode.cpp | 348 +++++++++++++++++- src/include/nodes/relation.h | 1 + src/include/optimizer/pathnode.h | 3 +- src/test/regress/pg_regress.cpp | 2 +- 11 files changed, 649 insertions(+), 55 deletions(-) diff --git a/src/gausskernel/optimizer/path/allpaths.cpp b/src/gausskernel/optimizer/path/allpaths.cpp index bf5ac06a29..06bd742495 100755 --- a/src/gausskernel/optimizer/path/allpaths.cpp +++ b/src/gausskernel/optimizer/path/allpaths.cpp @@ -289,7 +289,11 @@ RelOptInfo* make_one_rel(PlannerInfo* root, List* joinlist) * Generate access paths for the entire join tree. */ rel = make_rel_from_joinlist(root, joinlist); - + if (list_length(joinlist) <=1 && rel->partial_pathlist != NULL) { + Path* partial_path = (Path*)linitial(rel->partial_pathlist); + add_path(root, rel, partial_path); + set_cheapest(rel, root); + } /* * The result should join all and only the query's base rels. */ @@ -1174,14 +1178,14 @@ static void set_plain_rel_pathlist(PlannerInfo* root, RelOptInfo* rel, RangeTblE } else { add_path(root, rel, create_cstorescan_path(root, rel)); if (can_parallel) - add_path(root, rel, create_cstorescan_path(root, rel, u_sess->opt_cxt.query_dop)); + add_partial_path(root, rel, create_cstorescan_path(root, rel, u_sess->opt_cxt.query_dop)); } break; } case REL_ROW_ORIENTED: { add_path(root, rel, create_seqscan_path(root, rel, required_outer)); if (can_parallel) - add_path(root, rel, create_seqscan_path(root, rel, required_outer, u_sess->opt_cxt.query_dop)); + add_partial_path(root, rel, create_seqscan_path(root, rel, required_outer, u_sess->opt_cxt.query_dop)); break; } default: { @@ -3270,7 +3274,6 @@ RelOptInfo* standard_join_search(PlannerInfo* root, int levels_needed, List* ini if (IS_STREAM_PLAN && permit_gather(root, HINT_GATHER_JOIN)) { CreateGatherPaths(root, rel, true); } - /* Find and save the cheapest paths for this rel */ set_cheapest(rel, root); diff --git a/src/gausskernel/optimizer/path/joinpath.cpp b/src/gausskernel/optimizer/path/joinpath.cpp index a8ea8bba2b..30e407f724 100755 --- a/src/gausskernel/optimizer/path/joinpath.cpp +++ b/src/gausskernel/optimizer/path/joinpath.cpp @@ -53,6 +53,8 @@ static void sort_inner_and_outer(PlannerInfo* root, RelOptInfo* joinrel, RelOptI List* restrictlist, List* mergeclause_list, JoinType jointype, JoinPathExtraData* extra, Relids param_source_rels); static void match_unsorted_outer(PlannerInfo* root, RelOptInfo* joinrel, RelOptInfo* outerrel, RelOptInfo* innerrel, List* restrictlist, List* mergeclause_list, JoinType jointype, JoinPathExtraData* extra, Relids param_source_rels); +static void match_partial_unsorted_outer(PlannerInfo* root, RelOptInfo* joinrel, RelOptInfo* outerrel, RelOptInfo* innerrel, + List* restrictlist, List* mergeclause_list, JoinType jointype, JoinPathExtraData* extra, Relids param_source_rels); static void hash_inner_and_outer(PlannerInfo* root, RelOptInfo* joinrel, RelOptInfo* outerrel, RelOptInfo* innerrel, List* restrictlist, JoinType jointype, JoinPathExtraData* extra, Relids param_source_rels); @@ -295,7 +297,7 @@ void add_paths_to_joinrel(PlannerInfo* root, RelOptInfo* joinrel, RelOptInfo* ou * (That's okay because we know that nestloop can't handle right/full * joins at all, so it wouldn't work in the prohibited cases either.) */ - if (mergejoin_allowed) + if (mergejoin_allowed) { match_unsorted_outer(root, joinrel, outerrel, @@ -305,6 +307,18 @@ void add_paths_to_joinrel(PlannerInfo* root, RelOptInfo* joinrel, RelOptInfo* ou jointype, &extra, param_source_rels); + if (u_sess->opt_cxt.query_dop > 1) { + match_partial_unsorted_outer(root, + joinrel, + outerrel, + innerrel, + restrictlist, + mergeclause_list, + jointype, + &extra, + param_source_rels); + } + } #ifdef NOT_USED @@ -677,8 +691,13 @@ static void try_nestloop_path(PlannerInfo* root, RelOptInfo* joinrel, JoinType j nlpgen->addNestLoopPath(&cur_workspace, NULL, 1); - if (u_sess->opt_cxt.query_dop > 1) + if (u_sess->opt_cxt.query_dop > 1) { nlpgen->addNestLoopPath(&cur_workspace, NULL, u_sess->opt_cxt.query_dop); + if (joinrel->partial_pathlist != NULL) { + Path* cheapest_partial_path = (Path*)linitial(joinrel->partial_pathlist); + add_path(root, joinrel, cheapest_partial_path ,false); + } + } #endif } @@ -816,7 +835,6 @@ static void try_mergejoin_path(PlannerInfo* root, RelOptInfo* joinrel, JoinType if (add_path_precheck(joinrel, workspace.startup_cost, workspace.total_cost, pathkeys, required_outer) || add_path_hintcheck(root->parse->hintState, joinrel->relids, outer_path, inner_path, HINT_KEYWORD_MERGEJOIN)) { -#ifdef STREAMPLAN /* check exec type of inner and outer path before generate join path. */ if (IS_STREAM_PLAN && CheckJoinExecType(root, outer_path, inner_path)) { execOnCoords = true; @@ -850,45 +868,16 @@ static void try_mergejoin_path(PlannerInfo* root, RelOptInfo* joinrel, JoinType if (u_sess->opt_cxt.query_dop > 1) mjpgen->addMergeJoinPath(&cur_workspace, distribution, u_sess->opt_cxt.query_dop); } else { -#ifdef ENABLE_MULTIPLE_NODES - /* - * We choose candidate distribution list here with heuristic method, - * (1) for un-correlated query block, we will get a list of candidate Distribution - * (2) for correlated query block, we should shuffle them (inner and outer) to a correlated subplan node - * group - */ - List* candidate_distribution_list = - ng_get_join_candidate_distribution_list(outer_path, inner_path, root->is_correlated, - get_join_distribution_perference_type(joinrel, inner_path, outer_path)); - - /* - * For each candidate distribution (node group), we do merge join computing on it, - * if outer or inner is not in candidate node group, we should do shuffle. - */ - ListCell* lc = NULL; - foreach (lc, candidate_distribution_list) { - Distribution* distribution = (Distribution*)lfirst(lc); - JoinCostWorkspace cur_workspace; - copy_JoinCostWorkspace(&cur_workspace, &workspace); - - mjpgen->addMergeJoinPath(&cur_workspace, distribution, 1); - if (u_sess->opt_cxt.query_dop > 1) - mjpgen->addMergeJoinPath(&cur_workspace, distribution, u_sess->opt_cxt.query_dop); - } -#else JoinCostWorkspace cur_workspace; copy_JoinCostWorkspace(&cur_workspace, &workspace); mjpgen->addMergeJoinPath(&cur_workspace, NULL, 1); if (u_sess->opt_cxt.query_dop > 1) mjpgen->addMergeJoinPath(&cur_workspace, NULL, u_sess->opt_cxt.query_dop); -#endif } delete mjpgen; - } else -#endif - { + } else { /* try mergejoin path for single */ TryMergeJoinPathSingle(root, joinrel, @@ -1051,8 +1040,13 @@ static void try_hashjoin_path(PlannerInfo* root, RelOptInfo* joinrel, JoinType j JoinCostWorkspace cur_workspace; copy_JoinCostWorkspace(&cur_workspace, &workspace); hjpgen->addHashJoinPath(&cur_workspace, NULL, 1); - if (u_sess->opt_cxt.query_dop > 1) + if (u_sess->opt_cxt.query_dop > 1) { hjpgen->addHashJoinPath(&cur_workspace, NULL, u_sess->opt_cxt.query_dop); + if (joinrel->partial_pathlist != NULL) { + Path* cheapest_partial_path = (Path*)linitial(joinrel->partial_pathlist); + add_path(root, joinrel, cheapest_partial_path ,false); + } + } #endif } @@ -1698,6 +1692,215 @@ static void match_unsorted_outer(PlannerInfo* root, RelOptInfo* joinrel, RelOptI pfree_ext(join_used); } +/* + * match_partial_unsorted_outer + * Creates possible join paths for processing a single join relation + * 'joinrel' by employing either iterative substitution or + * mergejoining on each of its possible outer paths (considering + * only outer paths that are already ordered well enough for merging). + * + * We always generate a nestloop path for each available outer path. + * In fact we may generate as many as five: one on the cheapest-total-cost + * inner path, one on the same with materialization, one on the + * cheapest-startup-cost inner path (if different), one on the + * cheapest-total inner-indexscan path (if any), and one on the + * cheapest-startup inner-indexscan path (if different). + * + * We also consider mergejoins if mergejoin clauses are available. We have + * two ways to generate the inner path for a mergejoin: sort the cheapest + * inner path, or use an inner path that is already suitably ordered for the + * merge. If we have several mergeclauses, it could be that there is no inner + * path (or only a very expensive one) for the full list of mergeclauses, but + * better paths exist if we truncate the mergeclause list (thereby discarding + * some sort key requirements). So, we consider truncations of the + * mergeclause list as well as the full list. (Ideally we'd consider all + * subsets of the mergeclause list, but that seems way too expensive.) + * + * 'joinrel' is the join relation + * 'outerrel' is the outer join relation + * 'innerrel' is the inner join relation + * 'restrictlist' contains all of the RestrictInfo nodes for restriction + * clauses that apply to this join + * 'mergeclause_list' is a list of RestrictInfo nodes for available + * mergejoin clauses in this join + * 'jointype' is the type of join to do + * 'sjinfo' is extra info about the join for selectivity estimation + * 'semifactors' contains valid data if jointype is SEMI or ANTI + * 'param_source_rels' are OK targets for parameterization of result paths + */ +static void match_partial_unsorted_outer(PlannerInfo* root, RelOptInfo* joinrel, RelOptInfo* outerrel, RelOptInfo* innerrel, + List* restrictlist, List* mergeclause_list, JoinType jointype, JoinPathExtraData* extra, Relids param_source_rels) +{ + if (outerrel->partial_pathlist == NULL || innerrel->partial_pathlist == NULL) { + return; + } + JoinType save_jointype = jointype; + bool nestjoinOK = false; + bool useallclauses = false; + Path* matpath = NULL; + ListCell* l = NULL; + List* merge_pathkeys = NULL; + /* + * Nestloop only supports inner, left, semi, and anti joins. Also, if we + * are doing a right or full mergejoin, we must use *all* the mergeclauses + * as join clauses, else we will not have a valid plan. (Although these + * two flags are currently inverses, keep them separate for clarity and + * possible future changes.) + */ + switch (jointype) { + case JOIN_INNER: + case JOIN_LEFT: + case JOIN_SEMI: + case JOIN_ANTI: + case JOIN_LEFT_ANTI_FULL: + nestjoinOK = true; + useallclauses = false; + break; + case JOIN_RIGHT: + case JOIN_FULL: + case JOIN_RIGHT_SEMI: + case JOIN_RIGHT_ANTI: + case JOIN_RIGHT_ANTI_FULL: + nestjoinOK = false; + useallclauses = true; + break; + case JOIN_UNIQUE_OUTER: + case JOIN_UNIQUE_INNER: + jointype = JOIN_INNER; + nestjoinOK = true; + useallclauses = false; + break; + default: { + ereport(ERROR, + (errmodule(MOD_OPT), + errcode(ERRCODE_UNRECOGNIZED_NODE_TYPE), + errmsg("unrecognized join type when match unsorted outer: %d", (int)jointype))); + + nestjoinOK = false; /* keep compiler quiet */ + useallclauses = false; + } break; + } + + Path* outer_cheapest_total = (Path*)linitial(outerrel->partial_pathlist); + Path* inner_cheapest_total = (Path*)linitial(innerrel->partial_pathlist); + Path* inner_cheapest_total_orig = inner_cheapest_total; + + /* + * If inner_cheapest_total is parameterized by the outer rel, ignore it; + * we will consider it below as a member of cheapest_parameterized_paths, + * but the other possibilities considered in this routine aren't usable. + */ + if (PATH_PARAM_BY_REL(inner_cheapest_total, outerrel)) + inner_cheapest_total = NULL; + + /* + * If we need to unique-ify the inner path, we will consider only the + * cheapest-total inner. + */ + if (save_jointype == JOIN_UNIQUE_INNER) { + if (inner_cheapest_total == NULL) + return; + inner_cheapest_total = (Path*)create_unique_path(root, innerrel, inner_cheapest_total, extra->sjinfo); + AssertEreport(inner_cheapest_total != NULL, MOD_OPT_JOIN, "inner cheapest path is NULL"); + } else if (nestjoinOK && inner_cheapest_total != NULL ) { + /* + * Consider materializing the cheapest inner path, unless + * enable_material is off or the path in question materializes its + * output anyway. + */ + if (u_sess->attr.attr_sql.enable_material && + !ExecMaterializesOutput(inner_cheapest_total->pathtype)) { + matpath = (Path*)create_material_path(inner_cheapest_total); + } else if (ExecMaterializesOutput(inner_cheapest_total->pathtype)) { + /* if inner is already materialized, we accept it */ + matpath = inner_cheapest_total; + } + } + + foreach (l, outerrel->partial_pathlist) { + Path* outerpath = (Path*)lfirst(l); + + /* for non-optimal inner, we only try outer path with the same distributed key */ + if (inner_cheapest_total_orig != linitial(innerrel->partial_pathlist) && + outerpath != outer_cheapest_total) + continue; + + /* + * We cannot use an outer path that is parameterized by the inner rel. + */ + if (PATH_PARAM_BY_REL(outerpath, innerrel)) + continue; + + /* + * If we need to unique-ify the outer path, it's pointless to consider + * any but the cheapest outer. (XXX we don't consider parameterized + * outers, nor inners, for unique-ified cases. Should we?) + */ + if (save_jointype == JOIN_UNIQUE_OUTER) { + if (outerpath != outer_cheapest_total) { + continue; + } + outerpath = (Path*)create_unique_path(root, outerrel, outerpath, extra->sjinfo); + AssertEreport(outerpath != NULL, MOD_OPT_JOIN, "outer path is NULL"); + } + + /* + * The result will have this sort order (even if it is implemented as + * a nestloop, and even if some of the mergeclauses are implemented by + * qpquals rather than as true mergeclauses): + */ + merge_pathkeys = build_join_pathkeys(root, joinrel, jointype, outerpath->pathkeys); + + if (save_jointype == JOIN_UNIQUE_INNER) { + try_nestloop_path(root, + joinrel, + jointype, + save_jointype, + extra, + param_source_rels, + outerpath, + inner_cheapest_total, + restrictlist, + merge_pathkeys); + } else if (nestjoinOK) { + //这边要改,检视不通过 + List* all_paths = + list_union_ptr(innerrel->cheapest_parameterized_paths, innerrel->partial_pathlist); + ListCell* llc2 = NULL; + + foreach (llc2, all_paths) { + Path* innerpath = (Path*)lfirst(llc2); + + try_nestloop_path(root, + joinrel, + jointype, + save_jointype, + extra, + param_source_rels, + outerpath, + innerpath, + restrictlist, + merge_pathkeys); + } + + list_free_ext(all_paths); + + /* Also consider materialized form of the cheapest inner path */ + if (matpath != NULL) + try_nestloop_path(root, + joinrel, + jointype, + save_jointype, + extra, + param_source_rels, + outerpath, + matpath, + restrictlist, + merge_pathkeys); + } + } +} + /* * hash_inner_and_outer * Create hashjoin join paths by explicitly hashing both the outer and @@ -1926,6 +2129,53 @@ static void hash_inner_and_outer(PlannerInfo* root, RelOptInfo* joinrel, RelOptI } i++; } + if (u_sess->opt_cxt.query_dop > 1 && outerrel->partial_pathlist != NULL && innerrel->partial_pathlist != NULL) { + Path* cheapest_outter_path = (Path*)linitial(outerrel->partial_pathlist); + Path* cheapest_inner_path = (Path*)linitial(innerrel->partial_pathlist); + jointype = save_jointype; + if (jointype == JOIN_UNIQUE_OUTER) { + cheapest_outter_path = (Path*)create_unique_path(root, outerrel, cheapest_outter_path, extra->sjinfo); + AssertEreport(cheapest_outter_path != NULL, MOD_OPT_JOIN, "outer cheapest path is NULL"); + jointype = JOIN_INNER; + try_hashjoin_path(root, + joinrel, + jointype, + save_jointype, + extra, + param_source_rels, + cheapest_outter_path, + cheapest_inner_path, + restrictlist, + hashclauses); + /* no possibility of cheap startup here */ + } else if (jointype == JOIN_UNIQUE_INNER) { + cheapest_inner_path = (Path*)create_unique_path(root, innerrel, cheapest_inner_path, extra->sjinfo); + AssertEreport(cheapest_inner_path != NULL, MOD_OPT_JOIN, "inner cheapest path is NULL"); + jointype = JOIN_INNER; + try_hashjoin_path(root, + joinrel, + jointype, + save_jointype, + extra, + param_source_rels, + cheapest_outter_path, + cheapest_inner_path, + restrictlist, + hashclauses); + } else { + try_hashjoin_path(root, + joinrel, + jointype, + save_jointype, + extra, + param_source_rels, + (Path*)cheapest_outter_path, + (Path*)cheapest_inner_path, + restrictlist, + hashclauses); + } + } + if (join_used != NULL) pfree_ext(join_used); } diff --git a/src/gausskernel/optimizer/path/joinrels.cpp b/src/gausskernel/optimizer/path/joinrels.cpp index 228e96dfe3..19b20d06db 100755 --- a/src/gausskernel/optimizer/path/joinrels.cpp +++ b/src/gausskernel/optimizer/path/joinrels.cpp @@ -755,7 +755,6 @@ RelOptInfo* make_join_rel(PlannerInfo* root, RelOptInfo* rel1, RelOptInfo* rel2) u_sess->opt_cxt.op_work_mem = work_mem_orig; root->glob->estiopmem = esti_op_mem_orig; -#ifdef STREAMPLAN /* * If there are join quals that cannot generate Stream plan, we mark it and try * to get a PGXC plan instead. @@ -781,7 +780,6 @@ RelOptInfo* make_join_rel(PlannerInfo* root, RelOptInfo* rel1, RelOptInfo* rel2) mark_dummy_rel(joinrel); } } -#endif return joinrel; } diff --git a/src/gausskernel/optimizer/path/streampath_base.cpp b/src/gausskernel/optimizer/path/streampath_base.cpp index af8d4d505e..69b6e2ffb3 100755 --- a/src/gausskernel/optimizer/path/streampath_base.cpp +++ b/src/gausskernel/optimizer/path/streampath_base.cpp @@ -104,7 +104,11 @@ PathGen::~PathGen() */ void PathGen::addPath(Path* new_path) { - add_path(m_root, m_rel, new_path); + if (new_path->dop > 1) { + add_partial_path(m_root, m_rel, new_path); + } else { + add_path(m_root, m_rel, new_path); + } } /* diff --git a/src/gausskernel/optimizer/plan/createplan.cpp b/src/gausskernel/optimizer/plan/createplan.cpp index 71d36c4097..ca4dbe1eb8 100755 --- a/src/gausskernel/optimizer/plan/createplan.cpp +++ b/src/gausskernel/optimizer/plan/createplan.cpp @@ -400,16 +400,12 @@ static Plan* create_plan_recurse(PlannerInfo* root, Path* best_path) case T_PartIterator: plan = (Plan*)create_globalpartInterator_plan(root, (PartIteratorPath*)best_path); break; -#ifdef PGXC case T_RemoteQuery: plan = create_remotequery_plan(root, (RemoteQueryPath*)best_path); break; -#endif -#ifdef STREAMPLAN case T_Stream: plan = create_stream_plan(root, (StreamPath*)best_path); break; -#endif default: { ereport(ERROR, (errcode(ERRCODE_UNRECOGNIZED_NODE_TYPE), diff --git a/src/gausskernel/optimizer/plan/streamplan.cpp b/src/gausskernel/optimizer/plan/streamplan.cpp index 15d2e1c800..ab965a8a68 100644 --- a/src/gausskernel/optimizer/plan/streamplan.cpp +++ b/src/gausskernel/optimizer/plan/streamplan.cpp @@ -1062,13 +1062,13 @@ void CreateGatherPaths(PlannerInfo* root, RelOptInfo* rel, bool isJoin) { /* Create gather path on plain rel pathlist. */ ListCell* lc = NULL; - List* pathlist = rel->pathlist; + List* partial_pathlist = rel->partial_pathlist; if (!PreCheckGatherParse(root, rel) || !PreCheckGatherOthers(root, rel, isJoin)) { return; } - foreach(lc, pathlist) { + foreach(lc, partial_pathlist) { Path* path = (Path*)lfirst(lc); /* only add gather for path which execute on datanodes */ diff --git a/src/gausskernel/optimizer/plan/streamwalker.cpp b/src/gausskernel/optimizer/plan/streamwalker.cpp index ee5c8be2ee..8ec25413c7 100755 --- a/src/gausskernel/optimizer/plan/streamwalker.cpp +++ b/src/gausskernel/optimizer/plan/streamwalker.cpp @@ -444,7 +444,10 @@ static void stream_walker_query(Query* query, shipping_context *cxt) stream_walker_query_insertinto_rep(query, cxt); /* mark shippable flag based on rte shippbility */ stream_walker_finalize_cxt(query, cxt); - + if (list_length(query->resultRelations) > 1) { + /* turn off dop for multiple modify */ + u_sess->opt_cxt.query_dop = 1; + } /* Mark query's can_push and global_shippable flag. */ query->can_push = cxt->current_shippable; cxt->global_shippable = cxt->global_shippable && cxt->current_shippable; diff --git a/src/gausskernel/optimizer/util/pathnode.cpp b/src/gausskernel/optimizer/util/pathnode.cpp index afa419a29d..9d9921b701 100755 --- a/src/gausskernel/optimizer/util/pathnode.cpp +++ b/src/gausskernel/optimizer/util/pathnode.cpp @@ -1332,10 +1332,9 @@ static void set_predpush_same_level_hint(HintState* hstate, RelOptInfo* rel, Pat */ void set_hint_value(RelOptInfo* join_rel, Path* new_path, HintState* hstate) { - if (hstate == NULL) { + if (hstate == NULL || (new_path->hint_value != 0 && new_path->dop != 0)) { return; } - AssertEreport(new_path->hint_value == 0, MOD_OPT, ""); set_scan_hint(new_path, hstate); @@ -1587,7 +1586,7 @@ static bool AddPathPreCheck(Path* newPath) * * Returns nothing, but modifies parent_rel->pathlist. */ -void add_path(PlannerInfo* root, RelOptInfo* parent_rel, Path* new_path) +void add_path(PlannerInfo* root, RelOptInfo* parent_rel, Path* new_path, bool is_delete_path) { bool accept_new = true; /* unless we find a superior old path */ ListCell* insert_after = NULL; /* where to insert new item */ @@ -1798,9 +1797,10 @@ void add_path(PlannerInfo* root, RelOptInfo* parent_rel, Path* new_path) /* * Delete the data pointed-to by the deleted cell, if possible */ - if (!IsA(old_path, IndexPath)) + if (!IsA(old_path, IndexPath) && old_path->dop <= 1){ pfree_ext(old_path); /* p1_prev does not advance */ + } } else { /* new belongs after this old path if it has cost >= old's */ if (new_path->total_cost >= old_path->total_cost && new_path->hint_value <= old_path->hint_value) @@ -1857,6 +1857,342 @@ void add_path(PlannerInfo* root, RelOptInfo* parent_rel, Path* new_path) lappend_cell(parent_rel->pathlist, insert_after, new_path); else parent_rel->pathlist = lcons(new_path, parent_rel->pathlist); + } else { + /* Reject and recycle the new path */ + if (!IsA(new_path, IndexPath) && is_delete_path) + pfree_ext(new_path); + } +} + +/* + * add_path + * Consider a potential implementation path for the specified parent rel, + * and add it to the rel's partial_pathlist if it is worthy of consideration. + * A path is worthy if it has a better sort order (better pathkeys) or + * cheaper cost (on either dimension), or generates fewer rows, than any + * existing path that has the same or superset parameterization rels. + * + * We also remove from the rel's partial_pathlist any old paths that are dominated + * by new_path --- that is, new_path is cheaper, at least as well ordered, + * generates no more rows, and requires no outer rels not required by the + * old path. + * + * In most cases, a path with a superset parameterization will generate + * fewer rows (since it has more join clauses to apply), so that those two + * figures of merit move in opposite directions; this means that a path of + * one parameterization can seldom dominate a path of another. But such + * cases do arise, so we make the full set of checks anyway. + * + * There is one policy decision embedded in this function, along with its + * sibling add_path_precheck: we treat all parameterized paths as having + * NIL pathkeys, so that they compete only on cost. This is to reduce + * the number of parameterized paths that are kept. See discussion in + * src/backend/optimizer/README. + * + * The partial_pathlist is kept sorted by total_cost, with cheaper paths + * at the front. Within this routine, that's simply a speed hack: + * doing it that way makes it more likely that we will reject an inferior + * path after a few comparisons, rather than many comparisons. + * However, add_path_precheck relies on this ordering to exit early + * when possible. + * + * NOTE: discarded Path objects are immediately pfree'd to reduce planner + * memory consumption. We dare not try to free the substructure of a Path, + * since much of it may be shared with other Paths or the query tree itself; + * but just recycling discarded Path nodes is a very useful savings in + * a large join tree. We can recycle the List nodes of partial_pathlist, too. + * + * BUT: we do not pfree IndexPath objects, since they may be referenced as + * children of BitmapHeapPaths as well as being paths in their own right. + * + * 'parent_rel' is the relation entry to which the path corresponds. + * 'new_path' is a potential path for parent_rel. + * + * Returns nothing, but modifies parent_rel->partial_pathlist. + */ +void add_partial_path(PlannerInfo* root, RelOptInfo* parent_rel, Path* new_path) +{ + bool accept_new = true; /* unless we find a superior old path */ + ListCell* insert_after = NULL; /* where to insert new item */ + List* new_path_pathkeys = NIL; + ListCell* p1 = NULL; + ListCell* p1_prev = NULL; + ListCell* p1_next = NULL; + bool small_fuzzy_factor_is_used = false; + + /* + * This is a convenient place to check for query cancel --- no part of the + * planner goes very long without calling add_path(). + */ + CHECK_FOR_INTERRUPTS(); + + if (!AddPathPreCheck(new_path)) { + return; + } + + /* Set path's hint_value. */ + if (root != NULL && root->parse->hintState != NULL) { + set_hint_value(parent_rel, new_path, root->parse->hintState); + } + + /*Set path's index_hint */ + if (root != NULL && root->parse->indexhintList != NULL) { + set_index_hint_value(new_path, root->parse->indexhintList); + } + + /* we will add cn gather path when cn gather hint switch on */ + if (root != NULL && EXEC_CONTAIN_COORDINATOR(new_path->exec_type) && permit_gather(root)) { + RangeTblEntry* rte = root->simple_rte_array[parent_rel->relid]; + bool isSysTable = (rte != NULL && rte->rtekind == RTE_RELATION && is_sys_table(rte->relid)); + + if (!isSysTable) { + AddGatherPath(root, parent_rel, new_path); + return; + } + } + + if (OPTIMIZE_PLAN != u_sess->attr.attr_sql.plan_mode_seed) { + parent_rel->partial_pathlist = lcons(new_path, parent_rel->partial_pathlist); + return; + } + + /* Pretend parameterized paths have no pathkeys, per comment above */ + new_path_pathkeys = new_path->param_info ? NIL : new_path->pathkeys; + + /* + * Loop to check proposed new path against old paths. Note it is possible + * for more than one old path to be tossed out because new_path dominates + * it. + * + * We can't use foreach here because the loop body may delete the current + * list cell. + */ + p1_prev = NULL; + for (p1 = list_head(parent_rel->partial_pathlist); p1 != NULL; p1 = p1_next) { + Path* old_path = (Path*)lfirst(p1); + bool remove_old = false; /* unless new proves superior */ + bool eq_diskey = true; + PathCostComparison costcmp = COSTS_DIFFERENT; + PathKeysComparison keyscmp = PATHKEYS_DIFFERENT; + BMS_Comparison outercmp = BMS_DIFFERENT; + double rowscmp; + + p1_next = lnext(p1); + + /* + * Do a fuzzy cost comparison with 1% fuzziness limit. (XXX does this + * percentage need to be user-configurable?) + */ + costcmp = compare_path_costs_fuzzily(new_path, old_path, FUZZY_FACTOR); + + /* + * If the two paths compare differently for startup and total cost, + * then we want to keep both, and we can skip comparing pathkeys and + * required_outer rels. If they compare the same, proceed with the + * other comparisons. Row count is checked last. (We make the tests + * in this order because the cost comparison is most likely to turn + * out "different", and the pathkeys comparison next most likely. As + * explained above, row count very seldom makes a difference, so even + * though it's cheap to compare there's not much point in checking it + * earlier.) + */ + if (costcmp != COSTS_DIFFERENT) { + /* Similarly check to see if either dominates on pathkeys */ + List* old_path_pathkeys = NIL; + + old_path_pathkeys = old_path->param_info ? NIL : old_path->pathkeys; + keyscmp = compare_pathkeys(new_path_pathkeys, old_path_pathkeys); + if (keyscmp != PATHKEYS_DIFFERENT) { + switch (costcmp) { + case COSTS_EQUAL: + outercmp = bms_subset_compare(PATH_REQ_OUTER(new_path), PATH_REQ_OUTER(old_path)); + if (keyscmp == PATHKEYS_BETTER1) { + if ((outercmp == BMS_EQUAL || outercmp == BMS_SUBSET1) && new_path->rows <= old_path->rows) + remove_old = true; /* new dominates old */ + } else if (keyscmp == PATHKEYS_BETTER2) { + if ((outercmp == BMS_EQUAL || outercmp == BMS_SUBSET2) && new_path->rows >= old_path->rows) + accept_new = false; /* old dominates new */ + } else { + if (outercmp == BMS_EQUAL) { + /* + * Same pathkeys and outer rels, and fuzzily + * the same cost, so keep just one; to decide + * which, first check rows and then do a fuzzy + * cost comparison with very small fuzz limit. + * (We used to do an exact cost comparison, + * but that results in annoying + * platform-specific plan variations due to + * roundoff in the cost estimates.) If things + * are still tied, arbitrarily keep only the + * old path. Notice that we will keep only + * the old path even if the less-fuzzy + * comparison decides the startup and total + * costs compare differently. + */ + if (new_path->rows < old_path->rows) + remove_old = true; /* new dominates old */ + else if (new_path->rows > old_path->rows) + accept_new = false; /* old dominates new */ + else { + small_fuzzy_factor_is_used = true; + if (compare_path_costs_fuzzily(new_path, old_path, SMALL_FUZZY_FACTOR) == + COSTS_BETTER1) + remove_old = true; /* new dominates old */ + else + accept_new = false; /* old equals or + * dominates new */ + } + } else if (outercmp == BMS_SUBSET1 && new_path->rows <= old_path->rows) + remove_old = true; /* new dominates old */ + else if (outercmp == BMS_SUBSET2 && new_path->rows >= old_path->rows) + accept_new = false; /* old dominates new */ + /* else different parameterizations, keep both */ + } + break; + case COSTS_BETTER1: + if (keyscmp != PATHKEYS_BETTER2) { + outercmp = bms_subset_compare(PATH_REQ_OUTER(new_path), PATH_REQ_OUTER(old_path)); + if ((outercmp == BMS_EQUAL || outercmp == BMS_SUBSET1) && new_path->rows <= old_path->rows) + remove_old = true; /* new dominates old */ + } + break; + case COSTS_BETTER2: + if (keyscmp != PATHKEYS_BETTER1) { + outercmp = bms_subset_compare(PATH_REQ_OUTER(new_path), PATH_REQ_OUTER(old_path)); + if ((outercmp == BMS_EQUAL || outercmp == BMS_SUBSET2) && new_path->rows >= old_path->rows) + accept_new = false; /* old dominates new */ + } + break; + default: + + /* + * can't get here, but keep this case to keep compiler + * quiet + */ + break; + } + } + } + +#ifdef STREAMPLAN + if (IS_STREAM_PLAN) { + /* When compare the path with single node distribution with other path with non-single + * node distribution, the former will be kept and the latter will be removed. + */ + bool is_new_path_single_node_distribution = ng_is_single_node_group_distribution(&new_path->distribution); + bool is_old_path_single_node_distribution = ng_is_single_node_group_distribution(&old_path->distribution); + /*When open dn gather, only the path with single node distribution will be kept for the cost after.*/ + if ((is_new_path_single_node_distribution && !is_old_path_single_node_distribution) + || (!is_new_path_single_node_distribution && is_old_path_single_node_distribution)) { + /* When compare the path with single node distribution with other path with non-single + * node distribution, the former will be kept and the latter will be removed. + */ + if (costcmp == COSTS_BETTER1 || costcmp == COSTS_BETTER2) { + eq_diskey = true; + } + } else if (!is_new_path_single_node_distribution && !is_old_path_single_node_distribution) { + eq_diskey = equal_distributekey(root, new_path->distribute_keys, old_path->distribute_keys); + } else { + // Remove when they are all single node distribution. + eq_diskey = true; + } + } +#endif + /* + * Remove current element from pathlist if dominated by new. + */ +#ifdef STREAMPLAN + if (remove_old && eq_diskey) { +#else + if (remove_old) { +#endif + ereport(DEBUG1, + (errmodule(MOD_OPT_JOIN), + (errmsg("An old path is removed with cost = %lf .. %lf; rows = %lf", + old_path->startup_cost, + old_path->total_cost, + old_path->rows)))); + rowscmp = old_path->rows - new_path->rows; + if (log_min_messages <= DEBUG1) + debug1_print_compare_result( + costcmp, keyscmp, outercmp, rowscmp, root, old_path, small_fuzzy_factor_is_used); + parent_rel->partial_pathlist = list_delete_cell(parent_rel->partial_pathlist, p1, p1_prev); + + /* + * Delete the data pointed-to by the deleted cell, if possible + */ + if (!IsA(old_path, IndexPath)) { + ListCell* lc = NULL; + bool is_deleted = false; + foreach(lc, parent_rel->pathlist) { //可以考虑方向记path->(list of exists) + Path* path_parallel = (Path*)lfirst(lc); + if (path_parallel == old_path) { + parent_rel->pathlist = list_delete_cell2(parent_rel->pathlist, lc); + is_deleted = true; + break; + } + } + if (!is_deleted) { + pfree_ext(old_path); + } + } + /* p1_prev does not advance */ + } else { + /* new belongs after this old path if it has cost >= old's */ + if (new_path->total_cost >= old_path->total_cost && new_path->hint_value <= old_path->hint_value) + insert_after = p1; + /* p1_prev advances */ + p1_prev = p1; + } + +#ifdef STREAMPLAN + /* we should accept the new if distribute key differs */ + if (!accept_new && !eq_diskey) { + accept_new = true; + /* new belongs after this old path if it has cost >= old's */ + if (new_path->total_cost >= old_path->total_cost && new_path->hint_value <= old_path->hint_value) + insert_after = p1; + /* p1_prev advances */ + p1_prev = p1; + } +#endif + + /* + * If we found an old path that dominates new_path, we can quit + * scanning the pathlist; we will not add new_path, and we assume + * new_path cannot dominate any other elements of the pathlist. + */ + if (!accept_new) { + ereport(DEBUG1, + (errmodule(MOD_OPT_JOIN), + (errmsg("A new path is not accepted with cost = %lf .. %lf; rows = %lf", + new_path->startup_cost, + new_path->total_cost, + new_path->rows)))); + rowscmp = old_path->rows - new_path->rows; + if (log_min_messages <= DEBUG1) { + debug1_print_new_path(root, new_path, small_fuzzy_factor_is_used); + debug1_print_compare_result( + costcmp, keyscmp, outercmp, rowscmp, root, old_path, small_fuzzy_factor_is_used); + } + break; + } + } + + if (accept_new) { + /* Accept the new path: insert it at proper place in pathlist */ + ereport(DEBUG1, + (errmodule(MOD_OPT_JOIN), + (errmsg("A new path is accepted with cost = %lf .. %lf; rows = %lf", + new_path->startup_cost, + new_path->total_cost, + new_path->rows)))); + if (log_min_messages <= DEBUG1) + debug1_print_new_path(root, new_path, small_fuzzy_factor_is_used); + if (insert_after != NULL) + lappend_cell(parent_rel->partial_pathlist, insert_after, new_path); + else + parent_rel->partial_pathlist = lcons(new_path, parent_rel->partial_pathlist); } else { /* Reject and recycle the new path */ if (!IsA(new_path, IndexPath)) @@ -3016,7 +3352,9 @@ UniquePath* create_unique_path(PlannerInfo* root, RelOptInfo* rel, Path* subpath if (subpath == lfirst(lc)) break; } - AssertEreport(lc != NULL, MOD_OPT_JOIN, "Subpath should be one of cheapest total path of rel"); + if (subpath->dop == 1) { + AssertEreport(lc != NULL, MOD_OPT_JOIN, "Subpath should be one of cheapest total path of rel"); + } AssertEreport(subpath->parent == rel || subpath->parent->base_rel == rel, MOD_OPT_JOIN, ""); /* ... or if SpecialJoinInfo is the wrong one */ AssertEreport(sjinfo->jointype == JOIN_SEMI, MOD_OPT_JOIN, "Join type should be semi join"); diff --git a/src/include/nodes/relation.h b/src/include/nodes/relation.h index 65a80bd29a..930dc876a8 100755 --- a/src/include/nodes/relation.h +++ b/src/include/nodes/relation.h @@ -714,6 +714,7 @@ typedef struct RelOptInfo { List* distribute_keys; /* distribute key */ List* pathlist; /* Path structures */ List* ppilist; /* ParamPathInfos used in pathlist */ + List* partial_pathlist; /* partial Paths */ struct Path* cheapest_gather_path; struct Path* cheapest_startup_path; List* cheapest_total_path; /* contain all cheapest total paths from different distribute key */ diff --git a/src/include/optimizer/pathnode.h b/src/include/optimizer/pathnode.h index 6ff21d6003..0fecb39ff7 100644 --- a/src/include/optimizer/pathnode.h +++ b/src/include/optimizer/pathnode.h @@ -46,7 +46,8 @@ extern int compare_fractional_path_costs(Path* path1, Path* path2, double fracti extern void set_cheapest(RelOptInfo* parent_rel, PlannerInfo* root = NULL); extern Path* get_cheapest_path(PlannerInfo* root, RelOptInfo* rel, const double* agg_groups, bool has_groupby); extern Path* find_hinted_path(Path* current_path); -extern void add_path(PlannerInfo* root, RelOptInfo* parent_rel, Path* new_path); +extern void add_path(PlannerInfo* root, RelOptInfo* parent_rel, Path* new_path, bool is_delete_path = true); +extern void add_partial_path(PlannerInfo* root, RelOptInfo* parent_rel, Path* new_path); extern bool add_path_precheck( RelOptInfo* parent_rel, Cost startup_cost, Cost total_cost, List* pathkeys, Relids required_outer); diff --git a/src/test/regress/pg_regress.cpp b/src/test/regress/pg_regress.cpp index bdac8906af..cdfa27862d 100644 --- a/src/test/regress/pg_regress.cpp +++ b/src/test/regress/pg_regress.cpp @@ -5461,7 +5461,7 @@ static void check_global_variables() } } -#define BASE_PGXC_LIKE_MACRO_NUM 1419 +#define BASE_PGXC_LIKE_MACRO_NUM 1418 static void check_pgxc_like_macros() { #ifdef BUILD_BY_CMAKE -- Gitee From 2567c1a1404da0f85e61e7c110e33f41172880e7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=9D=A8=E5=BF=97=E9=93=AE?= <8033306+zhizhengyang@user.noreply.gitee.com> Date: Mon, 15 Apr 2024 10:39:22 +0800 Subject: [PATCH 2/4] fix code --- src/gausskernel/optimizer/path/joinpath.cpp | 110 ++---------------- src/gausskernel/optimizer/path/joinrels.cpp | 4 +- src/gausskernel/optimizer/plan/createplan.cpp | 6 +- src/test/regress/expected/plan_hint_iud.out | 22 ++-- 4 files changed, 25 insertions(+), 117 deletions(-) diff --git a/src/gausskernel/optimizer/path/joinpath.cpp b/src/gausskernel/optimizer/path/joinpath.cpp index 30e407f724..02f4dfdb3e 100755 --- a/src/gausskernel/optimizer/path/joinpath.cpp +++ b/src/gausskernel/optimizer/path/joinpath.cpp @@ -308,6 +308,9 @@ void add_paths_to_joinrel(PlannerInfo* root, RelOptInfo* joinrel, RelOptInfo* ou &extra, param_source_rels); if (u_sess->opt_cxt.query_dop > 1) { + /* + *The match_partial_unsorted_outer function is used in SMP scenarios. + */ match_partial_unsorted_outer(root, joinrel, outerrel, @@ -627,7 +630,6 @@ static void try_nestloop_path(PlannerInfo* root, RelOptInfo* joinrel, JoinType j if (add_path_precheck(joinrel, workspace.startup_cost, workspace.total_cost, pathkeys, required_outer) || add_path_hintcheck(root->parse->hintState, joinrel->relids, outer_path, inner_path, HINT_KEYWORD_NESTLOOP)) { -#ifdef STREAMPLAN /* check exec type of inner and outer path before generate join path. */ if (IS_STREAM_PLAN && CheckJoinExecType(root, outer_path, inner_path)) { execOnCoords = true; @@ -659,33 +661,6 @@ static void try_nestloop_path(PlannerInfo* root, RelOptInfo* joinrel, JoinType j if (u_sess->opt_cxt.query_dop > 1) nlpgen->addNestLoopPath(&cur_workspace, distribution, u_sess->opt_cxt.query_dop); } else { -#ifdef ENABLE_MULTIPLE_NODES - /* - * We choose candidate distribution list here with heuristic method, - * (1) for un-correlated query block, we will get a list of candidate Distribution - * (2) for correlated query block, we should shuffle them (inner and outer) to a correlated subplan node - * group - */ - List* candidate_distribution_list = - ng_get_join_candidate_distribution_list(outer_path, inner_path, root->is_correlated, - get_join_distribution_perference_type(joinrel, inner_path, outer_path)); - - /* - * For each candidate distribution (node group), we do nest loop join computing on it, - * if outer or inner is not in candidate node group, we should do shuffle. - */ - ListCell* lc = NULL; - foreach (lc, candidate_distribution_list) { - Distribution* distribution = (Distribution*)lfirst(lc); - JoinCostWorkspace cur_workspace; - copy_JoinCostWorkspace(&cur_workspace, &workspace); - - nlpgen->addNestLoopPath(&cur_workspace, distribution, 1); - - if (u_sess->opt_cxt.query_dop > 1) - nlpgen->addNestLoopPath(&cur_workspace, distribution, u_sess->opt_cxt.query_dop); - } -#else JoinCostWorkspace cur_workspace; copy_JoinCostWorkspace(&cur_workspace, &workspace); @@ -698,13 +673,10 @@ static void try_nestloop_path(PlannerInfo* root, RelOptInfo* joinrel, JoinType j add_path(root, joinrel, cheapest_partial_path ,false); } } -#endif } delete nlpgen; - } else -#endif - { + } else { /* try nestloop path single */ TryNestLoopPathSingle(root, joinrel, @@ -839,7 +811,6 @@ static void try_mergejoin_path(PlannerInfo* root, RelOptInfo* joinrel, JoinType if (IS_STREAM_PLAN && CheckJoinExecType(root, outer_path, inner_path)) { execOnCoords = true; } - if (IS_STREAM_PLAN && !execOnCoords) { MergeJoinPathGen* mjpgen = New(CurrentMemoryContext) MergeJoinPathGen(root, joinrel, @@ -980,7 +951,6 @@ static void try_hashjoin_path(PlannerInfo* root, RelOptInfo* joinrel, JoinType j if (add_path_precheck(joinrel, workspace.startup_cost, workspace.total_cost, NIL, required_outer) || add_path_hintcheck(root->parse->hintState, joinrel->relids, outer_path, inner_path, HINT_KEYWORD_HASHJOIN)) { -#ifdef STREAMPLAN /* check exec type of inner and outer path before generate join path. */ if (IS_STREAM_PLAN && CheckJoinExecType(root, outer_path, inner_path)) { execOnCoords = true; @@ -1011,32 +981,6 @@ static void try_hashjoin_path(PlannerInfo* root, RelOptInfo* joinrel, JoinType j if (u_sess->opt_cxt.query_dop > 1) hjpgen->addHashJoinPath(&cur_workspace, distribution, u_sess->opt_cxt.query_dop); } else { -#ifdef ENABLE_MULTIPLE_NODES - /* - * We choose candidate distribution list here with heuristic method, - * (1) for un-correlated query block, we will get a list of candidate Distribution - * (2) for correlated query block, we should shuffle them (inner and outer) to a correlated subplan node - * group - */ - List* candidate_distribution_list = - ng_get_join_candidate_distribution_list(outer_path, inner_path, root->is_correlated, - get_join_distribution_perference_type(joinrel, inner_path, outer_path)); - - /* - * For each candidate distribution (node group), we do hash join computing on it, - * if outer or inner is not in candidate node group, we should do shuffle. - */ - ListCell* lc = NULL; - foreach (lc, candidate_distribution_list) { - Distribution* distribution = (Distribution*)lfirst(lc); - JoinCostWorkspace cur_workspace; - copy_JoinCostWorkspace(&cur_workspace, &workspace); - - hjpgen->addHashJoinPath(&cur_workspace, distribution, 1); - if (u_sess->opt_cxt.query_dop > 1) - hjpgen->addHashJoinPath(&cur_workspace, distribution, u_sess->opt_cxt.query_dop); - } -#else JoinCostWorkspace cur_workspace; copy_JoinCostWorkspace(&cur_workspace, &workspace); hjpgen->addHashJoinPath(&cur_workspace, NULL, 1); @@ -1047,12 +991,10 @@ static void try_hashjoin_path(PlannerInfo* root, RelOptInfo* joinrel, JoinType j add_path(root, joinrel, cheapest_partial_path ,false); } } -#endif } delete hjpgen; } else -#endif { /* try hash join single */ TryHashJoinPathSingle(root, @@ -1694,39 +1636,9 @@ static void match_unsorted_outer(PlannerInfo* root, RelOptInfo* joinrel, RelOptI /* * match_partial_unsorted_outer - * Creates possible join paths for processing a single join relation - * 'joinrel' by employing either iterative substitution or - * mergejoining on each of its possible outer paths (considering - * only outer paths that are already ordered well enough for merging). - * - * We always generate a nestloop path for each available outer path. - * In fact we may generate as many as five: one on the cheapest-total-cost - * inner path, one on the same with materialization, one on the - * cheapest-startup-cost inner path (if different), one on the - * cheapest-total inner-indexscan path (if any), and one on the - * cheapest-startup inner-indexscan path (if different). - * - * We also consider mergejoins if mergejoin clauses are available. We have - * two ways to generate the inner path for a mergejoin: sort the cheapest - * inner path, or use an inner path that is already suitably ordered for the - * merge. If we have several mergeclauses, it could be that there is no inner - * path (or only a very expensive one) for the full list of mergeclauses, but - * better paths exist if we truncate the mergeclause list (thereby discarding - * some sort key requirements). So, we consider truncations of the - * mergeclause list as well as the full list. (Ideally we'd consider all - * subsets of the mergeclause list, but that seems way too expensive.) - * - * 'joinrel' is the join relation - * 'outerrel' is the outer join relation - * 'innerrel' is the inner join relation - * 'restrictlist' contains all of the RestrictInfo nodes for restriction - * clauses that apply to this join - * 'mergeclause_list' is a list of RestrictInfo nodes for available - * mergejoin clauses in this join - * 'jointype' is the type of join to do - * 'sjinfo' is extra info about the join for selectivity estimation - * 'semifactors' contains valid data if jointype is SEMI or ANTI - * 'param_source_rels' are OK targets for parameterization of result paths + * The match_partial_unsorted_outer function is used to generate SMP plans. + * Since SMP currently does not support merge join, merge join is excluded. + * Other aspects are the same as the match_unsorted_outer function. */ static void match_partial_unsorted_outer(PlannerInfo* root, RelOptInfo* joinrel, RelOptInfo* outerrel, RelOptInfo* innerrel, List* restrictlist, List* mergeclause_list, JoinType jointype, JoinPathExtraData* extra, Relids param_source_rels) @@ -1863,12 +1775,9 @@ static void match_partial_unsorted_outer(PlannerInfo* root, RelOptInfo* joinrel, restrictlist, merge_pathkeys); } else if (nestjoinOK) { - //这边要改,检视不通过 - List* all_paths = - list_union_ptr(innerrel->cheapest_parameterized_paths, innerrel->partial_pathlist); + /* because parameter info not support parallel plan, so we remove it*/ ListCell* llc2 = NULL; - - foreach (llc2, all_paths) { + foreach (llc2, innerrel->partial_pathlist) { Path* innerpath = (Path*)lfirst(llc2); try_nestloop_path(root, @@ -1883,7 +1792,6 @@ static void match_partial_unsorted_outer(PlannerInfo* root, RelOptInfo* joinrel, merge_pathkeys); } - list_free_ext(all_paths); /* Also consider materialized form of the cheapest inner path */ if (matpath != NULL) diff --git a/src/gausskernel/optimizer/path/joinrels.cpp b/src/gausskernel/optimizer/path/joinrels.cpp index 19b20d06db..17329dfed9 100755 --- a/src/gausskernel/optimizer/path/joinrels.cpp +++ b/src/gausskernel/optimizer/path/joinrels.cpp @@ -761,7 +761,7 @@ RelOptInfo* make_join_rel(PlannerInfo* root, RelOptInfo* rel1, RelOptInfo* rel2) * e.g. cannot broadcast hashed results for inner plan of semi join when outer * plan is replicated now. */ - +#ifdef STREAMPLAN if (IS_STREAM && NIL == joinrel->pathlist) { /* * We remove the useless RelOptInfo and then try other join path if the current level @@ -780,7 +780,7 @@ RelOptInfo* make_join_rel(PlannerInfo* root, RelOptInfo* rel1, RelOptInfo* rel2) mark_dummy_rel(joinrel); } } - +#endif return joinrel; } diff --git a/src/gausskernel/optimizer/plan/createplan.cpp b/src/gausskernel/optimizer/plan/createplan.cpp index ca4dbe1eb8..2260cbc264 100755 --- a/src/gausskernel/optimizer/plan/createplan.cpp +++ b/src/gausskernel/optimizer/plan/createplan.cpp @@ -400,12 +400,16 @@ static Plan* create_plan_recurse(PlannerInfo* root, Path* best_path) case T_PartIterator: plan = (Plan*)create_globalpartInterator_plan(root, (PartIteratorPath*)best_path); break; +#ifdef PGXC case T_RemoteQuery: plan = create_remotequery_plan(root, (RemoteQueryPath*)best_path); break; +#endif +#ifdef STREAMPLAN case T_Stream: plan = create_stream_plan(root, (StreamPath*)best_path); break; +#endif default: { ereport(ERROR, (errcode(ERRCODE_UNRECOGNIZED_NODE_TYPE), @@ -1853,11 +1857,9 @@ static Plan* create_unique_plan(PlannerInfo* root, UniquePath* best_path) * minimum output tlist, without any stuff we might have added to the * subplan tlist. */ -#ifdef STREAMPLAN if (IS_STREAM_PLAN && best_path->hold_tlist) { tlist = newtlist; } -#endif /* Can't figure out the real size, so give a rough estimation */ Size hashentrysize = alloc_trunk_size((subplan->plan_width) + MAXALIGN(sizeof(MinimalTupleData))); diff --git a/src/test/regress/expected/plan_hint_iud.out b/src/test/regress/expected/plan_hint_iud.out index 2a13cdd586..9ef39c3ea4 100755 --- a/src/test/regress/expected/plan_hint_iud.out +++ b/src/test/regress/expected/plan_hint_iud.out @@ -325,14 +325,13 @@ deallocate all; ------------------------------------------------------------- Update on t1 -> Streaming(type: LOCAL GATHER dop: 1/8) - -> Nested Loop - Join Filter: (t1.c2 = t2.c2) - -> HashAggregate - Group By Key: t2.c2 + -> Hash Semi Join + Hash Cond: (t1.c2 = t2.c2) + -> Seq Scan on t1 + -> Hash -> Streaming(type: BROADCAST dop: 8/8) -> Seq Scan on t2 - -> Seq Scan on t1 -(9 rows) +(8 rows) --- No expand :EXP update t1 set c2 = 2 where c2 in (select /*+ no_expand */ c2 from t2); @@ -457,14 +456,13 @@ deallocate all; ------------------------------------------------------------- Delete on t1 -> Streaming(type: LOCAL GATHER dop: 1/8) - -> Nested Loop - Join Filter: (t1.c2 = t2.c2) - -> HashAggregate - Group By Key: t2.c2 + -> Hash Semi Join + Hash Cond: (t1.c2 = t2.c2) + -> Seq Scan on t1 + -> Hash -> Streaming(type: BROADCAST dop: 8/8) -> Seq Scan on t2 - -> Seq Scan on t1 -(9 rows) +(8 rows) --- No expand :EXP delete t1 where c2 in (select /*+ no_expand */ c2 from t2); -- Gitee From 5d8e4c1f52e106b24715e71395b4fe7e18167c92 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=9D=A8=E5=BF=97=E9=93=AE?= <8033306+zhizhengyang@user.noreply.gitee.com> Date: Mon, 22 Apr 2024 14:45:32 +0800 Subject: [PATCH 3/4] add comment --- src/gausskernel/optimizer/path/allpaths.cpp | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/gausskernel/optimizer/path/allpaths.cpp b/src/gausskernel/optimizer/path/allpaths.cpp index 06bd742495..2a1cb1f5ad 100755 --- a/src/gausskernel/optimizer/path/allpaths.cpp +++ b/src/gausskernel/optimizer/path/allpaths.cpp @@ -289,6 +289,12 @@ RelOptInfo* make_one_rel(PlannerInfo* root, List* joinlist) * Generate access paths for the entire join tree. */ rel = make_rel_from_joinlist(root, joinlist); + /* + * The generated SMP path are stored in partial_pathlist; however, partial_pathlist + * is not used to store plans permanently. Therefore, it is necessary to compare the optimal + * plan in partial_pathlist with the plans in pathlist from the serial process + * and save the best one in pathlist. + */ if (list_length(joinlist) <=1 && rel->partial_pathlist != NULL) { Path* partial_path = (Path*)linitial(rel->partial_pathlist); add_path(root, rel, partial_path); -- Gitee From 27ad6c43b0ad27176c359417955804336f674af9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=9D=A8=E5=BF=97=E9=93=AE?= <8033306+zhizhengyang@user.noreply.gitee.com> Date: Wed, 24 Apr 2024 10:23:23 +0800 Subject: [PATCH 4/4] bug fix --- src/gausskernel/optimizer/path/allpaths.cpp | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/src/gausskernel/optimizer/path/allpaths.cpp b/src/gausskernel/optimizer/path/allpaths.cpp index 2a1cb1f5ad..0e2ad642fc 100755 --- a/src/gausskernel/optimizer/path/allpaths.cpp +++ b/src/gausskernel/optimizer/path/allpaths.cpp @@ -1229,7 +1229,18 @@ static void set_plain_rel_pathlist(PlannerInfo* root, RelOptInfo* rel, RangeTblE RelationClose(relation); } #endif - + /* + * if it's a partition table, To support the partition plan, + * we will no longer separate pathlist and partial_pathlist. + */ + if (rel->isPartitionedTable) { + ListCell* pathCell = NULL; + foreach(pathCell, rel->partial_pathlist) { + Path* path = (Path*)lfirst(pathCell); + add_path(root, rel, path); + } + rel->partial_pathlist = NULL; + } /* Now find the cheapest of the paths for this rel */ set_cheapest(rel); -- Gitee