diff --git a/src/bin/gs_guc/cluster_guc.conf b/src/bin/gs_guc/cluster_guc.conf index c07dd9ae94a041c6b61b2772ce0a14582058e67c..ca0cb0bcd099ff648d2ff44821afd69f05e15f27 100755 --- a/src/bin/gs_guc/cluster_guc.conf +++ b/src/bin/gs_guc/cluster_guc.conf @@ -228,6 +228,7 @@ enable_indexscan|bool|0,0|NULL|NULL| enable_inner_unique_opt|bool|0,0|NULL|NULL| enable_kill_query|bool|0,0|NULL|NULL| enable_material|bool|0,0|NULL|NULL| +enable_memoize|bool|0,0|NULL|NULL| enable_memory_limit|bool|0,0|NULL|NULL| enable_memory_context_control|bool|0,0|NULL|NULL| enable_memory_context_check_debug|bool|0,0|NULL|NULL| diff --git a/src/common/backend/nodes/nodes.cpp b/src/common/backend/nodes/nodes.cpp index ce21ef1b05b244479423ec0786bdcac9866e3ac7..e213704736a590bd4ff7cc1fbd96f589d06b39cc 100755 --- a/src/common/backend/nodes/nodes.cpp +++ b/src/common/backend/nodes/nodes.cpp @@ -73,6 +73,7 @@ static const TagStr g_tagStrArr[] = {{T_Invalid, "Invalid"}, {T_MergeJoin, "MergeJoin"}, {T_HashJoin, "HashJoin"}, {T_Material, "Material"}, + {T_Memoize, "Memoize"}, {T_Sort, "Sort"}, {T_SortGroup, "SortGroup"}, {T_Group, "Group"}, @@ -147,6 +148,7 @@ static const TagStr g_tagStrArr[] = {{T_Invalid, "Invalid"}, {T_MergeJoinState, "MergeJoinState"}, {T_HashJoinState, "HashJoinState"}, {T_MaterialState, "MaterialState"}, + {T_MemoizeState, "MemoizeState"}, {T_SortState, "SortState"}, {T_SortGroupState, "SortGroupState"}, {T_GroupState, "GroupState"}, @@ -277,6 +279,7 @@ static const TagStr g_tagStrArr[] = {{T_Invalid, "Invalid"}, {T_MergeAppendPath, "MergeAppendPath"}, {T_ResultPath, "ResultPath"}, {T_MaterialPath, "MaterialPath"}, + {T_MemoizePath, "MemoizePath"}, {T_UniquePath, "UniquePath"}, {T_PartIteratorPath, "PartIteratorPath"}, {T_EquivalenceClass, "EquivalenceClass"}, diff --git a/src/common/backend/utils/adt/datum.cpp b/src/common/backend/utils/adt/datum.cpp index ad858fd5ab6f3212498c84566972b3e036fb574c..8ee9fb019686c90c4df4ea0712c4d40041c4f91d 100644 --- a/src/common/backend/utils/adt/datum.cpp +++ b/src/common/backend/utils/adt/datum.cpp @@ -42,6 +42,7 @@ #include "utils/datum.h" #include "access/tuptoaster.h" #include "fmgr.h" +#include "access/hash.h" /* ------------------------------------------------------------------------- * datumGetSize @@ -252,6 +253,50 @@ bool DatumImageEq(Datum value1, Datum value2, bool typByVal, int typLen) return result; } +/*------------------------------------------------------------------------- + * datum_image_hash + * + * Generate a hash value based on the binary representation of 'value'. Most + * use cases will want to use the hash function specific to the Datum's type, + * however, some corner cases require generating a hash value based on the + * actual bits rather than the logical value. + *------------------------------------------------------------------------- + */ +uint32 +datum_image_hash(Datum value, bool typByVal, int typLen) +{ + Size len; + uint32 result; + + if (typByVal) + result = hash_any((unsigned char *) &value, sizeof(Datum)); + else if (typLen > 0) + result = hash_any((unsigned char *) DatumGetPointer(value), typLen); + else if (typLen == -1) { + struct varlena *val; + + len = toast_raw_datum_size(value); + + val = PG_DETOAST_DATUM_PACKED(value); + + result = hash_any((unsigned char *) VARDATA_ANY(val), len - VARHDRSZ); + + /* Only free memory if it's a copy made here. */ + if ((Pointer) val != (Pointer) value) + pfree(val); + } else if (typLen == -2) { + char *s; + + s = DatumGetCString(value); + len = strlen(s) + 1; + + result = hash_any((unsigned char *) s, len); + } else { + elog(ERROR, "unexpected typLen: %d", typLen); + result = 0; /* keep compiler quiet */ + } + return result; +} Datum btequalimage(PG_FUNCTION_ARGS) { @@ -275,3 +320,4 @@ Datum btequalimage(PG_FUNCTION_ARGS) PG_RETURN_BOOL(false); } + diff --git a/src/common/backend/utils/misc/guc/guc_sql.cpp b/src/common/backend/utils/misc/guc/guc_sql.cpp index cf4f2fb7d286c640f8ed0348cc39f355199cab1d..eadf689417516f6f48ddee0960227860b2b386de 100755 --- a/src/common/backend/utils/misc/guc/guc_sql.cpp +++ b/src/common/backend/utils/misc/guc/guc_sql.cpp @@ -819,6 +819,17 @@ static void InitSqlConfigureNamesBool() NULL, NULL, NULL}, + {{"enable_memoize", + PGC_USERSET, + NODE_ALL, + QUERY_TUNING_METHOD, + gettext_noop("Enables the planner's use of memoize."), + NULL}, + &u_sess->attr.attr_sql.enable_memoize, + false, + NULL, + NULL, + NULL}, {{"enable_startwith_debug", PGC_USERSET, NODE_ALL, diff --git a/src/gausskernel/optimizer/path/costsize.cpp b/src/gausskernel/optimizer/path/costsize.cpp index 44c0afe96ef929edf51cbf76efe86f079d361839..9072895299f2fb263393e49579bf0b4bd6d7c8c7 100755 --- a/src/gausskernel/optimizer/path/costsize.cpp +++ b/src/gausskernel/optimizer/path/costsize.cpp @@ -2503,7 +2503,194 @@ void cost_merge_append(Path* path, PlannerInfo* root, List* pathkeys, int n_stre path->startup_cost = startup_cost + input_startup_cost; path->total_cost = startup_cost + run_cost + input_total_cost; } +/* + * get_expr_width + * Estimate the width of the given expr attempting to use the width + * cached in a Var's owning RelOptInfo, else fallback on the type's + * average width when unable to or when the given Node is not a Var. + */ +static int32 +get_expr_width(PlannerInfo *root, const Node *expr) +{ + int32 width; + + if (IsA(expr, Var)) + { + const Var *var = (const Var *) expr; + + /* We should not see any upper-level Vars here */ + Assert(var->varlevelsup == 0); + /* Try to get data from RelOptInfo cache */ + if (!IS_SPECIAL_VARNO(var->varno) && + var->varno < root->simple_rel_array_size) + { + RelOptInfo *rel = root->simple_rel_array[var->varno]; + + if (rel != NULL && + var->varattno >= rel->min_attr && + var->varattno <= rel->max_attr) + { + int ndx = var->varattno - rel->min_attr; + + if (rel->attr_widths[ndx] > 0) + return rel->attr_widths[ndx]; + } + } + + /* + * No cached data available, so estimate using just the type info. + */ + width = get_typavgwidth(var->vartype, var->vartypmod); + Assert(width > 0); + + return width; + } + + width = get_typavgwidth(exprType(expr), exprTypmod(expr)); + Assert(width > 0); + return width; +} +/* + * cost_memoize_rescan + * Determines the estimated cost of rescanning a Memoize node. + * + * In order to estimate this, we must gain knowledge of how often we expect to + * be called and how many distinct sets of parameters we are likely to be + * called with. If we expect a good cache hit ratio, then we can set our + * costs to account for that hit ratio, plus a little bit of cost for the + * caching itself. Caching will not work out well if we expect to be called + * with too many distinct parameter values. The worst-case here is that we + * never see any parameter value twice, in which case we'd never get a cache + * hit and caching would be a complete waste of effort. + */ +void cost_memoize_rescan(PlannerInfo *root, MemoizePath *mpath, + Cost *rescan_startup_cost, Cost *rescan_total_cost) +{ + ListCell *lc; + Cost input_startup_cost = mpath->subpath->startup_cost; + Cost input_total_cost = mpath->subpath->total_cost; + double tuples = mpath->subpath->rows; + double calls = mpath->calls; + int width = mpath->subpath->pathtarget->width; + + double hash_mem_bytes; + double est_entry_bytes; + double est_cache_entries; + double ndistinct; + double evict_ratio; + double hit_ratio; + Cost startup_cost; + Cost total_cost; + + /* available cache space */ + hash_mem_bytes = get_hash_memory_limit(); + + /* + * Set the number of bytes each cache entry should consume in the cache. + * To provide us with better estimations on how many cache entries we can + * store at once, we make a call to the executor here to ask it what + * memory overheads there are for a single cache entry. + */ + est_entry_bytes = relation_byte_size(tuples, width, false); // TODO cyw + + /* include the estimated width for the cache keys */ + foreach(lc, mpath->param_exprs) + est_entry_bytes += get_expr_width(root, (Node *) lfirst(lc)); + + /* estimate on the upper limit of cache entries we can hold at once */ + est_cache_entries = floor(hash_mem_bytes / est_entry_bytes); + + /* estimate on the distinct number of parameter values */ + ndistinct = estimate_num_groups(root, mpath->param_exprs, calls, 1); + + /* + * When the estimation fell back on using a default value, it's a bit too + * risky to assume that it's ok to use a Memoize node. The use of a + * default could cause us to use a Memoize node when it's really + * inappropriate to do so. If we see that this has been done, then we'll + * assume that every call will have unique parameters, which will almost + * certainly mean a MemoizePath will never survive add_path(). + */ + /* + if ((estinfo.flags & SELFLAG_USED_DEFAULT) != 0) + ndistinct = calls; + */ + + /* + * Since we've already estimated the maximum number of entries we can + * store at once and know the estimated number of distinct values we'll be + * called with, we'll take this opportunity to set the path's est_entries. + * This will ultimately determine the hash table size that the executor + * will use. If we leave this at zero, the executor will just choose the + * size itself. Really this is not the right place to do this, but it's + * convenient since everything is already calculated. + */ + mpath->est_entries = Min(Min(ndistinct, est_cache_entries), + PG_UINT32_MAX); + + /* + * When the number of distinct parameter values is above the amount we can + * store in the cache, then we'll have to evict some entries from the + * cache. This is not free. Here we estimate how often we'll incur the + * cost of that eviction. + */ + evict_ratio = 1.0 - Min(est_cache_entries, ndistinct) / ndistinct; + + /* + * In order to estimate how costly a single scan will be, we need to + * attempt to estimate what the cache hit ratio will be. To do that we + * must look at how many scans are estimated in total for this node and + * how many of those scans we expect to get a cache hit. + */ + hit_ratio = ((calls - ndistinct) / calls) * + (est_cache_entries / Max(ndistinct, est_cache_entries)); + + Assert(hit_ratio >= 0 && hit_ratio <= 1.0); + + /* + * Set the total_cost accounting for the expected cache hit ratio. We + * also add on a cpu_operator_cost to account for a cache lookup. This + * will happen regardless of whether it's a cache hit or not. + */ + total_cost = input_total_cost * (1.0 - hit_ratio) + u_sess->attr.attr_sql.cpu_operator_cost; + + /* Now adjust the total cost to account for cache evictions */ + + /* Charge a cpu_tuple_cost for evicting the actual cache entry */ + total_cost += u_sess->attr.attr_sql.cpu_tuple_cost * evict_ratio; + + /* + * Charge a 10th of cpu_operator_cost to evict every tuple in that entry. + * The per-tuple eviction is really just a pfree, so charging a whole + * cpu_operator_cost seems a little excessive. + */ + total_cost += u_sess->attr.attr_sql.cpu_operator_cost / 10.0 * evict_ratio * tuples; + + /* + * Now adjust for storing things in the cache, since that's not free + * either. Everything must go in the cache. We don't proportion this + * over any ratio, just apply it once for the scan. We charge a + * cpu_tuple_cost for the creation of the cache entry and also a + * cpu_operator_cost for each tuple we expect to cache. + */ + total_cost += u_sess->attr.attr_sql.cpu_tuple_cost + u_sess->attr.attr_sql.cpu_operator_cost * tuples; + + /* + * Getting the first row must be also be proportioned according to the + * expected cache hit ratio. + */ + startup_cost = input_startup_cost * (1.0 - hit_ratio); + + /* + * Additionally we charge a cpu_tuple_cost to account for cache lookups, + * which we'll do regardless of whether it was a cache hit or not. + */ + startup_cost += u_sess->attr.attr_sql.cpu_tuple_cost; + + *rescan_startup_cost = startup_cost; + *rescan_total_cost = total_cost; +} /* * cost_material * Determines and returns the cost of materializing a relation, including @@ -3184,6 +3371,7 @@ void final_cost_nestloop(PlannerInfo* root, NestPath* path, JoinCostWorkspace* w if (path->innerjoinpath->pathtype == T_Material) copy_mem_info(&((MaterialPath*)path->innerjoinpath)->mem_info, &workspace->inner_mem_info); + ereport(DEBUG2, (errmodule(MOD_OPT_JOIN), errmsg("final cost nest loop: stream_cost: %lf, startup_cost: %lf, total_cost: %lf", @@ -4695,6 +4883,11 @@ void cost_rescan(PlannerInfo* root, Path* path, Cost* rescan_startup_cost, /* ou root->glob->vectorized, dop); } break; + case T_Memoize: { + /* All the hard work is done by cost_memoize_rescan */ + cost_memoize_rescan(root, (MemoizePath *) path, + rescan_startup_cost, rescan_total_cost); + } break; default: *rescan_startup_cost = path->startup_cost; *rescan_total_cost = path->total_cost; diff --git a/src/gausskernel/optimizer/path/joinpath.cpp b/src/gausskernel/optimizer/path/joinpath.cpp index b951ae1b45ee7d3d0aa705946365f93c97ae9812..96092f82ec47312f9036d9284ff69cb47468f169 100755 --- a/src/gausskernel/optimizer/path/joinpath.cpp +++ b/src/gausskernel/optimizer/path/joinpath.cpp @@ -40,6 +40,7 @@ #include "utils/rel_gs.h" #include "utils/selfuncs.h" #include "utils/syscache.h" +#include "utils/typcache.h" #include "optimizer/streamplan.h" #include "pgxc/pgxc.h" #include "parser/parsetree.h" @@ -565,6 +566,281 @@ static void TryNestLoopPathSingle(PlannerInfo* root, RelOptInfo* joinrel, JoinTy return; } +/* + * paraminfo_get_equal_hashops + * Determine if the clauses in param_info and innerrel's lateral_vars + * can be hashed. + * Returns true if hashing is possible, otherwise false. + * + * Additionally, on success we collect the outer expressions and the + * appropriate equality operators for each hashable parameter to innerrel. + * These are returned in parallel lists in *param_exprs and *operators. + * We also set *binary_mode to indicate whether strict binary matching is + * required. + */ +static bool +paraminfo_get_equal_hashops(PlannerInfo *root, ParamPathInfo *param_info, + RelOptInfo *outerrel, RelOptInfo *innerrel, + List **param_exprs, List **operators, + bool *binary_mode) + +{ + ListCell *lc; + + *param_exprs = NIL; + *operators = NIL; + *binary_mode = false; + + /* Add join clauses from param_info to the hash key */ + if (param_info != NULL) { + List *clauses = param_info->ppi_clauses; + + foreach(lc, clauses) { + RestrictInfo *rinfo = (RestrictInfo *) lfirst(lc); + OpExpr *opexpr; + Node *expr; + Oid hasheqoperator; + + opexpr = (OpExpr *) rinfo->clause; + + /* + * Bail if the rinfo is not compatible. We need a join OpExpr + * with 2 args. + */ + if (!IsA(opexpr, OpExpr) || list_length(opexpr->args) != 2 || + !clause_sides_match_join(rinfo, outerrel, innerrel)) { + list_free(*operators); + list_free(*param_exprs); + return false; + } + + if (rinfo->outer_is_left) { + expr = (Node *) linitial(opexpr->args); + hasheqoperator = rinfo->left_hasheqoperator; + } else { + expr = (Node *) lsecond(opexpr->args); + hasheqoperator = rinfo->right_hasheqoperator; + } + + /* can't do memoize if we can't hash the outer type */ + if (!OidIsValid(hasheqoperator)) { + list_free(*operators); + list_free(*param_exprs); + return false; + } + + /* + * 'expr' may already exist as a parameter from a previous item in + * ppi_clauses. No need to include it again, however we'd better + * ensure we do switch into binary mode if required. See below. + */ + if (!list_member(*param_exprs, expr)) { + *operators = lappend_oid(*operators, hasheqoperator); + *param_exprs = lappend(*param_exprs, expr); + } + + /* + * When the join operator is not hashable then it's possible that + * the operator will be able to distinguish something that the + * hash equality operator could not. For example with floating + * point types -0.0 and +0.0 are classed as equal by the hash + * function and equality function, but some other operator may be + * able to tell those values apart. This means that we must put + * memoize into binary comparison mode so that it does bit-by-bit + * comparisons rather than a "logical" comparison as it would + * using the hash equality operator. + */ + if (!OidIsValid(rinfo->hashjoinoperator)) + *binary_mode = true; + } + } + + /* Now add any lateral vars to the cache key too */ + foreach(lc, innerrel->lateral_vars) { + Node *expr = (Node *) lfirst(lc); + TypeCacheEntry *typentry; + + /* Reject if there are any volatile functions in lateral vars */ + if (contain_volatile_functions(expr)) { + list_free(*operators); + list_free(*param_exprs); + return false; + } + + typentry = lookup_type_cache(exprType(expr), + TYPECACHE_HASH_PROC | TYPECACHE_EQ_OPR); + + /* can't use memoize without a valid hash proc and equals operator */ + if (!OidIsValid(typentry->hash_proc) || !OidIsValid(typentry->eq_opr)) { + list_free(*operators); + list_free(*param_exprs); + return false; + } + + /* + * 'expr' may already exist as a parameter from the ppi_clauses. No + * need to include it again, however we'd better ensure we do switch + * into binary mode. + */ + if (!list_member(*param_exprs, expr)) { + *operators = lappend_oid(*operators, typentry->eq_opr); + *param_exprs = lappend(*param_exprs, expr); + } + + /* + * We must go into binary mode as we don't have too much of an idea of + * how these lateral Vars are being used. See comment above when we + * set *binary_mode for the non-lateral Var case. This could be + * relaxed a bit if we had the RestrictInfos and knew the operators + * being used, however for cases like Vars that are arguments to + * functions we must operate in binary mode as we don't have + * visibility into what the function is doing with the Vars. + */ + *binary_mode = true; + } + + /* We're okay to use memoize */ + return true; +} +/* + * get_memoize_path + * If possible, make and return a Memoize path atop of 'inner_path'. + * Otherwise return NULL. + */ +static Path * +get_memoize_path(PlannerInfo *root, RelOptInfo *innerrel, + RelOptInfo *outerrel, Path *inner_path, + Path *outer_path, JoinType jointype, + JoinPathExtraData *extra) +{ + List *param_exprs; + List *hash_operators; + ListCell *lc; + bool binary_mode; + + /* Obviously not if it's disabled */ + if (!u_sess->attr.attr_sql.enable_memoize) + return NULL; + + /* + * We can safely not bother with all this unless we expect to perform more + * than one inner scan. The first scan is always going to be a cache + * miss. This would likely fail later anyway based on costs, so this is + * really just to save some wasted effort. + */ + if (outer_path->parent->rows < 2) + return NULL; + + /* + * We can only have a memoize node when there's some kind of cache key, + * either parameterized path clauses or lateral Vars. No cache key sounds + * more like something a Materialize node might be more useful for. + */ + if ((inner_path->param_info == NULL || + inner_path->param_info->ppi_clauses == NIL) && + innerrel->lateral_vars == NIL) + return NULL; + + /* + * Currently we don't do this for SEMI and ANTI joins unless they're + * marked as inner_unique. This is because nested loop SEMI/ANTI joins + * don't scan the inner node to completion, which will mean memoize cannot + * mark the cache entry as complete. + * + * XXX Currently we don't attempt to mark SEMI/ANTI joins as inner_unique + * = true. Should we? See add_paths_to_joinrel() + */ + if (!extra->inner_unique && (jointype == JOIN_SEMI || + jointype == JOIN_ANTI)) + return NULL; + + /* + * Memoize normally marks cache entries as complete when it runs out of + * tuples to read from its subplan. However, with unique joins, Nested + * Loop will skip to the next outer tuple after finding the first matching + * inner tuple. This means that we may not read the inner side of the + * join to completion which leaves no opportunity to mark the cache entry + * as complete. To work around that, when the join is unique we + * automatically mark cache entries as complete after fetching the first + * tuple. This works when the entire join condition is parameterized. + * Otherwise, when the parameterization is only a subset of the join + * condition, we can't be sure which part of it causes the join to be + * unique. This means there are no guarantees that only 1 tuple will be + * read. We cannot mark the cache entry as complete after reading the + * first tuple without that guarantee. This means the scope of Memoize + * node's usefulness is limited to only outer rows that have no join + * partner as this is the only case where Nested Loop would exhaust the + * inner scan of a unique join. Since the scope is limited to that, we + * just don't bother making a memoize path in this case. + * + * Lateral vars needn't be considered here as they're not considered when + * determining if the join is unique. + * + * XXX this could be enabled if the remaining join quals were made part of + * the inner scan's filter instead of the join filter. Maybe it's worth + * considering doing that? + */ + /* + if (extra->inner_unique && + (inner_path->param_info == NULL || + bms_num_members(inner_path->param_info->ppi_serials) < + list_length(extra->restrictlist))) + return NULL; + */ + + /* + * We can't use a memoize node if there are volatile functions in the + * inner rel's target list or restrict list. A cache hit could reduce the + * number of calls to these functions. + */ + if (contain_volatile_functions((Node *) innerrel->reltarget)) + return NULL; + + foreach(lc, innerrel->baserestrictinfo) { + RestrictInfo *rinfo = (RestrictInfo *) lfirst(lc); + + if (contain_volatile_functions((Node *) rinfo)) + return NULL; + } + + /* + * Also check the parameterized path restrictinfos for volatile functions. + * Indexed functions must be immutable so shouldn't have any volatile + * functions, however, with a lateral join the inner scan may not be an + * index scan. + */ + if (inner_path->param_info != NULL) { + foreach(lc, inner_path->param_info->ppi_clauses) { + RestrictInfo *rinfo = (RestrictInfo *) lfirst(lc); + if (contain_volatile_functions((Node *) rinfo)) + return NULL; + } + } + + /* Check if we have hash ops for each parameter to the path */ + if (paraminfo_get_equal_hashops(root, + inner_path->param_info, + /* + outerrel->top_parent ? + outerrel->top_parent : outerrel, + */ + outerrel, + innerrel, + ¶m_exprs, + &hash_operators, + &binary_mode)) { + return (Path *) create_memoize_path(root, + innerrel, + inner_path, + param_exprs, + hash_operators, + extra->inner_unique, + binary_mode, + outer_path->rows); + } + + return NULL; +} /* * try_nestloop_path @@ -1483,6 +1759,7 @@ static void match_unsorted_outer(PlannerInfo* root, RelOptInfo* joinrel, RelOptI foreach (llc2, all_paths) { Path* innerpath = (Path*)lfirst(llc2); + Path* mpath; try_nestloop_path(root, joinrel, @@ -1494,6 +1771,18 @@ static void match_unsorted_outer(PlannerInfo* root, RelOptInfo* joinrel, RelOptI innerpath, restrictlist, merge_pathkeys); + mpath = get_memoize_path(root,innerrel, outerrel, innerpath, outerpath, jointype, extra); + if (mpath != NULL) + try_nestloop_path(root, + joinrel, + jointype, + save_jointype, + extra, + param_source_rels, + outerpath, + mpath, + restrictlist, + merge_pathkeys); } list_free_ext(all_paths); diff --git a/src/gausskernel/optimizer/plan/createplan.cpp b/src/gausskernel/optimizer/plan/createplan.cpp index 5c63dc03d0a7deae729847c22c261345122b2cd7..6a8b6fa37ef6977e2e79fefbb280fdc315aee48f 100755 --- a/src/gausskernel/optimizer/plan/createplan.cpp +++ b/src/gausskernel/optimizer/plan/createplan.cpp @@ -96,6 +96,7 @@ static void adjust_scan_targetlist(ResultPath* best_path, Plan* subplan); static Plan* create_projection_plan(PlannerInfo* root, ProjectionPath* best_path); static ProjectSet* create_project_set_plan(PlannerInfo* root, ProjectSetPath* best_path); static Material* create_material_plan(PlannerInfo* root, MaterialPath* best_path); +static Memoize* create_memoize_plan(PlannerInfo *root, MemoizePath *best_path);//, int flags); static Plan* create_unique_plan(PlannerInfo* root, UniquePath* best_path); static SeqScan* create_seqscan_plan(PlannerInfo* root, Path* best_path, List* tlist, List* scan_clauses); static CStoreScan* create_cstorescan_plan(PlannerInfo* root, Path* best_path, List* tlist, List* scan_clauses); @@ -133,6 +134,9 @@ static void copy_path_costsize(Plan* dest, Path* src); static void copy_generic_path_info(Plan *dest, Path *src); static SeqScan* make_seqscan(List* qptlist, List* qpqual, Index scanrelid); static CStoreScan* make_cstorescan(List* qptlist, List* qpqual, Index scanrelid); +static Memoize * make_memoize(Plan *lefttree, Oid *hashoperators, Oid *collations, + List *param_exprs, bool singlerow, bool binary_mode, + uint32 est_entries, Bitmapset *keyparamids); #ifdef ENABLE_MULTIPLE_NODES static TsStoreScan* make_tsstorescan(List* qptlist, List* qpqual, Index scanrelid); #endif /* ENABLE_MULTIPLE_NODES */ @@ -401,6 +405,9 @@ static Plan* create_plan_recurse(PlannerInfo* root, Path* best_path) case T_ProjectSet: plan = (Plan *) create_project_set_plan(root, (ProjectSetPath *) best_path); break; + case T_Memoize: + plan = (Plan*)create_memoize_plan(root, (MemoizePath*)best_path); + break; case T_Material: plan = (Plan*)create_material_plan(root, (MaterialPath*)best_path); break; @@ -1677,7 +1684,57 @@ static Material* create_material_plan(PlannerInfo* root, MaterialPath* best_path return plan; } +/* + * create_memoize_plan + * Create a Memoize plan for 'best_path' and (recursively) plans for its + * subpaths. + * + * Returns a Plan node. + */ +static Memoize * +create_memoize_plan(PlannerInfo *root, MemoizePath *best_path)//;, int flags) +{ + Memoize *plan; + Bitmapset *keyparamids; + Plan *subplan; + Oid *operators; + Oid *collations; + List *param_exprs = NIL; + ListCell *lc; + ListCell *lc2; + int nkeys; + int i; + + subplan = create_plan_recurse(root, best_path->subpath); + + param_exprs = (List *) replace_nestloop_params(root, (Node *) + best_path->param_exprs); + + nkeys = list_length(param_exprs); + Assert(nkeys > 0); + operators = (Oid*)palloc(nkeys * sizeof(Oid)); + collations = (Oid*)palloc(nkeys * sizeof(Oid)); + + i = 0; + forboth(lc, param_exprs, lc2, best_path->hash_operators) { + Expr *param_expr = (Expr *) lfirst(lc); + Oid opno = lfirst_oid(lc2); + + operators[i] = opno; + collations[i] = exprCollation((Node *) param_expr); + i++; + } + + keyparamids = pull_paramids((Expr *) param_exprs); + plan = make_memoize(subplan, operators, collations, param_exprs, + best_path->singlerow, best_path->binary_mode, + best_path->est_entries, keyparamids); + + copy_generic_path_info(&plan->plan, (Path *) best_path); + + return plan; +} /* * create_unique_plan * Create a Unique plan for 'best_path' and (recursively) plans @@ -10879,10 +10936,32 @@ bool is_projection_capable_path(Path *path) } return true; } +static Memoize *make_memoize(Plan *lefttree, Oid *hashoperators, Oid *collations, + List *param_exprs, bool singlerow, bool binary_mode, + uint32 est_entries, Bitmapset *keyparamids) +{ + Memoize *node = makeNode(Memoize); + Plan *plan = &node->plan; + + plan->targetlist = lefttree->targetlist; + plan->qual = NIL; + plan->lefttree = lefttree; + plan->righttree = NULL; + + node->numKeys = list_length(param_exprs); + node->hashOperators = hashoperators; + node->collations = collations; + node->param_exprs = param_exprs; + node->singlerow = singlerow; + node->binary_mode = binary_mode; + node->est_entries = est_entries; + node->keyparamids = keyparamids; + return node; +} #ifdef USE_SPQ List* spq_make_null_eq_clause(List* joinqual, List** otherqual, List* nullinfo) { return make_null_eq_clause(joinqual, otherqual, nullinfo); } -#endif \ No newline at end of file +#endif diff --git a/src/gausskernel/optimizer/plan/initsplan.cpp b/src/gausskernel/optimizer/plan/initsplan.cpp index d99fd5b8de9f95f2db3c61ade05f91ed409c5216..0bfeeae27c3e27c12fb9437ab4b52e990eb5ee8d 100644 --- a/src/gausskernel/optimizer/plan/initsplan.cpp +++ b/src/gausskernel/optimizer/plan/initsplan.cpp @@ -33,6 +33,7 @@ #include "optimizer/var.h" #include "parser/parsetree.h" #include "rewrite/rewriteManip.h" +#include "utils/typcache.h" #include "utils/lsyscache.h" #include "catalog/index.h" #include "catalog/pg_amop.h" @@ -52,6 +53,7 @@ static bool check_outerjoin_delay(PlannerInfo* root, Relids* relids_p, Relids* n static bool check_equivalence_delay(PlannerInfo* root, RestrictInfo* restrictinfo); static bool check_redundant_nullability_qual(PlannerInfo* root, Node* clause); static void check_mergejoinable(RestrictInfo* restrictinfo); +static void check_memoizable(RestrictInfo *restrictinfo); /***************************************************************************** * @@ -1904,6 +1906,9 @@ void distribute_restrictinfo_to_rels(PlannerInfo* root, RestrictInfo* restrictin */ check_hashjoinable(restrictinfo); + + check_memoizable(restrictinfo); + /* * Add clause to the join lists of all the relevant relations. */ @@ -2082,6 +2087,7 @@ RestrictInfo* build_implied_join_equality( /* Set mergejoinability/hashjoinability flags */ check_mergejoinable(restrictinfo); check_hashjoinable(restrictinfo); + check_memoizable(restrictinfo); return restrictinfo; } @@ -2243,3 +2249,45 @@ void check_plan_correlation(PlannerInfo* root, Node* expr) list_free_ext(param_expr); } } +/* + * check_memoizable + * If the restrictinfo's clause is suitable to be used for a Memoize node, + * set the left_hasheqoperator and right_hasheqoperator to the hash equality + * operator that will be needed during caching. + */ +static void +check_memoizable(RestrictInfo *restrictinfo) +{ + TypeCacheEntry *typentry; + Expr *clause = restrictinfo->clause; + Oid lefttype; + Oid righttype; + + if (restrictinfo->pseudoconstant) + return; + if (!is_opclause(clause)) + return; + if (list_length(((OpExpr *) clause)->args) != 2) + return; + + lefttype = exprType((Node*)linitial(((OpExpr *) clause)->args)); + + typentry = lookup_type_cache(lefttype, TYPECACHE_HASH_PROC | + TYPECACHE_EQ_OPR); + + if (OidIsValid(typentry->hash_proc) && OidIsValid(typentry->eq_opr)) + restrictinfo->left_hasheqoperator = typentry->eq_opr; + + righttype = exprType((Node*)lsecond(((OpExpr *) clause)->args)); + + /* + * Lookup the right type, unless it's the same as the left type, in which + * case typentry is already pointing to the required TypeCacheEntry. + */ + if (lefttype != righttype) + typentry = lookup_type_cache(righttype, TYPECACHE_HASH_PROC | + TYPECACHE_EQ_OPR); + + if (OidIsValid(typentry->hash_proc) && OidIsValid(typentry->eq_opr)) + restrictinfo->right_hasheqoperator = typentry->eq_opr; +} diff --git a/src/gausskernel/optimizer/plan/setrefs.cpp b/src/gausskernel/optimizer/plan/setrefs.cpp index ec5db1b956d047e662a47f96b9188e14efb035b6..f55e7cc68fe3b489cc6a43172938ca62a538201a 100644 --- a/src/gausskernel/optimizer/plan/setrefs.cpp +++ b/src/gausskernel/optimizer/plan/setrefs.cpp @@ -558,6 +558,7 @@ static Plan* set_plan_refs(PlannerInfo* root, Plan* plan, int rtoffset) case T_Hash: case T_Material: + case T_Memoize: case T_VecMaterial: case T_Sort: case T_SortGroup: @@ -2812,4 +2813,4 @@ void spq_extract_plan_dependencies(PlannerInfo *root, Plan *plan) (void)spq_extract_plan_dependencies_walker((Node *)plan, &context); } -#endif \ No newline at end of file +#endif diff --git a/src/gausskernel/optimizer/plan/subselect.cpp b/src/gausskernel/optimizer/plan/subselect.cpp index 75756c6c12330242afcf1c3d5d62f1c7cdd52d69..694f8f2907768d0eb410644cbbcfc677edce8734 100644 --- a/src/gausskernel/optimizer/plan/subselect.cpp +++ b/src/gausskernel/optimizer/plan/subselect.cpp @@ -3327,6 +3327,7 @@ static Bitmapset* finalize_plan(PlannerInfo* root, Plan* plan, Bitmapset* valid_ case T_ProjectSet: case T_Hash: case T_Material: + case T_Memoize: case T_Sort: case T_SortGroup: case T_Unique: diff --git a/src/gausskernel/optimizer/util/clauses.cpp b/src/gausskernel/optimizer/util/clauses.cpp index f910103eca806f8bdad266a8c8eb0679767f7135..28f24fb606c689f9c85c0c0428e8cbe5f7058f8d 100644 --- a/src/gausskernel/optimizer/util/clauses.cpp +++ b/src/gausskernel/optimizer/util/clauses.cpp @@ -139,6 +139,8 @@ static bool is_operator_pushdown(Oid opno); static bool contain_var_unsubstitutable_functions_walker(Node* node, void* context); static bool is_accurate_estimatable_func(Oid funcId); +static bool pull_paramids_walker(Node *node, Bitmapset **context); + /***************************************************************************** * OPERATOR clause functions *****************************************************************************/ @@ -1194,6 +1196,63 @@ static bool contain_specified_functions_walker(Node* node, check_function_contex } else if (IsA(node, UserSetElem)) { /* UserSetElem is volatile */ return context->checktype == CONTAIN_VOLATILE_FUNTION; + } else if (IsA(node, PathTarget)) { + PathTarget *target = (PathTarget *) node; + + /* + * We also do caching for PathTarget the same as we do above for + * RestrictInfos. + */ + /* + if (target->has_volatile_expr == VOLATILITY_NOVOLATILE) + return false; + else if (target->has_volatile_expr == VOLATILITY_VOLATILE) + return true; + else + { + */ + bool hasvolatile; + + hasvolatile = contain_volatile_functions((Node *) target->exprs); + + /* + if (hasvolatile) + target->has_volatile_expr = VOLATILITY_VOLATILE; + else + target->has_volatile_expr = VOLATILITY_NOVOLATILE; + */ + + return hasvolatile; + } else if (IsA(node, RestrictInfo)) + { + RestrictInfo *rinfo = (RestrictInfo *) node; + + /* + * For RestrictInfo, check if we've checked the volatility of it + * before. If so, we can just use the cached value and not bother + * checking it again. Otherwise, check it and cache if whether we + * found any volatile functions. + */ + /* + if (rinfo->has_volatile == VOLATILITY_NOVOLATILE) + return false; + else if (rinfo->has_volatile == VOLATILITY_VOLATILE) + return true; + else + { + */ + bool hasvolatile; + + hasvolatile = contain_volatile_functions((Node *) rinfo->clause); + + /* + if (hasvolatile) + rinfo->has_volatile = VOLATILITY_VOLATILE; + else + rinfo->has_volatile = VOLATILITY_NOVOLATILE; + + */ + return hasvolatile; } return expression_tree_walker(node, (bool (*)())contain_specified_functions_walker, context); } @@ -5562,7 +5621,34 @@ List *get_quals_lists(Node *jtnode) return quallist; } +/* + * pull_paramids + * Returns a Bitmapset containing the paramids of all Params in 'expr'. + */ +Bitmapset * +pull_paramids(Expr *expr) +{ + Bitmapset *result = NULL; + + (void) pull_paramids_walker((Node *) expr, &result); + return result; +} + +static bool +pull_paramids_walker(Node *node, Bitmapset **context) +{ + if (node == NULL) + return false; + if (IsA(node, Param)) { + Param *param = (Param *) node; + + *context = bms_add_member(*context, param->paramid); + return false; + } + return expression_tree_walker(node,(bool(*)()) pull_paramids_walker, + (void *) context); +} #ifdef USE_SPQ /* diff --git a/src/gausskernel/optimizer/util/optcommon.cpp b/src/gausskernel/optimizer/util/optcommon.cpp index f5d96f95bb2d770e6796d57ed9a0e8777eefa267..0c3000c575e30fa1748e6d602f800fec9d3de812 100755 --- a/src/gausskernel/optimizer/util/optcommon.cpp +++ b/src/gausskernel/optimizer/util/optcommon.cpp @@ -361,6 +361,9 @@ void GetPlanNodePlainText( else *pname = *pt_operation = *sname = "Extensible Plan"; break; + case T_Memoize: + *pname = *sname = *pt_operation = "Memoize"; + break; case T_Material: *pname = *sname = *pt_operation = "Materialize"; break; diff --git a/src/gausskernel/optimizer/util/pathnode.cpp b/src/gausskernel/optimizer/util/pathnode.cpp index 8a5a43f98b293de87c2909489a1ae868e3cb0dc0..9208063d6e903453c8d469bcc0472f420e24eaee 100755 --- a/src/gausskernel/optimizer/util/pathnode.cpp +++ b/src/gausskernel/optimizer/util/pathnode.cpp @@ -2996,6 +2996,55 @@ MaterialPath* create_material_path(Path* subpath, bool materialize_all) return pathnode; } +/* + * create_memoize_path + * Creates a path corresponding to a Memoize plan, returning the pathnode. + */ +MemoizePath *create_memoize_path(PlannerInfo *root, RelOptInfo *rel, Path *subpath, + List *param_exprs, List *hash_operators, + bool singlerow, bool binary_mode, double calls) +{ + MemoizePath *pathnode = makeNode(MemoizePath); + + Assert(subpath->parent == rel); + + pathnode->path.pathtype = T_Memoize; + pathnode->path.parent = rel; + pathnode->path.pathtarget = rel->reltarget; + pathnode->path.param_info = subpath->param_info; + /* + pathnode->path.parallel_aware = false; + pathnode->path.parallel_safe = rel->consider_parallel && + subpath->parallel_safe; + pathnode->path.parallel_workers = subpath->parallel_workers; + */ + pathnode->path.pathkeys = subpath->pathkeys; + + pathnode->subpath = subpath; + pathnode->hash_operators = hash_operators; + pathnode->param_exprs = param_exprs; + pathnode->singlerow = singlerow; + pathnode->binary_mode = binary_mode; + pathnode->calls = calls; + + /* + * For now we set est_entries to 0. cost_memoize_rescan() does all the + * hard work to determine how many cache entries there are likely to be, + * so it seems best to leave it up to that function to fill this field in. + * If left at 0, the executor will make a guess at a good value. + */ + pathnode->est_entries = 0; + + /* + * Add a small additional charge for caching the first entry. All the + * harder calculations for rescans are performed in cost_memoize_rescan(). + */ + pathnode->path.startup_cost = subpath->startup_cost + u_sess->attr.attr_sql.cpu_tuple_cost; + pathnode->path.total_cost = subpath->total_cost + u_sess->attr.attr_sql.cpu_tuple_cost; + pathnode->path.rows = subpath->rows; + + return pathnode; +} /* * create_unique_path diff --git a/src/gausskernel/runtime/executor/Makefile b/src/gausskernel/runtime/executor/Makefile index 474227e2bd8f5e2c50ac2bc4c787c133f3211cd3..84aef60bdb868f9995475124ee22eb73d72a71cd 100644 --- a/src/gausskernel/runtime/executor/Makefile +++ b/src/gausskernel/runtime/executor/Makefile @@ -40,7 +40,7 @@ OBJS = execAmi.o execCurrent.o execGrouping.o execJunk.o execMain.o \ nodeBitmapAnd.o nodeBitmapOr.o \ nodeBitmapHeapscan.o nodeBitmapIndexscan.o nodeHash.o \ nodeHashjoin.o nodeIndexscan.o nodeIndexonlyscan.o \ - nodeLimit.o nodeLockRows.o \ + nodeLimit.o nodeLockRows.o nodeMemoize.o \ nodeMaterial.o nodeMergeAppend.o nodeMergejoin.o nodeModifyTable.o \ nodeNestloop.o nodeFunctionscan.o nodeRecursiveunion.o nodeResult.o \ nodeSamplescan.o nodeSeqscan.o nodeSetOp.o nodeSort.o nodeUnique.o \ diff --git a/src/gausskernel/runtime/executor/execAmi.cpp b/src/gausskernel/runtime/executor/execAmi.cpp index e860c8c84077c1809bfaf8718495628bbeeee9a8..dbc8b8a24235ea33c36d89bf028c69795792637e 100755 --- a/src/gausskernel/runtime/executor/execAmi.cpp +++ b/src/gausskernel/runtime/executor/execAmi.cpp @@ -36,6 +36,7 @@ #include "executor/node/nodeLimit.h" #include "executor/node/nodeLockRows.h" #include "executor/node/nodeMaterial.h" +#include "executor/node/nodeMemoize.h" #include "executor/node/nodeMergeAppend.h" #include "executor/node/nodeMergejoin.h" #include "executor/node/nodeModifyTable.h" @@ -242,6 +243,10 @@ void ExecReScanByType(PlanState* node) ExecReScanHashJoin((HashJoinState*)node); break; + case T_MemoizeState: + ExecReScanMemoize((MemoizeState*)(node)); + break; + case T_MaterialState: ExecReScanMaterial((MaterialState*)node); break; @@ -622,6 +627,7 @@ bool ExecSupportsBackwardScan(Plan* node) #endif case T_Material: + case T_Memoize: case T_Sort: /* these don't evaluate tlist */ return true; @@ -700,6 +706,7 @@ bool ExecMaterializesOutput(NodeTag plantype) { switch (plantype) { case T_Material: + case T_Memoize: case T_FunctionScan: case T_CteScan: case T_WorkTableScan: diff --git a/src/gausskernel/runtime/executor/execProcnode.cpp b/src/gausskernel/runtime/executor/execProcnode.cpp index fc5be0b57c547a188e0e91d1cd2d234b2ac392c6..1b6ebef2c6090ca5e110db61cc5a99883c3c583d 100755 --- a/src/gausskernel/runtime/executor/execProcnode.cpp +++ b/src/gausskernel/runtime/executor/execProcnode.cpp @@ -93,6 +93,7 @@ #include "executor/node/nodeLimit.h" #include "executor/node/nodeLockRows.h" #include "executor/node/nodeMaterial.h" +#include "executor/node/nodeMemoize.h" #include "executor/node/nodeMergeAppend.h" #include "executor/node/nodeMergejoin.h" #include "executor/node/nodeModifyTable.h" @@ -356,6 +357,8 @@ PlanState* ExecInitNodeByType(Plan* node, EState* estate, int eflags) return (PlanState*)ExecInitHashJoin((HashJoin*)node, estate, eflags); case T_Material: return (PlanState*)ExecInitMaterial((Material*)node, estate, eflags); + case T_Memoize: + return (PlanState*)ExecInitMemoize((Memoize*)node, estate, eflags); case T_Sort: return (PlanState*)ExecInitSort((Sort*)node, estate, eflags); case T_SortGroup: @@ -1225,6 +1228,10 @@ static void ExecEndNodeByType(PlanState* node) ExecEndMaterial((MaterialState*)node); break; + case T_MemoizeState: + ExecEndMemoize((MemoizeState*)node); + break; + case T_SortState: ExecEndSort((SortState*)node); break; diff --git a/src/gausskernel/runtime/executor/execTuples.cpp b/src/gausskernel/runtime/executor/execTuples.cpp index 5534048c6e7426219a1df8468392a33f5c2549ed..8f530e7432171fc98b31bb5b6c332b164fd21a5b 100644 --- a/src/gausskernel/runtime/executor/execTuples.cpp +++ b/src/gausskernel/runtime/executor/execTuples.cpp @@ -816,7 +816,37 @@ static TupleDesc ExecTypeFromTLInternal(List* target_list, bool has_oid, bool sk return type_info; } +/* + * ExecTypeFromExprList - build a tuple descriptor from a list of Exprs + * + * This is roughly like ExecTypeFromTL, but we work from bare expressions + * not TargetEntrys. No names are attached to the tupledesc's columns. + */ +TupleDesc pg_ExecTypeFromExprList(List *exprList) +{ + TupleDesc typeInfo; + ListCell *lc; + int cur_resno = 1; + typeInfo = CreateTemplateTupleDesc(list_length(exprList), false); + + foreach(lc, exprList) { + Node *e = (Node*)lfirst(lc); + + TupleDescInitEntry(typeInfo, + cur_resno, + NULL, + exprType(e), + exprTypmod(e), + 0); + TupleDescInitEntryCollation(typeInfo, + cur_resno, + exprCollation(e)); + cur_resno++; + } + + return typeInfo; +} /* * ExecTypeFromExprList - build a tuple descriptor from a list of Exprs * diff --git a/src/gausskernel/runtime/executor/nodeHash.cpp b/src/gausskernel/runtime/executor/nodeHash.cpp index 13e73d97ba499e7c47810b7a733b4991f64501d2..28d7ac4cbc995ebcbf982fa4669719321e6bc250 100644 --- a/src/gausskernel/runtime/executor/nodeHash.cpp +++ b/src/gausskernel/runtime/executor/nodeHash.cpp @@ -2318,3 +2318,27 @@ static void* dense_alloc(HashJoinTable hashtable, Size size) /* return pointer to the start of the tuple memory */ return ptr; } +/* + * Calculate the limit on how much memory can be used by Hash and similar + * plan types. This is work_mem times hash_mem_multiplier, and is + * expressed in bytes. + * + * Exported for use by the planner, as well as other hash-like executor + * nodes. This is a rather random place for this, but there is no better + * place. + */ +double hash_mem_multiplier = 2.0; +size_t +get_hash_memory_limit(void) +{ + double mem_limit; + + /* Do initial calculation in double arithmetic */ + mem_limit = (double) u_sess->attr.attr_memory.work_mem * hash_mem_multiplier * 1024.0; + + /* Clamp in case it doesn't fit in size_t */ + mem_limit = Min(mem_limit, (double) SIZE_MAX); + + return (size_t) mem_limit; +} + diff --git a/src/gausskernel/runtime/executor/nodeMemoize.cpp b/src/gausskernel/runtime/executor/nodeMemoize.cpp new file mode 100644 index 0000000000000000000000000000000000000000..9929e8f8fd328c6554bade522bae48d776851d44 --- /dev/null +++ b/src/gausskernel/runtime/executor/nodeMemoize.cpp @@ -0,0 +1,1277 @@ +/*------------------------------------------------------------------------- + * + * nodeMemoize.c + * Routines to handle caching of results from parameterized nodes + * + * Portions Copyright (c) 2021-2024, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * + * IDENTIFICATION + * src/backend/executor/nodeMemoize.c + * + * Memoize nodes are intended to sit above parameterized nodes in the plan + * tree in order to cache results from them. The intention here is that a + * repeat scan with a parameter value that has already been seen by the node + * can fetch tuples from the cache rather than having to re-scan the inner + * node all over again. The query planner may choose to make use of one of + * these when it thinks rescans for previously seen values are likely enough + * to warrant adding the additional node. + * + * The method of cache we use is a hash table. When the cache fills, we never + * spill tuples to disk, instead, we choose to evict the least recently used + * cache entry from the cache. We remember the least recently used entry by + * always pushing new entries and entries we look for onto the tail of a + * doubly linked list. This means that older items always bubble to the top + * of this LRU list. + * + * Sometimes our callers won't run their scans to completion. For example a + * semi-join only needs to run until it finds a matching tuple, and once it + * does, the join operator skips to the next outer tuple and does not execute + * the inner side again on that scan. Because of this, we must keep track of + * when a cache entry is complete, and by default, we know it is when we run + * out of tuples to read during the scan. However, there are cases where we + * can mark the cache entry as complete without exhausting the scan of all + * tuples. One case is unique joins, where the join operator knows that there + * will only be at most one match for any given outer tuple. In order to + * support such cases we allow the "singlerow" option to be set for the cache. + * This option marks the cache entry as complete after we read the first tuple + * from the subnode. + * + * It's possible when we're filling the cache for a given set of parameters + * that we're unable to free enough memory to store any more tuples. If this + * happens then we'll have already evicted all other cache entries. When + * caching another tuple would cause us to exceed our memory budget, we must + * free the entry that we're currently populating and move the state machine + * into MEMO_CACHE_BYPASS_MODE. This means that we'll not attempt to cache + * any further tuples for this particular scan. We don't have the memory for + * it. The state machine will be reset again on the next rescan. If the + * memory requirements to cache the next parameter's tuples are less + * demanding, then that may allow us to start putting useful entries back into + * the cache again. + * + * + * INTERFACE ROUTINES + * ExecMemoize - lookup cache, exec subplan when not found + * ExecInitMemoize - initialize node and subnodes + * ExecEndMemoize - shutdown node and subnodes + * ExecReScanMemoize - rescan the memoize node + * + * ExecMemoizeEstimate estimates DSM space needed for parallel plan + * ExecMemoizeInitializeDSM initialize DSM for parallel plan + * ExecMemoizeInitializeWorker attach to DSM info in parallel worker + * ExecMemoizeRetrieveInstrumentation get instrumentation from worker + *------------------------------------------------------------------------- + */ + +#include "postgres.h" + +#include "executor/executor.h" +#include "executor/node/nodeMemoize.h" +#include "lib/ilist.h" +#include "miscadmin.h" +#include "utils/datum.h" +#include "utils/lsyscache.h" +#include "utils/hashfn.h" +#include "access/tableam.h" + +/* States of the ExecMemoize state machine */ +#define MEMO_CACHE_LOOKUP 1 /* Attempt to perform a cache lookup */ +#define MEMO_CACHE_FETCH_NEXT_TUPLE 2 /* Get another tuple from the cache */ +#define MEMO_FILLING_CACHE 3 /* Read outer node to fill cache */ +#define MEMO_CACHE_BYPASS_MODE 4 /* Bypass mode. Just read from our + * subplan without caching anything */ +#define MEMO_END_OF_SCAN 5 /* Ready for rescan */ + + +/* Helper macros for memory accounting */ +#define EMPTY_ENTRY_MEMORY_BYTES(e) (sizeof(MemoizeEntry) + \ + sizeof(MemoizeKey) + \ + (e)->key->params->t_len); +#define CACHE_TUPLE_BYTES(t) (sizeof(MemoizeTuple) + \ + (t)->mintuple->t_len) + + /* MemoizeTuple Stores an individually cached tuple */ +typedef struct MemoizeTuple +{ + MinimalTuple mintuple; /* Cached tuple */ + struct MemoizeTuple *next; /* The next tuple with the same parameter + * values or NULL if it's the last one */ +} MemoizeTuple; + +/* + * MemoizeKey + * The hash table key for cached entries plus the LRU list link + */ +typedef struct MemoizeKey +{ + MinimalTuple params; + dlist_node lru_node; /* Pointer to next/prev key in LRU list */ +} MemoizeKey; + +/* + * MemoizeEntry + * The data struct that the cache hash table stores + */ +typedef struct MemoizeEntry +{ + MemoizeKey *key; /* Hash key for hash table lookups */ + MemoizeTuple *tuplehead; /* Pointer to the first tuple or NULL if no + * tuples are cached for this entry */ + uint32 hash; /* Hash value (cached) */ + char status; /* Hash status */ + bool complete; /* Did we read the outer plan to completion? */ +} MemoizeEntry; + + +#define SH_PREFIX memoize +#define SH_ELEMENT_TYPE MemoizeEntry +#define SH_KEY_TYPE MemoizeKey * +#define SH_SCOPE static inline +#define SH_DECLARE +#include "lib/simplehash.h" + +static uint32 MemoizeHash_hash(struct memoize_hash *tb, + const MemoizeKey *key); +static bool MemoizeHash_equal(struct memoize_hash *tb, + const MemoizeKey *key1, + const MemoizeKey *key2); + +#define SH_PREFIX memoize +#define SH_ELEMENT_TYPE MemoizeEntry +#define SH_KEY_TYPE MemoizeKey * +#define SH_KEY key +#define SH_HASH_KEY(tb, key) MemoizeHash_hash(tb, key) +#define SH_EQUAL(tb, a, b) MemoizeHash_equal(tb, a, b) +#define SH_SCOPE static inline +#define SH_STORE_HASH +#define SH_GET_HASH(tb, a) a->hash +#define SH_DEFINE +#include "lib/simplehash.h" + +/* + * MemoizeHash_hash + * Hash function for simplehash hashtable. 'key' is unused here as we + * require that all table lookups first populate the MemoizeState's + * probeslot with the key values to be looked up. + */ +static uint32 +MemoizeHash_hash(struct memoize_hash *tb, const MemoizeKey *key) +{ + MemoizeState *mstate = (MemoizeState *) tb->private_data; + ExprContext *econtext = mstate->ss.ps.ps_ExprContext; + MemoryContext oldcontext; + TupleTableSlot *pslot = mstate->probeslot; + uint32 hashkey = 0; + int numkeys = mstate->nkeys; + + oldcontext = MemoryContextSwitchTo(econtext->ecxt_per_tuple_memory); + + if (mstate->binary_mode) + { + for (int i = 0; i < numkeys; i++) + { + /* combine successive hashkeys by rotating */ + hashkey = pg_rotate_left32(hashkey, 1); + + if (!pslot->tts_isnull[i]) /* treat nulls as having hash key 0 */ + { + FormData_pg_attribute *attr; + uint32 hkey; + + attr = &pslot->tts_tupleDescriptor->attrs[i]; + + hkey = datum_image_hash(pslot->tts_values[i], attr->attbyval, attr->attlen); + + hashkey ^= hkey; + } + } + } + else + { + FmgrInfo *hashfunctions = mstate->hashfunctions; + Oid *collations = mstate->collations; + + for (int i = 0; i < numkeys; i++) + { + /* combine successive hashkeys by rotating */ + hashkey = pg_rotate_left32(hashkey, 1); + + if (!pslot->tts_isnull[i]) /* treat nulls as having hash key 0 */ + { + uint32 hkey; + + hkey = DatumGetUInt32(FunctionCall1Coll(&hashfunctions[i], + collations[i], pslot->tts_values[i])); + hashkey ^= hkey; + } + } + } + + MemoryContextSwitchTo(oldcontext); + return murmurhash32(hashkey); +} + +/* + * MemoizeHash_equal + * Equality function for confirming hash value matches during a hash + * table lookup. 'key2' is never used. Instead the MemoizeState's + * probeslot is always populated with details of what's being looked up. + */ +static bool +MemoizeHash_equal(struct memoize_hash *tb, const MemoizeKey *key1, + const MemoizeKey *key2) +{ + MemoizeState *mstate = (MemoizeState *) tb->private_data; + ExprContext *econtext = mstate->ss.ps.ps_ExprContext; + TupleTableSlot *tslot = mstate->tableslot; + TupleTableSlot *pslot = mstate->probeslot; + + /* probeslot should have already been prepared by prepare_probe_slot() */ + ExecStoreMinimalTuple(key1->params, tslot, false); + + if (mstate->binary_mode) + { + MemoryContext oldcontext; + int numkeys = mstate->nkeys; + bool match = true; + + oldcontext = MemoryContextSwitchTo(econtext->ecxt_per_tuple_memory); + + tableam_tslot_getallattrs(tslot); + tableam_tslot_getallattrs(pslot); + + for (int i = 0; i < numkeys; i++) + { + FormData_pg_attribute *attr; + + if (tslot->tts_isnull[i] != pslot->tts_isnull[i]) + { + match = false; + break; + } + + /* both NULL? they're equal */ + if (tslot->tts_isnull[i]) + continue; + + /* perform binary comparison on the two datums */ + attr = &tslot->tts_tupleDescriptor->attrs[i]; + if (!DatumImageEq(tslot->tts_values[i], pslot->tts_values[i], + attr->attbyval, attr->attlen)) + { + match = false; + break; + } + } + + MemoryContextSwitchTo(oldcontext); + return match; + } + else + { + econtext->ecxt_innertuple = tslot; + econtext->ecxt_outertuple = pslot; + return ExecQual(mstate->quals, econtext); + } +} + +/* + * Initialize the hash table to empty. The MemoizeState's hashtable field + * must point to NULL. + */ +static void +build_hash_table(MemoizeState *mstate, uint32 size) +{ + Assert(mstate->hashtable == NULL); + + /* Make a guess at a good size when we're not given a valid size. */ + if (size == 0) + size = 1024; + + /* memoize_create will convert the size to a power of 2 */ + mstate->hashtable = memoize_create(mstate->tableContext, size, mstate); +} + +/* + * prepare_probe_slot + * Populate mstate's probeslot with the values from the tuple stored + * in 'key'. If 'key' is NULL, then perform the population by evaluating + * mstate's param_exprs. + */ +static inline void +prepare_probe_slot(MemoizeState *mstate, MemoizeKey *key) +{ + TupleTableSlot *pslot = mstate->probeslot; + TupleTableSlot *tslot = mstate->tableslot; + int numKeys = mstate->nkeys; + + ExecClearTuple(pslot); + + if (key == NULL) + { + ExprContext *econtext = mstate->ss.ps.ps_ExprContext; + MemoryContext oldcontext; + + oldcontext = MemoryContextSwitchTo(econtext->ecxt_per_tuple_memory); + + /* Set the probeslot's values based on the current parameter values */ + for (int i = 0; i < numKeys; i++) + pslot->tts_values[i] = ExecEvalExpr(mstate->param_exprs[i], + econtext, + &pslot->tts_isnull[i]); + + MemoryContextSwitchTo(oldcontext); + } + else + { + /* Process the key's MinimalTuple and store the values in probeslot */ + ExecStoreMinimalTuple(key->params, tslot, false); + tableam_tslot_getallattrs(tslot); + memcpy(pslot->tts_values, tslot->tts_values, sizeof(Datum) * numKeys); + memcpy(pslot->tts_isnull, tslot->tts_isnull, sizeof(bool) * numKeys); + } + + ExecStoreVirtualTuple(pslot); +} + +/* + * entry_purge_tuples + * Remove all tuples from the cache entry pointed to by 'entry'. This + * leaves an empty cache entry. Also, update the memory accounting to + * reflect the removal of the tuples. + */ +static inline void +entry_purge_tuples(MemoizeState *mstate, MemoizeEntry *entry) +{ + MemoizeTuple *tuple = entry->tuplehead; + uint64 freed_mem = 0; + + while (tuple != NULL) + { + MemoizeTuple *next = tuple->next; + + freed_mem += CACHE_TUPLE_BYTES(tuple); + + /* Free memory used for this tuple */ + pfree(tuple->mintuple); + pfree(tuple); + + tuple = next; + } + + entry->complete = false; + entry->tuplehead = NULL; + + /* Update the memory accounting */ + mstate->mem_used -= freed_mem; +} + +/* + * remove_cache_entry + * Remove 'entry' from the cache and free memory used by it. + */ +static void +remove_cache_entry(MemoizeState *mstate, MemoizeEntry *entry) +{ + MemoizeKey *key = entry->key; + + dlist_delete(&entry->key->lru_node); + + /* Remove all of the tuples from this entry */ + entry_purge_tuples(mstate, entry); + + /* + * Update memory accounting. entry_purge_tuples should have already + * subtracted the memory used for each cached tuple. Here we just update + * the amount used by the entry itself. + */ + mstate->mem_used -= EMPTY_ENTRY_MEMORY_BYTES(entry); + + /* Remove the entry from the cache */ + memoize_delete_item(mstate->hashtable, entry); + + pfree(key->params); + pfree(key); +} + +/* + * cache_purge_all + * Remove all items from the cache + */ +static void +cache_purge_all(MemoizeState *mstate) +{ + uint64 evictions = 0; + + if (mstate->hashtable != NULL) + evictions = mstate->hashtable->members; + + /* + * Likely the most efficient way to remove all items is to just reset the + * memory context for the cache and then rebuild a fresh hash table. This + * saves having to remove each item one by one and pfree each cached tuple + */ + MemoryContextReset(mstate->tableContext); + + /* NULLify so we recreate the table on the next call */ + mstate->hashtable = NULL; + + /* reset the LRU list */ + dlist_init(&mstate->lru_list); + mstate->last_tuple = NULL; + mstate->entry = NULL; + + mstate->mem_used = 0; + + /* XXX should we add something new to track these purges? */ + mstate->stats.cache_evictions += evictions; /* Update Stats */ +} + +/* + * cache_reduce_memory + * Evict older and less recently used items from the cache in order to + * reduce the memory consumption back to something below the + * MemoizeState's mem_limit. + * + * 'specialkey', if not NULL, causes the function to return false if the entry + * which the key belongs to is removed from the cache. + */ +static bool +cache_reduce_memory(MemoizeState *mstate, MemoizeKey *specialkey) +{ + bool specialkey_intact = true; /* for now */ + dlist_mutable_iter iter; + uint64 evictions = 0; + + /* Update peak memory usage */ + if (mstate->mem_used > mstate->stats.mem_peak) + mstate->stats.mem_peak = mstate->mem_used; + + /* We expect only to be called when we've gone over budget on memory */ + Assert(mstate->mem_used > mstate->mem_limit); + + /* Start the eviction process starting at the head of the LRU list. */ + dlist_foreach_modify(iter, &mstate->lru_list) + { + MemoizeKey *key = dlist_container(MemoizeKey, lru_node, iter.cur); + MemoizeEntry *entry; + + /* + * Populate the hash probe slot in preparation for looking up this LRU + * entry. + */ + prepare_probe_slot(mstate, key); + + /* + * Ideally the LRU list pointers would be stored in the entry itself + * rather than in the key. Unfortunately, we can't do that as the + * simplehash.h code may resize the table and allocate new memory for + * entries which would result in those pointers pointing to the old + * buckets. However, it's fine to use the key to store this as that's + * only referenced by a pointer in the entry, which of course follows + * the entry whenever the hash table is resized. Since we only have a + * pointer to the key here, we must perform a hash table lookup to + * find the entry that the key belongs to. + */ + entry = memoize_lookup(mstate->hashtable, NULL); + + /* + * Sanity check that we found the entry belonging to the LRU list + * item. A misbehaving hash or equality function could cause the + * entry not to be found or the wrong entry to be found. + */ + if (unlikely(entry == NULL || entry->key != key)) + elog(ERROR, "could not find memoization table entry"); + + /* + * If we're being called to free memory while the cache is being + * populated with new tuples, then we'd better take some care as we + * could end up freeing the entry which 'specialkey' belongs to. + * Generally callers will pass 'specialkey' as the key for the cache + * entry which is currently being populated, so we must set + * 'specialkey_intact' to false to inform the caller the specialkey + * entry has been removed. + */ + if (key == specialkey) + specialkey_intact = false; + + /* + * Finally remove the entry. This will remove from the LRU list too. + */ + remove_cache_entry(mstate, entry); + + evictions++; + + /* Exit if we've freed enough memory */ + if (mstate->mem_used <= mstate->mem_limit) + break; + } + + mstate->stats.cache_evictions += evictions; /* Update Stats */ + + return specialkey_intact; +} + +/* + * cache_lookup + * Perform a lookup to see if we've already cached tuples based on the + * scan's current parameters. If we find an existing entry we move it to + * the end of the LRU list, set *found to true then return it. If we + * don't find an entry then we create a new one and add it to the end of + * the LRU list. We also update cache memory accounting and remove older + * entries if we go over the memory budget. If we managed to free enough + * memory we return the new entry, else we return NULL. + * + * Callers can assume we'll never return NULL when *found is true. + */ +static MemoizeEntry * +cache_lookup(MemoizeState *mstate, bool *found) +{ + MemoizeKey *key; + MemoizeEntry *entry; + MemoryContext oldcontext; + + /* prepare the probe slot with the current scan parameters */ + prepare_probe_slot(mstate, NULL); + + /* + * Add the new entry to the cache. No need to pass a valid key since the + * hash function uses mstate's probeslot, which we populated above. + */ + entry = memoize_insert(mstate->hashtable, NULL, found); + + if (*found) + { + /* + * Move existing entry to the tail of the LRU list to mark it as the + * most recently used item. + */ + dlist_move_tail(&mstate->lru_list, &entry->key->lru_node); + + return entry; + } + + oldcontext = MemoryContextSwitchTo(mstate->tableContext); + + /* Allocate a new key */ + entry->key = key = (MemoizeKey *) palloc(sizeof(MemoizeKey)); + key->params = ExecCopySlotMinimalTuple(mstate->probeslot); + + /* Update the total cache memory utilization */ + mstate->mem_used += EMPTY_ENTRY_MEMORY_BYTES(entry); + + /* Initialize this entry */ + entry->complete = false; + entry->tuplehead = NULL; + + /* + * Since this is the most recently used entry, push this entry onto the + * end of the LRU list. + */ + dlist_push_tail(&mstate->lru_list, &entry->key->lru_node); + + mstate->last_tuple = NULL; + + MemoryContextSwitchTo(oldcontext); + + /* + * If we've gone over our memory budget, then we'll free up some space in + * the cache. + */ + if (mstate->mem_used > mstate->mem_limit) + { + /* + * Try to free up some memory. It's highly unlikely that we'll fail + * to do so here since the entry we've just added is yet to contain + * any tuples and we're able to remove any other entry to reduce the + * memory consumption. + */ + if (unlikely(!cache_reduce_memory(mstate, key))) + return NULL; + + /* + * The process of removing entries from the cache may have caused the + * code in simplehash.h to shuffle elements to earlier buckets in the + * hash table. If it has, we'll need to find the entry again by + * performing a lookup. Fortunately, we can detect if this has + * happened by seeing if the entry is still in use and that the key + * pointer matches our expected key. + */ + if (entry->status != memoize_SH_IN_USE || entry->key != key) + { + /* + * We need to repopulate the probeslot as lookups performed during + * the cache evictions above will have stored some other key. + */ + prepare_probe_slot(mstate, key); + + /* Re-find the newly added entry */ + entry = memoize_lookup(mstate->hashtable, NULL); + Assert(entry != NULL); + } + } + + return entry; +} + +/* + * cache_store_tuple + * Add the tuple stored in 'slot' to the mstate's current cache entry. + * The cache entry must have already been made with cache_lookup(). + * mstate's last_tuple field must point to the tail of mstate->entry's + * list of tuples. + */ +static bool +cache_store_tuple(MemoizeState *mstate, TupleTableSlot *slot) +{ + MemoizeTuple *tuple; + MemoizeEntry *entry = mstate->entry; + MemoryContext oldcontext; + + Assert(slot != NULL); + Assert(entry != NULL); + + oldcontext = MemoryContextSwitchTo(mstate->tableContext); + + tuple = (MemoizeTuple *) palloc(sizeof(MemoizeTuple)); + tuple->mintuple = ExecCopySlotMinimalTuple(slot); + tuple->next = NULL; + + /* Account for the memory we just consumed */ + mstate->mem_used += CACHE_TUPLE_BYTES(tuple); + + if (entry->tuplehead == NULL) + { + /* + * This is the first tuple for this entry, so just point the list head + * to it. + */ + entry->tuplehead = tuple; + } + else + { + /* push this tuple onto the tail of the list */ + mstate->last_tuple->next = tuple; + } + + mstate->last_tuple = tuple; + MemoryContextSwitchTo(oldcontext); + + /* + * If we've gone over our memory budget then free up some space in the + * cache. + */ + if (mstate->mem_used > mstate->mem_limit) + { + MemoizeKey *key = entry->key; + + if (!cache_reduce_memory(mstate, key)) + return false; + + /* + * The process of removing entries from the cache may have caused the + * code in simplehash.h to shuffle elements to earlier buckets in the + * hash table. If it has, we'll need to find the entry again by + * performing a lookup. Fortunately, we can detect if this has + * happened by seeing if the entry is still in use and that the key + * pointer matches our expected key. + */ + if (entry->status != memoize_SH_IN_USE || entry->key != key) + { + /* + * We need to repopulate the probeslot as lookups performed during + * the cache evictions above will have stored some other key. + */ + prepare_probe_slot(mstate, key); + + /* Re-find the entry */ + mstate->entry = entry = memoize_lookup(mstate->hashtable, NULL); + Assert(entry != NULL); + } + } + + return true; +} + +static TupleTableSlot * +ExecMemoize(PlanState *pstate) +{ + MemoizeState *node = castNode(MemoizeState, pstate); + ExprContext *econtext = node->ss.ps.ps_ExprContext; + PlanState *outerNode; + TupleTableSlot *slot; + + CHECK_FOR_INTERRUPTS(); + + /* + * Reset per-tuple memory context to free any expression evaluation + * storage allocated in the previous tuple cycle. + */ + ResetExprContext(econtext); + + switch (node->mstatus) + { + case MEMO_CACHE_LOOKUP: + { + MemoizeEntry *entry; + TupleTableSlot *outerslot; + bool found; + + Assert(node->entry == NULL); + + /* first call? we'll need a hash table. */ + if (unlikely(node->hashtable == NULL)) + build_hash_table(node, ((Memoize *) pstate->plan)->est_entries); + + /* + * We're only ever in this state for the first call of the + * scan. Here we have a look to see if we've already seen the + * current parameters before and if we have already cached a + * complete set of records that the outer plan will return for + * these parameters. + * + * When we find a valid cache entry, we'll return the first + * tuple from it. If not found, we'll create a cache entry and + * then try to fetch a tuple from the outer scan. If we find + * one there, we'll try to cache it. + */ + + /* see if we've got anything cached for the current parameters */ + entry = cache_lookup(node, &found); + + if (found && entry->complete) + { + node->stats.cache_hits += 1; /* stats update */ + + /* + * Set last_tuple and entry so that the state + * MEMO_CACHE_FETCH_NEXT_TUPLE can easily find the next + * tuple for these parameters. + */ + node->last_tuple = entry->tuplehead; + node->entry = entry; + + /* Fetch the first cached tuple, if there is one */ + if (entry->tuplehead) + { + node->mstatus = MEMO_CACHE_FETCH_NEXT_TUPLE; + + slot = node->ss.ps.ps_ResultTupleSlot; + ExecStoreMinimalTuple(entry->tuplehead->mintuple, + slot, false); + + return slot; + } + + /* The cache entry is void of any tuples. */ + node->mstatus = MEMO_END_OF_SCAN; + return NULL; + } + + /* Handle cache miss */ + node->stats.cache_misses += 1; /* stats update */ + + if (found) + { + /* + * A cache entry was found, but the scan for that entry + * did not run to completion. We'll just remove all + * tuples and start again. It might be tempting to + * continue where we left off, but there's no guarantee + * the outer node will produce the tuples in the same + * order as it did last time. + */ + entry_purge_tuples(node, entry); + } + + /* Scan the outer node for a tuple to cache */ + outerNode = outerPlanState(node); + outerslot = ExecProcNode(outerNode); + if (TupIsNull(outerslot)) + { + /* + * cache_lookup may have returned NULL due to failure to + * free enough cache space, so ensure we don't do anything + * here that assumes it worked. There's no need to go into + * bypass mode here as we're setting mstatus to end of + * scan. + */ + if (likely(entry)) + entry->complete = true; + + node->mstatus = MEMO_END_OF_SCAN; + return NULL; + } + + node->entry = entry; + + /* + * If we failed to create the entry or failed to store the + * tuple in the entry, then go into bypass mode. + */ + if (unlikely(entry == NULL || + !cache_store_tuple(node, outerslot))) + { + node->stats.cache_overflows += 1; /* stats update */ + + node->mstatus = MEMO_CACHE_BYPASS_MODE; + + /* + * No need to clear out last_tuple as we'll stay in bypass + * mode until the end of the scan. + */ + } + else + { + /* + * If we only expect a single row from this scan then we + * can mark that we're not expecting more. This allows + * cache lookups to work even when the scan has not been + * executed to completion. + */ + entry->complete = node->singlerow; + node->mstatus = MEMO_FILLING_CACHE; + } + + slot = node->ss.ps.ps_ResultTupleSlot; + ExecCopySlot(slot, outerslot); + return slot; + } + + case MEMO_CACHE_FETCH_NEXT_TUPLE: + { + /* We shouldn't be in this state if these are not set */ + Assert(node->entry != NULL); + Assert(node->last_tuple != NULL); + + /* Skip to the next tuple to output */ + node->last_tuple = node->last_tuple->next; + + /* No more tuples in the cache */ + if (node->last_tuple == NULL) + { + node->mstatus = MEMO_END_OF_SCAN; + return NULL; + } + + slot = node->ss.ps.ps_ResultTupleSlot; + ExecStoreMinimalTuple(node->last_tuple->mintuple, slot, + false); + + return slot; + } + + case MEMO_FILLING_CACHE: + { + TupleTableSlot *outerslot; + MemoizeEntry *entry = node->entry; + + /* entry should already have been set by MEMO_CACHE_LOOKUP */ + Assert(entry != NULL); + + /* + * When in the MEMO_FILLING_CACHE state, we've just had a + * cache miss and are populating the cache with the current + * scan tuples. + */ + outerNode = outerPlanState(node); + outerslot = ExecProcNode(outerNode); + if (TupIsNull(outerslot)) + { + /* No more tuples. Mark it as complete */ + entry->complete = true; + node->mstatus = MEMO_END_OF_SCAN; + return NULL; + } + + /* + * Validate if the planner properly set the singlerow flag. It + * should only set that if each cache entry can, at most, + * return 1 row. + */ + if (unlikely(entry->complete)) + elog(ERROR, "cache entry already complete"); + + /* Record the tuple in the current cache entry */ + if (unlikely(!cache_store_tuple(node, outerslot))) + { + /* Couldn't store it? Handle overflow */ + node->stats.cache_overflows += 1; /* stats update */ + + node->mstatus = MEMO_CACHE_BYPASS_MODE; + + /* + * No need to clear out entry or last_tuple as we'll stay + * in bypass mode until the end of the scan. + */ + } + + slot = node->ss.ps.ps_ResultTupleSlot; + ExecCopySlot(slot, outerslot); + return slot; + } + + case MEMO_CACHE_BYPASS_MODE: + { + TupleTableSlot *outerslot; + + /* + * When in bypass mode we just continue to read tuples without + * caching. We need to wait until the next rescan before we + * can come out of this mode. + */ + outerNode = outerPlanState(node); + outerslot = ExecProcNode(outerNode); + if (TupIsNull(outerslot)) + { + node->mstatus = MEMO_END_OF_SCAN; + return NULL; + } + + slot = node->ss.ps.ps_ResultTupleSlot; + ExecCopySlot(slot, outerslot); + return slot; + } + + case MEMO_END_OF_SCAN: + + /* + * We've already returned NULL for this scan, but just in case + * something calls us again by mistake. + */ + return NULL; + + default: + elog(ERROR, "unrecognized memoize state: %d", + (int) node->mstatus); + return NULL; + } /* switch */ +} + +MemoizeState * +ExecInitMemoize(Memoize *node, EState *estate, int eflags) +{ + MemoizeState *mstate = makeNode(MemoizeState); + Plan *outerNode; + int i; + int nkeys; + Oid *eqfuncoids; + + /* check for unsupported flags */ + Assert(!(eflags & (EXEC_FLAG_BACKWARD | EXEC_FLAG_MARK))); + + mstate->ss.ps.plan = (Plan *) node; + mstate->ss.ps.state = estate; + mstate->ss.ps.ExecProcNode = ExecMemoize; + + /* + * Miscellaneous initialization + * + * create expression context for node + */ + ExecAssignExprContext(estate, &mstate->ss.ps); + + outerNode = outerPlan(node); + outerPlanState(mstate) = ExecInitNode(outerNode, estate, eflags); + + /* + * Initialize return slot and type. No need to initialize projection info + * because this node doesn't do projections. + */ + ExecInitResultTupleSlot(estate, &mstate->ss.ps); + TupleDesc resultDesc = ExecGetResultType(outerPlanState(mstate)); + ExecAssignResultTypeFromTL(&mstate->ss.ps, resultDesc->td_tam_ops); + mstate->ss.ps.ps_ProjInfo = NULL; + + /* + * Initialize scan slot and type. + */ + //ExecCreateScanSlotFromOuterPlan(estate, &mstate->ss); + PlanState *outerPlan; + TupleDesc tupDesc; + outerPlan = (PlanState*)(outerPlanState(&mstate->ss.ps)); + tupDesc = ExecGetResultType(outerPlan); + + ExecInitScanTupleSlot(estate, &mstate->ss);//, tupDesc);//, tts_ops); + + /* + * Set the state machine to lookup the cache. We won't find anything + * until we cache something, but this saves a special case to create the + * first entry. + */ + mstate->mstatus = MEMO_CACHE_LOOKUP; + + mstate->nkeys = nkeys = node->numKeys; + mstate->hashkeydesc = pg_ExecTypeFromExprList(node->param_exprs); + mstate->tableslot = MakeSingleTupleTableSlot(mstate->hashkeydesc); + mstate->probeslot = MakeSingleTupleTableSlot(mstate->hashkeydesc); + + mstate->param_exprs = (ExprState **) palloc(nkeys * sizeof(ExprState *)); + mstate->collations = node->collations; /* Just point directly to the plan + * data */ + mstate->hashfunctions = (FmgrInfo *) palloc(nkeys * sizeof(FmgrInfo)); + + eqfuncoids = (Oid*)palloc(nkeys * sizeof(Oid)); + + for (i = 0; i < nkeys; i++) + { + Oid hashop = node->hashOperators[i]; + Oid left_hashfn; + Oid right_hashfn; + Expr *param_expr = (Expr *) list_nth(node->param_exprs, i); + + if (!get_op_hash_functions(hashop, &left_hashfn, &right_hashfn)) + elog(ERROR, "could not find hash function for hash operator %u", + hashop); + + fmgr_info(left_hashfn, &mstate->hashfunctions[i]); + + mstate->param_exprs[i] = ExecInitExpr(param_expr, (PlanState *) mstate); + eqfuncoids[i] = get_opcode(hashop); + } + + /* + mstate->cache_eq_expr = ExecBuildParamSetEqual(mstate->hashkeydesc, + &TTSOpsMinimalTuple, + &TTSOpsVirtual, + eqfuncoids, + node->collations, + node->param_exprs, + (PlanState *) mstate); + */ + mstate->quals = (List*)ExecInitExpr((Expr*)NULL, (PlanState*)mstate); + + pfree(eqfuncoids); + mstate->mem_used = 0; + + /* Limit the total memory consumed by the cache to this */ + mstate->mem_limit = get_hash_memory_limit(); + + /* A memory context dedicated for the cache */ + mstate->tableContext = AllocSetContextCreate(CurrentMemoryContext, + "MemoizeHashTable", + ALLOCSET_DEFAULT_SIZES); + + dlist_init(&mstate->lru_list); + mstate->last_tuple = NULL; + mstate->entry = NULL; + + /* + * Mark if we can assume the cache entry is completed after we get the + * first record for it. Some callers might not call us again after + * getting the first match. e.g. A join operator performing a unique join + * is able to skip to the next outer tuple after getting the first + * matching inner tuple. In this case, the cache entry is complete after + * getting the first tuple. This allows us to mark it as so. + */ + mstate->singlerow = node->singlerow; + mstate->keyparamids = node->keyparamids; + + /* + * Record if the cache keys should be compared bit by bit, or logically + * using the type's hash equality operator + */ + mstate->binary_mode = node->binary_mode; + + /* Zero the statistics counters */ + memset(&mstate->stats, 0, sizeof(MemoizeInstrumentation)); + + /* + * Because it may require a large allocation, we delay building of the + * hash table until executor run. + */ + mstate->hashtable = NULL; + + return mstate; +} + +void +ExecEndMemoize(MemoizeState *node) +{ +#ifdef USE_ASSERT_CHECKING + /* Validate the memory accounting code is correct in assert builds. */ + if (node->hashtable != NULL) + { + int count; + uint64 mem = 0; + memoize_iterator i; + MemoizeEntry *entry; + + memoize_start_iterate(node->hashtable, &i); + + count = 0; + while ((entry = memoize_iterate(node->hashtable, &i)) != NULL) + { + MemoizeTuple *tuple = entry->tuplehead; + + mem += EMPTY_ENTRY_MEMORY_BYTES(entry); + while (tuple != NULL) + { + mem += CACHE_TUPLE_BYTES(tuple); + tuple = tuple->next; + } + count++; + } + + Assert(count == node->hashtable->members); + Assert(mem == node->mem_used); + } +#endif + + /* + * When ending a parallel worker, copy the statistics gathered by the + * worker back into shared memory so that it can be picked up by the main + * process to report in EXPLAIN ANALYZE. + */ +#ifdef CYW_COMMENT + if (node->shared_info != NULL && IsParallelWorker()) + { + MemoizeInstrumentation *si; + + /* Make mem_peak available for EXPLAIN */ + if (node->stats.mem_peak == 0) + node->stats.mem_peak = node->mem_used; + + Assert(ParallelWorkerNumber <= node->shared_info->num_workers); + si = &node->shared_info->sinstrument[ParallelWorkerNumber]; + memcpy(si, &node->stats, sizeof(MemoizeInstrumentation)); + } +#endif /* 0 */ + + /* Remove the cache context */ + MemoryContextDelete(node->tableContext); + + /* + * shut down the subplan + */ + ExecEndNode(outerPlanState(node)); +} + +void +ExecReScanMemoize(MemoizeState *node) +{ + PlanState *outerPlan = outerPlanState(node); + + /* Mark that we must lookup the cache for a new set of parameters */ + node->mstatus = MEMO_CACHE_LOOKUP; + + /* nullify pointers used for the last scan */ + node->entry = NULL; + node->last_tuple = NULL; + + /* + * if chgParam of subnode is not null then plan will be re-scanned by + * first ExecProcNode. + */ + if (outerPlan->chgParam == NULL) + ExecReScan(outerPlan); + + /* + * Purge the entire cache if a parameter changed that is not part of the + * cache key. + */ + if (bms_nonempty_difference(outerPlan->chgParam, node->keyparamids)) + cache_purge_all(node); +} + +/* + * ExecEstimateCacheEntryOverheadBytes + * For use in the query planner to help it estimate the amount of memory + * required to store a single entry in the cache. + */ +double +ExecEstimateCacheEntryOverheadBytes(double ntuples) +{ + return sizeof(MemoizeEntry) + sizeof(MemoizeKey) + sizeof(MemoizeTuple) * + ntuples; +} + +/* ---------------------------------------------------------------- + * Parallel Query Support + * ---------------------------------------------------------------- + */ + + /* ---------------------------------------------------------------- + * ExecMemoizeEstimate + * + * Estimate space required to propagate memoize statistics. + * ---------------------------------------------------------------- + */ +void +ExecMemoizeEstimate(MemoizeState *node, ParallelContext *pcxt) +{ + return; +#ifdef CYW_COMMENT + Size size; + + /* don't need this if not instrumenting or no workers */ + if (!node->ss.ps.instrument || pcxt->nworkers == 0) + return; + + size = mul_size(pcxt->nworkers, sizeof(MemoizeInstrumentation)); + size = add_size(size, offsetof(SharedMemoizeInfo, sinstrument)); + shm_toc_estimate_chunk(&pcxt->estimator, size); + shm_toc_estimate_keys(&pcxt->estimator, 1); +#endif +} + +/* ---------------------------------------------------------------- + * ExecMemoizeInitializeDSM + * + * Initialize DSM space for memoize statistics. + * ---------------------------------------------------------------- + */ +void +ExecMemoizeInitializeDSM(MemoizeState *node, ParallelContext *pcxt) +{ +#ifdef CYW_COMMENT + Size size; + + /* don't need this if not instrumenting or no workers */ + if (!node->ss.ps.instrument || pcxt->nworkers == 0) + return; + + size = offsetof(SharedMemoizeInfo, sinstrument) + + pcxt->nworkers * sizeof(MemoizeInstrumentation); + node->shared_info = shm_toc_allocate(pcxt->toc, size); + /* ensure any unfilled slots will contain zeroes */ + memset(node->shared_info, 0, size); + node->shared_info->num_workers = pcxt->nworkers; + shm_toc_insert(pcxt->toc, node->ss.ps.plan->plan_node_id, + node->shared_info); +#endif +} + +/* ---------------------------------------------------------------- + * ExecMemoizeInitializeWorker + * + * Attach worker to DSM space for memoize statistics. + * ---------------------------------------------------------------- + */ +void +ExecMemoizeInitializeWorker(MemoizeState *node, ParallelWorkerContext *pwcxt) +{ +} + +/* ---------------------------------------------------------------- + * ExecMemoizeRetrieveInstrumentation + * + * Transfer memoize statistics from DSM to private memory. + * ---------------------------------------------------------------- + */ +void +ExecMemoizeRetrieveInstrumentation(MemoizeState *node) +{ + Size size; + SharedMemoizeInfo *si; + + if (node->shared_info == NULL) + return; + + size = offsetof(SharedMemoizeInfo, sinstrument) + + node->shared_info->num_workers * sizeof(MemoizeInstrumentation); + si = (SharedMemoizeInfo*)palloc(size); + memcpy(si, node->shared_info, size); + node->shared_info = si; +} diff --git a/src/include/executor/executor.h b/src/include/executor/executor.h index bf15e8d6bef07ad7fb20683e94164c8ac3b73c53..473416fa7bb895b02ef9b79730ce5b51b9f2d018 100755 --- a/src/include/executor/executor.h +++ b/src/include/executor/executor.h @@ -521,6 +521,7 @@ extern TupleTableSlot *ExecInitNullTupleSlot(EState *estate, TupleDesc tupType); extern TupleDesc ExecTypeFromTL(List *targetList, bool hasoid, bool markdropped = false, const TableAmRoutine *tam_ops = TableAmHeap); extern TupleDesc ExecCleanTypeFromTL(List *targetList, bool hasoid, const TableAmRoutine *tam_ops = TableAmHeap); +extern TupleDesc pg_ExecTypeFromExprList(List *exprList); extern TupleDesc ExecTypeFromExprList(List *exprList, List *namesList, const TableAmRoutine *tam_ops = TableAmHeap); extern void UpdateChangedParamSet(PlanState *node, Bitmapset *newchg); extern void InitOutputValues(RightRefState* refState, Datum* values, bool* isnull, bool* hasExecs); diff --git a/src/include/executor/node/nodeMemoize.h b/src/include/executor/node/nodeMemoize.h new file mode 100644 index 0000000000000000000000000000000000000000..69b92809924f5b5e8b3a80945f7d2e536538b50f --- /dev/null +++ b/src/include/executor/node/nodeMemoize.h @@ -0,0 +1,39 @@ +/*------------------------------------------------------------------------- + * + * nodeMemoize.h + * + * + * + * Portions Copyright (c) 2021-2024, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * src/include/executor/nodeMemoize.h + * + *------------------------------------------------------------------------- + */ +#ifndef NODEMEMOIZE_H +#define NODEMEMOIZE_H + +//#include "access/parallel.h" +#include "nodes/execnodes.h" +#include "lib/ilist.h" + +extern uint32 +datum_image_hash(Datum value, bool typByVal, int typLen); + +typedef struct ParallelContext +{ +} ParallelContext; + +struct ParallelWorkerContext; + + + +extern MemoizeState *ExecInitMemoize(Memoize *node, EState *estate, int eflags); +extern void ExecEndMemoize(MemoizeState *node); +extern void ExecReScanMemoize(MemoizeState *node); +extern double ExecEstimateCacheEntryOverheadBytes(double ntuples); +extern void ExecMemoizeEstimate(MemoizeState *node); +extern void ExecMemoizeRetrieveInstrumentation(MemoizeState *node); + +#endif /* NODEMEMOIZE_H */ diff --git a/src/include/knl/knl_guc/knl_session_attr_sql.h b/src/include/knl/knl_guc/knl_session_attr_sql.h index 414ea8c709fbf2acea65200a19dee5b49e29bfea..07ca3717d37d8c915e9fdc2fe856b59acb9ad37c 100644 --- a/src/include/knl/knl_guc/knl_session_attr_sql.h +++ b/src/include/knl/knl_guc/knl_session_attr_sql.h @@ -72,6 +72,7 @@ typedef struct knl_session_attr_sql { bool enable_hashagg; bool enable_sortgroup_agg; bool enable_material; + bool enable_memoize; bool enable_nestloop; bool enable_mergejoin; bool enable_hashjoin; diff --git a/src/include/lib/ilist.h b/src/include/lib/ilist.h index 91b7fd597c61a1fea50ce9fbe057de5be320f800..3062308d18c4326a57bc02704972b498e8bdf9ba 100644 --- a/src/include/lib/ilist.h +++ b/src/include/lib/ilist.h @@ -390,6 +390,24 @@ static inline void dlist_move_head(dlist_head* head, dlist_node* node) dlist_check(head); } +/* + * Move element from its current position in the list to the tail position in + * the same list. + * + * Undefined behaviour if 'node' is not already part of the list. + */ +static inline void +dlist_move_tail(dlist_head *head, dlist_node *node) +{ + /* fast path if it's already at the tail */ + if (head->head.prev == node) + return; + + dlist_delete(node); + dlist_push_tail(head, node); + + dlist_check(head); +} /* * Check whether 'node' has a following node. diff --git a/src/include/miscadmin.h b/src/include/miscadmin.h index a1c124319ca5ce6f32b5ae014dd06d7aae937d44..cb6afe24b2886ea77f4b24da09ee6f42e765dada 100644 --- a/src/include/miscadmin.h +++ b/src/include/miscadmin.h @@ -720,6 +720,8 @@ extern void CancelBackup(void); extern void EarlyBindingTLSVariables(void); +/* in executor/nodeHash.c */ +extern size_t get_hash_memory_limit(void); /* * converts the 64 bits unsigned integer between host byte order and network byte order. * Note that the network byte order is BIG ENDIAN. diff --git a/src/include/nodes/execnodes.h b/src/include/nodes/execnodes.h index 3240dc40b827ee342f25f1c0fd60b7f5da2fc9f8..0580debff65626fcd6c123a3767930462427b3c1 100755 --- a/src/include/nodes/execnodes.h +++ b/src/include/nodes/execnodes.h @@ -1423,6 +1423,7 @@ typedef struct PlanState { List* plan_issues; } PlanState; +#define outerPlanState(node) (((PlanState *)(node))->lefttree) static inline bool planstate_need_stub(PlanState* ps) { return ps->stubType != PST_None; @@ -1436,7 +1437,6 @@ static inline bool planstate_need_stub(PlanState* ps) * ---------------- */ #define innerPlanState(node) (((PlanState*)(node))->righttree) -#define outerPlanState(node) (((PlanState*)(node))->lefttree) /* Macros for inline access to certain instrumentation counters */ #define InstrCountFiltered1(node, delta) \ @@ -2510,6 +2510,76 @@ typedef struct MaterialState { Tuplestorestate* tuplestorestate; } MaterialState; +struct MemoizeEntry; +struct MemoizeTuple; +struct MemoizeKey; + +typedef struct MemoizeInstrumentation +{ + uint64 cache_hits; /* number of rescans where we've found the + * scan parameter values to be cached */ + uint64 cache_misses; /* number of rescans where we've not found the + * scan parameter values to be cached. */ + uint64 cache_evictions; /* number of cache entries removed due to + * the need to free memory */ + uint64 cache_overflows; /* number of times we've had to bypass the + * cache when filling it due to not being + * able to free enough space to store the + * current scan's tuples. */ + uint64 mem_peak; /* peak memory usage in bytes */ +} MemoizeInstrumentation; + +/* ---------------- + * Shared memory container for per-worker memoize information + * ---------------- + */ +typedef struct SharedMemoizeInfo +{ + int num_workers; + MemoizeInstrumentation sinstrument[FLEXIBLE_ARRAY_MEMBER]; +} SharedMemoizeInfo; + +/* ---------------- + * MemoizeState information + * + * memoize nodes are used to cache recent and commonly seen results from + * a parameterized scan. + * ---------------- + */ +typedef struct MemoizeState +{ + ScanState ss; /* its first field is NodeTag */ + int mstatus; /* value of ExecMemoize state machine */ + int nkeys; /* number of cache keys */ + struct memoize_hash *hashtable; /* hash table for cache entries */ + TupleDesc hashkeydesc; /* tuple descriptor for cache keys */ + TupleTableSlot *tableslot; /* min tuple slot for existing cache entries */ + TupleTableSlot *probeslot; /* virtual slot used for hash lookups */ + ExprState *cache_eq_expr; /* Compare exec params to hash key */ + ExprState **param_exprs; /* exprs containing the parameters to this + * node */ + FmgrInfo *hashfunctions; /* lookup data for hash funcs nkeys in size */ + Oid *collations; /* collation for comparisons nkeys in size */ + uint64 mem_used; /* bytes of memory used by cache */ + uint64 mem_limit; /* memory limit in bytes for the cache */ + MemoryContext tableContext; /* memory context to store cache data */ + dlist_head lru_list; /* least recently used entry list */ + struct MemoizeTuple *last_tuple; /* Used to point to the last tuple + * returned during a cache hit and the + * tuple we last stored when + * populating the cache. */ + struct MemoizeEntry *entry; /* the entry that 'last_tuple' belongs to or + * NULL if 'last_tuple' is NULL. */ + bool singlerow; /* true if the cache entry is to be marked as + * complete after caching the first tuple. */ + bool binary_mode; /* true when cache key should be compared bit + * by bit, false when using hash equality ops */ + MemoizeInstrumentation stats; /* execution statistics */ + SharedMemoizeInfo *shared_info; /* statistics for parallel workers */ + Bitmapset *keyparamids; /* Param->paramids of expressions belonging to + * param_exprs */ + List* quals; +} MemoizeState; /* ---------------- * SortState information * ---------------- diff --git a/src/include/nodes/nodes.h b/src/include/nodes/nodes.h index c5d5b08220fa23bdd8b7afc49e0d8b56f537cc0d..7f58f9d664d64bd837dfd88e2da36046e2e163c0 100755 --- a/src/include/nodes/nodes.h +++ b/src/include/nodes/nodes.h @@ -84,6 +84,7 @@ typedef enum NodeTag { T_MergeJoin, T_HashJoin, T_Material, + T_Memoize, T_Sort, T_SortGroup, T_Group, @@ -203,6 +204,7 @@ typedef enum NodeTag { T_MergeJoinState, T_HashJoinState, T_MaterialState, + T_MemoizeState, T_SortState, T_SortGroupState, T_GroupState, @@ -370,6 +372,7 @@ typedef enum NodeTag { T_ProjectionPath, T_ProjectSetPath, T_MaterialPath, + T_MemoizePath, T_UniquePath, T_PartIteratorPath, T_EquivalenceClass, diff --git a/src/include/nodes/plannodes.h b/src/include/nodes/plannodes.h index 86aa0cc067b5b2c1335dc54e4c9de0bed7eb791b..d955eaf3564ed40dba30a74fe69c7f85409b4880 100644 --- a/src/include/nodes/plannodes.h +++ b/src/include/nodes/plannodes.h @@ -1186,6 +1186,47 @@ typedef struct Material { typedef struct VecMaterial : public Material { } VecMaterial; +/* ---------------- + * memoize node + * ---------------- + */ +typedef struct Memoize +{ + Plan plan; + + /* size of the two arrays below */ + int numKeys; + + /* hash operators for each key */ + Oid *hashOperators;// pg_node_attr(array_size(numKeys)); + + /* collations for each key */ + Oid *collations;// pg_node_attr(array_size(numKeys)); + + /* cache keys in the form of exprs containing parameters */ + List *param_exprs; + + /* + * true if the cache entry should be marked as complete after we store the + * first tuple in it. + */ + bool singlerow; + + /* + * true when cache key should be compared bit by bit, false when using + * hash equality ops + */ + bool binary_mode; + + /* + * The maximum number of entries that the planner expects will fit in the + * cache, or 0 if unknown + */ + uint32 est_entries; + + /* paramids from param_exprs */ + Bitmapset *keyparamids; +} Memoize; /* ---------------- * sort node diff --git a/src/include/nodes/relation.h b/src/include/nodes/relation.h index 855c207da85279d7c27d81de31a8caa9c827df20..062306f0bb8a160b0ff8cb6d8c2a976e850d7415 100755 --- a/src/include/nodes/relation.h +++ b/src/include/nodes/relation.h @@ -1427,7 +1427,26 @@ typedef struct MaterialPath { bool materialize_all; /* true for materialize above streamed subplan */ OpMemInfo mem_info; /* Memory info for materialize */ } MaterialPath; - +/* + * MemoizePath represents a Memoize plan node, i.e., a cache that caches + * tuples from parameterized paths to save the underlying node from having to + * be rescanned for parameter values which are already cached. + */ +typedef struct MemoizePath +{ + Path path; + Path *subpath; /* outerpath to cache tuples from */ + List *hash_operators; /* OIDs of hash equality ops for cache keys */ + List *param_exprs; /* expressions that are cache keys */ + bool singlerow; /* true if the cache entry is to be marked as + * complete after caching the first record. */ + bool binary_mode; /* true when cache key should be compared bit + * by bit, false when using hash equality ops */ + double calls; /* expected number of rescans */ + uint32 est_entries; /* The maximum number of entries that the + * planner expects will fit in the cache, or 0 + * if unknown */ +} MemoizePath; /* * UniquePath represents elimination of distinct rows from the output of * its subpath. @@ -1840,6 +1859,9 @@ typedef struct RestrictInfo { /* cache space for hashclause processing; -1 if not yet set */ BucketSize left_bucketsize; /* avg bucketsize of left side */ BucketSize right_bucketsize; /* avg bucketsize of right side */ + /* hash equality operators used for memoize nodes, else InvalidOid */ + Oid left_hasheqoperator; //pg_node_attr(equal_ignore); + Oid right_hasheqoperator; //pg_node_attr(equal_ignore); } RestrictInfo; /* diff --git a/src/include/optimizer/clauses.h b/src/include/optimizer/clauses.h index 3ab6fc7a97a4f037dc5440c307777274a3f0be7b..1770e9fe9f85e7ab69136457e1fb1e6ba18c5654 100644 --- a/src/include/optimizer/clauses.h +++ b/src/include/optimizer/clauses.h @@ -162,7 +162,7 @@ extern List* get_quals_lists(Node *jtnode); extern bool isTableofType(Oid typeOid, Oid* base_oid, Oid* indexbyType); extern Expr* simplify_function(Oid funcid, Oid result_type, int32 result_typmod, Oid result_collid, Oid input_collid, List** args_p, bool process_args, bool allow_non_const, eval_const_expressions_context* context); - +extern Bitmapset *pull_paramids(Expr *expr); #ifdef USE_SPQ extern Query *fold_constants(PlannerInfo *root, Query *q, ParamListInfo boundParams, Size max_size); extern Query *flatten_join_alias_var_optimizer(Query *query, int queryLevel); diff --git a/src/include/optimizer/pathnode.h b/src/include/optimizer/pathnode.h index 91dee74f2aee80516120186deebbed7663193154..c9bdad958df019ef2afb7d246600724372da98bf 100644 --- a/src/include/optimizer/pathnode.h +++ b/src/include/optimizer/pathnode.h @@ -79,6 +79,14 @@ extern MergeAppendPath* create_merge_append_path( PlannerInfo* root, RelOptInfo* rel, List* subpaths, List* pathkeys, Relids required_outer); extern ResultPath* create_result_path(PlannerInfo *root, RelOptInfo *rel, List* quals, Path* subpath = NULL, Bitmapset *upper_params = NULL); extern MaterialPath* create_material_path(Path* subpath, bool materialize_all = false); +extern MemoizePath *create_memoize_path(PlannerInfo *root, + RelOptInfo *rel, + Path *subpath, + List *param_exprs, + List *hash_operators, + bool singlerow, + bool binary_mode, + double calls); extern UniquePath* create_unique_path(PlannerInfo* root, RelOptInfo* rel, Path* subpath, SpecialJoinInfo* sjinfo); extern Path* create_subqueryscan_path(PlannerInfo* root, RelOptInfo* rel, List* pathkeys, Relids required_outer, List *subplan_params); extern Path* create_subqueryscan_path_reparam(PlannerInfo* root, RelOptInfo* rel, List* pathkeys, Relids required_outer, List *subplan_params); diff --git a/src/include/port/pg_bitutils.h b/src/include/port/pg_bitutils.h index 584030dce8138dbf12d42c2589f581944b4c2c11..43d173d2e3389ba7eeba4867dbfaadd86fcfd940 100644 --- a/src/include/port/pg_bitutils.h +++ b/src/include/port/pg_bitutils.h @@ -182,6 +182,12 @@ static inline uint32 pg_rotate_right32(uint32 word, int n) return (word >> n) | (word << (sizeof(word) * BITS_PER_BYTE - n)); } +static inline uint32 +pg_rotate_left32(uint32 word, int n) +{ + return (word << n) | (word >> (32 - n)); +} + /* calculate ceil(log base 2) of num */ static inline uint64 sh_log2(uint64 num) { diff --git a/src/include/utils/hashfn.h b/src/include/utils/hashfn.h new file mode 100644 index 0000000000000000000000000000000000000000..18cd9ebb50c7440544f64b38a603432fe2b17e94 --- /dev/null +++ b/src/include/utils/hashfn.h @@ -0,0 +1,120 @@ +/* + * Utilities for working with hash values. + * + * Portions Copyright (c) 2017-2024, PostgreSQL Global Development Group + */ + +#ifndef HASHFN_H +#define HASHFN_H + + +/* + * Rotate the high 32 bits and the low 32 bits separately. The standard + * hash function sometimes rotates the low 32 bits by one bit when + * combining elements. We want extended hash functions to be compatible with + * that algorithm when the seed is 0, so we can't just do a normal rotation. + * This works, though. + */ +#define ROTATE_HIGH_AND_LOW_32BITS(v) \ + ((((v) << 1) & UINT64CONST(0xfffffffefffffffe)) | \ + (((v) >> 31) & UINT64CONST(0x100000001))) + + +extern uint32 hash_bytes(const unsigned char *k, int keylen); +extern uint64 hash_bytes_extended(const unsigned char *k, + int keylen, uint64 seed); +extern uint32 hash_bytes_uint32(uint32 k); +extern uint64 hash_bytes_uint32_extended(uint32 k, uint64 seed); + +#ifndef FRONTEND +static inline Datum +hash_any(const unsigned char *k, int keylen) +{ + return UInt32GetDatum(hash_bytes(k, keylen)); +} + +static inline Datum +hash_any_extended(const unsigned char *k, int keylen, uint64 seed) +{ + return UInt64GetDatum(hash_bytes_extended(k, keylen, seed)); +} + +static inline Datum +hash_uint32(uint32 k) +{ + return UInt32GetDatum(hash_bytes_uint32(k)); +} + +static inline Datum +hash_uint32_extended(uint32 k, uint64 seed) +{ + return UInt64GetDatum(hash_bytes_uint32_extended(k, seed)); +} +#endif + +extern uint32 string_hash(const void *key, Size keysize); +extern uint32 tag_hash(const void *key, Size keysize); +extern uint32 uint32_hash(const void *key, Size keysize); + +#define oid_hash uint32_hash /* Remove me eventually */ + +/* + * Combine two 32-bit hash values, resulting in another hash value, with + * decent bit mixing. + * + * Similar to boost's hash_combine(). + */ +static inline uint32 +hash_combine(uint32 a, uint32 b) +{ + a ^= b + 0x9e3779b9 + (a << 6) + (a >> 2); + return a; +} + +/* + * Combine two 64-bit hash values, resulting in another hash value, using the + * same kind of technique as hash_combine(). Testing shows that this also + * produces good bit mixing. + */ +static inline uint64 +hash_combine64(uint64 a, uint64 b) +{ + /* 0x49a0f4dd15e5a8e3 is 64bit random data */ + a ^= b + UINT64CONST(0x49a0f4dd15e5a8e3) + (a << 54) + (a >> 7); + return a; +} + +/* + * Simple inline murmur hash implementation hashing a 32 bit integer, for + * performance. + */ +static inline uint32 +murmurhash32(uint32 data) +{ + uint32 h = data; + + h ^= h >> 16; + h *= 0x85ebca6b; + h ^= h >> 13; + h *= 0xc2b2ae35; + h ^= h >> 16; + return h; +} + +/* 64-bit variant */ +static inline uint64 +murmurhash64(uint64 data) +{ + uint64 h = data; + + h ^= h >> 33; + h *= 0xff51afd7ed558ccd; + h ^= h >> 33; + h *= 0xc4ceb9fe1a85ec53; + h ^= h >> 33; + + return h; +} + +#endif /* HASHFN_H */ +