greenplumn cdbpathlocus 源码
greenplumn cdbpathlocus 代码
文件路径:/src/include/cdb/cdbpathlocus.h
/*-------------------------------------------------------------------------
*
* cdbpathlocus.h
*
*
* Portions Copyright (c) 2005-2008, Greenplum inc
* Portions Copyright (c) 2012-Present VMware, Inc. or its affiliates.
*
*
* IDENTIFICATION
* src/include/cdb/cdbpathlocus.h
*
*-------------------------------------------------------------------------
*/
#ifndef CDBPATHLOCUS_H
#define CDBPATHLOCUS_H
#include "nodes/pg_list.h" /* List */
#include "nodes/bitmapset.h" /* Bitmapset */
struct Path; /* defined in relation.h */
struct RelOptInfo; /* defined in relation.h */
struct PlannerInfo; /* defined in relation.h */
struct GpPolicy; /* defined in gp_distribution_policy.h */
struct PathTarget;
/*
* CdbLocusType
*
* The difference between SegmentGeneral and Replicated is that a path
* with Replicated locus *must* be executed on all of the segments, whereas
* a SegmentGeneral *may* be executed on all of the segments, or just one
* of them, as is convenient. Replicated is used as the locus for
* UPDATEs/DELETEs/INSERTs to replicated tables; it's important that the
* plan gets executed on all segments so that all segments are updated.
* SegmentGeneral is used when querying replicated tables.
*/
typedef enum CdbLocusType
{
CdbLocusType_Null,
CdbLocusType_Entry, /* a single backend process on the entry db:
* usually the qDisp itself, but could be a
* qExec started by the entry postmaster.
*/
CdbLocusType_SingleQE, /* a single backend process on any db: the
* qDisp itself, or a qExec started by a
* segment postmaster or the entry postmaster.
*/
CdbLocusType_General, /* compatible with any locus (data is
* self-contained in the query plan or
* generally available in any qExec or qDisp) */
CdbLocusType_SegmentGeneral,/* generally available in any qExec, but not
* available in qDisp */
CdbLocusType_OuterQuery, /* generally available in any qExec or qDisp, but
* contains correlated vars from outer query, so must
* not be redistributed */
CdbLocusType_Replicated, /* replicated over all qExecs of an N-gang */
CdbLocusType_Hashed, /* hash partitioned over all qExecs of N-gang */
CdbLocusType_HashedOJ, /* result of hash partitioned outer join, NULLs can be anywhere */
CdbLocusType_Strewn, /* partitioned on no known function */
CdbLocusType_End /* = last valid CdbLocusType + 1 */
} CdbLocusType;
#define CdbLocusType_IsValid(locustype) \
((locustype) > CdbLocusType_Null && \
(locustype) < CdbLocusType_End)
#define CdbLocusType_IsValidOrNull(locustype) \
((locustype) >= CdbLocusType_Null && \
(locustype) < CdbLocusType_End)
/*
* CdbPathLocus
*
* Specifies a distribution of tuples across segments.
*
* If locustype is CdbLocusType_Hashed or CdbLocusType_HashedOJ:
* Rows are distributed based on values of a partitioning key. The
* partitioning key is often called a "distribution key", to avoid
* confusion with table partitioning.
*
* A tuple's partitioning key consists of one or more key columns.
* When CdbPathLocus represents the distribution of a table, the
* partitioning key corresponds to the columns listed in DISTRIBUTED BY
* of the table, but the planner can distribute intermediate results
* based on arbitrary expressions.
*
* The partitioning key is represented by a List of DistributionKeys,
* one for each key column. Each DistributionKey contains a list of
* EquivalenceClasses, which contain expressions that can be used
* to compute the value for the key column. Any of the expressions
* can be used to compute the value, depending on what relations are
* available at that part of the plan.
*
* For example, if the query contains a "WHERE a=b" clause, the planner
* would form an EquivalenceClass that contains two members, "a" and "b".
* Because of the WHERE clause, either "a" and "b" can be used to
* compute the hash value. Usually, a DistributionKey contains only one
* EquivalenceClass, because whenever there is an equijoin on two
* expressions, the planner puts them in the same EquivalenceClass.
* However, if there are FULL JOINs in the query, the FULL JOIN quals do
* not form equivalence classes with other quals, because the NULL
* handling is different. See src/backend/optimizer/README for
* discussion on "outerjoin delayed" equivalence classes.
*
* When a path locus is constructed for a FULL JOIN, CdbLocusType_HashedOJ
* is used instead of CdbLocusType_Hashed. The important distinction
* between Hashed and HashedOJ is the semantics for NULLs. In a Hashed
* distribution, a NULL is hashed like any other value, and all NULLs are
* located on a particular segment, based on the hash value of a NULL
* datum. But with HashedOJ, NULL values can legitimately appear on any
* segment!
*
* For join optimization, Hashed and HashedOJ can both be used. In an inner
* join on A=B, NULL rows won't match anyway. And for an OUTER JOIN, it
* doesn't matter which segment the NULL rows appear on, as long as we
* correctly mark the resulting locus also as HashedOJ. But for grouping,
* HashedOJ can not be used, because you might end up with multiple NULL
* NULL groups, one for each segment!
*
* If locustype == CdbLocusType_Strewn:
* Rows are distributed according to a criterion that is unknown or
* may depend on inputs that are unknown or unavailable in the present
* context. The 'distkey' field is NIL.
*
* If the distribution is not partitioned, then the 'distkey' field is NIL.
*
* The numsegments attribute specifies how many segments the tuples are
* distributed on, from segment 0 to segment `numsegments-1`. In the future
* we might further change it to a range or list so discontinuous segments
* can be described. This numsegments has different meaning for different
* locustype:
* - Null: numsegments is usually meaningless in Null locus as it will be
* remade to other locus types later. But there is also cases that we set
* a valid numsegments in Null locus, this value will be kept when remade
* it to other locus types, and it becomes meaningful after that;
* - Entry: numsegments in Entry locus specify the candidate segments to put
* the Entry node on, it's master and all the primary segments in current
* implementation;
* - SingleQE: numsegments in SingleQE locus specify the candidate segments
* to put the SingleQE node on, although SingleQE is always executed on one
* segment but numsegments usually have a value > 1;
* - General: similar with Entry and SingleQE;
* - SegmentGeneral, Replicated, Hashed, HashedOJ, Strewn: numsegments in
* these locus types specify the segments that contain the tuples;
*/
typedef struct CdbPathLocus
{
CdbLocusType locustype;
List *distkey; /* List of DistributionKeys */
int numsegments;
} CdbPathLocus;
#define CdbPathLocus_NumSegments(locus) \
((locus).numsegments)
/*
* CdbPathLocus_IsEqual
*
* Returns true if one CdbPathLocus is exactly the same as the other.
* To test for logical equivalence, use cdbpathlocus_equal() instead.
*/
#define CdbPathLocus_IsEqual(a, b) \
((a).locustype == (b).locustype && \
(a).numsegments == (b).numsegments && \
(a).distkey == (b).distkey)
#define CdbPathLocus_CommonSegments(a, b) \
Min((a).numsegments, (b).numsegments)
/*
* CdbPathLocus_IsBottleneck
*
* Returns true if the locus indicates confinement to a single process:
* either a singleton qExec (1-gang) on a segment db or the entry db, or
* the qDisp process.
*/
#define CdbPathLocus_IsBottleneck(locus) \
(CdbPathLocus_IsEntry(locus) || \
CdbPathLocus_IsOuterQuery(locus) || \
CdbPathLocus_IsSingleQE(locus))
/*
* CdbPathLocus_IsPartitioned
*
* Returns true if the locus indicates partitioning across an N-gang, such
* that each qExec has a disjoint subset of the rows.
*/
#define CdbPathLocus_IsPartitioned(locus) \
(CdbPathLocus_IsHashed(locus) || \
CdbPathLocus_IsHashedOJ(locus) || \
CdbPathLocus_IsStrewn(locus))
#define CdbPathLocus_IsNull(locus) \
((locus).locustype == CdbLocusType_Null)
#define CdbPathLocus_IsEntry(locus) \
((locus).locustype == CdbLocusType_Entry)
#define CdbPathLocus_IsSingleQE(locus) \
((locus).locustype == CdbLocusType_SingleQE)
#define CdbPathLocus_IsGeneral(locus) \
((locus).locustype == CdbLocusType_General)
#define CdbPathLocus_IsReplicated(locus) \
((locus).locustype == CdbLocusType_Replicated)
#define CdbPathLocus_IsHashed(locus) \
((locus).locustype == CdbLocusType_Hashed)
#define CdbPathLocus_IsHashedOJ(locus) \
((locus).locustype == CdbLocusType_HashedOJ)
#define CdbPathLocus_IsStrewn(locus) \
((locus).locustype == CdbLocusType_Strewn)
#define CdbPathLocus_IsSegmentGeneral(locus) \
((locus).locustype == CdbLocusType_SegmentGeneral)
#define CdbPathLocus_IsOuterQuery(locus) \
((locus).locustype == CdbLocusType_OuterQuery)
#define CdbPathLocus_MakeSimple(plocus, _locustype, numsegments_) \
do { \
CdbPathLocus *_locus = (plocus); \
_locus->locustype = (_locustype); \
_locus->numsegments = (numsegments_); \
_locus->distkey = NIL; \
} while (0)
#define CdbPathLocus_MakeNull(plocus) \
CdbPathLocus_MakeSimple((plocus), CdbLocusType_Null, -1)
#define CdbPathLocus_MakeEntry(plocus) \
CdbPathLocus_MakeSimple((plocus), CdbLocusType_Entry, -1)
#define CdbPathLocus_MakeSingleQE(plocus, numsegments_) \
CdbPathLocus_MakeSimple((plocus), CdbLocusType_SingleQE, (numsegments_))
#define CdbPathLocus_MakeGeneral(plocus) \
CdbPathLocus_MakeSimple((plocus), CdbLocusType_General, -1)
#define CdbPathLocus_MakeSegmentGeneral(plocus, numsegments_) \
CdbPathLocus_MakeSimple((plocus), CdbLocusType_SegmentGeneral, (numsegments_))
#define CdbPathLocus_MakeReplicated(plocus, numsegments_) \
CdbPathLocus_MakeSimple((plocus), CdbLocusType_Replicated, (numsegments_))
#define CdbPathLocus_MakeHashed(plocus, distkey_, numsegments_) \
do { \
CdbPathLocus *_locus = (plocus); \
_locus->locustype = CdbLocusType_Hashed; \
_locus->numsegments = (numsegments_); \
_locus->distkey = (distkey_); \
Assert(cdbpathlocus_is_valid(*_locus)); \
} while (0)
#define CdbPathLocus_MakeHashedOJ(plocus, distkey_, numsegments_) \
do { \
CdbPathLocus *_locus = (plocus); \
_locus->locustype = CdbLocusType_HashedOJ; \
_locus->numsegments = (numsegments_); \
_locus->distkey = (distkey_); \
Assert(cdbpathlocus_is_valid(*_locus)); \
} while (0)
#define CdbPathLocus_MakeStrewn(plocus, numsegments_) \
CdbPathLocus_MakeSimple((plocus), CdbLocusType_Strewn, (numsegments_))
#define CdbPathLocus_MakeOuterQuery(plocus) \
CdbPathLocus_MakeSimple((plocus), CdbLocusType_OuterQuery, -1)
/************************************************************************/
extern bool cdbpathlocus_equal(CdbPathLocus a, CdbPathLocus b);
/************************************************************************/
extern CdbPathLocus cdbpathlocus_for_insert(struct PlannerInfo *root,
struct GpPolicy *policy,
struct PathTarget *pathtarget);
CdbPathLocus
cdbpathlocus_from_policy(struct PlannerInfo *root, Index rti, struct GpPolicy *policy);
CdbPathLocus
cdbpathlocus_from_baserel(struct PlannerInfo *root,
struct RelOptInfo *rel);
CdbPathLocus
cdbpathlocus_from_exprs(struct PlannerInfo *root,
struct RelOptInfo *rel,
List *hash_on_exprs,
List *hash_opclasses,
List *hash_sortrefs,
int numsegments);
CdbPathLocus
cdbpathlocus_from_subquery(struct PlannerInfo *root,
struct RelOptInfo *rel,
struct Path *subpath);
CdbPathLocus
cdbpathlocus_join(JoinType jointype, CdbPathLocus a, CdbPathLocus b);
/************************************************************************/
/*
* cdbpathlocus_get_distkey_exprs
*
* Returns a List with one Expr for each distkey column. Each item either is
* in the given targetlist, or has no Var nodes that are not in the targetlist;
* and uses only rels in the given set of relids. Returns NIL if the
* distkey cannot be expressed in terms of the given relids and targetlist.
*/
extern void cdbpathlocus_get_distkey_exprs(CdbPathLocus locus,
Bitmapset *relids,
List *targetlist,
List **exprs_p,
List **opfamilies_p);
/*
* cdbpathlocus_pull_above_projection
*
* Given a targetlist, and a locus evaluable before the projection is
* applied, returns an equivalent locus evaluable after the projection.
* Returns a strewn locus if the necessary inputs are not projected.
*
* 'relids' is the set of relids that may occur in the targetlist exprs.
* 'targetlist' specifies the projection. It is a List of TargetEntry
* or merely a List of Expr.
* 'newvarlist' is an optional List of Expr, in 1-1 correspondence with
* 'targetlist'. If specified, instead of creating a Var node to
* reference a targetlist item, we plug in a copy of the corresponding
* newvarlist item.
* 'newrelid' is the RTE index of the projected result, for finding or
* building Var nodes that reference the projected columns.
* Ignored if 'newvarlist' is specified.
*/
CdbPathLocus
cdbpathlocus_pull_above_projection(struct PlannerInfo *root,
CdbPathLocus locus,
Bitmapset *relids,
List *targetlist,
List *newvarlist,
Index newrelid);
/************************************************************************/
/*
* cdbpathlocus_is_hashed_on_exprs
*
* This function tests whether grouping on a given set of exprs can be done
* in place without motion.
*
* For a hashed locus, returns false if the distkey has a column whose
* equivalence class contains no expr belonging to the given list.
*
* If 'ignore_constants' is true, any constants in the locus are ignored.
*/
bool
cdbpathlocus_is_hashed_on_exprs(CdbPathLocus locus, List *exprlist, bool ignore_constants);
/*
* cdbpathlocus_is_hashed_on_eclasses
*
* This function tests whether grouping on a given set of exprs can be done
* in place without motion.
*
* For a hashed locus, returns false if the distkey has any column whose
* equivalence class is not in 'eclasses' list.
*
* If 'ignore_constants' is true, any constants in the locus are ignored.
*/
bool
cdbpathlocus_is_hashed_on_eclasses(CdbPathLocus locus, List *eclasses, bool ignore_constants);
bool
cdbpathlocus_is_hashed_on_tlist(CdbPathLocus locus, List *tlist, bool ignore_constants);
/*
* cdbpathlocus_is_hashed_on_relids
*
* Used when a subquery predicate (such as "x IN (SELECT ...)") has been
* flattened into a join with the tables of the current query. A row of
* the cross-product of the current query's tables might join with more
* than one row from the subquery and thus be duplicated. Removing such
* duplicates after the join is called deduping. If a row's duplicates
* might occur in more than one partition, a Motion operator will be
* needed to bring them together. This function tests whether the join
* result (whose locus is given) can be deduped in place without motion.
*
* For a hashed locus, returns false if the distkey has a column whose
* equivalence class contains no Var node belonging to the given set of
* relids. Caller should specify the relids of the non-subquery tables.
*/
bool
cdbpathlocus_is_hashed_on_relids(CdbPathLocus locus, Bitmapset *relids);
/*
* cdbpathlocus_is_valid
*
* Returns true if locus appears structurally valid.
*/
bool
cdbpathlocus_is_valid(CdbPathLocus locus);
#endif /* CDBPATHLOCUS_H */
相关信息
相关文章
greenplumn cdbappendonlyblockdirectory 源码
greenplumn cdbappendonlystorage 源码
greenplumn cdbappendonlystorage_int 源码
greenplumn cdbappendonlystorageformat 源码
greenplumn cdbappendonlystoragelayer 源码
greenplumn cdbappendonlystorageread 源码
0
赞
热门推荐
-
2、 - 优质文章
-
3、 gate.io
-
8、 golang
-
9、 openharmony
-
10、 Vue中input框自动聚焦