greenplumn CBucket 源码
greenplumn CBucket 代码
文件路径:/src/backend/gporca/libnaucrates/include/naucrates/statistics/CBucket.h
//---------------------------------------------------------------------------
//	Greenplum Database
//	Copyright (C) 2011 EMC Corp.
//
//	@filename:
//		CBucket.h
//
//	@doc:
//		Bucket in a histogram
//---------------------------------------------------------------------------
#ifndef GPNAUCRATES_CBucket_H
#define GPNAUCRATES_CBucket_H
#include "gpos/base.h"
#include "gpos/common/DbgPrintMixin.h"
#include "gpos/error/CAutoTrace.h"
#include "gpos/task/CTask.h"
#include "naucrates/statistics/CPoint.h"
#include "naucrates/statistics/IBucket.h"
#define GPOPT_BUCKET_DEFAULT_FREQ 1.0
#define GPOPT_BUCKET_DEFAULT_DISTINCT 1.0
namespace gpnaucrates
{
using namespace gpos;
using namespace gpmd;
// forward decl
class CBucket;
// dynamic array of buckets
using CBucketArray = CDynamicPtrArray<CBucket, CleanupDelete>;
//---------------------------------------------------------------------------
//	@class:
//		CBucket
//
//	@doc:
//		Represents a bucket in a histogram
//
//---------------------------------------------------------------------------
class CBucket : public IBucket, public gpos::DbgPrintMixin<CBucket>
{
private:
	// lower bound of bucket
	CPoint *m_bucket_lower_bound;
	// upper bound of bucket
	CPoint *m_bucket_upper_bound;
	// is lower bound closed (does bucket include boundary value)
	BOOL m_is_lower_closed;
	// is upper bound closed (does bucket includes boundary value)
	BOOL m_is_upper_closed;
	// frequency corresponding to bucket
	CDouble m_frequency;
	// number of distinct elements in bucket
	CDouble m_distinct;
public:
	CBucket &operator=(const CBucket &) = delete;
	CBucket(const CBucket &) = delete;
	// ctor
	CBucket(CPoint *bucket_lower_bound, CPoint *bucket_upper_bound,
			BOOL is_lower_closed, BOOL is_upper_closed, CDouble frequency,
			CDouble distinct);
	// dtor
	~CBucket() override;
	// does bucket contain point
	BOOL Contains(const CPoint *point) const;
	// is the point before the lower bound of the bucket
	BOOL IsBefore(const CPoint *point) const;
	// is the point after the upper bound of the bucket
	BOOL IsAfter(const CPoint *point) const;
	// what percentage of bucket is covered by [lb,pp]
	CDouble GetOverlapPercentage(const CPoint *point,
								 BOOL include_point = true) const;
	// frequency associated with bucket
	CDouble
	GetFrequency() const
	{
		return m_frequency;
	}
	// width of bucket
	CDouble Width() const;
	// set frequency
	void
	SetFrequency(CDouble frequency)
	{
		GPOS_ASSERT(CDouble(0) <= frequency);
		GPOS_ASSERT(CDouble(1.0) >= frequency);
		m_frequency = frequency;
	}
	// set number of distinct values
	void
	SetDistinct(CDouble distinct)
	{
		GPOS_ASSERT(CDouble(0) <= distinct);
		m_distinct = distinct;
	}
	// number of distinct values in bucket
	CDouble
	GetNumDistinct() const
	{
		return m_distinct;
	}
	// lower point
	CPoint *
	GetLowerBound() const override
	{
		return m_bucket_lower_bound;
	}
	// upper point
	CPoint *
	GetUpperBound() const override
	{
		return m_bucket_upper_bound;
	}
	// is lower bound closed (does bucket includes boundary value)
	BOOL
	IsLowerClosed() const
	{
		return m_is_lower_closed;
	}
	// is upper bound closed (does bucket includes boundary value)
	BOOL
	IsUpperClosed() const
	{
		return m_is_upper_closed;
	}
	// does bucket's range intersect another's
	BOOL Intersects(const CBucket *bucket) const;
	// does bucket's range subsume another's
	BOOL Subsumes(const CBucket *bucket) const;
	// does bucket occur before another
	BOOL IsBefore(const CBucket *bucket) const;
	// does bucket occur after another
	BOOL IsAfter(const CBucket *bucket) const;
	// print function
	IOstream &OsPrint(IOstream &os) const;
	// construct new bucket with lower bound greater than given point
	CBucket *MakeBucketGreaterThan(CMemoryPool *mp, CPoint *point) const;
	// scale down version of bucket adjusting upper boundary
	CBucket *MakeBucketScaleUpper(CMemoryPool *mp, CPoint *bucket_upper_bound,
								  BOOL include_upper) const;
	// scale down version of bucket adjusting lower boundary
	CBucket *MakeBucketScaleLower(CMemoryPool *mp, CPoint *bucket_lower_bound,
								  BOOL include_lower) const;
	// extract singleton bucket at given point
	// use_width to calculate the scaling ratio instead of default (ndv)
	CBucket *MakeBucketSingleton(CMemoryPool *mp,
								 CPoint *point_singleton) const;
	// create a new bucket by intersecting with another and return the percentage of each of the buckets that intersect
	CBucket *MakeBucketIntersect(CMemoryPool *mp, CBucket *bucket,
								 CDouble *result_freq_intersect1,
								 CDouble *result_freq_intersect2) const;
	// Remove a bucket range. This produces lower and upper split
	void Difference(CMemoryPool *mp, CBucket *bucket_other,
					CBucket **result_bucket_lower,
					CBucket **result_bucket_upper);
	// return copy of bucket
	CBucket *MakeBucketCopy(CMemoryPool *mp);
	// return a copy of the bucket with updated frequency based on the new total number of rows
	CBucket *MakeBucketUpdateFrequency(CMemoryPool *mp, CDouble rows_old,
									   CDouble rows_new);
	// Attempt a merge with another bucket and return leftovers
	CBucket *SplitAndMergeBuckets(CMemoryPool *mp, CBucket *bucket_other,
								  CDouble rows, CDouble rows_other,
								  CBucket **bucket1_new, CBucket **bucket2_new,
								  CDouble *result_rows,
								  BOOL is_union_all = true);
	// does bucket support sampling
	BOOL
	CanSample() const
	{
		return m_bucket_lower_bound->GetDatum()->StatsMappable();
	}
	BOOL Equals(const CBucket *bucket) const;
	// generate a random data point within bucket boundaries
	CDouble GetSample(ULONG *seed) const;
	// compare lower bucket boundaries
	static INT CompareLowerBounds(const CBucket *bucket1,
								  const CBucket *bucket2);
	// compare upper bucket boundaries
	static INT CompareUpperBounds(const CBucket *bucket1,
								  const CBucket *bucket2);
	// compare lower bound of first bucket to upper bound of second bucket
	static INT CompareLowerBoundToUpperBound(const CBucket *bucket1,
											 const CBucket *bucket2);
	// create a new singleton bucket with the given datum as it lower and upper bounds
	static CBucket *MakeBucketSingleton(CMemoryPool *mp, IDatum *datum);
};
}  // namespace gpnaucrates
#endif	// !GPNAUCRATES_CBucket_H
// EOF
相关信息
相关文章
greenplumn CFilterStatsProcessor 源码
greenplumn CGroupByStatsProcessor 源码
greenplumn CInnerJoinStatsProcessor 源码
greenplumn CJoinStatsProcessor 源码
greenplumn CLeftAntiSemiJoinStatsProcessor 源码
greenplumn CLeftOuterJoinStatsProcessor 源码
greenplumn CLeftSemiJoinStatsProcessor 源码
                        
                            0
                        
                        
                             赞
                        
                    
                    
                热门推荐
- 
                        2、 - 优质文章
- 
                        3、 gate.io
- 
                        8、 openharmony
- 
                        9、 golang