greenplumn rangetypes_gist 源码

2022-08-18
浏览 (248)

greenplumn rangetypes_gist 代码

文件路径：/src/backend/utils/adt/rangetypes_gist.c

/*-------------------------------------------------------------------------
 *
 * rangetypes_gist.c
 *	  GiST support for range types.
 *
 * Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group
 * Portions Copyright (c) 1994, Regents of the University of California
 *
 *
 * IDENTIFICATION
 *	  src/backend/utils/adt/rangetypes_gist.c
 *
 *-------------------------------------------------------------------------
 */
#include "postgres.h"

#include "access/gist.h"
#include "access/stratnum.h"
#include "utils/float.h"
#include "utils/fmgrprotos.h"
#include "utils/datum.h"
#include "utils/rangetypes.h"


/*
 * Range class properties used to segregate different classes of ranges in
 * GiST.  Each unique combination of properties is a class.  CLS_EMPTY cannot
 * be combined with anything else.
 */
#define CLS_NORMAL			0	/* Ordinary finite range (no bits set) */
#define CLS_LOWER_INF		1	/* Lower bound is infinity */
#define CLS_UPPER_INF		2	/* Upper bound is infinity */
#define CLS_CONTAIN_EMPTY	4	/* Contains underlying empty ranges */
#define CLS_EMPTY			8	/* Special class for empty ranges */

#define CLS_COUNT			9	/* # of classes; includes all combinations of
								 * properties. CLS_EMPTY doesn't combine with
								 * anything else, so it's only 2^3 + 1. */

/*
 * Minimum accepted ratio of split for items of the same class.  If the items
 * are of different classes, we will separate along those lines regardless of
 * the ratio.
 */
#define LIMIT_RATIO  0.3

/* Constants for fixed penalty values */
#define INFINITE_BOUND_PENALTY	2.0
#define CONTAIN_EMPTY_PENALTY  1.0
#define DEFAULT_SUBTYPE_DIFF_PENALTY  1.0

/*
 * Per-item data for range_gist_single_sorting_split.
 */
typedef struct
{
	int			index;
	RangeBound	bound;
} SingleBoundSortItem;

/* place on left or right side of split? */
typedef enum
{
	SPLIT_LEFT = 0,				/* makes initialization to SPLIT_LEFT easier */
	SPLIT_RIGHT
} SplitLR;

/*
 * Context for range_gist_consider_split.
 */
typedef struct
{
	TypeCacheEntry *typcache;	/* typcache for range type */
	bool		has_subtype_diff;	/* does it have subtype_diff? */
	int			entries_count;	/* total number of entries being split */

	/* Information about currently selected split follows */

	bool		first;			/* true if no split was selected yet */

	RangeBound *left_upper;		/* upper bound of left interval */
	RangeBound *right_lower;	/* lower bound of right interval */

	float4		ratio;			/* split ratio */
	float4		overlap;		/* overlap between left and right predicate */
	int			common_left;	/* # common entries destined for each side */
	int			common_right;
} ConsiderSplitContext;

/*
 * Bounds extracted from a non-empty range, for use in
 * range_gist_double_sorting_split.
 */
typedef struct
{
	RangeBound	lower;
	RangeBound	upper;
} NonEmptyRange;

/*
 * Represents information about an entry that can be placed in either group
 * without affecting overlap over selected axis ("common entry").
 */
typedef struct
{
	/* Index of entry in the initial array */
	int			index;
	/* Delta between closeness of range to each of the two groups */
	double		delta;
} CommonEntry;

/* Helper macros to place an entry in the left or right group during split */
/* Note direct access to variables v, typcache, left_range, right_range */
#define PLACE_LEFT(range, off)					\
	do {										\
		if (v->spl_nleft > 0)					\
			left_range = range_super_union(typcache, left_range, range); \
		else									\
			left_range = (range);				\
		v->spl_left[v->spl_nleft++] = (off);	\
	} while(0)

#define PLACE_RIGHT(range, off)					\
	do {										\
		if (v->spl_nright > 0)					\
			right_range = range_super_union(typcache, right_range, range); \
		else									\
			right_range = (range);				\
		v->spl_right[v->spl_nright++] = (off);	\
	} while(0)

/* Copy a RangeType datum (hardwires typbyval and typlen for ranges...) */
#define rangeCopy(r) \
	((RangeType *) DatumGetPointer(datumCopy(PointerGetDatum(r), \
											 false, -1)))

static RangeType *range_super_union(TypeCacheEntry *typcache, RangeType *r1,
									RangeType *r2);
static bool range_gist_consistent_int(TypeCacheEntry *typcache,
									  StrategyNumber strategy, RangeType *key,
									  Datum query);
static bool range_gist_consistent_leaf(TypeCacheEntry *typcache,
									   StrategyNumber strategy, RangeType *key,
									   Datum query);
static void range_gist_fallback_split(TypeCacheEntry *typcache,
									  GistEntryVector *entryvec,
									  GIST_SPLITVEC *v);
static void range_gist_class_split(TypeCacheEntry *typcache,
								   GistEntryVector *entryvec,
								   GIST_SPLITVEC *v,
								   SplitLR *classes_groups);
static void range_gist_single_sorting_split(TypeCacheEntry *typcache,
											GistEntryVector *entryvec,
											GIST_SPLITVEC *v,
											bool use_upper_bound);
static void range_gist_double_sorting_split(TypeCacheEntry *typcache,
											GistEntryVector *entryvec,
											GIST_SPLITVEC *v);
static void range_gist_consider_split(ConsiderSplitContext *context,
									  RangeBound *right_lower, int min_left_count,
									  RangeBound *left_upper, int max_left_count);
static int	get_gist_range_class(RangeType *range);
static int	single_bound_cmp(const void *a, const void *b, void *arg);
static int	interval_cmp_lower(const void *a, const void *b, void *arg);
static int	interval_cmp_upper(const void *a, const void *b, void *arg);
static int	common_entry_cmp(const void *i1, const void *i2);
static float8 call_subtype_diff(TypeCacheEntry *typcache,
								Datum val1, Datum val2);


/* GiST query consistency check */
Datum
range_gist_consistent(PG_FUNCTION_ARGS)
{
	GISTENTRY  *entry = (GISTENTRY *) PG_GETARG_POINTER(0);
	Datum		query = PG_GETARG_DATUM(1);
	StrategyNumber strategy = (StrategyNumber) PG_GETARG_UINT16(2);

	/* Oid subtype = PG_GETARG_OID(3); */
	bool	   *recheck = (bool *) PG_GETARG_POINTER(4);
	RangeType  *key = DatumGetRangeTypeP(entry->key);
	TypeCacheEntry *typcache;

	/* All operators served by this function are exact */
	*recheck = false;

	typcache = range_get_typcache(fcinfo, RangeTypeGetOid(key));

	if (GIST_LEAF(entry))
		PG_RETURN_BOOL(range_gist_consistent_leaf(typcache, strategy,
												  key, query));
	else
		PG_RETURN_BOOL(range_gist_consistent_int(typcache, strategy,
												 key, query));
}

/* form union range */
Datum
range_gist_union(PG_FUNCTION_ARGS)
{
	GistEntryVector *entryvec = (GistEntryVector *) PG_GETARG_POINTER(0);
	GISTENTRY  *ent = entryvec->vector;
	RangeType  *result_range;
	TypeCacheEntry *typcache;
	int			i;

	result_range = DatumGetRangeTypeP(ent[0].key);

	typcache = range_get_typcache(fcinfo, RangeTypeGetOid(result_range));

	for (i = 1; i < entryvec->n; i++)
	{
		result_range = range_super_union(typcache, result_range,
										 DatumGetRangeTypeP(ent[i].key));
	}

	PG_RETURN_RANGE_P(result_range);
}

/*
 * We store ranges as ranges in GiST indexes, so we do not need
 * compress, decompress, or fetch functions.  Note this implies a limit
 * on the size of range values that can be indexed.
 */

/*
 * GiST page split penalty function.
 *
 * The penalty function has the following goals (in order from most to least
 * important):
 * - Keep normal ranges separate
 * - Avoid broadening the class of the original predicate
 * - Avoid broadening (as determined by subtype_diff) the original predicate
 * - Favor adding ranges to narrower original predicates
 */
Datum
range_gist_penalty(PG_FUNCTION_ARGS)
{
	GISTENTRY  *origentry = (GISTENTRY *) PG_GETARG_POINTER(0);
	GISTENTRY  *newentry = (GISTENTRY *) PG_GETARG_POINTER(1);
	float	   *penalty = (float *) PG_GETARG_POINTER(2);
	RangeType  *orig = DatumGetRangeTypeP(origentry->key);
	RangeType  *new = DatumGetRangeTypeP(newentry->key);
	TypeCacheEntry *typcache;
	bool		has_subtype_diff;
	RangeBound	orig_lower,
				new_lower,
				orig_upper,
				new_upper;
	bool		orig_empty,
				new_empty;

	if (RangeTypeGetOid(orig) != RangeTypeGetOid(new))
		elog(ERROR, "range types do not match");

	typcache = range_get_typcache(fcinfo, RangeTypeGetOid(orig));

	has_subtype_diff = OidIsValid(typcache->rng_subdiff_finfo.fn_oid);

	range_deserialize(typcache, orig, &orig_lower, &orig_upper, &orig_empty);
	range_deserialize(typcache, new, &new_lower, &new_upper, &new_empty);

	/*
	 * Distinct branches for handling distinct classes of ranges.  Note that
	 * penalty values only need to be commensurate within the same class of
	 * new range.
	 */
	if (new_empty)
	{
		/* Handle insertion of empty range */
		if (orig_empty)
		{
			/*
			 * The best case is to insert it to empty original range.
			 * Insertion here means no broadening of original range. Also
			 * original range is the most narrow.
			 */
			*penalty = 0.0;
		}
		else if (RangeIsOrContainsEmpty(orig))
		{
			/*
			 * The second case is to insert empty range into range which
			 * contains at least one underlying empty range.  There is still
			 * no broadening of original range, but original range is not as
			 * narrow as possible.
			 */
			*penalty = CONTAIN_EMPTY_PENALTY;
		}
		else if (orig_lower.infinite && orig_upper.infinite)
		{
			/*
			 * Original range requires broadening.  (-inf; +inf) is most far
			 * from normal range in this case.
			 */
			*penalty = 2 * CONTAIN_EMPTY_PENALTY;
		}
		else if (orig_lower.infinite || orig_upper.infinite)
		{
			/*
			 * (-inf, x) or (x, +inf) original ranges are closer to normal
			 * ranges, so it's worse to mix it with empty ranges.
			 */
			*penalty = 3 * CONTAIN_EMPTY_PENALTY;
		}
		else
		{
			/*
			 * The least preferred case is broadening of normal range.
			 */
			*penalty = 4 * CONTAIN_EMPTY_PENALTY;
		}
	}
	else if (new_lower.infinite && new_upper.infinite)
	{
		/* Handle insertion of (-inf, +inf) range */
		if (orig_lower.infinite && orig_upper.infinite)
		{
			/*
			 * Best case is inserting to (-inf, +inf) original range.
			 */
			*penalty = 0.0;
		}
		else if (orig_lower.infinite || orig_upper.infinite)
		{
			/*
			 * When original range is (-inf, x) or (x, +inf) it requires
			 * broadening of original range (extension of one bound to
			 * infinity).
			 */
			*penalty = INFINITE_BOUND_PENALTY;
		}
		else
		{
			/*
			 * Insertion to normal original range is least preferred.
			 */
			*penalty = 2 * INFINITE_BOUND_PENALTY;
		}

		if (RangeIsOrContainsEmpty(orig))
		{
			/*
			 * Original range is narrower when it doesn't contain empty
			 * ranges. Add additional penalty otherwise.
			 */
			*penalty += CONTAIN_EMPTY_PENALTY;
		}
	}
	else if (new_lower.infinite)
	{
		/* Handle insertion of (-inf, x) range */
		if (!orig_empty && orig_lower.infinite)
		{
			if (orig_upper.infinite)
			{
				/*
				 * (-inf, +inf) range won't be extended by insertion of (-inf,
				 * x) range. It's a less desirable case than insertion to
				 * (-inf, y) original range without extension, because in that
				 * case original range is narrower. But we can't express that
				 * in single float value.
				 */
				*penalty = 0.0;
			}
			else
			{
				if (range_cmp_bounds(typcache, &new_upper, &orig_upper) > 0)
				{
					/*
					 * Get extension of original range using subtype_diff. Use
					 * constant if subtype_diff unavailable.
					 */
					if (has_subtype_diff)
						*penalty = call_subtype_diff(typcache,
													 new_upper.val,
													 orig_upper.val);
					else
						*penalty = DEFAULT_SUBTYPE_DIFF_PENALTY;
				}
				else
				{
					/* No extension of original range */
					*penalty = 0.0;
				}
			}
		}
		else
		{
			/*
			 * If lower bound of original range is not -inf, then extension of
			 * it is infinity.
			 */
			*penalty = get_float4_infinity();
		}
	}
	else if (new_upper.infinite)
	{
		/* Handle insertion of (x, +inf) range */
		if (!orig_empty && orig_upper.infinite)
		{
			if (orig_lower.infinite)
			{
				/*
				 * (-inf, +inf) range won't be extended by insertion of (x,
				 * +inf) range. It's a less desirable case than insertion to
				 * (y, +inf) original range without extension, because in that
				 * case original range is narrower. But we can't express that
				 * in single float value.
				 */
				*penalty = 0.0;
			}
			else
			{
				if (range_cmp_bounds(typcache, &new_lower, &orig_lower) < 0)
				{
					/*
					 * Get extension of original range using subtype_diff. Use
					 * constant if subtype_diff unavailable.
					 */
					if (has_subtype_diff)
						*penalty = call_subtype_diff(typcache,
													 orig_lower.val,
													 new_lower.val);
					else
						*penalty = DEFAULT_SUBTYPE_DIFF_PENALTY;
				}
				else
				{
					/* No extension of original range */
					*penalty = 0.0;
				}
			}
		}
		else
		{
			/*
			 * If upper bound of original range is not +inf, then extension of
			 * it is infinity.
			 */
			*penalty = get_float4_infinity();
		}
	}
	else
	{
		/* Handle insertion of normal (non-empty, non-infinite) range */
		if (orig_empty || orig_lower.infinite || orig_upper.infinite)
		{
			/*
			 * Avoid mixing normal ranges with infinite and empty ranges.
			 */
			*penalty = get_float4_infinity();
		}
		else
		{
			/*
			 * Calculate extension of original range by calling subtype_diff.
			 * Use constant if subtype_diff unavailable.
			 */
			float8		diff = 0.0;

			if (range_cmp_bounds(typcache, &new_lower, &orig_lower) < 0)
			{
				if (has_subtype_diff)
					diff += call_subtype_diff(typcache,
											  orig_lower.val,
											  new_lower.val);
				else
					diff += DEFAULT_SUBTYPE_DIFF_PENALTY;
			}
			if (range_cmp_bounds(typcache, &new_upper, &orig_upper) > 0)
			{
				if (has_subtype_diff)
					diff += call_subtype_diff(typcache,
											  new_upper.val,
											  orig_upper.val);
				else
					diff += DEFAULT_SUBTYPE_DIFF_PENALTY;
			}
			*penalty = diff;
		}
	}

	PG_RETURN_POINTER(penalty);
}

/*
 * The GiST PickSplit method for ranges
 *
 * Primarily, we try to segregate ranges of different classes.  If splitting
 * ranges of the same class, use the appropriate split method for that class.
 */
Datum
range_gist_picksplit(PG_FUNCTION_ARGS)
{
	GistEntryVector *entryvec = (GistEntryVector *) PG_GETARG_POINTER(0);
	GIST_SPLITVEC *v = (GIST_SPLITVEC *) PG_GETARG_POINTER(1);
	TypeCacheEntry *typcache;
	OffsetNumber i;
	RangeType  *pred_left;
	int			nbytes;
	OffsetNumber maxoff;
	int			count_in_classes[CLS_COUNT];
	int			j;
	int			non_empty_classes_count = 0;
	int			biggest_class = -1;
	int			biggest_class_count = 0;
	int			total_count;

	/* use first item to look up range type's info */
	pred_left = DatumGetRangeTypeP(entryvec->vector[FirstOffsetNumber].key);
	typcache = range_get_typcache(fcinfo, RangeTypeGetOid(pred_left));

	maxoff = entryvec->n - 1;
	nbytes = (maxoff + 1) * sizeof(OffsetNumber);
	v->spl_left = (OffsetNumber *) palloc(nbytes);
	v->spl_right = (OffsetNumber *) palloc(nbytes);

	/*
	 * Get count distribution of range classes.
	 */
	memset(count_in_classes, 0, sizeof(count_in_classes));
	for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i))
	{
		RangeType  *range = DatumGetRangeTypeP(entryvec->vector[i].key);

		count_in_classes[get_gist_range_class(range)]++;
	}

	/*
	 * Count non-empty classes and find biggest class.
	 */
	total_count = maxoff;
	for (j = 0; j < CLS_COUNT; j++)
	{
		if (count_in_classes[j] > 0)
		{
			if (count_in_classes[j] > biggest_class_count)
			{
				biggest_class_count = count_in_classes[j];
				biggest_class = j;
			}
			non_empty_classes_count++;
		}
	}

	Assert(non_empty_classes_count > 0);

	if (non_empty_classes_count == 1)
	{
		/* One non-empty class, so split inside class */
		if ((biggest_class & ~CLS_CONTAIN_EMPTY) == CLS_NORMAL)
		{
			/* double sorting split for normal ranges */
			range_gist_double_sorting_split(typcache, entryvec, v);
		}
		else if ((biggest_class & ~CLS_CONTAIN_EMPTY) == CLS_LOWER_INF)
		{
			/* upper bound sorting split for (-inf, x) ranges */
			range_gist_single_sorting_split(typcache, entryvec, v, true);
		}
		else if ((biggest_class & ~CLS_CONTAIN_EMPTY) == CLS_UPPER_INF)
		{
			/* lower bound sorting split for (x, +inf) ranges */
			range_gist_single_sorting_split(typcache, entryvec, v, false);
		}
		else
		{
			/* trivial split for all (-inf, +inf) or all empty ranges */
			range_gist_fallback_split(typcache, entryvec, v);
		}
	}
	else
	{
		/*
		 * Class based split.
		 *
		 * To which side of the split should each class go?  Initialize them
		 * all to go to the left side.
		 */
		SplitLR		classes_groups[CLS_COUNT];

		memset(classes_groups, 0, sizeof(classes_groups));

		if (count_in_classes[CLS_NORMAL] > 0)
		{
			/* separate normal ranges if any */
			classes_groups[CLS_NORMAL] = SPLIT_RIGHT;
		}
		else
		{
			/*----------
			 * Try to split classes in one of two ways:
			 *	1) containing infinities - not containing infinities
			 *	2) containing empty - not containing empty
			 *
			 * Select the way which balances the ranges between left and right
			 * the best. If split in these ways is not possible, there are at
			 * most 3 classes, so just separate biggest class.
			 *----------
			 */
			int			infCount,
						nonInfCount;
			int			emptyCount,
						nonEmptyCount;

			nonInfCount =
				count_in_classes[CLS_NORMAL] +
				count_in_classes[CLS_CONTAIN_EMPTY] +
				count_in_classes[CLS_EMPTY];
			infCount = total_count - nonInfCount;

			nonEmptyCount =
				count_in_classes[CLS_NORMAL] +
				count_in_classes[CLS_LOWER_INF] +
				count_in_classes[CLS_UPPER_INF] +
				count_in_classes[CLS_LOWER_INF | CLS_UPPER_INF];
			emptyCount = total_count - nonEmptyCount;

			if (infCount > 0 && nonInfCount > 0 &&
				(Abs(infCount - nonInfCount) <=
				 Abs(emptyCount - nonEmptyCount)))
			{
				classes_groups[CLS_NORMAL] = SPLIT_RIGHT;
				classes_groups[CLS_CONTAIN_EMPTY] = SPLIT_RIGHT;
				classes_groups[CLS_EMPTY] = SPLIT_RIGHT;
			}
			else if (emptyCount > 0 && nonEmptyCount > 0)
			{
				classes_groups[CLS_NORMAL] = SPLIT_RIGHT;
				classes_groups[CLS_LOWER_INF] = SPLIT_RIGHT;
				classes_groups[CLS_UPPER_INF] = SPLIT_RIGHT;
				classes_groups[CLS_LOWER_INF | CLS_UPPER_INF] = SPLIT_RIGHT;
			}
			else
			{
				/*
				 * Either total_count == emptyCount or total_count ==
				 * infCount.
				 */
				classes_groups[biggest_class] = SPLIT_RIGHT;
			}
		}

		range_gist_class_split(typcache, entryvec, v, classes_groups);
	}

	PG_RETURN_POINTER(v);
}

/* equality comparator for GiST */
Datum
range_gist_same(PG_FUNCTION_ARGS)
{
	RangeType  *r1 = PG_GETARG_RANGE_P(0);
	RangeType  *r2 = PG_GETARG_RANGE_P(1);
	bool	   *result = (bool *) PG_GETARG_POINTER(2);

	/*
	 * range_eq will ignore the RANGE_CONTAIN_EMPTY flag, so we have to check
	 * that for ourselves.  More generally, if the entries have been properly
	 * normalized, then unequal flags bytes must mean unequal ranges ... so
	 * let's just test all the flag bits at once.
	 */
	if (range_get_flags(r1) != range_get_flags(r2))
		*result = false;
	else
	{
		TypeCacheEntry *typcache;

		typcache = range_get_typcache(fcinfo, RangeTypeGetOid(r1));

		*result = range_eq_internal(typcache, r1, r2);
	}

	PG_RETURN_POINTER(result);
}

/*
 *----------------------------------------------------------
 * STATIC FUNCTIONS
 *----------------------------------------------------------
 */

/*
 * Return the smallest range that contains r1 and r2
 *
 * This differs from regular range_union in two critical ways:
 * 1. It won't throw an error for non-adjacent r1 and r2, but just absorb
 * the intervening values into the result range.
 * 2. We track whether any empty range has been union'd into the result,
 * so that contained_by searches can be indexed.  Note that this means
 * that *all* unions formed within the GiST index must go through here.
 */
static RangeType *
range_super_union(TypeCacheEntry *typcache, RangeType *r1, RangeType *r2)
{
	RangeType  *result;
	RangeBound	lower1,
				lower2;
	RangeBound	upper1,
				upper2;
	bool		empty1,
				empty2;
	char		flags1,
				flags2;
	RangeBound *result_lower;
	RangeBound *result_upper;

	range_deserialize(typcache, r1, &lower1, &upper1, &empty1);
	range_deserialize(typcache, r2, &lower2, &upper2, &empty2);
	flags1 = range_get_flags(r1);
	flags2 = range_get_flags(r2);

	if (empty1)
	{
		/* We can return r2 as-is if it already is or contains empty */
		if (flags2 & (RANGE_EMPTY | RANGE_CONTAIN_EMPTY))
			return r2;
		/* Else we'd better copy it (modify-in-place isn't safe) */
		r2 = rangeCopy(r2);
		range_set_contain_empty(r2);
		return r2;
	}
	if (empty2)
	{
		/* We can return r1 as-is if it already is or contains empty */
		if (flags1 & (RANGE_EMPTY | RANGE_CONTAIN_EMPTY))
			return r1;
		/* Else we'd better copy it (modify-in-place isn't safe) */
		r1 = rangeCopy(r1);
		range_set_contain_empty(r1);
		return r1;
	}

	if (range_cmp_bounds(typcache, &lower1, &lower2) <= 0)
		result_lower = &lower1;
	else
		result_lower = &lower2;

	if (range_cmp_bounds(typcache, &upper1, &upper2) >= 0)
		result_upper = &upper1;
	else
		result_upper = &upper2;

	/* optimization to avoid constructing a new range */
	if (result_lower == &lower1 && result_upper == &upper1 &&
		((flags1 & RANGE_CONTAIN_EMPTY) || !(flags2 & RANGE_CONTAIN_EMPTY)))
		return r1;
	if (result_lower == &lower2 && result_upper == &upper2 &&
		((flags2 & RANGE_CONTAIN_EMPTY) || !(flags1 & RANGE_CONTAIN_EMPTY)))
		return r2;

	result = make_range(typcache, result_lower, result_upper, false);

	if ((flags1 & RANGE_CONTAIN_EMPTY) || (flags2 & RANGE_CONTAIN_EMPTY))
		range_set_contain_empty(result);

	return result;
}

/*
 * GiST consistent test on an index internal page
 */
static bool
range_gist_consistent_int(TypeCacheEntry *typcache, StrategyNumber strategy,
						  RangeType *key, Datum query)
{
	switch (strategy)
	{
		case RANGESTRAT_BEFORE:
			if (RangeIsEmpty(key) || RangeIsEmpty(DatumGetRangeTypeP(query)))
				return false;
			return (!range_overright_internal(typcache, key,
											  DatumGetRangeTypeP(query)));
		case RANGESTRAT_OVERLEFT:
			if (RangeIsEmpty(key) || RangeIsEmpty(DatumGetRangeTypeP(query)))
				return false;
			return (!range_after_internal(typcache, key,
										  DatumGetRangeTypeP(query)));
		case RANGESTRAT_OVERLAPS:
			return range_overlaps_internal(typcache, key,
										   DatumGetRangeTypeP(query));
		case RANGESTRAT_OVERRIGHT:
			if (RangeIsEmpty(key) || RangeIsEmpty(DatumGetRangeTypeP(query)))
				return false;
			return (!range_before_internal(typcache, key,
										   DatumGetRangeTypeP(query)));
		case RANGESTRAT_AFTER:
			if (RangeIsEmpty(key) || RangeIsEmpty(DatumGetRangeTypeP(query)))
				return false;
			return (!range_overleft_internal(typcache, key,
											 DatumGetRangeTypeP(query)));
		case RANGESTRAT_ADJACENT:
			if (RangeIsEmpty(key) || RangeIsEmpty(DatumGetRangeTypeP(query)))
				return false;
			if (range_adjacent_internal(typcache, key,
										DatumGetRangeTypeP(query)))
				return true;
			return range_overlaps_internal(typcache, key,
										   DatumGetRangeTypeP(query));
		case RANGESTRAT_CONTAINS:
			return range_contains_internal(typcache, key,
										   DatumGetRangeTypeP(query));
		case RANGESTRAT_CONTAINED_BY:

			/*
			 * Empty ranges are contained by anything, so if key is or
			 * contains any empty ranges, we must descend into it.  Otherwise,
			 * descend only if key overlaps the query.
			 */
			if (RangeIsOrContainsEmpty(key))
				return true;
			return range_overlaps_internal(typcache, key,
										   DatumGetRangeTypeP(query));
		case RANGESTRAT_CONTAINS_ELEM:
			return range_contains_elem_internal(typcache, key, query);
		case RANGESTRAT_EQ:

			/*
			 * If query is empty, descend only if the key is or contains any
			 * empty ranges.  Otherwise, descend if key contains query.
			 */
			if (RangeIsEmpty(DatumGetRangeTypeP(query)))
				return RangeIsOrContainsEmpty(key);
			return range_contains_internal(typcache, key,
										   DatumGetRangeTypeP(query));
		default:
			elog(ERROR, "unrecognized range strategy: %d", strategy);
			return false;		/* keep compiler quiet */
	}
}

/*
 * GiST consistent test on an index leaf page
 */
static bool
range_gist_consistent_leaf(TypeCacheEntry *typcache, StrategyNumber strategy,
						   RangeType *key, Datum query)
{
	switch (strategy)
	{
		case RANGESTRAT_BEFORE:
			return range_before_internal(typcache, key,
										 DatumGetRangeTypeP(query));
		case RANGESTRAT_OVERLEFT:
			return range_overleft_internal(typcache, key,
										   DatumGetRangeTypeP(query));
		case RANGESTRAT_OVERLAPS:
			return range_overlaps_internal(typcache, key,
										   DatumGetRangeTypeP(query));
		case RANGESTRAT_OVERRIGHT:
			return range_overright_internal(typcache, key,
											DatumGetRangeTypeP(query));
		case RANGESTRAT_AFTER:
			return range_after_internal(typcache, key,
										DatumGetRangeTypeP(query));
		case RANGESTRAT_ADJACENT:
			return range_adjacent_internal(typcache, key,
										   DatumGetRangeTypeP(query));
		case RANGESTRAT_CONTAINS:
			return range_contains_internal(typcache, key,
										   DatumGetRangeTypeP(query));
		case RANGESTRAT_CONTAINED_BY:
			return range_contained_by_internal(typcache, key,
											   DatumGetRangeTypeP(query));
		case RANGESTRAT_CONTAINS_ELEM:
			return range_contains_elem_internal(typcache, key, query);
		case RANGESTRAT_EQ:
			return range_eq_internal(typcache, key, DatumGetRangeTypeP(query));
		default:
			elog(ERROR, "unrecognized range strategy: %d", strategy);
			return false;		/* keep compiler quiet */
	}
}

/*
 * Trivial split: half of entries will be placed on one page
 * and the other half on the other page.
 */
static void
range_gist_fallback_split(TypeCacheEntry *typcache,
						  GistEntryVector *entryvec,
						  GIST_SPLITVEC *v)
{
	RangeType  *left_range = NULL;
	RangeType  *right_range = NULL;
	OffsetNumber i,
				maxoff,
				split_idx;

	maxoff = entryvec->n - 1;
	/* Split entries before this to left page, after to right: */
	split_idx = (maxoff - FirstOffsetNumber) / 2 + FirstOffsetNumber;

	v->spl_nleft = 0;
	v->spl_nright = 0;
	for (i = FirstOffsetNumber; i <= maxoff; i++)
	{
		RangeType  *range = DatumGetRangeTypeP(entryvec->vector[i].key);

		if (i < split_idx)
			PLACE_LEFT(range, i);
		else
			PLACE_RIGHT(range, i);
	}

	v->spl_ldatum = RangeTypePGetDatum(left_range);
	v->spl_rdatum = RangeTypePGetDatum(right_range);
}

/*
 * Split based on classes of ranges.
 *
 * See get_gist_range_class for class definitions.
 * classes_groups is an array of length CLS_COUNT indicating the side of the
 * split to which each class should go.
 */
static void
range_gist_class_split(TypeCacheEntry *typcache,
					   GistEntryVector *entryvec,
					   GIST_SPLITVEC *v,
					   SplitLR *classes_groups)
{
	RangeType  *left_range = NULL;
	RangeType  *right_range = NULL;
	OffsetNumber i,
				maxoff;

	maxoff = entryvec->n - 1;

	v->spl_nleft = 0;
	v->spl_nright = 0;
	for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i))
	{
		RangeType  *range = DatumGetRangeTypeP(entryvec->vector[i].key);
		int			class;

		/* Get class of range */
		class = get_gist_range_class(range);

		/* Place range to appropriate page */
		if (classes_groups[class] == SPLIT_LEFT)
			PLACE_LEFT(range, i);
		else
		{
			Assert(classes_groups[class] == SPLIT_RIGHT);
			PLACE_RIGHT(range, i);
		}
	}

	v->spl_ldatum = RangeTypePGetDatum(left_range);
	v->spl_rdatum = RangeTypePGetDatum(right_range);
}

/*
 * Sorting based split. First half of entries according to the sort will be
 * placed to one page, and second half of entries will be placed to other
 * page. use_upper_bound parameter indicates whether to use upper or lower
 * bound for sorting.
 */
static void
range_gist_single_sorting_split(TypeCacheEntry *typcache,
								GistEntryVector *entryvec,
								GIST_SPLITVEC *v,
								bool use_upper_bound)
{
	SingleBoundSortItem *sortItems;
	RangeType  *left_range = NULL;
	RangeType  *right_range = NULL;
	OffsetNumber i,
				maxoff,
				split_idx;

	maxoff = entryvec->n - 1;

	sortItems = (SingleBoundSortItem *)
		palloc(maxoff * sizeof(SingleBoundSortItem));

	/*
	 * Prepare auxiliary array and sort the values.
	 */
	for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i))
	{
		RangeType  *range = DatumGetRangeTypeP(entryvec->vector[i].key);
		RangeBound	bound2;
		bool		empty;

		sortItems[i - 1].index = i;
		/* Put appropriate bound into array */
		if (use_upper_bound)
			range_deserialize(typcache, range, &bound2,
							  &sortItems[i - 1].bound, &empty);
		else
			range_deserialize(typcache, range, &sortItems[i - 1].bound,
							  &bound2, &empty);
		Assert(!empty);
	}

	qsort_arg(sortItems, maxoff, sizeof(SingleBoundSortItem),
			  single_bound_cmp, typcache);

	split_idx = maxoff / 2;

	v->spl_nleft = 0;
	v->spl_nright = 0;

	for (i = 0; i < maxoff; i++)
	{
		int			idx = sortItems[i].index;
		RangeType  *range = DatumGetRangeTypeP(entryvec->vector[idx].key);

		if (i < split_idx)
			PLACE_LEFT(range, idx);
		else
			PLACE_RIGHT(range, idx);
	}

	v->spl_ldatum = RangeTypePGetDatum(left_range);
	v->spl_rdatum = RangeTypePGetDatum(right_range);
}

/*
 * Double sorting split algorithm.
 *
 * The algorithm considers dividing ranges into two groups. The first (left)
 * group contains general left bound. The second (right) group contains
 * general right bound. The challenge is to find upper bound of left group
 * and lower bound of right group so that overlap of groups is minimal and
 * ratio of distribution is acceptable. Algorithm finds for each lower bound of
 * right group minimal upper bound of left group, and for each upper bound of
 * left group maximal lower bound of right group. For each found pair
 * range_gist_consider_split considers replacement of currently selected
 * split with the new one.
 *
 * After that, all the entries are divided into three groups:
 * 1) Entries which should be placed to the left group
 * 2) Entries which should be placed to the right group
 * 3) "Common entries" which can be placed to either group without affecting
 *	  amount of overlap.
 *
 * The common ranges are distributed by difference of distance from lower
 * bound of common range to lower bound of right group and distance from upper
 * bound of common range to upper bound of left group.
 *
 * For details see:
 * "A new double sorting-based node splitting algorithm for R-tree",
 * A. Korotkov
 * http://syrcose.ispras.ru/2011/files/SYRCoSE2011_Proceedings.pdf#page=36
 */
static void
range_gist_double_sorting_split(TypeCacheEntry *typcache,
								GistEntryVector *entryvec,
								GIST_SPLITVEC *v)
{
	ConsiderSplitContext context;
	OffsetNumber i,
				maxoff;
	RangeType  *range,
			   *left_range = NULL,
			   *right_range = NULL;
	int			common_entries_count;
	NonEmptyRange *by_lower,
			   *by_upper;
	CommonEntry *common_entries;
	int			nentries,
				i1,
				i2;
	RangeBound *right_lower,
			   *left_upper;

	memset(&context, 0, sizeof(ConsiderSplitContext));
	context.typcache = typcache;
	context.has_subtype_diff = OidIsValid(typcache->rng_subdiff_finfo.fn_oid);

	maxoff = entryvec->n - 1;
	nentries = context.entries_count = maxoff - FirstOffsetNumber + 1;
	context.first = true;

	/* Allocate arrays for sorted range bounds */
	by_lower = (NonEmptyRange *) palloc(nentries * sizeof(NonEmptyRange));
	by_upper = (NonEmptyRange *) palloc(nentries * sizeof(NonEmptyRange));

	/* Fill arrays of bounds */
	for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i))
	{
		RangeType  *range = DatumGetRangeTypeP(entryvec->vector[i].key);
		bool		empty;

		range_deserialize(typcache, range,
						  &by_lower[i - FirstOffsetNumber].lower,
						  &by_lower[i - FirstOffsetNumber].upper,
						  &empty);
		Assert(!empty);
	}

	/*
	 * Make two arrays of range bounds: one sorted by lower bound and another
	 * sorted by upper bound.
	 */
	memcpy(by_upper, by_lower, nentries * sizeof(NonEmptyRange));
	qsort_arg(by_lower, nentries, sizeof(NonEmptyRange),
			  interval_cmp_lower, typcache);
	qsort_arg(by_upper, nentries, sizeof(NonEmptyRange),
			  interval_cmp_upper, typcache);

	/*----------
	 * The goal is to form a left and right range, so that every entry
	 * range is contained by either left or right interval (or both).
	 *
	 * For example, with the ranges (0,1), (1,3), (2,3), (2,4):
	 *
	 * 0 1 2 3 4
	 * +-+
	 *	 +---+
	 *	   +-+
	 *	   +---+
	 *
	 * The left and right ranges are of the form (0,a) and (b,4).
	 * We first consider splits where b is the lower bound of an entry.
	 * We iterate through all entries, and for each b, calculate the
	 * smallest possible a. Then we consider splits where a is the
	 * upper bound of an entry, and for each a, calculate the greatest
	 * possible b.
	 *
	 * In the above example, the first loop would consider splits:
	 * b=0: (0,1)-(0,4)
	 * b=1: (0,1)-(1,4)
	 * b=2: (0,3)-(2,4)
	 *
	 * And the second loop:
	 * a=1: (0,1)-(1,4)
	 * a=3: (0,3)-(2,4)
	 * a=4: (0,4)-(2,4)
	 *----------
	 */

	/*
	 * Iterate over lower bound of right group, finding smallest possible
	 * upper bound of left group.
	 */
	i1 = 0;
	i2 = 0;
	right_lower = &by_lower[i1].lower;
	left_upper = &by_upper[i2].lower;
	while (true)
	{
		/*
		 * Find next lower bound of right group.
		 */
		while (i1 < nentries &&
			   range_cmp_bounds(typcache, right_lower,
								&by_lower[i1].lower) == 0)
		{
			if (range_cmp_bounds(typcache, &by_lower[i1].upper,
								 left_upper) > 0)
				left_upper = &by_lower[i1].upper;
			i1++;
		}
		if (i1 >= nentries)
			break;
		right_lower = &by_lower[i1].lower;

		/*
		 * Find count of ranges which anyway should be placed to the left
		 * group.
		 */
		while (i2 < nentries &&
			   range_cmp_bounds(typcache, &by_upper[i2].upper,
								left_upper) <= 0)
			i2++;

		/*
		 * Consider found split to see if it's better than what we had.
		 */
		range_gist_consider_split(&context, right_lower, i1, left_upper, i2);
	}

	/*
	 * Iterate over upper bound of left group finding greatest possible lower
	 * bound of right group.
	 */
	i1 = nentries - 1;
	i2 = nentries - 1;
	right_lower = &by_lower[i1].upper;
	left_upper = &by_upper[i2].upper;
	while (true)
	{
		/*
		 * Find next upper bound of left group.
		 */
		while (i2 >= 0 &&
			   range_cmp_bounds(typcache, left_upper,
								&by_upper[i2].upper) == 0)
		{
			if (range_cmp_bounds(typcache, &by_upper[i2].lower,
								 right_lower) < 0)
				right_lower = &by_upper[i2].lower;
			i2--;
		}
		if (i2 < 0)
			break;
		left_upper = &by_upper[i2].upper;

		/*
		 * Find count of intervals which anyway should be placed to the right
		 * group.
		 */
		while (i1 >= 0 &&
			   range_cmp_bounds(typcache, &by_lower[i1].lower,
								right_lower) >= 0)
			i1--;

		/*
		 * Consider found split to see if it's better than what we had.
		 */
		range_gist_consider_split(&context, right_lower, i1 + 1,
								  left_upper, i2 + 1);
	}

	/*
	 * If we failed to find any acceptable splits, use trivial split.
	 */
	if (context.first)
	{
		range_gist_fallback_split(typcache, entryvec, v);
		return;
	}

	/*
	 * Ok, we have now selected bounds of the groups. Now we have to
	 * distribute entries themselves. At first we distribute entries which can
	 * be placed unambiguously and collect "common entries" to array.
	 */

	/* Allocate vectors for results */
	v->spl_left = (OffsetNumber *) palloc(nentries * sizeof(OffsetNumber));
	v->spl_right = (OffsetNumber *) palloc(nentries * sizeof(OffsetNumber));
	v->spl_nleft = 0;
	v->spl_nright = 0;

	/*
	 * Allocate an array for "common entries" - entries which can be placed to
	 * either group without affecting overlap along selected axis.
	 */
	common_entries_count = 0;
	common_entries = (CommonEntry *) palloc(nentries * sizeof(CommonEntry));

	/*
	 * Distribute entries which can be distributed unambiguously, and collect
	 * common entries.
	 */
	for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i))
	{
		RangeBound	lower,
					upper;
		bool		empty;

		/*
		 * Get upper and lower bounds along selected axis.
		 */
		range = DatumGetRangeTypeP(entryvec->vector[i].key);

		range_deserialize(typcache, range, &lower, &upper, &empty);

		if (range_cmp_bounds(typcache, &upper, context.left_upper) <= 0)
		{
			/* Fits in the left group */
			if (range_cmp_bounds(typcache, &lower, context.right_lower) >= 0)
			{
				/* Fits also in the right group, so "common entry" */
				common_entries[common_entries_count].index = i;
				if (context.has_subtype_diff)
				{
					/*
					 * delta = (lower - context.right_lower) -
					 * (context.left_upper - upper)
					 */
					common_entries[common_entries_count].delta =
						call_subtype_diff(typcache,
										  lower.val,
										  context.right_lower->val) -
						call_subtype_diff(typcache,
										  context.left_upper->val,
										  upper.val);
				}
				else
				{
					/* Without subtype_diff, take all deltas as zero */
					common_entries[common_entries_count].delta = 0;
				}
				common_entries_count++;
			}
			else
			{
				/* Doesn't fit to the right group, so join to the left group */
				PLACE_LEFT(range, i);
			}
		}
		else
		{
			/*
			 * Each entry should fit on either left or right group. Since this
			 * entry didn't fit in the left group, it better fit in the right
			 * group.
			 */
			Assert(range_cmp_bounds(typcache, &lower,
									context.right_lower) >= 0);
			PLACE_RIGHT(range, i);
		}
	}

	/*
	 * Distribute "common entries", if any.
	 */
	if (common_entries_count > 0)
	{
		/*
		 * Sort "common entries" by calculated deltas in order to distribute
		 * the most ambiguous entries first.
		 */
		qsort(common_entries, common_entries_count, sizeof(CommonEntry),
			  common_entry_cmp);

		/*
		 * Distribute "common entries" between groups according to sorting.
		 */
		for (i = 0; i < common_entries_count; i++)
		{
			int			idx = common_entries[i].index;

			range = DatumGetRangeTypeP(entryvec->vector[idx].key);

			/*
			 * Check if we have to place this entry in either group to achieve
			 * LIMIT_RATIO.
			 */
			if (i < context.common_left)
				PLACE_LEFT(range, idx);
			else
				PLACE_RIGHT(range, idx);
		}
	}

	v->spl_ldatum = PointerGetDatum(left_range);
	v->spl_rdatum = PointerGetDatum(right_range);
}

/*
 * Consider replacement of currently selected split with a better one
 * during range_gist_double_sorting_split.
 */
static void
range_gist_consider_split(ConsiderSplitContext *context,
						  RangeBound *right_lower, int min_left_count,
						  RangeBound *left_upper, int max_left_count)
{
	int			left_count,
				right_count;
	float4		ratio,
				overlap;

	/*
	 * Calculate entries distribution ratio assuming most uniform distribution
	 * of common entries.
	 */
	if (min_left_count >= (context->entries_count + 1) / 2)
		left_count = min_left_count;
	else if (max_left_count <= context->entries_count / 2)
		left_count = max_left_count;
	else
		left_count = context->entries_count / 2;
	right_count = context->entries_count - left_count;

	/*
	 * Ratio of split: quotient between size of smaller group and total
	 * entries count.  This is necessarily 0.5 or less; if it's less than
	 * LIMIT_RATIO then we will never accept the new split.
	 */
	ratio = ((float4) Min(left_count, right_count)) /
		((float4) context->entries_count);

	if (ratio > LIMIT_RATIO)
	{
		bool		selectthis = false;

		/*
		 * The ratio is acceptable, so compare current split with previously
		 * selected one. We search for minimal overlap (allowing negative
		 * values) and minimal ratio secondarily.  If subtype_diff is
		 * available, it's used for overlap measure.  Without subtype_diff we
		 * use number of "common entries" as an overlap measure.
		 */
		if (context->has_subtype_diff)
			overlap = call_subtype_diff(context->typcache,
										left_upper->val,
										right_lower->val);
		else
			overlap = max_left_count - min_left_count;

		/* If there is no previous selection, select this split */
		if (context->first)
			selectthis = true;
		else
		{
			/*
			 * Choose the new split if it has a smaller overlap, or same
			 * overlap but better ratio.
			 */
			if (overlap < context->overlap ||
				(overlap == context->overlap && ratio > context->ratio))
				selectthis = true;
		}

		if (selectthis)
		{
			/* save information about selected split */
			context->first = false;
			context->ratio = ratio;
			context->overlap = overlap;
			context->right_lower = right_lower;
			context->left_upper = left_upper;
			context->common_left = max_left_count - left_count;
			context->common_right = left_count - min_left_count;
		}
	}
}

/*
 * Find class number for range.
 *
 * The class number is a valid combination of the properties of the
 * range.  Note: the highest possible number is 8, because CLS_EMPTY
 * can't be combined with anything else.
 */
static int
get_gist_range_class(RangeType *range)
{
	int			classNumber;
	char		flags;

	flags = range_get_flags(range);
	if (flags & RANGE_EMPTY)
	{
		classNumber = CLS_EMPTY;
	}
	else
	{
		classNumber = 0;
		if (flags & RANGE_LB_INF)
			classNumber |= CLS_LOWER_INF;
		if (flags & RANGE_UB_INF)
			classNumber |= CLS_UPPER_INF;
		if (flags & RANGE_CONTAIN_EMPTY)
			classNumber |= CLS_CONTAIN_EMPTY;
	}
	return classNumber;
}

/*
 * Comparison function for range_gist_single_sorting_split.
 */
static int
single_bound_cmp(const void *a, const void *b, void *arg)
{
	SingleBoundSortItem *i1 = (SingleBoundSortItem *) a;
	SingleBoundSortItem *i2 = (SingleBoundSortItem *) b;
	TypeCacheEntry *typcache = (TypeCacheEntry *) arg;

	return range_cmp_bounds(typcache, &i1->bound, &i2->bound);
}

/*
 * Compare NonEmptyRanges by lower bound.
 */
static int
interval_cmp_lower(const void *a, const void *b, void *arg)
{
	NonEmptyRange *i1 = (NonEmptyRange *) a;
	NonEmptyRange *i2 = (NonEmptyRange *) b;
	TypeCacheEntry *typcache = (TypeCacheEntry *) arg;

	return range_cmp_bounds(typcache, &i1->lower, &i2->lower);
}

/*
 * Compare NonEmptyRanges by upper bound.
 */
static int
interval_cmp_upper(const void *a, const void *b, void *arg)
{
	NonEmptyRange *i1 = (NonEmptyRange *) a;
	NonEmptyRange *i2 = (NonEmptyRange *) b;
	TypeCacheEntry *typcache = (TypeCacheEntry *) arg;

	return range_cmp_bounds(typcache, &i1->upper, &i2->upper);
}

/*
 * Compare CommonEntrys by their deltas.
 */
static int
common_entry_cmp(const void *i1, const void *i2)
{
	double		delta1 = ((CommonEntry *) i1)->delta;
	double		delta2 = ((CommonEntry *) i2)->delta;

	if (delta1 < delta2)
		return -1;
	else if (delta1 > delta2)
		return 1;
	else
		return 0;
}

/*
 * Convenience function to invoke type-specific subtype_diff function.
 * Caller must have already checked that there is one for the range type.
 */
static float8
call_subtype_diff(TypeCacheEntry *typcache, Datum val1, Datum val2)
{
	float8		value;

	value = DatumGetFloat8(FunctionCall2Coll(&typcache->rng_subdiff_finfo,
											 typcache->rng_collation,
											 val1, val2));
	/* Cope with buggy subtype_diff function by returning zero */
	if (value >= 0.0)
		return value;
	return 0.0;
}

相关信息

greenplumn 源码目录

相关文章

greenplumn acl 源码

greenplumn amutils 源码

greenplumn array_expanded 源码

greenplumn array_selfuncs 源码

greenplumn array_typanalyze 源码

greenplumn array_userfuncs 源码

greenplumn arrayfuncs 源码

greenplumn arrayutils 源码

greenplumn ascii 源码

greenplumn bool 源码

0 赞

所属分类： 大数据
本文标签： greenplum
版权声明： 原创文章如转载，请注明本文链接: https://m.seaxiang.com/blog/90ff12f2cbcd4e508ca699bd6422bd8f

热门推荐

1、直接访问google.com
2、优质文章
3、 gt
4、 harmony 鸿蒙hdc使用指导
5、 harmony 鸿蒙ArkUI组件（ArkTS）开发常见问题
6、 harmony 鸿蒙初识ArkTS语言
7、 openharmony
8、 flink kafka connector scan.startup.mode 的几个选项
9、 golang
10、 Vue中input框自动聚焦

Loading...