4 #include "access/gist.h"
5 #include "access/skey.h"
6 #include "access/tuptoaster.h"
7 #include "utils/memutils.h"
9 typedef struct SmlSign {
10 int32 vl_len_; /* varlena header (do not touch directly!) */
17 #define SMLSIGNHDRSZ (offsetof(SmlSign, data))
21 #define SIGLEN ( sizeof(int)*SIGLENINT )
22 #define SIGLENBIT (SIGLEN*BITBYTE - 1) /* see makesign */
23 typedef char BITVEC[SIGLEN];
24 typedef char *BITVECP;
28 #define GETBYTE(x,i) ( *( (BITVECP)(x) + (int)( (i) / BITBYTE ) ) )
29 #define GETBITBYTE(x,i) ( ((char)(x)) >> i & 0x01 )
30 #define CLRBIT(x,i) GETBYTE(x,i) &= ~( 0x01 << ( (i) % BITBYTE ) )
31 #define SETBIT(x,i) GETBYTE(x,i) |= ( 0x01 << ( (i) % BITBYTE ) )
32 #define GETBIT(x,i) ( (GETBYTE(x,i) >> ( (i) % BITBYTE )) & 0x01 )
34 #define HASHVAL(val) (((unsigned int)(val)) % SIGLENBIT)
35 #define HASH(sign, val) SETBIT((sign), HASHVAL(val))
39 #define ALLISTRUE 0x04
41 #define ISARRKEY(x) ( ((SmlSign*)x)->flag & ARRKEY )
42 #define ISSIGNKEY(x) ( ((SmlSign*)x)->flag & SIGNKEY )
43 #define ISALLTRUE(x) ( ((SmlSign*)x)->flag & ALLISTRUE )
45 #define CALCGTSIZE(flag, len) ( SMLSIGNHDRSZ + ( ( (flag) & ARRKEY ) ? ((len)*sizeof(uint32)) : (((flag) & ALLISTRUE) ? 0 : SIGLEN) ) )
46 #define GETSIGN(x) ( (BITVECP)( (char*)x+SMLSIGNHDRSZ ) )
47 #define GETARR(x) ( (uint32*)( (char*)x+SMLSIGNHDRSZ ) )
49 #define GETENTRY(vec,pos) ((SmlSign *) DatumGetPointer((vec)->vector[(pos)].key))
54 PG_FUNCTION_INFO_V1(gsmlsign_in);
55 Datum gsmlsign_in(PG_FUNCTION_ARGS);
57 gsmlsign_in(PG_FUNCTION_ARGS)
59 elog(ERROR, "not implemented");
63 PG_FUNCTION_INFO_V1(gsmlsign_out);
64 Datum gsmlsign_out(PG_FUNCTION_ARGS);
66 gsmlsign_out(PG_FUNCTION_ARGS)
68 elog(ERROR, "not implemented");
76 /* Number of one-bits in an unsigned byte */
77 static const uint8 number_of_ones[256] = {
78 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4,
79 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
80 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
81 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
82 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
83 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
84 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
85 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
86 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
87 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
88 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
89 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
90 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
91 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
92 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
93 4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8
97 compareint(const void *va, const void *vb)
99 uint32 a = *((uint32 *) va);
100 uint32 b = *((uint32 *) vb);
104 return (a > b) ? 1 : -1;
108 * Removes duplicates from an array of int4. 'l' is
109 * size of the input array. Returns the new size of the array.
112 uniqueint(uint32 *a, int4 l, int4 *max)
125 qsort((void *) a, l, sizeof(uint32), compareint);
148 Array2HashedArray(ProcTypeInfo info, ArrayType *a)
150 SimpleArray *s = Array2SimpleArray(info, a);
155 len = CALCGTSIZE( ARRKEY, s->nelems );
157 getFmgrInfoHash(s->info);
158 if (s->info->tupDesc)
159 elog(ERROR, "GiST doesn't support composite (weighted) type");
161 sign = palloc( len );
163 sign->size = s->nelems;
166 for(i=0;i<s->nelems;i++)
167 ptr[i] = DatumGetUInt32( FunctionCall1( &s->info->hashFunc, s->elems[i] ) );
170 * there is a collision of hash-function; len is always equal or less than
173 sign->size = uniqueint( GETARR(sign), sign->size, &sign->maxrepeat );
174 len = CALCGTSIZE( ARRKEY, sign->size );
175 SET_VARSIZE(sign, len);
181 HashedElemCmp(const void *va, const void *vb)
183 uint32 a = ((HashedElem *) va)->hash;
184 uint32 b = ((HashedElem *) vb)->hash;
188 double ma = ((HashedElem *) va)->idfMin;
189 double mb = ((HashedElem *) va)->idfMin;
194 return ( ma > mb ) ? 1 : -1;
197 return (a > b) ? 1 : -1;
201 uniqueHashedElem(HashedElem *a, int4 l)
211 qsort(a, l, sizeof(HashedElem), HashedElemCmp);
214 if (ptr->hash != res->hash)
218 res->idfMax = ptr->idfMax;
226 getHashedCache(void *cache)
228 StatCache *stat = getStat(cache, SIGLENBIT);
230 if ( stat->nhelems < 0 )
237 if (stat->info->tupDesc)
238 elog(ERROR, "GiST doesn't support composite (weighted) type");
239 getFmgrInfoHash(stat->info);
240 for(i=0;i<stat->nelems;i++)
242 uint32 hash = DatumGetUInt32( FunctionCall1( &stat->info->hashFunc, stat->elems[i].datum ) );
243 int index = HASHVAL(hash);
245 stat->helems[i].hash = hash;
246 stat->helems[i].idfMin = stat->helems[i].idfMax = stat->elems[i].idf;
247 if ( stat->selems[index].idfMin == 0.0 )
248 stat->selems[index].idfMin = stat->selems[index].idfMax = stat->elems[i].idf;
249 else if ( stat->selems[index].idfMin > stat->elems[i].idf )
250 stat->selems[index].idfMin = stat->elems[i].idf;
251 else if ( stat->selems[index].idfMax < stat->elems[i].idf )
252 stat->selems[index].idfMax = stat->elems[i].idf;
255 stat->nhelems = uniqueHashedElem( stat->helems, stat->nelems);
262 getHashedElemIdf(StatCache *stat, uint32 hash, HashedElem *StopLow)
264 HashedElem *StopMiddle,
265 *StopHigh = stat->helems + stat->nhelems;
268 StopLow = stat->helems;
270 while (StopLow < StopHigh) {
271 StopMiddle = StopLow + ((StopHigh - StopLow) >> 1);
273 if ( StopMiddle->hash == hash )
275 else if ( StopMiddle->hash < hash )
276 StopLow = StopMiddle + 1;
278 StopHigh = StopMiddle;
285 fillHashVal(void *cache, SimpleArray *a)
292 allocateHash(cache, a);
294 if (a->info->tupDesc)
295 elog(ERROR, "GiST doesn't support composite (weighted) type");
296 getFmgrInfoHash(a->info);
298 for(i=0;i<a->nelems;i++)
299 a->hash[i] = DatumGetUInt32( FunctionCall1( &a->info->hashFunc, a->elems[i] ) );
304 hasHashedElem(SmlSign *a, uint32 h)
306 uint32 *StopLow = GETARR(a),
307 *StopHigh = GETARR(a) + a->size,
310 while (StopLow < StopHigh) {
311 StopMiddle = StopLow + ((StopHigh - StopLow) >> 1);
313 if ( *StopMiddle == h )
315 else if ( *StopMiddle < h )
316 StopLow = StopMiddle + 1;
318 StopHigh = StopMiddle;
325 makesign(BITVECP sign, SmlSign *a)
328 uint32 *ptr = GETARR(a);
330 MemSet((void *) sign, 0, sizeof(BITVEC));
331 SETBIT(sign, SIGLENBIT); /* set last unused bit */
333 for (i = 0; i < a->size; i++)
338 sizebitvec(BITVECP sign)
344 size += number_of_ones[(unsigned char) sign[i]];
349 PG_FUNCTION_INFO_V1(gsmlsign_compress);
350 Datum gsmlsign_compress(PG_FUNCTION_ARGS);
352 gsmlsign_compress(PG_FUNCTION_ARGS)
354 GISTENTRY *entry = (GISTENTRY *) PG_GETARG_POINTER(0);
355 GISTENTRY *retval = entry;
357 if (entry->leafkey) /* new key */
360 ArrayType *a = DatumGetArrayTypeP(entry->key);
362 sign = Array2HashedArray(NULL, a);
364 if ( VARSIZE(sign) > TOAST_INDEX_TARGET )
365 { /* make signature due to its big size */
369 len = CALCGTSIZE( SIGNKEY, sign->size );
370 tmpsign = palloc( len );
371 tmpsign->flag = SIGNKEY;
372 SET_VARSIZE(tmpsign, len);
374 makesign(GETSIGN(tmpsign), sign);
375 tmpsign->size = sizebitvec(GETSIGN(tmpsign));
376 tmpsign->maxrepeat = sign->maxrepeat;
380 retval = (GISTENTRY *) palloc(sizeof(GISTENTRY));
381 gistentryinit(*retval, PointerGetDatum(sign),
382 entry->rel, entry->page,
383 entry->offset, FALSE);
385 else if ( ISSIGNKEY(DatumGetPointer(entry->key)) &&
386 !ISALLTRUE(DatumGetPointer(entry->key)) )
388 SmlSign *sign = (SmlSign*)DatumGetPointer(entry->key);
390 Assert( sign->size == sizebitvec(GETSIGN(sign)) );
392 if ( sign->size == SIGLENBIT )
394 int4 len = CALCGTSIZE(SIGNKEY | ALLISTRUE, 0);
395 int4 maxrepeat = sign->maxrepeat;
397 sign = (SmlSign *) palloc(len);
398 SET_VARSIZE(sign, len);
399 sign->flag = SIGNKEY | ALLISTRUE;
400 sign->size = SIGLENBIT;
401 sign->maxrepeat = maxrepeat;
403 retval = (GISTENTRY *) palloc(sizeof(GISTENTRY));
405 gistentryinit(*retval, PointerGetDatum(sign),
406 entry->rel, entry->page,
407 entry->offset, FALSE);
411 PG_RETURN_POINTER(retval);
414 PG_FUNCTION_INFO_V1(gsmlsign_decompress);
415 Datum gsmlsign_decompress(PG_FUNCTION_ARGS);
417 gsmlsign_decompress(PG_FUNCTION_ARGS)
419 GISTENTRY *entry = (GISTENTRY *) PG_GETARG_POINTER(0);
420 SmlSign *key = (SmlSign*)DatumGetPointer(PG_DETOAST_DATUM(entry->key));
422 if (key != (SmlSign *) DatumGetPointer(entry->key))
424 GISTENTRY *retval = (GISTENTRY *) palloc(sizeof(GISTENTRY));
426 gistentryinit(*retval, PointerGetDatum(key),
427 entry->rel, entry->page,
428 entry->offset, FALSE);
430 PG_RETURN_POINTER(retval);
433 PG_RETURN_POINTER(entry);
440 unionkey(BITVECP sbase, SmlSign *add)
446 BITVECP sadd = GETSIGN(add);
456 uint32 *ptr = GETARR(add);
458 for (i = 0; i < add->size; i++)
465 PG_FUNCTION_INFO_V1(gsmlsign_union);
466 Datum gsmlsign_union(PG_FUNCTION_ARGS);
468 gsmlsign_union(PG_FUNCTION_ARGS)
470 GistEntryVector *entryvec = (GistEntryVector *) PG_GETARG_POINTER(0);
471 int *size = (int *) PG_GETARG_POINTER(1);
479 MemSet((void *) base, 0, sizeof(BITVEC));
480 for (i = 0; i < entryvec->n; i++)
482 if (GETENTRY(entryvec, i)->maxrepeat > maxrepeat)
483 maxrepeat = GETENTRY(entryvec, i)->maxrepeat;
484 if (unionkey(base, GETENTRY(entryvec, i)))
492 len = CALCGTSIZE(flag, 0);
493 result = (SmlSign *) palloc(len);
495 SET_VARSIZE(result, len);
497 result->maxrepeat = maxrepeat;
499 if (!ISALLTRUE(result))
501 memcpy((void *) GETSIGN(result), (void *) base, sizeof(BITVEC));
502 result->size = sizebitvec(GETSIGN(result));
505 result->size = SIGLENBIT;
507 PG_RETURN_POINTER(result);
514 PG_FUNCTION_INFO_V1(gsmlsign_same);
515 Datum gsmlsign_same(PG_FUNCTION_ARGS);
517 gsmlsign_same(PG_FUNCTION_ARGS)
519 SmlSign *a = (SmlSign*)PG_GETARG_POINTER(0);
520 SmlSign *b = (SmlSign*)PG_GETARG_POINTER(1);
521 bool *result = (bool *) PG_GETARG_POINTER(2);
523 if (a->size != b->size)
527 else if (ISSIGNKEY(a))
528 { /* then b also ISSIGNKEY */
531 /* in this case b is all true too - other cases is catched
538 BITVECP sa = GETSIGN(a),
558 uint32 *ptra = GETARR(a),
563 for (i = 0; i < a->size; i++)
565 if ( ptra[i] != ptrb[i])
573 PG_RETURN_POINTER(result);
580 hemdistsign(BITVECP a, BITVECP b)
588 diff = (unsigned char) (a[i] ^ b[i]);
589 dist += number_of_ones[diff];
595 hemdist(SmlSign *a, SmlSign *b)
602 return SIGLENBIT - b->size;
604 else if (ISALLTRUE(b))
605 return SIGLENBIT - b->size;
607 return hemdistsign(GETSIGN(a), GETSIGN(b));
610 PG_FUNCTION_INFO_V1(gsmlsign_penalty);
611 Datum gsmlsign_penalty(PG_FUNCTION_ARGS);
613 gsmlsign_penalty(PG_FUNCTION_ARGS)
615 GISTENTRY *origentry = (GISTENTRY *) PG_GETARG_POINTER(0); /* always ISSIGNKEY */
616 GISTENTRY *newentry = (GISTENTRY *) PG_GETARG_POINTER(1);
617 float *penalty = (float *) PG_GETARG_POINTER(2);
618 SmlSign *origval = (SmlSign *) DatumGetPointer(origentry->key);
619 SmlSign *newval = (SmlSign *) DatumGetPointer(newentry->key);
620 BITVECP orig = GETSIGN(origval);
624 if (ISARRKEY(newval))
628 makesign(sign, newval);
630 if (ISALLTRUE(origval))
631 *penalty = ((float) (SIGLENBIT - sizebitvec(sign))) / (float) (SIGLENBIT + 1);
633 *penalty = hemdistsign(sign, orig);
636 *penalty = hemdist(origval, newval);
638 PG_RETURN_POINTER(penalty);
653 fillcache(CACHESIGN *item, SmlSign *key)
655 item->allistrue = false;
656 item->size = key->size;
660 makesign(item->sign, key);
661 item->size = sizebitvec( item->sign );
663 else if (ISALLTRUE(key))
664 item->allistrue = true;
666 memcpy((void *) item->sign, (void *) GETSIGN(key), sizeof(BITVEC));
669 #define WISH_F(a,b,c) (double)( -(double)(((a)-(b))*((a)-(b))*((a)-(b)))*(c) )
678 comparecost(const void *va, const void *vb)
680 SPLITCOST *a = (SPLITCOST *) va;
681 SPLITCOST *b = (SPLITCOST *) vb;
683 if (a->cost == b->cost)
686 return (a->cost > b->cost) ? 1 : -1;
690 hemdistcache(CACHESIGN *a, CACHESIGN *b)
697 return SIGLENBIT - b->size;
699 else if (b->allistrue)
700 return SIGLENBIT - a->size;
702 return hemdistsign(a->sign, b->sign);
705 PG_FUNCTION_INFO_V1(gsmlsign_picksplit);
706 Datum gsmlsign_picksplit(PG_FUNCTION_ARGS);
708 gsmlsign_picksplit(PG_FUNCTION_ARGS)
710 GistEntryVector *entryvec = (GistEntryVector *) PG_GETARG_POINTER(0);
711 GIST_SPLITVEC *v = (GIST_SPLITVEC *) PG_GETARG_POINTER(1);
723 OffsetNumber seed_1 = 0,
731 SPLITCOST *costvector;
733 maxoff = entryvec->n - 2;
734 nbytes = (maxoff + 2) * sizeof(OffsetNumber);
735 v->spl_left = (OffsetNumber *) palloc(nbytes);
736 v->spl_right = (OffsetNumber *) palloc(nbytes);
738 cache = (CACHESIGN *) palloc(sizeof(CACHESIGN) * (maxoff + 2));
739 fillcache(&cache[FirstOffsetNumber], GETENTRY(entryvec, FirstOffsetNumber));
741 for (k = FirstOffsetNumber; k < maxoff; k = OffsetNumberNext(k))
743 for (j = OffsetNumberNext(k); j <= maxoff; j = OffsetNumberNext(j))
745 if (k == FirstOffsetNumber)
746 fillcache(&cache[j], GETENTRY(entryvec, j));
748 size_waste = hemdistcache(&(cache[j]), &(cache[k]));
749 if (size_waste > waste)
760 right = v->spl_right;
763 if (seed_1 == 0 || seed_2 == 0)
769 /* form initial .. */
770 if (cache[seed_1].allistrue)
772 datum_l = (SmlSign *) palloc(CALCGTSIZE(SIGNKEY | ALLISTRUE, 0));
773 SET_VARSIZE(datum_l, CALCGTSIZE(SIGNKEY | ALLISTRUE, 0));
774 datum_l->flag = SIGNKEY | ALLISTRUE;
775 datum_l->size = SIGLENBIT;
779 datum_l = (SmlSign *) palloc(CALCGTSIZE(SIGNKEY, 0));
780 SET_VARSIZE(datum_l, CALCGTSIZE(SIGNKEY, 0));
781 datum_l->flag = SIGNKEY;
782 memcpy((void *) GETSIGN(datum_l), (void *) cache[seed_1].sign, sizeof(BITVEC));
783 datum_l->size = cache[seed_1].size;
785 if (cache[seed_2].allistrue)
787 datum_r = (SmlSign *) palloc(CALCGTSIZE(SIGNKEY | ALLISTRUE, 0));
788 SET_VARSIZE(datum_r, CALCGTSIZE(SIGNKEY | ALLISTRUE, 0));
789 datum_r->flag = SIGNKEY | ALLISTRUE;
790 datum_r->size = SIGLENBIT;
794 datum_r = (SmlSign *) palloc(CALCGTSIZE(SIGNKEY, 0));
795 SET_VARSIZE(datum_r, CALCGTSIZE(SIGNKEY, 0));
796 datum_r->flag = SIGNKEY;
797 memcpy((void *) GETSIGN(datum_r), (void *) cache[seed_2].sign, sizeof(BITVEC));
798 datum_r->size = cache[seed_2].size;
801 union_l = GETSIGN(datum_l);
802 union_r = GETSIGN(datum_r);
803 maxoff = OffsetNumberNext(maxoff);
804 fillcache(&cache[maxoff], GETENTRY(entryvec, maxoff));
805 /* sort before ... */
806 costvector = (SPLITCOST *) palloc(sizeof(SPLITCOST) * maxoff);
807 for (j = FirstOffsetNumber; j <= maxoff; j = OffsetNumberNext(j))
809 costvector[j - 1].pos = j;
810 size_alpha = hemdistcache(&(cache[seed_1]), &(cache[j]));
811 size_beta = hemdistcache(&(cache[seed_2]), &(cache[j]));
812 costvector[j - 1].cost = Abs(size_alpha - size_beta);
814 qsort((void *) costvector, maxoff, sizeof(SPLITCOST), comparecost);
816 datum_l->maxrepeat = datum_r->maxrepeat = 1;
818 for (k = 0; k < maxoff; k++)
820 j = costvector[k].pos;
827 else if (j == seed_2)
834 if (ISALLTRUE(datum_l) || cache[j].allistrue)
836 if (ISALLTRUE(datum_l) && cache[j].allistrue)
839 size_alpha = SIGLENBIT - (
840 (cache[j].allistrue) ? datum_l->size : cache[j].size
844 size_alpha = hemdistsign(cache[j].sign, GETSIGN(datum_l));
846 if (ISALLTRUE(datum_r) || cache[j].allistrue)
848 if (ISALLTRUE(datum_r) && cache[j].allistrue)
851 size_beta = SIGLENBIT - (
852 (cache[j].allistrue) ? datum_r->size : cache[j].size
856 size_beta = hemdistsign(cache[j].sign, GETSIGN(datum_r));
858 if (size_alpha < size_beta + WISH_F(v->spl_nleft, v->spl_nright, 0.1))
860 if (ISALLTRUE(datum_l) || cache[j].allistrue)
862 if (!ISALLTRUE(datum_l))
863 MemSet((void *) GETSIGN(datum_l), 0xff, sizeof(BITVEC));
864 datum_l->size = SIGLENBIT;
870 union_l[i] |= ptr[i];
871 datum_l->size = sizebitvec(union_l);
878 if (ISALLTRUE(datum_r) || cache[j].allistrue)
880 if (!ISALLTRUE(datum_r))
881 MemSet((void *) GETSIGN(datum_r), 0xff, sizeof(BITVEC));
882 datum_r->size = SIGLENBIT;
888 union_r[i] |= ptr[i];
889 datum_r->size = sizebitvec(union_r);
895 *right = *left = FirstOffsetNumber;
896 v->spl_ldatum = PointerGetDatum(datum_l);
897 v->spl_rdatum = PointerGetDatum(datum_r);
899 Assert( datum_l->size = sizebitvec(GETSIGN(datum_l)) );
900 Assert( datum_r->size = sizebitvec(GETSIGN(datum_r)) );
902 PG_RETURN_POINTER(v);
906 getIdfMaxLimit(SmlSign *key)
908 switch( getTFMethod() )
914 return (double)(key->maxrepeat);
917 return 1.0 + log( (double)(key->maxrepeat) );
920 elog(ERROR,"Unknown TF method: %d", getTFMethod());
927 * Consistent function
929 PG_FUNCTION_INFO_V1(gsmlsign_consistent);
930 Datum gsmlsign_consistent(PG_FUNCTION_ARGS);
932 gsmlsign_consistent(PG_FUNCTION_ARGS)
934 GISTENTRY *entry = (GISTENTRY *) PG_GETARG_POINTER(0);
935 StrategyNumber strategy = (StrategyNumber) PG_GETARG_UINT16(2);
936 bool *recheck = (bool *) PG_GETARG_POINTER(4);
938 SmlSign *key = (SmlSign*)DatumGetPointer(entry->key);
944 fcinfo->flinfo->fn_extra = SearchArrayCache(
945 fcinfo->flinfo->fn_extra,
946 fcinfo->flinfo->fn_mcxt,
947 PG_GETARG_DATUM(1), &a, &s, &query);
954 if ( strategy == SmlarOverlapStrategy )
960 else if (ISARRKEY(key))
962 uint32 *kptr = GETARR(key),
963 *qptr = GETARR(query);
965 while( kptr - GETARR(key) < key->size && qptr - GETARR(query) < query->size )
969 else if ( *kptr > *qptr )
981 BITVECP sign = GETSIGN(key);
983 fillHashVal(fcinfo->flinfo->fn_extra, s);
985 for(i=0; i<s->nelems; i++)
987 if ( GETBIT(sign, HASHVAL(s->hash[i])) )
996 * SmlarSimilarityStrategy
998 else if (ISALLTRUE(key))
1000 if ( GIST_LEAF(entry) )
1003 * With TF/IDF similarity we cannot say anything useful
1005 if ( query->size < SIGLENBIT && getSmlType() != ST_TFIDF )
1007 double power = ((double)(query->size)) * ((double)(SIGLENBIT));
1009 if ( ((double)(query->size)) / sqrt(power) >= GetSmlarLimit() )
1020 else if (ISARRKEY(key))
1022 uint32 *kptr = GETARR(key),
1023 *qptr = GETARR(query);
1025 Assert( GIST_LEAF(entry) );
1027 switch(getSmlType())
1031 StatCache *stat = getHashedCache(fcinfo->flinfo->fn_extra);
1035 double maxKTF = getIdfMaxLimit(key);
1039 fillHashVal(fcinfo->flinfo->fn_extra, s);
1040 if ( stat->info != s->info )
1041 elog(ERROR,"Statistic and actual argument have different type");
1043 for(i=0;i<s->nelems;i++)
1045 sumQ += s->df[i] * s->df[i];
1047 h = getHashedElemIdf(stat, s->hash[i], NULL);
1048 if ( h && hasHashedElem(key, s->hash[i]) )
1050 sumK += h->idfMin * h->idfMin;
1051 sumU += h->idfMax * maxKTF * s->df[i];
1055 if ( sumK > 0.0 && sumQ > 0.0 && sumU / sqrt( sumK * sumQ ) >= GetSmlarLimit() )
1058 * More precisely calculate sumK
1063 for(i=0;i<key->size;i++)
1065 h = getHashedElemIdf(stat, GETARR(key)[i], h);
1067 sumK += h->idfMin * h->idfMin;
1070 if ( sumK > 0.0 && sumQ > 0.0 && sumU / sqrt( sumK * sumQ ) >= GetSmlarLimit() )
1078 power = sqrt( ((double)(key->size)) * ((double)(s->nelems)) );
1080 if ( ((double)Min(key->size, s->nelems)) / power >= GetSmlarLimit() )
1084 while( kptr - GETARR(key) < key->size && qptr - GETARR(query) < query->size )
1086 if ( *kptr < *qptr )
1088 else if ( *kptr > *qptr )
1098 if ( ((double)cnt) / power >= GetSmlarLimit() )
1104 elog(ERROR,"GiST doesn't support current formula type of similarity");
1109 BITVECP sign = GETSIGN(key);
1112 fillHashVal(fcinfo->flinfo->fn_extra, s);
1114 if ( GIST_LEAF(entry) )
1116 switch(getSmlType())
1120 StatCache *stat = getHashedCache(fcinfo->flinfo->fn_extra);
1124 double maxKTF = getIdfMaxLimit(key);
1127 if ( stat->info != s->info )
1128 elog(ERROR,"Statistic and actual argument have different type");
1130 for(i=0;i<s->nelems;i++)
1132 int32 hbit = HASHVAL(s->hash[i]);
1134 sumQ += s->df[i] * s->df[i];
1135 if ( GETBIT(sign, hbit) )
1137 sumK += stat->selems[ hbit ].idfMin * stat->selems[ hbit ].idfMin;
1138 sumU += stat->selems[ hbit ].idfMax * maxKTF * s->df[i];
1142 if ( sumK > 0.0 && sumQ > 0.0 && sumU / sqrt( sumK * sumQ ) >= GetSmlarLimit() )
1145 * More precisely calculate sumK
1149 for(i=0;i<SIGLENBIT;i++)
1150 if ( GETBIT(sign,i) )
1151 sumK += stat->selems[ i ].idfMin * stat->selems[ i ].idfMin;
1153 if ( sumK > 0.0 && sumQ > 0.0 && sumU / sqrt( sumK * sumQ ) >= GetSmlarLimit() )
1162 power = sqrt( ((double)(key->size)) * ((double)(s->nelems)) );
1164 for(i=0; i<s->nelems; i++)
1165 count += GETBIT(sign, HASHVAL(s->hash[i]));
1167 if ( ((double)count) / power >= GetSmlarLimit() )
1172 elog(ERROR,"GiST doesn't support current formula type of similarity");
1175 else /* non-leaf page */
1177 switch(getSmlType())
1181 StatCache *stat = getHashedCache(fcinfo->flinfo->fn_extra);
1185 double maxKTF = getIdfMaxLimit(key);
1188 if ( stat->info != s->info )
1189 elog(ERROR,"Statistic and actual argument have different type");
1191 for(i=0;i<s->nelems;i++)
1193 int32 hbit = HASHVAL(s->hash[i]);
1195 sumQ += s->df[i] * s->df[i];
1196 if ( GETBIT(sign, hbit) )
1198 sumU += stat->selems[ hbit ].idfMax * maxKTF * s->df[i];
1199 if ( minK > stat->selems[ hbit ].idfMin || minK < 0.0 )
1200 minK = stat->selems[ hbit ].idfMin;
1204 if ( sumQ > 0.0 && minK > 0.0 && sumU / sqrt( sumQ * minK ) >= GetSmlarLimit() )
1210 for(i=0; i<s->nelems; i++)
1211 count += GETBIT(sign, HASHVAL(s->hash[i]));
1213 if ( s->nelems == count || ((double)count) / ((double)(s->nelems)) >= GetSmlarLimit() )
1218 elog(ERROR,"GiST doesn't support current formula type of similarity");
1225 static int nnres = 0;
1226 static int nres = 0;
1227 if ( GIST_LEAF(entry) ) {
1232 elog(NOTICE,"%s nn:%d n:%d", (ISARRKEY(key)) ? "ARR" : ( (ISALLTRUE(key)) ? "TRUE" : "SIGN" ), nnres, nres );
1237 PG_RETURN_BOOL(res);