README
[hstore.git] / hstore_gist.c
1 /*
2  * contrib/hstore/hstore_gist.c
3  */
4 #include "postgres.h"
5
6 #include "access/gist.h"
7 #include "access/skey.h"
8 #include "catalog/pg_type.h"
9 #include "utils/builtins.h"
10 #include "utils/pg_crc.h"
11
12 #include "hstore.h"
13
14 /* bigint defines */
15 #define BITBYTE 8
16 #define SIGLENINT  4                    /* >122 => key will toast, so very slow!!! */
17 #define SIGLEN  ( sizeof(int)*SIGLENINT )
18 #define SIGLENBIT (SIGLEN*BITBYTE)
19
20 typedef char BITVEC[SIGLEN];
21 typedef char *BITVECP;
22
23 #define SIGPTR(x)  ( (BITVECP) ARR_DATA_PTR(x) )
24
25
26 #define LOOPBYTE \
27                         for(i=0;i<SIGLEN;i++)
28
29 #define LOOPBIT \
30                         for(i=0;i<SIGLENBIT;i++)
31
32 /* beware of multiple evaluation of arguments to these macros! */
33 #define GETBYTE(x,i) ( *( (BITVECP)(x) + (int)( (i) / BITBYTE ) ) )
34 #define GETBITBYTE(x,i) ( (*((char*)(x)) >> (i)) & 0x01 )
35 #define CLRBIT(x,i)   GETBYTE(x,i) &= ~( 0x01 << ( (i) % BITBYTE ) )
36 #define SETBIT(x,i)   GETBYTE(x,i) |=  ( 0x01 << ( (i) % BITBYTE ) )
37 #define GETBIT(x,i) ( (GETBYTE(x,i) >> ( (i) % BITBYTE )) & 0x01 )
38 #define HASHVAL(val) (((unsigned int)(val)) % SIGLENBIT)
39 #define HASH(sign, val) SETBIT((sign), HASHVAL(val))
40
41 typedef struct
42 {
43         int32           vl_len_;                /* varlena header (do not touch directly!) */
44         int32           flag;
45         char            data[1];
46 } GISTTYPE;
47
48 #define ALLISTRUE               0x04
49
50 #define ISALLTRUE(x)    ( ((GISTTYPE*)x)->flag & ALLISTRUE )
51
52 #define GTHDRSIZE               (VARHDRSZ + sizeof(int32))
53 #define CALCGTSIZE(flag) ( GTHDRSIZE+(((flag) & ALLISTRUE) ? 0 : SIGLEN) )
54
55 #define GETSIGN(x)              ( (BITVECP)( (char*)x+GTHDRSIZE ) )
56
57 #define SUMBIT(val) (           \
58         GETBITBYTE((val),0) + \
59         GETBITBYTE((val),1) + \
60         GETBITBYTE((val),2) + \
61         GETBITBYTE((val),3) + \
62         GETBITBYTE((val),4) + \
63         GETBITBYTE((val),5) + \
64         GETBITBYTE((val),6) + \
65         GETBITBYTE((val),7)   \
66 )
67
68 #define GETENTRY(vec,pos) ((GISTTYPE *) DatumGetPointer((vec)->vector[(pos)].key))
69
70 #define WISH_F(a,b,c) (double)( -(double)(((a)-(b))*((a)-(b))*((a)-(b)))*(c) )
71
72 PG_FUNCTION_INFO_V1(ghstore_in);
73 Datum           ghstore_in(PG_FUNCTION_ARGS);
74
75 PG_FUNCTION_INFO_V1(ghstore_out);
76 Datum           ghstore_out(PG_FUNCTION_ARGS);
77
78
79 Datum
80 ghstore_in(PG_FUNCTION_ARGS)
81 {
82         elog(ERROR, "Not implemented");
83         PG_RETURN_DATUM(0);
84 }
85
86 Datum
87 ghstore_out(PG_FUNCTION_ARGS)
88 {
89         elog(ERROR, "Not implemented");
90         PG_RETURN_DATUM(0);
91 }
92
93 PG_FUNCTION_INFO_V1(ghstore_consistent);
94 PG_FUNCTION_INFO_V1(ghstore_compress);
95 PG_FUNCTION_INFO_V1(ghstore_decompress);
96 PG_FUNCTION_INFO_V1(ghstore_penalty);
97 PG_FUNCTION_INFO_V1(ghstore_picksplit);
98 PG_FUNCTION_INFO_V1(ghstore_union);
99 PG_FUNCTION_INFO_V1(ghstore_same);
100
101 Datum           ghstore_consistent(PG_FUNCTION_ARGS);
102 Datum           ghstore_compress(PG_FUNCTION_ARGS);
103 Datum           ghstore_decompress(PG_FUNCTION_ARGS);
104 Datum           ghstore_penalty(PG_FUNCTION_ARGS);
105 Datum           ghstore_picksplit(PG_FUNCTION_ARGS);
106 Datum           ghstore_union(PG_FUNCTION_ARGS);
107 Datum           ghstore_same(PG_FUNCTION_ARGS);
108
109 static int
110 crc32_HStoreValue(HStoreValue *v, uint32 r)
111 {
112         int             crc;
113         char    flag = '\0';
114
115         INIT_CRC32(crc);
116
117         switch(r)
118         {
119                 case WHS_KEY:
120                         flag = KEYFLAG;
121                         break;
122                 case WHS_VALUE:
123                         flag = VALFLAG;
124                         break;
125                 case WHS_ELEM:
126                         flag = ELEMFLAG;
127                         break;
128                 default:
129                         break;
130         }
131
132         COMP_CRC32(crc, &flag, 1);
133
134         switch(v->type)
135         {
136                 case hsvString:
137                         COMP_CRC32(crc, v->string.val, v->string.len);
138                         break;
139                 case hsvBool:
140                         flag = (v->boolean) ? 't' : 'f';
141                         COMP_CRC32(crc, &flag, 1);
142                         break;
143                 case hsvNumeric:
144                         crc = DatumGetInt32(DirectFunctionCall1(hash_numeric,
145                                                                 NumericGetDatum(v->numeric)));
146                         break;
147                 default:
148                         elog(PANIC, "impossible value %d", v->type);
149         }
150
151         FIN_CRC32(crc);
152         return crc;
153 }
154
155 static int
156 crc32_Key(char *buf, int sz)
157 {
158         int     crc;
159         char    flag = KEYFLAG;
160
161         INIT_CRC32(crc);
162
163         COMP_CRC32(crc, &flag, 1);
164         COMP_CRC32(crc, buf, sz);
165
166         FIN_CRC32(crc);
167         return crc;
168 }
169
170 Datum
171 ghstore_compress(PG_FUNCTION_ARGS)
172 {
173         GISTENTRY  *entry = (GISTENTRY *) PG_GETARG_POINTER(0);
174         GISTENTRY  *retval = entry;
175
176         if (entry->leafkey)
177         {
178                 GISTTYPE                *res = (GISTTYPE *) palloc0(CALCGTSIZE(0));
179                 HStore                  *val = DatumGetHStoreP(entry->key);
180
181                 SET_VARSIZE(res, CALCGTSIZE(0));
182
183                 if (!HS_ISEMPTY(val))
184                 {
185                         int                             r;
186                         HStoreIterator  *it = HStoreIteratorInit(VARDATA(val));
187                         HStoreValue             v;
188
189                         while((r = HStoreIteratorGet(&it, &v, false)) != 0)
190                         {
191                                 if ((r == WHS_ELEM || r == WHS_KEY || r == WHS_VALUE) &&
192                                         v.type != hsvNull)
193                                 {
194                                         int   h = crc32_HStoreValue(&v, r);
195
196                                         HASH(GETSIGN(res), h);
197                                 }
198                         }
199                 }
200
201                 retval = (GISTENTRY *) palloc(sizeof(GISTENTRY));
202                 gistentryinit(*retval, PointerGetDatum(res),
203                                           entry->rel, entry->page,
204                                           entry->offset,
205                                           FALSE);
206         }
207         else if (!ISALLTRUE(DatumGetPointer(entry->key)))
208         {
209                 int32           i;
210                 GISTTYPE   *res;
211                 BITVECP         sign = GETSIGN(DatumGetPointer(entry->key));
212
213                 LOOPBYTE
214                 {
215                         if ((sign[i] & 0xff) != 0xff)
216                                 PG_RETURN_POINTER(retval);
217                 }
218
219                 res = (GISTTYPE *) palloc(CALCGTSIZE(ALLISTRUE));
220                 SET_VARSIZE(res, CALCGTSIZE(ALLISTRUE));
221                 res->flag = ALLISTRUE;
222
223                 retval = (GISTENTRY *) palloc(sizeof(GISTENTRY));
224                 gistentryinit(*retval, PointerGetDatum(res),
225                                           entry->rel, entry->page,
226                                           entry->offset,
227                                           FALSE);
228         }
229
230         PG_RETURN_POINTER(retval);
231 }
232
233 /*
234  * Since type ghstore isn't toastable (and doesn't need to be),
235  * this function can be a no-op.
236  */
237 Datum
238 ghstore_decompress(PG_FUNCTION_ARGS)
239 {
240         PG_RETURN_POINTER(PG_GETARG_POINTER(0));
241 }
242
243 Datum
244 ghstore_same(PG_FUNCTION_ARGS)
245 {
246         GISTTYPE   *a = (GISTTYPE *) PG_GETARG_POINTER(0);
247         GISTTYPE   *b = (GISTTYPE *) PG_GETARG_POINTER(1);
248         bool       *result = (bool *) PG_GETARG_POINTER(2);
249
250         if (ISALLTRUE(a) && ISALLTRUE(b))
251                 *result = true;
252         else if (ISALLTRUE(a))
253                 *result = false;
254         else if (ISALLTRUE(b))
255                 *result = false;
256         else
257         {
258                 int32           i;
259                 BITVECP         sa = GETSIGN(a),
260                                         sb = GETSIGN(b);
261
262                 *result = true;
263                 LOOPBYTE
264                 {
265                         if (sa[i] != sb[i])
266                         {
267                                 *result = false;
268                                 break;
269                         }
270                 }
271         }
272         PG_RETURN_POINTER(result);
273 }
274
275 static int32
276 sizebitvec(BITVECP sign)
277 {
278         int32           size = 0,
279                                 i;
280
281         LOOPBYTE
282         {
283                 size += SUMBIT(sign);
284                 sign = (BITVECP) (((char *) sign) + 1);
285         }
286         return size;
287 }
288
289 static int
290 hemdistsign(BITVECP a, BITVECP b)
291 {
292         int                     i,
293                                 dist = 0;
294
295         LOOPBIT
296         {
297                 if (GETBIT(a, i) != GETBIT(b, i))
298                         dist++;
299         }
300         return dist;
301 }
302
303 static int
304 hemdist(GISTTYPE *a, GISTTYPE *b)
305 {
306         if (ISALLTRUE(a))
307         {
308                 if (ISALLTRUE(b))
309                         return 0;
310                 else
311                         return SIGLENBIT - sizebitvec(GETSIGN(b));
312         }
313         else if (ISALLTRUE(b))
314                 return SIGLENBIT - sizebitvec(GETSIGN(a));
315
316         return hemdistsign(GETSIGN(a), GETSIGN(b));
317 }
318
319 static int32
320 unionkey(BITVECP sbase, GISTTYPE *add)
321 {
322         int32           i;
323         BITVECP         sadd = GETSIGN(add);
324
325         if (ISALLTRUE(add))
326                 return 1;
327         LOOPBYTE
328                 sbase[i] |= sadd[i];
329         return 0;
330 }
331
332 Datum
333 ghstore_union(PG_FUNCTION_ARGS)
334 {
335         GistEntryVector *entryvec = (GistEntryVector *) PG_GETARG_POINTER(0);
336         int32           len = entryvec->n;
337
338         int                *size = (int *) PG_GETARG_POINTER(1);
339         BITVEC          base;
340         int32           i;
341         int32           flag = 0;
342         GISTTYPE   *result;
343
344         MemSet((void *) base, 0, sizeof(BITVEC));
345         for (i = 0; i < len; i++)
346         {
347                 if (unionkey(base, GETENTRY(entryvec, i)))
348                 {
349                         flag = ALLISTRUE;
350                         break;
351                 }
352         }
353
354         len = CALCGTSIZE(flag);
355         result = (GISTTYPE *) palloc(len);
356         SET_VARSIZE(result, len);
357         result->flag = flag;
358         if (!ISALLTRUE(result))
359                 memcpy((void *) GETSIGN(result), (void *) base, sizeof(BITVEC));
360         *size = len;
361
362         PG_RETURN_POINTER(result);
363 }
364
365 Datum
366 ghstore_penalty(PG_FUNCTION_ARGS)
367 {
368         GISTENTRY  *origentry = (GISTENTRY *) PG_GETARG_POINTER(0); /* always ISSIGNKEY */
369         GISTENTRY  *newentry = (GISTENTRY *) PG_GETARG_POINTER(1);
370         float      *penalty = (float *) PG_GETARG_POINTER(2);
371         GISTTYPE   *origval = (GISTTYPE *) DatumGetPointer(origentry->key);
372         GISTTYPE   *newval = (GISTTYPE *) DatumGetPointer(newentry->key);
373
374         *penalty = hemdist(origval, newval);
375         PG_RETURN_POINTER(penalty);
376 }
377
378
379 typedef struct
380 {
381         OffsetNumber pos;
382         int32           cost;
383 } SPLITCOST;
384
385 static int
386 comparecost(const void *a, const void *b)
387 {
388         return ((const SPLITCOST *) a)->cost - ((const SPLITCOST *) b)->cost;
389 }
390
391
392 Datum
393 ghstore_picksplit(PG_FUNCTION_ARGS)
394 {
395         GistEntryVector *entryvec = (GistEntryVector *) PG_GETARG_POINTER(0);
396         OffsetNumber maxoff = entryvec->n - 2;
397
398         GIST_SPLITVEC *v = (GIST_SPLITVEC *) PG_GETARG_POINTER(1);
399         OffsetNumber k,
400                                 j;
401         GISTTYPE   *datum_l,
402                            *datum_r;
403         BITVECP         union_l,
404                                 union_r;
405         int32           size_alpha,
406                                 size_beta;
407         int32           size_waste,
408                                 waste = -1;
409         int32           nbytes;
410         OffsetNumber seed_1 = 0,
411                                 seed_2 = 0;
412         OffsetNumber *left,
413                            *right;
414         BITVECP         ptr;
415         int                     i;
416         SPLITCOST  *costvector;
417         GISTTYPE   *_k,
418                            *_j;
419
420         nbytes = (maxoff + 2) * sizeof(OffsetNumber);
421         v->spl_left = (OffsetNumber *) palloc(nbytes);
422         v->spl_right = (OffsetNumber *) palloc(nbytes);
423
424         for (k = FirstOffsetNumber; k < maxoff; k = OffsetNumberNext(k))
425         {
426                 _k = GETENTRY(entryvec, k);
427                 for (j = OffsetNumberNext(k); j <= maxoff; j = OffsetNumberNext(j))
428                 {
429                         size_waste = hemdist(_k, GETENTRY(entryvec, j));
430                         if (size_waste > waste)
431                         {
432                                 waste = size_waste;
433                                 seed_1 = k;
434                                 seed_2 = j;
435                         }
436                 }
437         }
438
439         left = v->spl_left;
440         v->spl_nleft = 0;
441         right = v->spl_right;
442         v->spl_nright = 0;
443
444         if (seed_1 == 0 || seed_2 == 0)
445         {
446                 seed_1 = 1;
447                 seed_2 = 2;
448         }
449
450         /* form initial .. */
451         if (ISALLTRUE(GETENTRY(entryvec, seed_1)))
452         {
453                 datum_l = (GISTTYPE *) palloc(GTHDRSIZE);
454                 SET_VARSIZE(datum_l, GTHDRSIZE);
455                 datum_l->flag = ALLISTRUE;
456         }
457         else
458         {
459                 datum_l = (GISTTYPE *) palloc(GTHDRSIZE + SIGLEN);
460                 SET_VARSIZE(datum_l, GTHDRSIZE + SIGLEN);
461                 datum_l->flag = 0;
462                 memcpy((void *) GETSIGN(datum_l),
463                            (void *) GETSIGN(GETENTRY(entryvec, seed_1)), sizeof(BITVEC))
464                         ;
465         }
466         if (ISALLTRUE(GETENTRY(entryvec, seed_2)))
467         {
468                 datum_r = (GISTTYPE *) palloc(GTHDRSIZE);
469                 SET_VARSIZE(datum_r, GTHDRSIZE);
470                 datum_r->flag = ALLISTRUE;
471         }
472         else
473         {
474                 datum_r = (GISTTYPE *) palloc(GTHDRSIZE + SIGLEN);
475                 SET_VARSIZE(datum_r, GTHDRSIZE + SIGLEN);
476                 datum_r->flag = 0;
477                 memcpy((void *) GETSIGN(datum_r),
478                            (void *) GETSIGN(GETENTRY(entryvec, seed_2)), sizeof(BITVEC));
479         }
480
481         maxoff = OffsetNumberNext(maxoff);
482         /* sort before ... */
483         costvector = (SPLITCOST *) palloc(sizeof(SPLITCOST) * maxoff);
484         for (j = FirstOffsetNumber; j <= maxoff; j = OffsetNumberNext(j))
485         {
486                 costvector[j - 1].pos = j;
487                 _j = GETENTRY(entryvec, j);
488                 size_alpha = hemdist(datum_l, _j);
489                 size_beta = hemdist(datum_r, _j);
490                 costvector[j - 1].cost = abs(size_alpha - size_beta);
491         }
492         qsort((void *) costvector, maxoff, sizeof(SPLITCOST), comparecost);
493
494         union_l = GETSIGN(datum_l);
495         union_r = GETSIGN(datum_r);
496
497         for (k = 0; k < maxoff; k++)
498         {
499                 j = costvector[k].pos;
500                 if (j == seed_1)
501                 {
502                         *left++ = j;
503                         v->spl_nleft++;
504                         continue;
505                 }
506                 else if (j == seed_2)
507                 {
508                         *right++ = j;
509                         v->spl_nright++;
510                         continue;
511                 }
512                 _j = GETENTRY(entryvec, j);
513                 size_alpha = hemdist(datum_l, _j);
514                 size_beta = hemdist(datum_r, _j);
515
516                 if (size_alpha < size_beta + WISH_F(v->spl_nleft, v->spl_nright, 0.0001))
517                 {
518                         if (ISALLTRUE(datum_l) || ISALLTRUE(_j))
519                         {
520                                 if (!ISALLTRUE(datum_l))
521                                         MemSet((void *) union_l, 0xff, sizeof(BITVEC));
522                         }
523                         else
524                         {
525                                 ptr = GETSIGN(_j);
526                                 LOOPBYTE
527                                         union_l[i] |= ptr[i];
528                         }
529                         *left++ = j;
530                         v->spl_nleft++;
531                 }
532                 else
533                 {
534                         if (ISALLTRUE(datum_r) || ISALLTRUE(_j))
535                         {
536                                 if (!ISALLTRUE(datum_r))
537                                         MemSet((void *) union_r, 0xff, sizeof(BITVEC));
538                         }
539                         else
540                         {
541                                 ptr = GETSIGN(_j);
542                                 LOOPBYTE
543                                         union_r[i] |= ptr[i];
544                         }
545                         *right++ = j;
546                         v->spl_nright++;
547                 }
548         }
549
550         *right = *left = FirstOffsetNumber;
551
552         v->spl_ldatum = PointerGetDatum(datum_l);
553         v->spl_rdatum = PointerGetDatum(datum_r);
554
555         PG_RETURN_POINTER(v);
556 }
557
558 Datum
559 ghstore_consistent(PG_FUNCTION_ARGS)
560 {
561         GISTTYPE   *entry = (GISTTYPE *) DatumGetPointer(((GISTENTRY *) PG_GETARG_POINTER(0))->key);
562         StrategyNumber strategy = (StrategyNumber) PG_GETARG_UINT16(2);
563
564         /* Oid          subtype = PG_GETARG_OID(3); */
565         bool       *recheck = (bool *) PG_GETARG_POINTER(4);
566         bool            res = true;
567         BITVECP         sign;
568
569         /* All cases served by this function are inexact */
570         *recheck = true;
571
572         if (ISALLTRUE(entry))
573                 PG_RETURN_BOOL(true);
574
575         sign = GETSIGN(entry);
576
577         if (strategy == HStoreContainsStrategyNumber ||
578                 strategy == HStoreOldContainsStrategyNumber)
579         {
580                 BITVECP         qe;
581                 int                     i;
582
583                 qe = fcinfo->flinfo->fn_extra;
584                 if (qe == NULL)
585                 {
586                         HStore                  *query = PG_GETARG_HS(1);
587
588                         qe = MemoryContextAlloc(fcinfo->flinfo->fn_mcxt, sizeof(BITVEC));
589                         memset(qe, 0, sizeof(BITVEC));
590
591                         if (!HS_ISEMPTY(query))
592                         {
593                                 int                             r;
594                                 HStoreIterator  *it = HStoreIteratorInit(VARDATA(query));
595                                 HStoreValue             v;
596
597                                 while((r = HStoreIteratorGet(&it, &v, false)) != 0)
598                                 {
599                                         if ((r == WHS_ELEM || r == WHS_KEY || r == WHS_VALUE) && v.type != hsvNull)
600                                         {
601                                                 int   crc = crc32_HStoreValue(&v, r);
602
603                                                 HASH(qe, crc);
604                                         }
605                                 }
606                         }
607
608                         fcinfo->flinfo->fn_extra = qe;
609                 }
610
611                 LOOPBYTE
612                 {
613                         if ((sign[i] & qe[i]) != qe[i])
614                         {
615                                 res = false;
616                                 break;
617                         }
618                 }
619         }
620         else if (strategy == HStoreExistsStrategyNumber)
621         {
622                 int     *qval;
623
624                 qval = fcinfo->flinfo->fn_extra;
625                 if (qval == NULL)
626                 {
627                         text       *query = PG_GETARG_TEXT_PP(1);
628                         int                     crc = crc32_Key(VARDATA_ANY(query), VARSIZE_ANY_EXHDR(query));
629
630                         qval = MemoryContextAlloc(fcinfo->flinfo->fn_mcxt, sizeof(*qval));
631                         *qval = HASHVAL(crc);
632
633                         fcinfo->flinfo->fn_extra = qval;
634                 }
635
636                 res = (GETBIT(sign, *qval)) ? true : false;
637         }
638         else if (strategy == HStoreExistsAllStrategyNumber ||
639                          strategy == HStoreExistsAnyStrategyNumber)
640         {
641                 BITVECP arrentry;
642                 int             i;
643
644                 arrentry = fcinfo->flinfo->fn_extra;
645                 if (arrentry == NULL)
646                 {
647                         ArrayType  *query = PG_GETARG_ARRAYTYPE_P(1);
648                         Datum      *key_datums;
649                         bool       *key_nulls;
650                         int                     key_count;
651
652                         arrentry = MemoryContextAlloc(fcinfo->flinfo->fn_mcxt,
653                                                                                   sizeof(BITVEC));
654                         memset(arrentry, 0, sizeof(BITVEC));
655
656                         deconstruct_array(query,
657                                                           TEXTOID, -1, false, 'i',
658                                                           &key_datums, &key_nulls, &key_count);
659
660                         for (i = 0; i < key_count; ++i)
661                         {
662                                 int                     crc;
663
664                                 if (key_nulls[i])
665                                         continue;
666                                 crc = crc32_Key(VARDATA(key_datums[i]),
667                                                                 VARSIZE(key_datums[i]) - VARHDRSZ);
668                                 HASH(arrentry, crc);
669                         }
670
671                         fcinfo->flinfo->fn_extra = arrentry;
672                 }
673
674                 if (strategy == HStoreExistsAllStrategyNumber)
675                 {
676                         LOOPBYTE
677                         {
678                                 if ((sign[i] & arrentry[i]) != arrentry[i])
679                                 {
680                                         res = false;
681                                         break;
682                                 }
683                         }
684                 }
685                 else /* HStoreExistsAnyStrategyNumber */
686                 {
687                         res = false;
688
689                         LOOPBYTE
690                         {
691                                 if (sign[i] & arrentry[i])
692                                 {
693                                         res = true;
694                                         break;
695                                 }
696                         }
697                 }
698         }
699         else
700                 elog(ERROR, "Unsupported strategy number: %d", strategy);
701
702         PG_RETURN_BOOL(res);
703 }