Add PG_MODULE_MAGIC for 8.2
[hstore.git] / hstore_gist.c
1 #include "hstore.h"
2
3 #include "access/gist.h"
4 #include "access/itup.h"
5 /*#include "access/rtree.h"*/
6 #include "crc32.h"
7
8 /* bigint defines */
9 #define BITBYTE 8
10 #define SIGLENINT  4               /* >122 => key will toast, so very slow!!! */
11 #define SIGLEN  ( sizeof(int)*SIGLENINT )
12 #define SIGLENBIT (SIGLEN*BITBYTE)
13
14 typedef char BITVEC[SIGLEN];
15 typedef char *BITVECP;
16
17 #define SIGPTR(x)  ( (BITVECP) ARR_DATA_PTR(x) )
18
19
20 #define LOOPBYTE(a) \
21                 for(i=0;i<SIGLEN;i++) {\
22                                 a;\
23                 }
24
25 #define LOOPBIT(a) \
26                 for(i=0;i<SIGLENBIT;i++) {\
27                                 a;\
28                 }
29
30 /* beware of multiple evaluation of arguments to these macros! */
31 #define GETBYTE(x,i) ( *( (BITVECP)(x) + (int)( (i) / BITBYTE ) ) )
32 #define GETBITBYTE(x,i) ( (*((char*)(x)) >> (i)) & 0x01 )
33 #define CLRBIT(x,i)   GETBYTE(x,i) &= ~( 0x01 << ( (i) % BITBYTE ) )
34 #define SETBIT(x,i)   GETBYTE(x,i) |=  ( 0x01 << ( (i) % BITBYTE ) )
35 #define GETBIT(x,i) ( (GETBYTE(x,i) >> ( (i) % BITBYTE )) & 0x01 )
36 #define HASHVAL(val) (((unsigned int)(val)) % SIGLENBIT)
37 #define HASH(sign, val) SETBIT((sign), HASHVAL(val))
38
39 typedef struct {
40         int4        len;
41         int4        flag;
42         char        data[1];
43 }       GISTTYPE;
44
45 #define ALLISTRUE       0x04
46
47 #define ISALLTRUE(x)    ( ((GISTTYPE*)x)->flag & ALLISTRUE )
48
49 #define GTHDRSIZE       ( sizeof(int4)*2  )
50 #define CALCGTSIZE(flag) ( GTHDRSIZE+(((flag) & ALLISTRUE) ? 0 : SIGLEN) )
51   
52 #define GETSIGN(x)      ( (BITVECP)( (char*)x+GTHDRSIZE ) )
53
54 #define SUMBIT(val) (       \
55         GETBITBYTE((val),0) + \
56         GETBITBYTE((val),1) + \
57         GETBITBYTE((val),2) + \
58         GETBITBYTE((val),3) + \
59         GETBITBYTE((val),4) + \
60         GETBITBYTE((val),5) + \
61         GETBITBYTE((val),6) + \
62         GETBITBYTE((val),7)   \
63 )
64
65 #ifdef GEVHDRSZ
66 #define GETENTRY(vec,pos) ((GISTTYPE *) DatumGetPointer((vec)->vector[(pos)].key))
67 #else
68 #define GETENTRY(vec,pos) ((GISTTYPE *) DatumGetPointer(((GISTENTRY *) VARDATA(vec))[(pos)].key))
69 #endif
70
71 #define WISH_F(a,b,c) (double)( -(double)(((a)-(b))*((a)-(b))*((a)-(b)))*(c) )
72
73 PG_FUNCTION_INFO_V1(ghstore_in);
74 Datum      ghstore_in(PG_FUNCTION_ARGS);
75
76 PG_FUNCTION_INFO_V1(ghstore_out);
77 Datum      ghstore_out(PG_FUNCTION_ARGS);
78   
79
80 Datum
81 ghstore_in(PG_FUNCTION_ARGS) {
82         elog(ERROR, "Not implemented");
83         PG_RETURN_DATUM(0);
84 }
85
86 Datum
87 ghstore_out(PG_FUNCTION_ARGS) {
88         elog(ERROR, "Not implemented");
89         PG_RETURN_DATUM(0);
90 }
91
92 PG_FUNCTION_INFO_V1(ghstore_consistent);
93 PG_FUNCTION_INFO_V1(ghstore_compress);
94 PG_FUNCTION_INFO_V1(ghstore_decompress);
95 PG_FUNCTION_INFO_V1(ghstore_penalty);
96 PG_FUNCTION_INFO_V1(ghstore_picksplit);
97 PG_FUNCTION_INFO_V1(ghstore_union);
98 PG_FUNCTION_INFO_V1(ghstore_same);
99
100 Datum      ghstore_consistent(PG_FUNCTION_ARGS);
101 Datum      ghstore_compress(PG_FUNCTION_ARGS);
102 Datum      ghstore_decompress(PG_FUNCTION_ARGS);
103 Datum      ghstore_penalty(PG_FUNCTION_ARGS);
104 Datum      ghstore_picksplit(PG_FUNCTION_ARGS);
105 Datum      ghstore_union(PG_FUNCTION_ARGS);
106 Datum      ghstore_same(PG_FUNCTION_ARGS);
107
108 Datum
109 ghstore_compress(PG_FUNCTION_ARGS) {
110         GISTENTRY  *entry = (GISTENTRY *) PG_GETARG_POINTER(0);
111         GISTENTRY  *retval = entry;
112          
113         if (entry->leafkey) {                                                  
114                 GISTTYPE   *res = (GISTTYPE*)palloc(CALCGTSIZE(0));
115                 HStore     *toastedval = (HStore *) DatumGetPointer(entry->key);
116                 HStore     *val = (HStore *) DatumGetPointer(PG_DETOAST_DATUM(entry->key));
117                 HEntry     *ptr = ARRPTR(val);
118                 char       *words = STRPTR(val);
119
120                 memset(res,0,CALCGTSIZE(0));
121                 res->len=CALCGTSIZE(0);
122         
123                 while(ptr-ARRPTR(val) < val->size) {
124                         int h;
125                         h = crc32_sz((char*)(words+ptr->pos), ptr->keylen);
126                         HASH( GETSIGN(res), h);
127                         if ( !ptr->valisnull ) {
128                                 h = crc32_sz((char *)(words+ptr->pos+ptr->keylen), ptr->vallen);
129                                 HASH( GETSIGN(res), h);
130                         }
131                         ptr++;
132                 }
133
134                 if (val != toastedval)
135                         pfree(val);
136
137                 retval = (GISTENTRY *) palloc(sizeof(GISTENTRY));
138                 gistentryinit(*retval, PointerGetDatum(res),
139                                           entry->rel, entry->page,
140                                           entry->offset, res->len, FALSE);
141         } else if ( !ISALLTRUE(DatumGetPointer(entry->key)) ) {
142                 int4        i;
143                 GISTTYPE   *res;
144                 BITVECP  sign = GETSIGN(DatumGetPointer(entry->key));
145                 
146                 LOOPBYTE(
147                         if ((sign[i] & 0xff) != 0xff)
148                                 PG_RETURN_POINTER(retval);
149                 );
150
151                 res = (GISTTYPE *) palloc(CALCGTSIZE(ALLISTRUE));
152                 res->len = CALCGTSIZE(ALLISTRUE);
153                 res->flag = ALLISTRUE;
154
155                 retval = (GISTENTRY *) palloc(sizeof(GISTENTRY));
156                 gistentryinit(*retval, PointerGetDatum(res),
157                                           entry->rel, entry->page,
158                                           entry->offset, res->len, FALSE);
159         }
160
161         PG_RETURN_POINTER(retval);
162 }
163
164 Datum
165 ghstore_decompress(PG_FUNCTION_ARGS) {
166         PG_RETURN_DATUM(PG_GETARG_DATUM(0));
167 }
168
169 Datum
170 ghstore_same(PG_FUNCTION_ARGS) {
171         GISTTYPE   *a = (GISTTYPE *) PG_GETARG_POINTER(0);
172         GISTTYPE   *b = (GISTTYPE *) PG_GETARG_POINTER(1);
173         bool       *result = (bool *) PG_GETARG_POINTER(2);
174
175         if (ISALLTRUE(a) && ISALLTRUE(b))
176                 *result = true;
177         else if (ISALLTRUE(a))
178                 *result = false;
179         else if (ISALLTRUE(b))
180                 *result = false;
181         else {
182                 int4            i;
183                 BITVECP         sa = GETSIGN(a),
184                                 sb = GETSIGN(b);
185                 *result = true;
186                 LOOPBYTE(
187                         if (sa[i] != sb[i]) {
188                                 *result = false;
189                                 break;
190                         }
191                 );
192         }
193         PG_RETURN_POINTER(result);
194 }
195
196 static int4
197 sizebitvec(BITVECP sign) {
198         int4        size = 0, i;
199         LOOPBYTE(
200                 size += SUMBIT(sign);
201                 sign = (BITVECP) (((char *) sign) + 1);
202         );
203         return size;
204 }
205  
206 static int
207 hemdistsign(BITVECP  a, BITVECP b) {
208         int i,dist=0;
209
210         LOOPBIT(
211                 if ( GETBIT(a,i) != GETBIT(b,i) )
212                         dist++;
213         );
214         return dist;
215 }
216
217 static int
218 hemdist(GISTTYPE   *a, GISTTYPE   *b) {
219         if ( ISALLTRUE(a) ) {
220                 if (ISALLTRUE(b))
221                         return 0;
222                 else
223                         return SIGLENBIT-sizebitvec(GETSIGN(b));
224         } else if (ISALLTRUE(b))
225                 return SIGLENBIT-sizebitvec(GETSIGN(a));
226
227         return hemdistsign( GETSIGN(a), GETSIGN(b) );
228 }
229
230 static int4
231 unionkey(BITVECP sbase, GISTTYPE * add)
232 {
233         int4        i;
234         BITVECP  sadd = GETSIGN(add);
235
236         if (ISALLTRUE(add))
237                 return 1;
238         LOOPBYTE(
239                 sbase[i] |= sadd[i];
240         );
241         return 0;
242 }
243
244 Datum
245 ghstore_union(PG_FUNCTION_ARGS) {
246 #ifdef GEVHDRSZ
247         GistEntryVector      *entryvec = (GistEntryVector *) PG_GETARG_POINTER(0);
248         int4        len = entryvec->n;
249 #else
250         bytea      *entryvec = (bytea *) PG_GETARG_POINTER(0);
251         int4        len = (VARSIZE(entryvec) - VARHDRSZ) / sizeof(GISTENTRY);
252 #endif
253         int             *size = (int *) PG_GETARG_POINTER(1);
254         BITVEC    base;
255         int4        i;
256         int4        flag = 0;
257         GISTTYPE   *result;
258
259         MemSet((void *) base, 0, sizeof(BITVEC));
260         for (i = 0; i < len; i++) {
261                 if (unionkey(base, GETENTRY(entryvec, i))) {
262                         flag = ALLISTRUE;
263                         break;
264                 }
265         }
266
267         len = CALCGTSIZE(flag);
268         result = (GISTTYPE *) palloc(len);
269         *size = result->len = len;
270         result->flag = flag;
271         if (!ISALLTRUE(result))
272                 memcpy((void *) GETSIGN(result), (void *) base, sizeof(BITVEC));
273
274         PG_RETURN_POINTER(result);
275 }
276
277 Datum
278 ghstore_penalty(PG_FUNCTION_ARGS) {
279         GISTENTRY  *origentry = (GISTENTRY *) PG_GETARG_POINTER(0); /* always ISSIGNKEY */
280         GISTENTRY  *newentry = (GISTENTRY *) PG_GETARG_POINTER(1);
281         float      *penalty = (float *) PG_GETARG_POINTER(2);
282         GISTTYPE   *origval = (GISTTYPE *) DatumGetPointer(origentry->key);
283         GISTTYPE   *newval = (GISTTYPE *) DatumGetPointer(newentry->key);
284
285         *penalty=hemdist(origval,newval);
286         PG_RETURN_POINTER(penalty);
287 }
288
289
290 typedef struct {
291         OffsetNumber pos;
292         int4        cost;
293 } SPLITCOST;
294
295 static int
296 comparecost(const void *a, const void *b) {
297         return ((SPLITCOST *) a)->cost - ((SPLITCOST *) b)->cost;
298 }
299
300
301 Datum
302 ghstore_picksplit(PG_FUNCTION_ARGS) {
303 #ifdef GEVHDRSZ
304         GistEntryVector      *entryvec = (GistEntryVector *) PG_GETARG_POINTER(0);
305         OffsetNumber maxoff = entryvec->n - 2;
306 #else
307         bytea      *entryvec = (bytea *) PG_GETARG_POINTER(0);
308         OffsetNumber maxoff = ((VARSIZE(entryvec) - VARHDRSZ) / sizeof(GISTENTRY)) - 2;
309 #endif
310         GIST_SPLITVEC *v = (GIST_SPLITVEC *) PG_GETARG_POINTER(1);
311         OffsetNumber k,
312                                 j;
313         GISTTYPE *datum_l,
314                            *datum_r;
315         BITVECP  union_l,
316                                 union_r;
317         int4        size_alpha, size_beta;
318         int4        size_waste,
319                                 waste = -1;
320         int4        nbytes;
321         OffsetNumber seed_1 = 0,
322                                 seed_2 = 0;
323         OffsetNumber *left,
324                            *right;
325         BITVECP  ptr;
326         int                  i;
327         SPLITCOST  *costvector;
328         GISTTYPE *_k,
329                            *_j;
330
331         nbytes = (maxoff + 2) * sizeof(OffsetNumber);
332         v->spl_left = (OffsetNumber *) palloc(nbytes);
333         v->spl_right = (OffsetNumber *) palloc(nbytes);
334
335         for (k = FirstOffsetNumber; k < maxoff; k = OffsetNumberNext(k)) {
336                 _k = GETENTRY(entryvec, k);
337                 for (j = OffsetNumberNext(k); j <= maxoff; j = OffsetNumberNext(j)) {
338                         size_waste=hemdist(_k, GETENTRY(entryvec, j));
339                         if (size_waste > waste ) {
340                                 waste = size_waste;
341                                 seed_1 = k;
342                                 seed_2 = j;
343                         }
344                 }
345         }
346
347         left = v->spl_left;
348         v->spl_nleft = 0;
349         right = v->spl_right;
350         v->spl_nright = 0;
351
352         if (seed_1 == 0 || seed_2 == 0)
353         {
354                 seed_1 = 1;
355                 seed_2 = 2;
356         }
357
358         /* form initial .. */
359         if (ISALLTRUE(GETENTRY(entryvec, seed_1))) {
360                 datum_l = (GISTTYPE *) palloc(GTHDRSIZE);
361                 datum_l->len = GTHDRSIZE;
362                 datum_l->flag = ALLISTRUE;
363         } else {
364                 datum_l = (GISTTYPE *) palloc(GTHDRSIZE + SIGLEN);
365                 datum_l->len = GTHDRSIZE + SIGLEN;
366                 datum_l->flag = 0;
367                 memcpy((void *) GETSIGN(datum_l), (void *) GETSIGN(GETENTRY(entryvec, seed_1)), sizeof(BITVEC))
368 ;
369         }
370         if (ISALLTRUE(GETENTRY(entryvec, seed_2))) {
371                 datum_r = (GISTTYPE *) palloc(GTHDRSIZE);
372                 datum_r->len = GTHDRSIZE;
373                 datum_r->flag = ALLISTRUE;
374         } else {
375                 datum_r = (GISTTYPE *) palloc(GTHDRSIZE + SIGLEN);
376                 datum_r->len = GTHDRSIZE + SIGLEN;
377                 datum_r->flag = 0;
378                 memcpy((void *) GETSIGN(datum_r), (void *) GETSIGN(GETENTRY(entryvec, seed_2)), sizeof(BITVEC)) ;
379         }
380
381         maxoff = OffsetNumberNext(maxoff);
382         /* sort before ... */
383         costvector = (SPLITCOST *) palloc(sizeof(SPLITCOST) * maxoff);
384         for (j = FirstOffsetNumber; j <= maxoff; j = OffsetNumberNext(j))
385         {
386                 costvector[j - 1].pos = j;
387                 _j = GETENTRY(entryvec, j);
388                 size_alpha = hemdist(datum_l,_j);
389                 size_beta  = hemdist(datum_r,_j);
390                 costvector[j - 1].cost = abs(size_alpha - size_beta);
391         }
392         qsort((void *) costvector, maxoff, sizeof(SPLITCOST), comparecost);
393
394         union_l=GETSIGN(datum_l);
395         union_r=GETSIGN(datum_r);
396
397         for (k = 0; k < maxoff; k++) {
398                 j = costvector[k].pos;
399                 if (j == seed_1) {
400                         *left++ = j;
401                         v->spl_nleft++;
402                         continue;
403                 } else if (j == seed_2) {
404                         *right++ = j;
405                         v->spl_nright++;
406                         continue;
407                 }
408                 _j = GETENTRY(entryvec, j);
409                 size_alpha = hemdist(datum_l,_j);
410                 size_beta  = hemdist(datum_r,_j);
411
412                 if (size_alpha < size_beta  + WISH_F(v->spl_nleft, v->spl_nright, 0.0001)) {
413                         if (ISALLTRUE(datum_l) || ISALLTRUE(_j) ) {
414                                 if (!ISALLTRUE(datum_l))
415                                         MemSet((void *) union_l, 0xff, sizeof(BITVEC));
416                         } else {
417                                 ptr=GETSIGN(_j);
418                                 LOOPBYTE(
419                                         union_l[i] |= ptr[i];
420                                 );
421                         }
422                         *left++ = j;
423                         v->spl_nleft++;
424                 } else {
425                         if (ISALLTRUE(datum_r) || ISALLTRUE(_j) ) {
426                                 if (!ISALLTRUE(datum_r))
427                                         MemSet((void *) union_r, 0xff, sizeof(BITVEC));
428                         } else {
429                                 ptr=GETSIGN(_j);
430                                 LOOPBYTE(
431                                         union_r[i] |= ptr[i];
432                                 );
433                         }
434                         *right++ = j;
435                         v->spl_nright++;
436                 }
437         }
438
439         *right = *left = FirstOffsetNumber;
440         pfree(costvector);
441
442         v->spl_ldatum = PointerGetDatum(datum_l);
443         v->spl_rdatum = PointerGetDatum(datum_r);
444
445         PG_RETURN_POINTER(v);
446 }
447
448
449 Datum      
450 ghstore_consistent(PG_FUNCTION_ARGS) {
451         GISTTYPE  *entry = (GISTTYPE*) DatumGetPointer( ((GISTENTRY *) PG_GETARG_POINTER(0))->key );
452         HStore *query=PG_GETARG_HS(1);
453         bool res=true;
454         HEntry  *qe = ARRPTR(query);
455         char    *qv = STRPTR(query);
456         BITVECP sign;
457
458         if ( ISALLTRUE(entry) ) {
459                 PG_FREE_IF_COPY(query,1);
460                 PG_RETURN_BOOL(true);
461         }
462
463         sign=GETSIGN(entry); 
464         while(res && qe-ARRPTR(query) < query->size) {
465                 int crc = crc32_sz((char *)(qv + qe->pos), qe->keylen);
466                 if (GETBIT(sign,HASHVAL(crc))) {
467                         if ( !qe->valisnull ) {
468                                 crc = crc32_sz((char *)(qv + qe->pos + qe->keylen), qe->vallen);
469                                 if ( !GETBIT(sign,HASHVAL(crc)) )
470                                         res=false;
471                         }
472                 } else
473                         res=false;
474                 qe++;
475         }
476
477         PG_FREE_IF_COPY(query,1);
478         PG_RETURN_BOOL(res);
479 }
480
481