fix ginstentryinit() macro for 8.2
[hstore.git] / hstore_gist.c
1 #include "hstore.h"
2
3 #include "access/gist.h"
4 #include "access/itup.h"
5 /*#include "access/rtree.h"*/
6 #include "crc32.h"
7
8 /* bigint defines */
9 #define BITBYTE 8
10 #define SIGLENINT  4               /* >122 => key will toast, so very slow!!! */
11 #define SIGLEN  ( sizeof(int)*SIGLENINT )
12 #define SIGLENBIT (SIGLEN*BITBYTE)
13
14 typedef char BITVEC[SIGLEN];
15 typedef char *BITVECP;
16
17 #define SIGPTR(x)  ( (BITVECP) ARR_DATA_PTR(x) )
18
19
20 #define LOOPBYTE(a) \
21                 for(i=0;i<SIGLEN;i++) {\
22                                 a;\
23                 }
24
25 #define LOOPBIT(a) \
26                 for(i=0;i<SIGLENBIT;i++) {\
27                                 a;\
28                 }
29
30 /* beware of multiple evaluation of arguments to these macros! */
31 #define GETBYTE(x,i) ( *( (BITVECP)(x) + (int)( (i) / BITBYTE ) ) )
32 #define GETBITBYTE(x,i) ( (*((char*)(x)) >> (i)) & 0x01 )
33 #define CLRBIT(x,i)   GETBYTE(x,i) &= ~( 0x01 << ( (i) % BITBYTE ) )
34 #define SETBIT(x,i)   GETBYTE(x,i) |=  ( 0x01 << ( (i) % BITBYTE ) )
35 #define GETBIT(x,i) ( (GETBYTE(x,i) >> ( (i) % BITBYTE )) & 0x01 )
36 #define HASHVAL(val) (((unsigned int)(val)) % SIGLENBIT)
37 #define HASH(sign, val) SETBIT((sign), HASHVAL(val))
38
39 typedef struct {
40         int4        len;
41         int4        flag;
42         char        data[1];
43 }       GISTTYPE;
44
45 #define ALLISTRUE       0x04
46
47 #define ISALLTRUE(x)    ( ((GISTTYPE*)x)->flag & ALLISTRUE )
48
49 #define GTHDRSIZE       ( sizeof(int4)*2  )
50 #define CALCGTSIZE(flag) ( GTHDRSIZE+(((flag) & ALLISTRUE) ? 0 : SIGLEN) )
51   
52 #define GETSIGN(x)      ( (BITVECP)( (char*)x+GTHDRSIZE ) )
53
54 #define SUMBIT(val) (       \
55         GETBITBYTE((val),0) + \
56         GETBITBYTE((val),1) + \
57         GETBITBYTE((val),2) + \
58         GETBITBYTE((val),3) + \
59         GETBITBYTE((val),4) + \
60         GETBITBYTE((val),5) + \
61         GETBITBYTE((val),6) + \
62         GETBITBYTE((val),7)   \
63 )
64
65 #ifdef GEVHDRSZ
66 #define GETENTRY(vec,pos) ((GISTTYPE *) DatumGetPointer((vec)->vector[(pos)].key))
67 #else
68 #define GETENTRY(vec,pos) ((GISTTYPE *) DatumGetPointer(((GISTENTRY *) VARDATA(vec))[(pos)].key))
69 #endif
70
71 #define WISH_F(a,b,c) (double)( -(double)(((a)-(b))*((a)-(b))*((a)-(b)))*(c) )
72
73 PG_FUNCTION_INFO_V1(ghstore_in);
74 Datum      ghstore_in(PG_FUNCTION_ARGS);
75
76 PG_FUNCTION_INFO_V1(ghstore_out);
77 Datum      ghstore_out(PG_FUNCTION_ARGS);
78   
79
80 Datum
81 ghstore_in(PG_FUNCTION_ARGS) {
82         elog(ERROR, "Not implemented");
83         PG_RETURN_DATUM(0);
84 }
85
86 Datum
87 ghstore_out(PG_FUNCTION_ARGS) {
88         elog(ERROR, "Not implemented");
89         PG_RETURN_DATUM(0);
90 }
91
92 PG_FUNCTION_INFO_V1(ghstore_consistent);
93 PG_FUNCTION_INFO_V1(ghstore_compress);
94 PG_FUNCTION_INFO_V1(ghstore_decompress);
95 PG_FUNCTION_INFO_V1(ghstore_penalty);
96 PG_FUNCTION_INFO_V1(ghstore_picksplit);
97 PG_FUNCTION_INFO_V1(ghstore_union);
98 PG_FUNCTION_INFO_V1(ghstore_same);
99
100 Datum      ghstore_consistent(PG_FUNCTION_ARGS);
101 Datum      ghstore_compress(PG_FUNCTION_ARGS);
102 Datum      ghstore_decompress(PG_FUNCTION_ARGS);
103 Datum      ghstore_penalty(PG_FUNCTION_ARGS);
104 Datum      ghstore_picksplit(PG_FUNCTION_ARGS);
105 Datum      ghstore_union(PG_FUNCTION_ARGS);
106 Datum      ghstore_same(PG_FUNCTION_ARGS);
107
108 Datum
109 ghstore_compress(PG_FUNCTION_ARGS) {
110         GISTENTRY  *entry = (GISTENTRY *) PG_GETARG_POINTER(0);
111         GISTENTRY  *retval = entry;
112          
113         if (entry->leafkey) {                                                  
114                 GISTTYPE   *res = (GISTTYPE*)palloc(CALCGTSIZE(0));
115                 HStore     *toastedval = (HStore *) DatumGetPointer(entry->key);
116                 HStore     *val = (HStore *) DatumGetPointer(PG_DETOAST_DATUM(entry->key));
117                 HEntry     *ptr = ARRPTR(val);
118                 char       *words = STRPTR(val);
119
120                 memset(res,0,CALCGTSIZE(0));
121                 res->len=CALCGTSIZE(0);
122         
123                 while(ptr-ARRPTR(val) < val->size) {
124                         int h;
125                         h = crc32_sz((char*)(words+ptr->pos), ptr->keylen);
126                         HASH( GETSIGN(res), h);
127                         if ( !ptr->valisnull ) {
128                                 h = crc32_sz((char *)(words+ptr->pos+ptr->keylen), ptr->vallen);
129                                 HASH( GETSIGN(res), h);
130                         }
131                         ptr++;
132                 }
133
134                 if (val != toastedval)
135                         pfree(val);
136
137                 retval = (GISTENTRY *) palloc(sizeof(GISTENTRY));
138                 gistentryinit(*retval, PointerGetDatum(res),
139                                           entry->rel, entry->page,
140                                           entry->offset,
141 #ifndef PG_MODULE_MAGIC
142                                           res->len,
143 #endif
144                                           FALSE);
145         } else if ( !ISALLTRUE(DatumGetPointer(entry->key)) ) {
146                 int4        i;
147                 GISTTYPE   *res;
148                 BITVECP  sign = GETSIGN(DatumGetPointer(entry->key));
149                 
150                 LOOPBYTE(
151                         if ((sign[i] & 0xff) != 0xff)
152                                 PG_RETURN_POINTER(retval);
153                 );
154
155                 res = (GISTTYPE *) palloc(CALCGTSIZE(ALLISTRUE));
156                 res->len = CALCGTSIZE(ALLISTRUE);
157                 res->flag = ALLISTRUE;
158
159                 retval = (GISTENTRY *) palloc(sizeof(GISTENTRY));
160                 gistentryinit(*retval, PointerGetDatum(res),
161                                           entry->rel, entry->page,
162                                           entry->offset, 
163 #ifndef PG_MODULE_MAGIC
164                                           res->len,
165 #endif
166                                           FALSE);
167         }
168
169         PG_RETURN_POINTER(retval);
170 }
171
172 Datum
173 ghstore_decompress(PG_FUNCTION_ARGS) {
174         PG_RETURN_DATUM(PG_GETARG_DATUM(0));
175 }
176
177 Datum
178 ghstore_same(PG_FUNCTION_ARGS) {
179         GISTTYPE   *a = (GISTTYPE *) PG_GETARG_POINTER(0);
180         GISTTYPE   *b = (GISTTYPE *) PG_GETARG_POINTER(1);
181         bool       *result = (bool *) PG_GETARG_POINTER(2);
182
183         if (ISALLTRUE(a) && ISALLTRUE(b))
184                 *result = true;
185         else if (ISALLTRUE(a))
186                 *result = false;
187         else if (ISALLTRUE(b))
188                 *result = false;
189         else {
190                 int4            i;
191                 BITVECP         sa = GETSIGN(a),
192                                 sb = GETSIGN(b);
193                 *result = true;
194                 LOOPBYTE(
195                         if (sa[i] != sb[i]) {
196                                 *result = false;
197                                 break;
198                         }
199                 );
200         }
201         PG_RETURN_POINTER(result);
202 }
203
204 static int4
205 sizebitvec(BITVECP sign) {
206         int4        size = 0, i;
207         LOOPBYTE(
208                 size += SUMBIT(sign);
209                 sign = (BITVECP) (((char *) sign) + 1);
210         );
211         return size;
212 }
213  
214 static int
215 hemdistsign(BITVECP  a, BITVECP b) {
216         int i,dist=0;
217
218         LOOPBIT(
219                 if ( GETBIT(a,i) != GETBIT(b,i) )
220                         dist++;
221         );
222         return dist;
223 }
224
225 static int
226 hemdist(GISTTYPE   *a, GISTTYPE   *b) {
227         if ( ISALLTRUE(a) ) {
228                 if (ISALLTRUE(b))
229                         return 0;
230                 else
231                         return SIGLENBIT-sizebitvec(GETSIGN(b));
232         } else if (ISALLTRUE(b))
233                 return SIGLENBIT-sizebitvec(GETSIGN(a));
234
235         return hemdistsign( GETSIGN(a), GETSIGN(b) );
236 }
237
238 static int4
239 unionkey(BITVECP sbase, GISTTYPE * add)
240 {
241         int4        i;
242         BITVECP  sadd = GETSIGN(add);
243
244         if (ISALLTRUE(add))
245                 return 1;
246         LOOPBYTE(
247                 sbase[i] |= sadd[i];
248         );
249         return 0;
250 }
251
252 Datum
253 ghstore_union(PG_FUNCTION_ARGS) {
254 #ifdef GEVHDRSZ
255         GistEntryVector      *entryvec = (GistEntryVector *) PG_GETARG_POINTER(0);
256         int4        len = entryvec->n;
257 #else
258         bytea      *entryvec = (bytea *) PG_GETARG_POINTER(0);
259         int4        len = (VARSIZE(entryvec) - VARHDRSZ) / sizeof(GISTENTRY);
260 #endif
261         int             *size = (int *) PG_GETARG_POINTER(1);
262         BITVEC    base;
263         int4        i;
264         int4        flag = 0;
265         GISTTYPE   *result;
266
267         MemSet((void *) base, 0, sizeof(BITVEC));
268         for (i = 0; i < len; i++) {
269                 if (unionkey(base, GETENTRY(entryvec, i))) {
270                         flag = ALLISTRUE;
271                         break;
272                 }
273         }
274
275         len = CALCGTSIZE(flag);
276         result = (GISTTYPE *) palloc(len);
277         *size = result->len = len;
278         result->flag = flag;
279         if (!ISALLTRUE(result))
280                 memcpy((void *) GETSIGN(result), (void *) base, sizeof(BITVEC));
281
282         PG_RETURN_POINTER(result);
283 }
284
285 Datum
286 ghstore_penalty(PG_FUNCTION_ARGS) {
287         GISTENTRY  *origentry = (GISTENTRY *) PG_GETARG_POINTER(0); /* always ISSIGNKEY */
288         GISTENTRY  *newentry = (GISTENTRY *) PG_GETARG_POINTER(1);
289         float      *penalty = (float *) PG_GETARG_POINTER(2);
290         GISTTYPE   *origval = (GISTTYPE *) DatumGetPointer(origentry->key);
291         GISTTYPE   *newval = (GISTTYPE *) DatumGetPointer(newentry->key);
292
293         *penalty=hemdist(origval,newval);
294         PG_RETURN_POINTER(penalty);
295 }
296
297
298 typedef struct {
299         OffsetNumber pos;
300         int4        cost;
301 } SPLITCOST;
302
303 static int
304 comparecost(const void *a, const void *b) {
305         return ((SPLITCOST *) a)->cost - ((SPLITCOST *) b)->cost;
306 }
307
308
309 Datum
310 ghstore_picksplit(PG_FUNCTION_ARGS) {
311 #ifdef GEVHDRSZ
312         GistEntryVector      *entryvec = (GistEntryVector *) PG_GETARG_POINTER(0);
313         OffsetNumber maxoff = entryvec->n - 2;
314 #else
315         bytea      *entryvec = (bytea *) PG_GETARG_POINTER(0);
316         OffsetNumber maxoff = ((VARSIZE(entryvec) - VARHDRSZ) / sizeof(GISTENTRY)) - 2;
317 #endif
318         GIST_SPLITVEC *v = (GIST_SPLITVEC *) PG_GETARG_POINTER(1);
319         OffsetNumber k,
320                                 j;
321         GISTTYPE *datum_l,
322                            *datum_r;
323         BITVECP  union_l,
324                                 union_r;
325         int4        size_alpha, size_beta;
326         int4        size_waste,
327                                 waste = -1;
328         int4        nbytes;
329         OffsetNumber seed_1 = 0,
330                                 seed_2 = 0;
331         OffsetNumber *left,
332                            *right;
333         BITVECP  ptr;
334         int                  i;
335         SPLITCOST  *costvector;
336         GISTTYPE *_k,
337                            *_j;
338
339         nbytes = (maxoff + 2) * sizeof(OffsetNumber);
340         v->spl_left = (OffsetNumber *) palloc(nbytes);
341         v->spl_right = (OffsetNumber *) palloc(nbytes);
342
343         for (k = FirstOffsetNumber; k < maxoff; k = OffsetNumberNext(k)) {
344                 _k = GETENTRY(entryvec, k);
345                 for (j = OffsetNumberNext(k); j <= maxoff; j = OffsetNumberNext(j)) {
346                         size_waste=hemdist(_k, GETENTRY(entryvec, j));
347                         if (size_waste > waste ) {
348                                 waste = size_waste;
349                                 seed_1 = k;
350                                 seed_2 = j;
351                         }
352                 }
353         }
354
355         left = v->spl_left;
356         v->spl_nleft = 0;
357         right = v->spl_right;
358         v->spl_nright = 0;
359
360         if (seed_1 == 0 || seed_2 == 0)
361         {
362                 seed_1 = 1;
363                 seed_2 = 2;
364         }
365
366         /* form initial .. */
367         if (ISALLTRUE(GETENTRY(entryvec, seed_1))) {
368                 datum_l = (GISTTYPE *) palloc(GTHDRSIZE);
369                 datum_l->len = GTHDRSIZE;
370                 datum_l->flag = ALLISTRUE;
371         } else {
372                 datum_l = (GISTTYPE *) palloc(GTHDRSIZE + SIGLEN);
373                 datum_l->len = GTHDRSIZE + SIGLEN;
374                 datum_l->flag = 0;
375                 memcpy((void *) GETSIGN(datum_l), (void *) GETSIGN(GETENTRY(entryvec, seed_1)), sizeof(BITVEC))
376 ;
377         }
378         if (ISALLTRUE(GETENTRY(entryvec, seed_2))) {
379                 datum_r = (GISTTYPE *) palloc(GTHDRSIZE);
380                 datum_r->len = GTHDRSIZE;
381                 datum_r->flag = ALLISTRUE;
382         } else {
383                 datum_r = (GISTTYPE *) palloc(GTHDRSIZE + SIGLEN);
384                 datum_r->len = GTHDRSIZE + SIGLEN;
385                 datum_r->flag = 0;
386                 memcpy((void *) GETSIGN(datum_r), (void *) GETSIGN(GETENTRY(entryvec, seed_2)), sizeof(BITVEC)) ;
387         }
388
389         maxoff = OffsetNumberNext(maxoff);
390         /* sort before ... */
391         costvector = (SPLITCOST *) palloc(sizeof(SPLITCOST) * maxoff);
392         for (j = FirstOffsetNumber; j <= maxoff; j = OffsetNumberNext(j))
393         {
394                 costvector[j - 1].pos = j;
395                 _j = GETENTRY(entryvec, j);
396                 size_alpha = hemdist(datum_l,_j);
397                 size_beta  = hemdist(datum_r,_j);
398                 costvector[j - 1].cost = abs(size_alpha - size_beta);
399         }
400         qsort((void *) costvector, maxoff, sizeof(SPLITCOST), comparecost);
401
402         union_l=GETSIGN(datum_l);
403         union_r=GETSIGN(datum_r);
404
405         for (k = 0; k < maxoff; k++) {
406                 j = costvector[k].pos;
407                 if (j == seed_1) {
408                         *left++ = j;
409                         v->spl_nleft++;
410                         continue;
411                 } else if (j == seed_2) {
412                         *right++ = j;
413                         v->spl_nright++;
414                         continue;
415                 }
416                 _j = GETENTRY(entryvec, j);
417                 size_alpha = hemdist(datum_l,_j);
418                 size_beta  = hemdist(datum_r,_j);
419
420                 if (size_alpha < size_beta  + WISH_F(v->spl_nleft, v->spl_nright, 0.0001)) {
421                         if (ISALLTRUE(datum_l) || ISALLTRUE(_j) ) {
422                                 if (!ISALLTRUE(datum_l))
423                                         MemSet((void *) union_l, 0xff, sizeof(BITVEC));
424                         } else {
425                                 ptr=GETSIGN(_j);
426                                 LOOPBYTE(
427                                         union_l[i] |= ptr[i];
428                                 );
429                         }
430                         *left++ = j;
431                         v->spl_nleft++;
432                 } else {
433                         if (ISALLTRUE(datum_r) || ISALLTRUE(_j) ) {
434                                 if (!ISALLTRUE(datum_r))
435                                         MemSet((void *) union_r, 0xff, sizeof(BITVEC));
436                         } else {
437                                 ptr=GETSIGN(_j);
438                                 LOOPBYTE(
439                                         union_r[i] |= ptr[i];
440                                 );
441                         }
442                         *right++ = j;
443                         v->spl_nright++;
444                 }
445         }
446
447         *right = *left = FirstOffsetNumber;
448         pfree(costvector);
449
450         v->spl_ldatum = PointerGetDatum(datum_l);
451         v->spl_rdatum = PointerGetDatum(datum_r);
452
453         PG_RETURN_POINTER(v);
454 }
455
456
457 Datum      
458 ghstore_consistent(PG_FUNCTION_ARGS) {
459         GISTTYPE  *entry = (GISTTYPE*) DatumGetPointer( ((GISTENTRY *) PG_GETARG_POINTER(0))->key );
460         HStore *query=PG_GETARG_HS(1);
461         bool res=true;
462         HEntry  *qe = ARRPTR(query);
463         char    *qv = STRPTR(query);
464         BITVECP sign;
465
466         if ( ISALLTRUE(entry) ) {
467                 PG_FREE_IF_COPY(query,1);
468                 PG_RETURN_BOOL(true);
469         }
470
471         sign=GETSIGN(entry); 
472         while(res && qe-ARRPTR(query) < query->size) {
473                 int crc = crc32_sz((char *)(qv + qe->pos), qe->keylen);
474                 if (GETBIT(sign,HASHVAL(crc))) {
475                         if ( !qe->valisnull ) {
476                                 crc = crc32_sz((char *)(qv + qe->pos + qe->keylen), qe->vallen);
477                                 if ( !GETBIT(sign,HASHVAL(crc)) )
478                                         res=false;
479                         }
480                 } else
481                         res=false;
482                 qe++;
483         }
484
485         PG_FREE_IF_COPY(query,1);
486         PG_RETURN_BOOL(res);
487 }
488
489