README
[hstore.git] / hstore_gin.c
1 /*
2  * contrib/hstore/hstore_gin.c
3  */
4 #include "postgres.h"
5
6 #include "access/gin.h"
7 #include "access/skey.h"
8 #include "catalog/pg_type.h"
9 #include "utils/builtins.h"
10
11 #include "hstore.h"
12
13
14 PG_FUNCTION_INFO_V1(gin_extract_hstore);
15 Datum           gin_extract_hstore(PG_FUNCTION_ARGS);
16
17 /* Build an indexable text value */
18 static text *
19 makeitem(char *str, int len, char flag)
20 {
21         text       *item;
22
23         item = (text *) palloc(VARHDRSZ + len + 1);
24         SET_VARSIZE(item, VARHDRSZ + len + 1);
25
26         *VARDATA(item) = flag;
27
28         if (str && len > 0)
29                 memcpy(VARDATA(item) + 1, str, len);
30
31         return item;
32 }
33
34 static text *
35 makeitemFromValue(HStoreValue *v, char flag)
36 {
37         text            *item;
38         char            *cstr;
39
40         switch(v->type)
41         {
42                 case hsvNull:
43                         item = makeitem(NULL, 0, NULLFLAG);
44                         break;
45                 case hsvBool:
46                         item = makeitem((v->boolean) ? " t" : " f", 2, flag);
47                         break;
48                 case hsvNumeric:
49                         /*
50                          * It's needed to get some text representaion of
51                          * numeric independed from locale setting and 
52                          * preciosion. We use hashed value - it's safe
53                          * because recheck flag will be set anyway
54                          */
55                         cstr = palloc(8 /* hex numbers */ + 1 /* \0 */);
56                         snprintf(cstr, 9, "%08x",  DatumGetInt32(DirectFunctionCall1(hash_numeric,
57                                                                                                         NumericGetDatum(v->numeric))));
58                         item = makeitem(cstr, 8, flag);
59                         pfree(cstr);
60                         break;
61                 case hsvString:
62                         item = makeitem(v->string.val, v->string.len, flag);
63                         break;
64                 default:
65                         elog(ERROR, "Wrong hstore type");
66         }
67
68         return item;
69 }
70
71
72 Datum
73 gin_extract_hstore(PG_FUNCTION_ARGS)
74 {
75         HStore                  *hs = PG_GETARG_HS(0);
76         int32                   *nentries = (int32 *) PG_GETARG_POINTER(1);
77         Datum                   *entries = NULL;
78         int                             total = 2 * HS_ROOT_COUNT(hs);
79         int                             i = 0, r;
80         HStoreIterator  *it;
81         HStoreValue             v;
82
83         if (total == 0)
84         {
85                 *nentries = 0;
86                 PG_RETURN_POINTER(NULL);
87         }
88
89         entries = (Datum *) palloc(sizeof(Datum) * total);
90
91         it = HStoreIteratorInit(VARDATA(hs));
92
93         while((r = HStoreIteratorGet(&it, &v, false)) != 0)
94         {
95                 if (i >= total)
96                 {
97                         total *= 2;
98                         entries = (Datum *) repalloc(entries, sizeof(Datum) * total);
99                 }
100
101                 switch(r)
102                 {
103                         case WHS_KEY:
104                                 entries[i++] = PointerGetDatum(makeitemFromValue(&v, KEYFLAG));
105                                 break;
106                         case WHS_VALUE:
107                                 entries[i++] = PointerGetDatum(makeitemFromValue(&v, VALFLAG));
108                                 break;
109                         case WHS_ELEM:
110                                 entries[i++] = PointerGetDatum(makeitemFromValue(&v, ELEMFLAG));
111                                 break;
112                         default:
113                                 break;
114                 }
115         }
116
117         *nentries = i;
118
119         PG_RETURN_POINTER(entries);
120 }
121
122 PG_FUNCTION_INFO_V1(gin_extract_hstore_query);
123 Datum           gin_extract_hstore_query(PG_FUNCTION_ARGS);
124
125 Datum
126 gin_extract_hstore_query(PG_FUNCTION_ARGS)
127 {
128         int32      *nentries = (int32 *) PG_GETARG_POINTER(1);
129         StrategyNumber strategy = PG_GETARG_UINT16(2);
130         int32      *searchMode = (int32 *) PG_GETARG_POINTER(6);
131         Datum      *entries;
132
133         if (strategy == HStoreContainsStrategyNumber)
134         {
135                 /* Query is an hstore, so just apply gin_extract_hstore... */
136                 entries = (Datum *)
137                         DatumGetPointer(DirectFunctionCall2(gin_extract_hstore,
138                                                                                                 PG_GETARG_DATUM(0),
139                                                                                                 PointerGetDatum(nentries)));
140                 /* ... except that "contains {}" requires a full index scan */
141                 if (entries == NULL)
142                         *searchMode = GIN_SEARCH_MODE_ALL;
143         }
144         else if (strategy == HStoreExistsStrategyNumber)
145         {
146                 text       *query = PG_GETARG_TEXT_PP(0);
147                 text       *item;
148
149                 *nentries = 1;
150                 entries = (Datum *) palloc(sizeof(Datum));
151                 item = makeitem(VARDATA_ANY(query), VARSIZE_ANY_EXHDR(query), KEYFLAG);
152                 entries[0] = PointerGetDatum(item);
153         }
154         else if (strategy == HStoreExistsAnyStrategyNumber ||
155                          strategy == HStoreExistsAllStrategyNumber)
156         {
157                 ArrayType  *query = PG_GETARG_ARRAYTYPE_P(0);
158                 Datum      *key_datums;
159                 bool       *key_nulls;
160                 int                     key_count;
161                 int                     i,
162                                         j;
163                 text       *item;
164
165                 deconstruct_array(query,
166                                                   TEXTOID, -1, false, 'i',
167                                                   &key_datums, &key_nulls, &key_count);
168
169                 entries = (Datum *) palloc(sizeof(Datum) * key_count);
170
171                 for (i = 0, j = 0; i < key_count; ++i)
172                 {
173                         /* Nulls in the array are ignored, cf hstoreArrayToPairs */
174                         if (key_nulls[i])
175                                 continue;
176                         item = makeitem(VARDATA(key_datums[i]),
177                                                         VARSIZE(key_datums[i]) - VARHDRSZ, KEYFLAG);
178                         entries[j++] = PointerGetDatum(item);
179                 }
180
181                 *nentries = j;
182                 /* ExistsAll with no keys should match everything */
183                 if (j == 0 && strategy == HStoreExistsAllStrategyNumber)
184                         *searchMode = GIN_SEARCH_MODE_ALL;
185         }
186         else
187         {
188                 elog(ERROR, "unrecognized strategy number: %d", strategy);
189                 entries = NULL;                 /* keep compiler quiet */
190         }
191
192         PG_RETURN_POINTER(entries);
193 }
194
195 PG_FUNCTION_INFO_V1(gin_consistent_hstore);
196 Datum           gin_consistent_hstore(PG_FUNCTION_ARGS);
197
198 Datum
199 gin_consistent_hstore(PG_FUNCTION_ARGS)
200 {
201         bool       *check = (bool *) PG_GETARG_POINTER(0);
202         StrategyNumber strategy = PG_GETARG_UINT16(1);
203
204         /* HStore          *query = PG_GETARG_HS(2); */
205         int32           nkeys = PG_GETARG_INT32(3);
206
207         /* Pointer         *extra_data = (Pointer *) PG_GETARG_POINTER(4); */
208         bool       *recheck = (bool *) PG_GETARG_POINTER(5);
209         bool            res = true;
210         int32           i;
211
212         if (strategy == HStoreContainsStrategyNumber)
213         {
214                 /*
215                  * Index doesn't have information about correspondence of keys and
216                  * values, so we need recheck.  However, if not all the keys are
217                  * present, we can fail at once.
218                  */
219                 *recheck = true;
220                 for (i = 0; i < nkeys; i++)
221                 {
222                         if (!check[i])
223                         {
224                                 res = false;
225                                 break;
226                         }
227                 }
228         }
229         else if (strategy == HStoreExistsStrategyNumber)
230         {
231                 /* Existence of key is guaranteed in default search mode */
232                 *recheck = false;
233                 res = true;
234         }
235         else if (strategy == HStoreExistsAnyStrategyNumber)
236         {
237                 /* Existence of key is guaranteed in default search mode */
238                 *recheck = false;
239                 res = true;
240         }
241         else if (strategy == HStoreExistsAllStrategyNumber)
242         {
243                 /* Testing for all the keys being present gives an exact result */
244                 *recheck = false;
245                 for (i = 0; i < nkeys; i++)
246                 {
247                         if (!check[i])
248                         {
249                                 res = false;
250                                 break;
251                         }
252                 }
253         }
254         else
255                 elog(ERROR, "unrecognized strategy number: %d", strategy);
256
257         PG_RETURN_BOOL(res);
258 }
259
260 PG_FUNCTION_INFO_V1(gin_consistent_hstore_hash);
261 Datum           gin_consistent_hstore_hash(PG_FUNCTION_ARGS);
262
263 Datum
264 gin_consistent_hstore_hash(PG_FUNCTION_ARGS)
265 {
266         bool       *check = (bool *) PG_GETARG_POINTER(0);
267         StrategyNumber strategy = PG_GETARG_UINT16(1);
268
269         /* HStore          *query = PG_GETARG_HS(2); */
270         int32           nkeys = PG_GETARG_INT32(3);
271
272         /* Pointer         *extra_data = (Pointer *) PG_GETARG_POINTER(4); */
273         bool       *recheck = (bool *) PG_GETARG_POINTER(5);
274         bool            res = true;
275         int32           i;
276
277         if (strategy == HStoreContainsStrategyNumber)
278         {
279                 /*
280                  * Index doesn't have information about correspondence of keys and
281                  * values, so we need recheck.  However, if not all the keys are
282                  * present, we can fail at once.
283                  */
284                 *recheck = true;
285                 for (i = 0; i < nkeys; i++)
286                 {
287                         if (!check[i])
288                         {
289                                 res = false;
290                                 break;
291                         }
292                 }
293         }
294         else
295                 elog(ERROR, "unrecognized strategy number: %d", strategy);
296
297         PG_RETURN_BOOL(res);
298 }
299
300 PG_FUNCTION_INFO_V1(gin_extract_hstore_hash);
301 Datum           gin_extract_hstore_hash(PG_FUNCTION_ARGS);
302
303 typedef struct PathHashStack
304 {
305         pg_crc32                          hash_state;
306         struct PathHashStack *next;
307 } PathHashStack;
308
309 #define PATH_SEPARATOR ("\0")
310
311 static void
312 hash_value(HStoreValue *v, PathHashStack *stack)
313 {
314         switch(v->type)
315         {
316                 case hsvNull:
317                         COMP_CRC32(stack->hash_state, "NULL", 5 /* include trailing \0 */);
318                         break;
319                 case hsvBool:
320                         COMP_CRC32(stack->hash_state, (v->boolean) ? " t" : " f", 2 /* include trailing \0 */);
321                         break;
322                 case hsvNumeric:
323                         stack->hash_state ^= DatumGetInt32(DirectFunctionCall1(hash_numeric,
324                                                                                                 NumericGetDatum(v->numeric)));
325                         break;
326                 case hsvString:
327                         COMP_CRC32(stack->hash_state, v->string.val, v->string.len);
328                         break;
329                 default:
330                         elog(ERROR, "Shouldn't take hash of array");
331                         break;
332         }
333 }
334
335 Datum
336 gin_extract_hstore_hash(PG_FUNCTION_ARGS)
337 {
338         HStore                  *hs = PG_GETARG_HS(0);
339         int32                   *nentries = (int32 *) PG_GETARG_POINTER(1);
340         Datum                   *entries = NULL;
341         int                             total = 2 * HS_ROOT_COUNT(hs);
342         int                             i = 0, r;
343         HStoreIterator  *it;
344         HStoreValue             v;
345         PathHashStack   tail;
346         PathHashStack   *stack, *tmp;
347         pg_crc32                path_crc32;
348
349         if (total == 0)
350         {
351                 *nentries = 0;
352                 PG_RETURN_POINTER(NULL);
353         }
354
355         entries = (Datum *) palloc(sizeof(Datum) * total);
356
357         it = HStoreIteratorInit(VARDATA(hs));
358
359         tail.next = NULL;
360         INIT_CRC32(tail.hash_state);
361         stack = &tail;
362
363         /*
364          * Calculate hashes of all key_1.key_2. ... .key_n.value paths as entries.
365          * Order of array elements doesn't matter so array keys are empty in path.
366          * For faster calculation of hashes use stack for precalculated hashes
367          * of prefixes.
368          */
369         while((r = HStoreIteratorGet(&it, &v, false)) != 0)
370         {
371                 if (i >= total)
372                 {
373                         total *= 2;
374                         entries = (Datum *) repalloc(entries, sizeof(Datum) * total);
375                 }
376
377                 switch(r)
378                 {
379                         case WHS_BEGIN_ARRAY:
380                                 tmp = stack;
381                                 stack = (PathHashStack *)palloc(sizeof(PathHashStack));
382                                 stack->next = tmp;
383                                 stack->hash_state = tmp->hash_state;
384                                 COMP_CRC32(stack->hash_state, PATH_SEPARATOR, 1);
385                                 break;
386                         case WHS_BEGIN_HASH:
387                                 /* Preserve stack item for key */
388                                 tmp = stack;
389                                 stack = (PathHashStack *)palloc(sizeof(PathHashStack));
390                                 stack->next = tmp;
391                                 break;
392                         case WHS_KEY:
393                                 /* Calc hash of key and separated into preserved stack item */
394                                 stack->hash_state = stack->next->hash_state;
395                                 hash_value(&v, stack);
396                                 COMP_CRC32(stack->hash_state, PATH_SEPARATOR, 1);
397                                 break;
398                         case WHS_VALUE:
399                         case WHS_ELEM:
400                                 path_crc32 = stack->hash_state;
401                                 hash_value(&v, stack);
402                                 FIN_CRC32(path_crc32);
403                                 entries[i++] = path_crc32;
404                                 break;
405                         case WHS_END_ARRAY:
406                         case WHS_END_HASH:
407                                 /* Pop stack item */
408                                 tmp = stack->next;
409                                 pfree(stack);
410                                 stack = tmp;
411                                 break;
412                         default:
413                                 break;
414                 }
415         }
416
417         *nentries = i;
418
419         PG_RETURN_POINTER(entries);
420 }
421
422 PG_FUNCTION_INFO_V1(gin_extract_hstore_hash_query);
423 Datum           gin_extract_hstore_hash_query(PG_FUNCTION_ARGS);
424
425 Datum
426 gin_extract_hstore_hash_query(PG_FUNCTION_ARGS)
427 {
428         int32      *nentries = (int32 *) PG_GETARG_POINTER(1);
429         StrategyNumber strategy = PG_GETARG_UINT16(2);
430         int32      *searchMode = (int32 *) PG_GETARG_POINTER(6);
431         Datum      *entries;
432
433         if (strategy == HStoreContainsStrategyNumber)
434         {
435                 /* Query is an hstore, so just apply gin_extract_hstore... */
436                 entries = (Datum *)
437                         DatumGetPointer(DirectFunctionCall2(gin_extract_hstore_hash,
438                                                                                                 PG_GETARG_DATUM(0),
439                                                                                                 PointerGetDatum(nentries)));
440                 /* ... except that "contains {}" requires a full index scan */
441                 if (entries == NULL)
442                         *searchMode = GIN_SEARCH_MODE_ALL;
443         }
444         else
445         {
446                 elog(ERROR, "unrecognized strategy number: %d", strategy);
447                 entries = NULL;                 /* keep compiler quiet */
448         }
449
450         PG_RETURN_POINTER(entries);
451 }