Add PG_MODULE_MAGIC for 8.2
[hstore.git] / hstore_io.c
1 #include "hstore.h"
2 #include <ctype.h>
3
4 #ifdef PG_MODULE_MAGIC
5 PG_MODULE_MAGIC;
6 #endif
7
8 typedef struct {
9         char *begin;
10         char *ptr;
11         char *cur;
12         char *word;
13         int wordlen;
14
15         Pairs   *pairs;
16         int     pcur;
17         int     plen;
18 } HSParser;
19
20 #define RESIZEPRSBUF \
21 do { \
22         if ( state->cur - state->word + 1 >= state->wordlen ) \
23         { \
24                 int4 clen = state->cur - state->word; \
25                 state->wordlen *= 2; \
26                 state->word = (char*)repalloc( (void*)state->word, state->wordlen ); \
27                 state->cur = state->word + clen; \
28         } \
29 } while (0)
30
31
32 #define GV_WAITVAL 0 
33 #define GV_INVAL 1 
34 #define GV_INESCVAL 2 
35 #define GV_WAITESCIN 3 
36 #define GV_WAITESCESCIN 4 
37
38 static bool
39 get_val( HSParser *state, bool ignoreeq, bool *escaped ) {
40         int st = GV_WAITVAL;
41         state->wordlen=32;
42         state->cur = state->word = palloc( state->wordlen );
43         *escaped=false;
44
45         while(1) {
46                 if ( st == GV_WAITVAL ) {
47                         if ( *(state->ptr) == '"' ) {
48                                 *escaped=true;
49                                 st = GV_INESCVAL;
50                         } else if ( *(state->ptr) == '\0' ) {
51                                 return false;
52                         } else if (  *(state->ptr) == '=' && !ignoreeq ) {
53                                 elog(ERROR,"Syntax error near '%c' at postion %d", *(state->ptr), state->ptr-state->begin);
54                         } else if ( *(state->ptr) == '\\' ) {
55                                 st = GV_WAITESCIN;
56                         } else if ( !isspace(*(state->ptr)) ) {
57                                 *(state->cur) = *(state->ptr);
58                                 state->cur++;
59                                 st = GV_INVAL;
60                         }
61                 } else if ( st == GV_INVAL ) {
62                         if ( *(state->ptr) == '\\' ) {
63                                 st = GV_WAITESCIN;
64                         } else if ( *(state->ptr) == '=' && !ignoreeq ) {
65                                 state->ptr--;
66                                 return true;
67                         } else if ( *(state->ptr) == ',' && ignoreeq ) {
68                                 state->ptr--;
69                                 return true;
70                         } else if ( isspace(*(state->ptr)) ) {
71                                 return true;
72                         } else if ( *(state->ptr) == '\0' ) {
73                                 state->ptr--;
74                                 return true;
75                         } else {
76                                 RESIZEPRSBUF;
77                                 *(state->cur) = *(state->ptr);
78                                 state->cur++;
79                         }
80                 } else if ( st == GV_INESCVAL ) {
81                         if ( *(state->ptr) == '\\' ) {
82                                 st = GV_WAITESCESCIN;
83                         } else if ( *(state->ptr) == '"' ) {
84                                 return true;
85                         } else if ( *(state->ptr) == '\0' ) {
86                                 elog(ERROR,"Unexpected end of string");
87                         } else {
88                                 RESIZEPRSBUF;
89                                 *(state->cur) = *(state->ptr);
90                                 state->cur++;
91                         }
92                 } else if ( st == GV_WAITESCIN ) {
93                         if ( *(state->ptr) == '\0' )
94                                 elog(ERROR,"Unexpected end of string");
95                         RESIZEPRSBUF;
96                         *(state->cur) = *(state->ptr);
97                         state->cur++;
98                         st = GV_INVAL; 
99                 } else if ( st == GV_WAITESCESCIN ) {
100                         if ( *(state->ptr) == '\0' )
101                                 elog(ERROR,"Unexpected end of string");
102                         RESIZEPRSBUF;
103                         *(state->cur) = *(state->ptr);
104                         state->cur++;
105                         st = GV_INESCVAL;
106                 } else
107                         elog(ERROR,"Unknown state %d at postion line %d in file '%s'", st, __LINE__, __FILE__); 
108
109                 state->ptr++;
110         } 
111
112         return false;
113 }
114
115 #define WKEY    0
116 #define WVAL    1
117 #define WEQ     2
118 #define WGT     3
119 #define WDEL    4
120
121
122 static void
123 parse_hstore( HSParser *state ) {
124         int st = WKEY;
125         bool escaped=false;
126
127         state->plen=16;
128         state->pairs = (Pairs*)palloc( sizeof(Pairs) * state->plen );
129         state->pcur=0;
130         state->ptr = state->begin;
131         state->word=NULL;
132
133         while(1) {
134                 if (st == WKEY) {
135                         if ( !get_val(state, false, &escaped) )
136                                 return;
137                         if ( state->pcur >= state->plen ) {
138                                 state->plen *= 2;
139                                 state->pairs = (Pairs*)repalloc( state->pairs, sizeof(Pairs) * state->plen );
140                         }
141                         state->pairs[ state->pcur ].key = state->word; 
142                         state->pairs[ state->pcur ].keylen = state->cur - state->word;
143                         state->pairs[ state->pcur ].val=NULL;
144                         state->word=NULL;
145                         st = WEQ;
146                 } else if ( st == WEQ ) {
147                         if ( *(state->ptr) == '=' ) {
148                                 st = WGT;
149                         } else if ( *(state->ptr) == '\0' ) {
150                                 elog(ERROR,"Unexpectd end of string");
151                         } else if (!isspace(*(state->ptr))) {
152                                 elog(ERROR,"Syntax error near '%c' at postion %d", *(state->ptr), state->ptr-state->begin);
153                         }
154                 } else if ( st == WGT ) {
155                         if ( *(state->ptr) == '>' ) {
156                                 st = WVAL;
157                         } else if ( *(state->ptr) == '\0' ) {
158                                 elog(ERROR,"Unexpectd end of string");
159                         } else { 
160                                 elog(ERROR,"Syntax error near '%c' at postion %d", *(state->ptr), state->ptr-state->begin);
161                         }
162                 } else if ( st == WVAL ) {
163                         if ( !get_val(state, true, &escaped) )
164                                 elog(ERROR,"Unexpected end of string");
165                         state->pairs[ state->pcur ].val = state->word;
166                         state->pairs[ state->pcur ].vallen = state->cur - state->word;
167                         state->pairs[ state->pcur ].isnull = false;
168                         state->pairs[ state->pcur ].needfree = true;
169                         if ( state->cur - state->word == 4 && !escaped) {
170                                 state->word[4] = '\0';
171                                 if ( 0==strcasecmp(state->word, "null") ) 
172                                         state->pairs[ state->pcur ].isnull=true;
173                         } 
174                         state->word=NULL;
175                         state->pcur++;
176                         st = WDEL;
177                 } else if ( st == WDEL ) {
178                         if (  *(state->ptr) == ',' ) {
179                                 st = WKEY;
180                         } else if ( *(state->ptr) == '\0' ) {
181                                 return;
182                         } else if (!isspace(*(state->ptr))) {
183                                 elog(ERROR,"Syntax error near '%c' at postion %d", *(state->ptr), state->ptr-state->begin);
184                         }
185                 } else 
186                         elog(ERROR,"Unknown state %d at line %d in file '%s'", st, __LINE__, __FILE__);
187
188                 state->ptr++;
189         }
190
191
192 int
193 comparePairs(const void *a, const void *b) {
194         if ( ((Pairs*)a)->keylen == ((Pairs*)b)->keylen ) { 
195                 int res =  strncmp(
196                                 ((Pairs*)a)->key,
197                                 ((Pairs*)b)->key,
198                                 ((Pairs*)a)->keylen
199                         );
200                 if ( res )
201                         return res;
202
203                 /* guarantee that neddfree willl be later */
204                 if ( ((Pairs*)b)->needfree == ((Pairs*)a)->needfree )
205                         return 0;
206                 else if ( ((Pairs*)a)->needfree )
207                         return 1;
208                 else
209                         return -1;  
210         }
211         return ( ((Pairs*)a)->keylen > ((Pairs*)b)->keylen ) ? 1 : -1;
212 }
213
214 int
215 uniquePairs(Pairs * a, int4 l, int4 *buflen) {
216         Pairs *ptr, *res;
217
218         *buflen=0;
219         if ( l < 2 ) {
220                 if ( l==1 )
221                         *buflen = a->keylen + ((a->isnull) ? 0 : a->vallen) ;
222                 return l;
223         }
224
225         qsort((void *) a, l, sizeof(Pairs), comparePairs);
226         ptr=a+1;
227         res=a;  
228         while( ptr - a < l ) {
229                 if ( ptr->keylen == res->keylen && strncmp( ptr->key, res->key, res->keylen )==0 ) {
230                         if ( ptr->needfree ) {
231                                 pfree(ptr->key);
232                                 pfree(ptr->val);
233                         }
234                 } else {
235                         *buflen += res->keylen + (( res->isnull ) ? 0 : res->vallen);
236                         res++;
237                         memcpy(res,ptr,sizeof(Pairs));
238                 }
239
240                 ptr++;
241         }
242
243         *buflen += res->keylen + (( res->isnull ) ? 0 : res->vallen);
244         return res + 1 - a;
245 }
246
247 static void
248 freeHSParse(HSParser *state) {
249         int i;
250
251         if ( state->word ) pfree( state->word );
252         for (i=0;i<state->pcur;i++)
253                 if ( state->pairs[i].needfree ) {
254                         if (state->pairs[i].key) pfree(state->pairs[i].key);
255                         if (state->pairs[i].val) pfree(state->pairs[i].val);
256                 }
257         pfree( state->pairs );
258 }
259
260 PG_FUNCTION_INFO_V1(hstore_in);
261 Datum           hstore_in(PG_FUNCTION_ARGS);
262 Datum
263 hstore_in(PG_FUNCTION_ARGS) {
264         HSParser   state;
265         int4 len,buflen,i;
266         HStore  *out;
267         HEntry  *entries;
268         char *ptr;
269
270         state.begin =  PG_GETARG_CSTRING(0);
271
272         parse_hstore(&state);
273
274         if ( state.pcur == 0 ) {
275                 freeHSParse(&state);
276                 len = CALCDATASIZE(0,0);
277                 out = palloc(len);
278                 out->len=len;
279                 out->size=0;
280                 PG_RETURN_POINTER(out);
281         }
282
283         state.pcur = uniquePairs(state.pairs, state.pcur, &buflen);
284
285         len=CALCDATASIZE(state.pcur, buflen);
286         out = palloc(len);
287         out->len=len;
288         out->size=state.pcur;
289
290         entries=ARRPTR(out);
291         ptr = STRPTR(out);
292
293         for(i=0;i<out->size;i++) {
294                 entries[i].keylen = state.pairs[i].keylen;
295                 entries[i].pos = ptr - STRPTR(out);
296                 memcpy(ptr, state.pairs[i].key, state.pairs[i].keylen);
297                 ptr+=entries[i].keylen;
298
299                 entries[i].valisnull = state.pairs[i].isnull;
300                 if ( entries[i].valisnull )
301                         entries[i].vallen=4; /* null */
302                 else {
303                         entries[i].vallen = state.pairs[i].vallen;
304                         memcpy(ptr, state.pairs[i].val,state.pairs[i].vallen);
305                         ptr+=entries[i].vallen;
306                 }
307         }
308
309         freeHSParse(&state);
310         PG_RETURN_POINTER(out);
311 }
312
313 static char*
314 cpw(char *dst, char *src, int len) {
315         char *ptr = src;
316
317         while(ptr-src<len) {
318                 if ( *ptr == '"' || *ptr == '\\' )
319                         *dst++='\\';
320                 *dst++ = *ptr++;
321         }
322         return dst;
323 }
324
325 PG_FUNCTION_INFO_V1(hstore_out);
326 Datum           hstore_out(PG_FUNCTION_ARGS);
327 Datum
328 hstore_out(PG_FUNCTION_ARGS) {
329         HStore *in = PG_GETARG_HS(0);
330         int buflen,i;
331         char *out,*ptr;
332         char *base = STRPTR(in);
333         HEntry  *entries = ARRPTR(in);
334
335         if ( in->size==0 ) {
336                 out=palloc(1);
337                 *out='\0';
338                 PG_FREE_IF_COPY(in,0);
339                 PG_RETURN_CSTRING(out);
340         }
341
342         buflen = ( 4 /* " */ + 2 /* => */ + 2 /*, */ )*in->size + 
343                 2 /* esc */ * ( in->len - CALCDATASIZE(in->size,0) );
344
345         out=ptr=palloc(buflen);
346         for(i=0;i<in->size;i++) {
347                 *ptr++='"';
348                 ptr = cpw( ptr, base + entries[i].pos, entries[i].keylen );
349                 *ptr++='"';
350                 *ptr++='=';
351                 *ptr++='>';
352                 if ( entries[i].valisnull ) {
353                         *ptr++='N';
354                         *ptr++='U';
355                         *ptr++='L';
356                         *ptr++='L';
357                 } else {
358                         *ptr++='"';
359                         ptr = cpw( ptr, base + entries[i].pos + entries[i].keylen, entries[i].vallen );
360                         *ptr++='"';
361                 }
362
363                 if ( i+1 != in->size ) {
364                         *ptr++=',';
365                         *ptr++=' ';
366                 }
367         }
368         *ptr='\0';
369
370         PG_FREE_IF_COPY(in,0);
371         PG_RETURN_CSTRING(out);
372 }