Initial revision
[hstore.git] / hstore_io.c
1 #include "hstore.h"
2 #include <ctype.h>
3
4 typedef struct {
5         char *begin;
6         char *ptr;
7         char *cur;
8         char *word;
9         int wordlen;
10
11         Pairs   *pairs;
12         int     pcur;
13         int     plen;
14 } HSParser;
15
16 #define RESIZEPRSBUF \
17 do { \
18         if ( state->cur - state->word + 1 >= state->wordlen ) \
19         { \
20                 int4 clen = state->cur - state->word; \
21                 state->wordlen *= 2; \
22                 state->word = (char*)repalloc( (void*)state->word, state->wordlen ); \
23                 state->cur = state->word + clen; \
24         } \
25 } while (0)
26
27
28 #define GV_WAITVAL 0 
29 #define GV_INVAL 1 
30 #define GV_INESCVAL 2 
31 #define GV_WAITESCIN 3 
32 #define GV_WAITESCESCIN 4 
33
34 static bool
35 get_val( HSParser *state, bool ignoreeq, bool *escaped ) {
36         int st = GV_WAITVAL;
37         state->wordlen=32;
38         state->cur = state->word = palloc( state->wordlen );
39         *escaped=false;
40
41         while(1) {
42                 if ( st == GV_WAITVAL ) {
43                         if ( *(state->ptr) == '"' ) {
44                                 *escaped=true;
45                                 st = GV_INESCVAL;
46                         } else if ( *(state->ptr) == '\0' ) {
47                                 return false;
48                         } else if (  *(state->ptr) == '=' && !ignoreeq ) {
49                                 elog(ERROR,"Syntax error near '%c' at postion %d", *(state->ptr), state->ptr-state->begin);
50                         } else if ( *(state->ptr) == '\\' ) {
51                                 st = GV_WAITESCIN;
52                         } else if ( !isspace(*(state->ptr)) ) {
53                                 *(state->cur) = *(state->ptr);
54                                 state->cur++;
55                                 st = GV_INVAL;
56                         }
57                 } else if ( st == GV_INVAL ) {
58                         if ( *(state->ptr) == '\\' ) {
59                                 st = GV_WAITESCIN;
60                         } else if ( *(state->ptr) == '=' && !ignoreeq ) {
61                                 state->ptr--;
62                                 return true;
63                         } else if ( *(state->ptr) == ',' && ignoreeq ) {
64                                 state->ptr--;
65                                 return true;
66                         } else if ( isspace(*(state->ptr)) ) {
67                                 return true;
68                         } else if ( *(state->ptr) == '\0' ) {
69                                 state->ptr--;
70                                 return true;
71                         } else {
72                                 RESIZEPRSBUF;
73                                 *(state->cur) = *(state->ptr);
74                                 state->cur++;
75                         }
76                 } else if ( st == GV_INESCVAL ) {
77                         if ( *(state->ptr) == '\\' ) {
78                                 st = GV_WAITESCESCIN;
79                         } else if ( *(state->ptr) == '"' ) {
80                                 return true;
81                         } else if ( *(state->ptr) == '\0' ) {
82                                 elog(ERROR,"Unexpected end of string");
83                         } else {
84                                 RESIZEPRSBUF;
85                                 *(state->cur) = *(state->ptr);
86                                 state->cur++;
87                         }
88                 } else if ( st == GV_WAITESCIN ) {
89                         if ( *(state->ptr) == '\0' )
90                                 elog(ERROR,"Unexpected end of string");
91                         RESIZEPRSBUF;
92                         *(state->cur) = *(state->ptr);
93                         state->cur++;
94                         st = GV_INVAL; 
95                 } else if ( st == GV_WAITESCESCIN ) {
96                         if ( *(state->ptr) == '\0' )
97                                 elog(ERROR,"Unexpected end of string");
98                         RESIZEPRSBUF;
99                         *(state->cur) = *(state->ptr);
100                         state->cur++;
101                         st = GV_INESCVAL;
102                 } else
103                         elog(ERROR,"Unknown state %d at postion line %d in file '%s'", st, __LINE__, __FILE__); 
104
105                 state->ptr++;
106         } 
107
108         return false;
109 }
110
111 #define WKEY    0
112 #define WVAL    1
113 #define WEQ     2
114 #define WGT     3
115 #define WDEL    4
116
117
118 static void
119 parse_hstore( HSParser *state ) {
120         int st = WKEY;
121         bool escaped=false;
122
123         state->plen=16;
124         state->pairs = (Pairs*)palloc( sizeof(Pairs) * state->plen );
125         state->pcur=0;
126         state->ptr = state->begin;
127         state->word=NULL;
128
129         while(1) {
130                 if (st == WKEY) {
131                         if ( !get_val(state, false, &escaped) )
132                                 return;
133                         if ( state->pcur >= state->plen ) {
134                                 state->plen *= 2;
135                                 state->pairs = (Pairs*)repalloc( state->pairs, sizeof(Pairs) * state->plen );
136                         }
137                         state->pairs[ state->pcur ].key = state->word; 
138                         state->pairs[ state->pcur ].keylen = state->cur - state->word;
139                         state->pairs[ state->pcur ].val=NULL;
140                         state->word=NULL;
141                         st = WEQ;
142                 } else if ( st == WEQ ) {
143                         if ( *(state->ptr) == '=' ) {
144                                 st = WGT;
145                         } else if ( *(state->ptr) == '\0' ) {
146                                 elog(ERROR,"Unexpectd end of string");
147                         } else if (!isspace(*(state->ptr))) {
148                                 elog(ERROR,"Syntax error near '%c' at postion %d", *(state->ptr), state->ptr-state->begin);
149                         }
150                 } else if ( st == WGT ) {
151                         if ( *(state->ptr) == '>' ) {
152                                 st = WVAL;
153                         } else if ( *(state->ptr) == '\0' ) {
154                                 elog(ERROR,"Unexpectd end of string");
155                         } else { 
156                                 elog(ERROR,"Syntax error near '%c' at postion %d", *(state->ptr), state->ptr-state->begin);
157                         }
158                 } else if ( st == WVAL ) {
159                         if ( !get_val(state, true, &escaped) )
160                                 elog(ERROR,"Unexpected end of string");
161                         state->pairs[ state->pcur ].val = state->word;
162                         state->pairs[ state->pcur ].vallen = state->cur - state->word;
163                         state->pairs[ state->pcur ].isnull = false;
164                         state->pairs[ state->pcur ].needfree = true;
165                         if ( state->cur - state->word == 4 && !escaped) {
166                                 state->word[4] = '\0';
167                                 if ( 0==strcasecmp(state->word, "null") ) 
168                                         state->pairs[ state->pcur ].isnull=true;
169                         } 
170                         state->word=NULL;
171                         state->pcur++;
172                         st = WDEL;
173                 } else if ( st == WDEL ) {
174                         if (  *(state->ptr) == ',' ) {
175                                 st = WKEY;
176                         } else if ( *(state->ptr) == '\0' ) {
177                                 return;
178                         } else if (!isspace(*(state->ptr))) {
179                                 elog(ERROR,"Syntax error near '%c' at postion %d", *(state->ptr), state->ptr-state->begin);
180                         }
181                 } else 
182                         elog(ERROR,"Unknown state %d at line %d in file '%s'", st, __LINE__, __FILE__);
183
184                 state->ptr++;
185         }
186
187
188 int
189 comparePairs(const void *a, const void *b) {
190         if ( ((Pairs*)a)->keylen == ((Pairs*)b)->keylen ) { 
191                 int res =  strncmp(
192                                 ((Pairs*)a)->key,
193                                 ((Pairs*)b)->key,
194                                 ((Pairs*)a)->keylen
195                         );
196                 if ( res )
197                         return res;
198
199                 /* guarantee that neddfree willl be later */
200                 if ( ((Pairs*)b)->needfree == ((Pairs*)a)->needfree )
201                         return 0;
202                 else if ( ((Pairs*)a)->needfree )
203                         return 1;
204                 else
205                         return -1;  
206         }
207         return ( ((Pairs*)a)->keylen > ((Pairs*)b)->keylen ) ? 1 : -1;
208 }
209
210 int
211 uniquePairs(Pairs * a, int4 l, int4 *buflen) {
212         Pairs *ptr, *res;
213
214         *buflen=0;
215         if ( l < 2 ) {
216                 if ( l==1 )
217                         *buflen = a->keylen + ((a->isnull) ? 0 : a->vallen) ;
218                 return l;
219         }
220
221         qsort((void *) a, l, sizeof(Pairs), comparePairs);
222         ptr=a+1;
223         res=a;  
224         while( ptr - a < l ) {
225                 if ( ptr->keylen == res->keylen && strncmp( ptr->key, res->key, res->keylen )==0 ) {
226                         if ( ptr->needfree ) {
227                                 pfree(ptr->key);
228                                 pfree(ptr->val);
229                         }
230                 } else {
231                         *buflen += res->keylen + (( res->isnull ) ? 0 : res->vallen);
232                         res++;
233                         memcpy(res,ptr,sizeof(Pairs));
234                 }
235
236                 ptr++;
237         }
238
239         *buflen += res->keylen + (( res->isnull ) ? 0 : res->vallen);
240         return res + 1 - a;
241 }
242
243 static void
244 freeHSParse(HSParser *state) {
245         int i;
246
247         if ( state->word ) pfree( state->word );
248         for (i=0;i<state->pcur;i++)
249                 if ( state->pairs[i].needfree ) {
250                         if (state->pairs[i].key) pfree(state->pairs[i].key);
251                         if (state->pairs[i].val) pfree(state->pairs[i].val);
252                 }
253         pfree( state->pairs );
254 }
255
256 PG_FUNCTION_INFO_V1(hstore_in);
257 Datum           hstore_in(PG_FUNCTION_ARGS);
258 Datum
259 hstore_in(PG_FUNCTION_ARGS) {
260         HSParser   state;
261         int4 len,buflen,i;
262         HStore  *out;
263         HEntry  *entries;
264         char *ptr;
265
266         state.begin =  PG_GETARG_CSTRING(0);
267
268         parse_hstore(&state);
269
270         if ( state.pcur == 0 ) {
271                 freeHSParse(&state);
272                 len = CALCDATASIZE(0,0);
273                 out = palloc(len);
274                 out->len=len;
275                 out->size=0;
276                 PG_RETURN_POINTER(out);
277         }
278
279         state.pcur = uniquePairs(state.pairs, state.pcur, &buflen);
280
281         len=CALCDATASIZE(state.pcur, buflen);
282         out = palloc(len);
283         out->len=len;
284         out->size=state.pcur;
285
286         entries=ARRPTR(out);
287         ptr = STRPTR(out);
288
289         for(i=0;i<out->size;i++) {
290                 entries[i].keylen = state.pairs[i].keylen;
291                 entries[i].pos = ptr - STRPTR(out);
292                 memcpy(ptr, state.pairs[i].key, state.pairs[i].keylen);
293                 ptr+=entries[i].keylen;
294
295                 entries[i].valisnull = state.pairs[i].isnull;
296                 if ( entries[i].valisnull )
297                         entries[i].vallen=4; /* null */
298                 else {
299                         entries[i].vallen = state.pairs[i].vallen;
300                         memcpy(ptr, state.pairs[i].val,state.pairs[i].vallen);
301                         ptr+=entries[i].vallen;
302                 }
303         }
304
305         freeHSParse(&state);
306         PG_RETURN_POINTER(out);
307 }
308
309 static char*
310 cpw(char *dst, char *src, int len) {
311         char *ptr = src;
312
313         while(ptr-src<len) {
314                 if ( *ptr == '"' || *ptr == '\\' )
315                         *dst++='\\';
316                 *dst++ = *ptr++;
317         }
318         return dst;
319 }
320
321 PG_FUNCTION_INFO_V1(hstore_out);
322 Datum           hstore_out(PG_FUNCTION_ARGS);
323 Datum
324 hstore_out(PG_FUNCTION_ARGS) {
325         HStore *in = PG_GETARG_HS(0);
326         int buflen,i;
327         char *out,*ptr;
328         char *base = STRPTR(in);
329         HEntry  *entries = ARRPTR(in);
330
331         if ( in->size==0 ) {
332                 out=palloc(1);
333                 *out='\0';
334                 PG_FREE_IF_COPY(in,0);
335                 PG_RETURN_CSTRING(out);
336         }
337
338         buflen = ( 4 /* " */ + 2 /* => */ + 2 /*, */ )*in->size + 
339                 2 /* esc */ * ( in->len - CALCDATASIZE(in->size,0) );
340
341         out=ptr=palloc(buflen);
342         for(i=0;i<in->size;i++) {
343                 *ptr++='"';
344                 ptr = cpw( ptr, base + entries[i].pos, entries[i].keylen );
345                 *ptr++='"';
346                 *ptr++='=';
347                 *ptr++='>';
348                 if ( entries[i].valisnull ) {
349                         *ptr++='N';
350                         *ptr++='U';
351                         *ptr++='L';
352                         *ptr++='L';
353                 } else {
354                         *ptr++='"';
355                         ptr = cpw( ptr, base + entries[i].pos + entries[i].keylen, entries[i].vallen );
356                         *ptr++='"';
357                 }
358
359                 if ( i+1 != in->size ) {
360                         *ptr++=',';
361                         *ptr++=' ';
362                 }
363         }
364         *ptr='\0';
365
366         PG_FREE_IF_COPY(in,0);
367         PG_RETURN_CSTRING(out);
368 }