#include "hstore.h" #include #ifdef PG_MODULE_MAGIC PG_MODULE_MAGIC; #endif typedef struct { char *begin; char *ptr; char *cur; char *word; int wordlen; Pairs *pairs; int pcur; int plen; } HSParser; #define RESIZEPRSBUF \ do { \ if ( state->cur - state->word + 1 >= state->wordlen ) \ { \ int4 clen = state->cur - state->word; \ state->wordlen *= 2; \ state->word = (char*)repalloc( (void*)state->word, state->wordlen ); \ state->cur = state->word + clen; \ } \ } while (0) #define GV_WAITVAL 0 #define GV_INVAL 1 #define GV_INESCVAL 2 #define GV_WAITESCIN 3 #define GV_WAITESCESCIN 4 static bool get_val( HSParser *state, bool ignoreeq, bool *escaped ) { int st = GV_WAITVAL; state->wordlen=32; state->cur = state->word = palloc( state->wordlen ); *escaped=false; while(1) { if ( st == GV_WAITVAL ) { if ( *(state->ptr) == '"' ) { *escaped=true; st = GV_INESCVAL; } else if ( *(state->ptr) == '\0' ) { return false; } else if ( *(state->ptr) == '=' && !ignoreeq ) { elog(ERROR,"Syntax error near '%c' at postion %d", *(state->ptr), state->ptr-state->begin); } else if ( *(state->ptr) == '\\' ) { st = GV_WAITESCIN; } else if ( !isspace(*(state->ptr)) ) { *(state->cur) = *(state->ptr); state->cur++; st = GV_INVAL; } } else if ( st == GV_INVAL ) { if ( *(state->ptr) == '\\' ) { st = GV_WAITESCIN; } else if ( *(state->ptr) == '=' && !ignoreeq ) { state->ptr--; return true; } else if ( *(state->ptr) == ',' && ignoreeq ) { state->ptr--; return true; } else if ( isspace(*(state->ptr)) ) { return true; } else if ( *(state->ptr) == '\0' ) { state->ptr--; return true; } else { RESIZEPRSBUF; *(state->cur) = *(state->ptr); state->cur++; } } else if ( st == GV_INESCVAL ) { if ( *(state->ptr) == '\\' ) { st = GV_WAITESCESCIN; } else if ( *(state->ptr) == '"' ) { return true; } else if ( *(state->ptr) == '\0' ) { elog(ERROR,"Unexpected end of string"); } else { RESIZEPRSBUF; *(state->cur) = *(state->ptr); state->cur++; } } else if ( st == GV_WAITESCIN ) { if ( *(state->ptr) == '\0' ) elog(ERROR,"Unexpected end of string"); RESIZEPRSBUF; *(state->cur) = *(state->ptr); state->cur++; st = GV_INVAL; } else if ( st == GV_WAITESCESCIN ) { if ( *(state->ptr) == '\0' ) elog(ERROR,"Unexpected end of string"); RESIZEPRSBUF; *(state->cur) = *(state->ptr); state->cur++; st = GV_INESCVAL; } else elog(ERROR,"Unknown state %d at postion line %d in file '%s'", st, __LINE__, __FILE__); state->ptr++; } return false; } #define WKEY 0 #define WVAL 1 #define WEQ 2 #define WGT 3 #define WDEL 4 static void parse_hstore( HSParser *state ) { int st = WKEY; bool escaped=false; state->plen=16; state->pairs = (Pairs*)palloc( sizeof(Pairs) * state->plen ); state->pcur=0; state->ptr = state->begin; state->word=NULL; while(1) { if (st == WKEY) { if ( !get_val(state, false, &escaped) ) return; if ( state->pcur >= state->plen ) { state->plen *= 2; state->pairs = (Pairs*)repalloc( state->pairs, sizeof(Pairs) * state->plen ); } state->pairs[ state->pcur ].key = state->word; state->pairs[ state->pcur ].keylen = state->cur - state->word; state->pairs[ state->pcur ].val=NULL; state->word=NULL; st = WEQ; } else if ( st == WEQ ) { if ( *(state->ptr) == '=' ) { st = WGT; } else if ( *(state->ptr) == '\0' ) { elog(ERROR,"Unexpectd end of string"); } else if (!isspace(*(state->ptr))) { elog(ERROR,"Syntax error near '%c' at postion %d", *(state->ptr), state->ptr-state->begin); } } else if ( st == WGT ) { if ( *(state->ptr) == '>' ) { st = WVAL; } else if ( *(state->ptr) == '\0' ) { elog(ERROR,"Unexpectd end of string"); } else { elog(ERROR,"Syntax error near '%c' at postion %d", *(state->ptr), state->ptr-state->begin); } } else if ( st == WVAL ) { if ( !get_val(state, true, &escaped) ) elog(ERROR,"Unexpected end of string"); state->pairs[ state->pcur ].val = state->word; state->pairs[ state->pcur ].vallen = state->cur - state->word; state->pairs[ state->pcur ].isnull = false; state->pairs[ state->pcur ].needfree = true; if ( state->cur - state->word == 4 && !escaped) { state->word[4] = '\0'; if ( 0==strcasecmp(state->word, "null") ) state->pairs[ state->pcur ].isnull=true; } state->word=NULL; state->pcur++; st = WDEL; } else if ( st == WDEL ) { if ( *(state->ptr) == ',' ) { st = WKEY; } else if ( *(state->ptr) == '\0' ) { return; } else if (!isspace(*(state->ptr))) { elog(ERROR,"Syntax error near '%c' at postion %d", *(state->ptr), state->ptr-state->begin); } } else elog(ERROR,"Unknown state %d at line %d in file '%s'", st, __LINE__, __FILE__); state->ptr++; } } int comparePairs(const void *a, const void *b) { if ( ((Pairs*)a)->keylen == ((Pairs*)b)->keylen ) { int res = strncmp( ((Pairs*)a)->key, ((Pairs*)b)->key, ((Pairs*)a)->keylen ); if ( res ) return res; /* guarantee that neddfree willl be later */ if ( ((Pairs*)b)->needfree == ((Pairs*)a)->needfree ) return 0; else if ( ((Pairs*)a)->needfree ) return 1; else return -1; } return ( ((Pairs*)a)->keylen > ((Pairs*)b)->keylen ) ? 1 : -1; } int uniquePairs(Pairs * a, int4 l, int4 *buflen) { Pairs *ptr, *res; *buflen=0; if ( l < 2 ) { if ( l==1 ) *buflen = a->keylen + ((a->isnull) ? 0 : a->vallen) ; return l; } qsort((void *) a, l, sizeof(Pairs), comparePairs); ptr=a+1; res=a; while( ptr - a < l ) { if ( ptr->keylen == res->keylen && strncmp( ptr->key, res->key, res->keylen )==0 ) { if ( ptr->needfree ) { pfree(ptr->key); pfree(ptr->val); } } else { *buflen += res->keylen + (( res->isnull ) ? 0 : res->vallen); res++; memcpy(res,ptr,sizeof(Pairs)); } ptr++; } *buflen += res->keylen + (( res->isnull ) ? 0 : res->vallen); return res + 1 - a; } static void freeHSParse(HSParser *state) { int i; if ( state->word ) pfree( state->word ); for (i=0;ipcur;i++) if ( state->pairs[i].needfree ) { if (state->pairs[i].key) pfree(state->pairs[i].key); if (state->pairs[i].val) pfree(state->pairs[i].val); } pfree( state->pairs ); } PG_FUNCTION_INFO_V1(hstore_in); Datum hstore_in(PG_FUNCTION_ARGS); Datum hstore_in(PG_FUNCTION_ARGS) { HSParser state; int4 len,buflen,i; HStore *out; HEntry *entries; char *ptr; state.begin = PG_GETARG_CSTRING(0); parse_hstore(&state); if ( state.pcur == 0 ) { freeHSParse(&state); len = CALCDATASIZE(0,0); out = palloc(len); out->len=len; out->size=0; PG_RETURN_POINTER(out); } state.pcur = uniquePairs(state.pairs, state.pcur, &buflen); len=CALCDATASIZE(state.pcur, buflen); out = palloc(len); out->len=len; out->size=state.pcur; entries=ARRPTR(out); ptr = STRPTR(out); for(i=0;isize;i++) { entries[i].keylen = state.pairs[i].keylen; entries[i].pos = ptr - STRPTR(out); memcpy(ptr, state.pairs[i].key, state.pairs[i].keylen); ptr+=entries[i].keylen; entries[i].valisnull = state.pairs[i].isnull; if ( entries[i].valisnull ) entries[i].vallen=4; /* null */ else { entries[i].vallen = state.pairs[i].vallen; memcpy(ptr, state.pairs[i].val,state.pairs[i].vallen); ptr+=entries[i].vallen; } } freeHSParse(&state); PG_RETURN_POINTER(out); } static char* cpw(char *dst, char *src, int len) { char *ptr = src; while(ptr-srcsize==0 ) { out=palloc(1); *out='\0'; PG_FREE_IF_COPY(in,0); PG_RETURN_CSTRING(out); } buflen = ( 4 /* " */ + 2 /* => */ + 2 /*, */ )*in->size + 2 /* esc */ * ( in->len - CALCDATASIZE(in->size,0) ); out=ptr=palloc(buflen); for(i=0;isize;i++) { *ptr++='"'; ptr = cpw( ptr, base + entries[i].pos, entries[i].keylen ); *ptr++='"'; *ptr++='='; *ptr++='>'; if ( entries[i].valisnull ) { *ptr++='N'; *ptr++='U'; *ptr++='L'; *ptr++='L'; } else { *ptr++='"'; ptr = cpw( ptr, base + entries[i].pos + entries[i].keylen, entries[i].vallen ); *ptr++='"'; } if ( i+1 != in->size ) { *ptr++=','; *ptr++=' '; } } *ptr='\0'; PG_FREE_IF_COPY(in,0); PG_RETURN_CSTRING(out); }