make suffix dumpable and some minors cleanup
[tedtools.git] / sfxstr.c
1 /*
2  * Copyright (c) 2004 Teodor Sigaev <teodor@sigaev.ru>
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *      notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *      notice, this list of conditions and the following disclaimer in the
12  *      documentation and/or other materials provided with the distribution.
13  * 3. Neither the name of the author nor the names of any co-contributors
14  *      may be used to endorse or promote products derived from this software
15  *      without specific prior written permission.
16  *
17  * THIS SOFTWARE IS PROVIDED BY CONTRIBUTORS ``AS IS'' AND ANY EXPRESS
18  * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
19  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20  * ARE DISCLAIMED. IN NO EVENT SHALL CONTRIBUTORS BE LIABLE FOR ANY
21  * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
23  * GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
24  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
25  * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
26  * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
27  * IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28  */
29
30 #include <errno.h>
31 #include <string.h>
32 #include <sys/types.h>
33 #include <sys/uio.h>
34 #include <unistd.h>
35 #include <fcntl.h>
36
37 #include "tlog.h"
38 #include "tmalloc.h"
39 #include "sfxstr.h"
40 #include "tools.h"
41
42 #define EN_VAL  0x01
43 #define EN_CHLD 0x02
44 #define EN_DATA 0x04
45
46 #define SFSTREE_VERSION         0x0100
47
48 typedef unsigned long Opaque;  /* XXX sizeof(Opaque) == sizeof(void *) */
49
50 #define CHECK_MEMORY(tree)      ( ( (tree)->plainmemory ) ? \
51         tlog(TL_CRIT|TL_EXIT, "Tree in plain memory - read only access") : (void)0 ) 
52
53 static __inline__ int
54 getNodeSize(SFSTree *info, SFSNode *node) {
55         int size =      SFSNHRDSZ;
56
57         if ( node->isskip ) {
58                 if ( node->isword )
59                         size += info->datasize;
60                 if ( node->haschild )
61                         size += sizeof(SFSNode*);
62                 size += node->nchar;
63         } else {
64                 u_int32_t       i;
65                 u_int32_t       nfound = 0;
66                 SFSNodeData *data = node->data;
67
68                 size += sizeof(SFSNodeData) * node->nchar;
69                 size += sizeof(SFSNode*) * node->nchild;
70
71                 for(i=0;i<node->nchar;i++) 
72                         nfound += data[i].isword;
73
74                 size += nfound*info->datasize;
75         }
76
77         return PTRALIGN(size);
78 }
79
80 static __inline__ SFSNode*
81 getChildPointer(SFSTree *info, SFSNodeData *nodedata) {
82         char *p = ((char*)nodedata) + nodedata->child; 
83
84         if ( info->plainmemory ) 
85                 return (SFSNode*) ( ((char*)(info->node)) + *(Opaque*)p );
86
87         return *(SFSNode**)p;
88 }
89
90 static __inline__ SFSNode*
91 getSkipChildPointer(SFSTree *info, SFSNode *node) {
92         if ( info->plainmemory )
93                 return (SFSNode*) ( ((char*)(info->node)) + *(Opaque*)node->data );
94
95         return *(SFSNode**)( (char*)(node->data) );
96 }
97
98 static SFSNode* enlargeNode(SFSTree *info, SFSNode* node, u_int8_t val, int flag, SFSNodeData **nd);
99 static SFSNode* addRecord(SFSTree *info, SFSNode* node, SFSDataIO *in, int level);
100
101 #define STRNCMP(p1,p2,n) ( ((n)==1) ? ( *((char*)(p1))==*((char*)(p2)) ) : (strncmp((char*)(p1), (char*)(p2), (n))==0) )
102
103 SFSTree* 
104 SFSInit_dp(SFSTree *info, u_int32_t datasize, SFSDataIO *in) {
105         if ( datasize % sizeof(u_int32_t) )
106                 tlog(TL_ALARM|TL_EXIT,"SFSInit_dp: datasize(%d) should be divided by sizeof(u_int32_t)", datasize);
107  
108         if (!info) 
109                 info=(SFSTree*)tmalloc(sizeof(SFSTree));
110         memset(info,0,sizeof(SFSTree));
111
112         info->datasize = datasize;
113
114         while(in && in->key) {
115                 SFSAdd(info, in);
116                 in++;
117         }
118
119         return info;    
120 }
121
122 SFSTree* 
123 SFSInit_c(SFSTree *info, char **in) {
124         char **ptr=in;
125         SFSDataIO  d;
126
127         if (!info) 
128                 info=(SFSTree*)tmalloc(sizeof(SFSTree));
129         memset(info,0,sizeof(SFSTree));
130
131         while(ptr && *ptr) {
132                 d.key=*ptr;
133                 d.keylen=0;
134                 SFSAdd(info, &d);
135                 ptr++;
136         }
137
138         return info;
139 }
140
141 void*
142 SFSFindData(SFSTree *info, char *word) {
143         SFSNode *node = info->node;
144         SFSNodeData *StopLow, *StopHigh, *StopMiddle;
145         u_int8_t *ptr =(u_int8_t*)word;
146
147         while( node && *ptr ) {
148                 if ( node->isskip ) {
149                         if ( STRNCMP(ptr, ((char*)node)+node->dataptr, node->nchar) ) {
150                                 ptr+=node->nchar;
151                                 if ( *ptr=='\0' && node->isword) {
152                                         return (void*) ( ((char*)(node->data)) + ((node->haschild) ? sizeof(SFSNode*) : 0) );
153                                 } else if ( node->haschild ) {
154                                         node = getSkipChildPointer(info, node);
155                                 } else {
156                                         return NULL;
157                                 }
158                         } else
159                                 return NULL;
160                 } else {
161                         StopLow = node->data;
162                         StopHigh = StopLow + node->nchar;
163                         while (StopLow < StopHigh) {
164                                 StopMiddle = StopLow + ((StopHigh - StopLow) >> 1);
165                                 if ( StopMiddle->val == *ptr ) {
166                                         ptr++;
167                                         if ( *ptr=='\0' && StopMiddle->isword ) {
168                                                 return (void*)( ((char*)node) + node->dataptr + info->datasize * StopMiddle->data );
169                                         } else if ( StopMiddle->haschild ) {
170                                                 node = getChildPointer(info, StopMiddle);
171                                         } else {
172                                                 return NULL;
173                                         }
174                                         break;
175                                 } else if ( StopMiddle->val < *ptr ) {
176                                         StopLow = StopMiddle + 1;
177                                 } else {
178                                         StopHigh = StopMiddle;
179                                 }
180                         }
181                         if ( StopLow >= StopHigh )
182                                 return NULL;
183                 }
184         }
185         return NULL;
186 }
187
188 static void
189 freeFSFNode(SFSTree *info, SFSNode *node, void (*freefunc)(void*)) {
190         u_int32_t i;
191  
192         if ( node->isskip ) {
193                 if (node->haschild)
194                         freeFSFNode(info, *(SFSNode**)(node->data), freefunc);
195                 if (freefunc && node->isword && info->datasize)
196                         (*freefunc)( (void*)( ((char*)(node->data))+ (node->haschild) ? sizeof(SFSNode*) : 0 ) );
197         } else {
198                 SFSNodeData *nd = node->data;
199                 char *data= ((char*)node) + node->dataptr;
200         
201                 for(i=0;i<node->nchar;i++) {
202                         if (nd->haschild)
203                                 freeFSFNode(info, *(SFSNode**)( ((char*)nd) + nd->child ), freefunc);
204                         if (freefunc && nd->isword && info->datasize) {
205                                 (*freefunc)( (void*)data );
206                                 data+=info->datasize;
207                         }
208                         nd++;
209                 }
210         }
211
212         tfree(node);
213 }
214
215 void 
216 SFSFree(SFSTree *info, void (*freefunc)(void*)) {
217         SFSNodeStack *s=info->stack;
218
219         if (info->node && !info->plainmemory) freeFSFNode(info, info->node, freefunc);
220         if (info->buf) tfree(info->buf);
221         info->buf = NULL;
222         info->tlen=0;
223         info->node = NULL;
224
225         while(s) {
226                 info->stack=s->next;
227                 tfree(s);
228                 s=info->stack;
229         }
230 }
231
232 static SFSNode*
233 makeSkipNode(SFSTree *info, SFSDataIO *io, int level) {
234         u_int32_t len;
235         int size;
236         SFSNode *res;
237
238         io->key+=level;
239         io->keylen-=level;
240
241         len = (io->keylen > 255) ? 255 : io->keylen;
242         size = SFSNHRDSZ + ((io->keylen > 255) ? sizeof(SFSNode*) : info->datasize) + len;
243         size = PTRALIGN(size);
244
245         res = (SFSNode*)tmalloc(size);
246         info->nnodes++;
247         info->totalen+=size;
248
249         res->isskip=1;
250         res->nchar=len;
251         res->dataptr = SFSNHRDSZ + ((io->keylen > 255) ? sizeof(SFSNode*) : info->datasize);
252         memcpy(((char*)res) + res->dataptr, io->key, len);
253
254         if ( io->keylen > 255 ) {
255                 res->haschild=1;
256                 res->isword=0;
257                 io->key = io->key+len;
258                 io->keylen = io->keylen - len;
259                 *(SFSNode**)(res->data) = makeSkipNode(info, io, 0);
260                 io->key = io->key-len;
261                 io->keylen = io->keylen + len;
262         } else {
263                 res->haschild=0;
264                 res->isword=1;
265                 if (info->datasize) {
266                         memcpy( res->data, io->data, info->datasize );
267                 }
268         }
269
270         io->key-=level;
271         io->keylen+=level;
272
273         return res;
274 }
275
276 static SFSNode*
277 splitSkipNode(SFSTree *info, SFSNode* node, SFSDataIO *io, int level) {
278         SFSNode *res;
279         int prefixlen=0;
280         char *s1,*s2;
281         SFSNodeData     *ndata;
282
283         tassert(node->isskip);
284         io->key+=level;
285         io->keylen-=level;
286
287         s1=((char*)node) + node->dataptr;
288         s2=io->key;
289
290         while( s1 - (((char*)node) + node->dataptr) < node->nchar && s2 - io->key < io->keylen && *s1==*s2) {
291                 s1++;
292                 s2++;
293         }
294
295         prefixlen = s2 - io->key;
296
297         if ( prefixlen==0 ) {
298                 if ( node->nchar == 1 ) {
299                         int flag = EN_VAL | ((node->isword) ? EN_DATA : 0) | ((node->haschild) ? EN_CHLD : 0);
300                         res = enlargeNode(info, NULL, *(u_int8_t*)(((char*)node) + node->dataptr), flag, &ndata);
301                         if ( node->isword ) {
302                                 if ( info->datasize ) 
303                                         memcpy(
304                                                 ((char*)res) + res->dataptr + info->datasize * ndata->data,
305                                                 ((char*)(node->data)) + ((node->haschild) ? sizeof(SFSNode*) : 0),
306                                                 info->datasize
307                                         );
308                         }
309                         if ( node->haschild ) 
310                                 *(SFSNode**)( ((char*)ndata) + ndata->child ) = *(SFSNode**)( node->data );
311                         info->totalen -= getNodeSize(info, node);
312                         info->nnodes--;
313                         tfree(node);
314                 } else {
315                         int size;
316
317                         res = enlargeNode(info, NULL, *(u_int8_t*)(((char*)node) + node->dataptr), EN_VAL|EN_CHLD, &ndata);
318
319                         size = getNodeSize(info,node);
320                         info->totalen-=size;
321
322                         node->nchar--;
323                         memmove( ((char*)node) + node->dataptr, ((char*)node) + node->dataptr + 1, node->nchar);
324
325                         size = getNodeSize(info,node);
326                         info->totalen+=size;
327
328                         *(SFSNode**)( ((char*)ndata) + ndata->child ) = (SFSNode*)trealloc(node, size);
329                 }
330                 res = addRecord(info, res, io, 0);
331         } else if (prefixlen==io->keylen) {
332                 if (prefixlen==node->nchar) {
333                         if ( node->isword || info->datasize==0) {
334                                 if (info->datasize)
335                                         memcpy( ((char*)(node->data)) + ((node->haschild) ? sizeof(SFSNodeData*) : 0),
336                                                 io->data,
337                                                 info->datasize);
338                                 node->isword=1;
339                                 res=node;
340                         } else {
341                                 int osize = PTRALIGN(SFSNHRDSZ + ((node->haschild) ? sizeof(SFSNodeData*) : 0) + node->nchar);
342                                 int nsize = PTRALIGN(SFSNHRDSZ + info->datasize + ((node->haschild) ? sizeof(SFSNodeData*) : 0) + node->nchar);
343
344                                 info->totalen += nsize - osize;
345
346                                 res=(SFSNode*)trealloc(node,nsize);
347                                 res->dataptr=SFSNHRDSZ + info->datasize + ((res->haschild) ? sizeof(SFSNodeData*) : 0);
348                                 res->isword=1;
349                                 memmove(((char*)res) + res->dataptr,
350                                         ((char*)(res->data))  + ((res->haschild) ? sizeof(SFSNodeData*) : 0), res->nchar);
351                                 memcpy(((char*)(res->data))  + ((res->haschild) ? sizeof(SFSNodeData*) : 0), io->data, info->datasize);
352                         }
353                 } else {
354                         int size = SFSNHRDSZ + info->datasize + sizeof(SFSNodeData*) + prefixlen;
355                         size = PTRALIGN(size);
356
357                         info->totalen+=size;
358                         info->nnodes++;
359                         res = (SFSNode*)tmalloc(size);
360                         res->isskip=1;
361                         res->isword=1;
362                         res->haschild=1;
363                         res->nchar = prefixlen;
364                         res->dataptr = SFSNHRDSZ + info->datasize + sizeof(SFSNodeData*);
365                         memcpy(((char*)res)+res->dataptr, io->key, prefixlen);
366                         if (info->datasize)
367                                 memcpy(((char*)(res->data)) + sizeof(SFSNodeData*), io->data, info->datasize);
368                         info->totalen-=getNodeSize(info,node);
369                         node->nchar-=prefixlen;
370                         memmove( ((char*)node) + node->dataptr, ((char*)node) + node->dataptr + prefixlen, node->nchar);
371                         size = getNodeSize(info,node);
372                         info->totalen+=size;
373                         *(SFSNode**)(res->data) = (SFSNode*)trealloc(node, size);
374                 }
375         } else if ( prefixlen==node->nchar ) {
376                 int size = SFSNHRDSZ + info->datasize +  sizeof(SFSNode*) + node->nchar;
377                 info->totalen+=sizeof(SFSNode*);
378                 res=(SFSNode*)trealloc(node,size);
379                 memmove( ((char*)(res->data)) + sizeof(SFSNode*), res->data, info->datasize + res->nchar);
380                 res->haschild=1;
381                 res->dataptr+=sizeof(SFSNode*);
382                 *(SFSNode**)(res->data) = makeSkipNode(info, io, prefixlen); 
383         } else { 
384                 int size = SFSNHRDSZ + sizeof(SFSNodeData*) + prefixlen;
385                 size = PTRALIGN(size);
386                 info->totalen+=size;
387                 info->nnodes++;
388                 res = (SFSNode*)tmalloc(size);
389                 res->isskip=1;
390                 res->isword=0;
391                 res->haschild=1;
392                 res->nchar = prefixlen;
393                 res->dataptr = SFSNHRDSZ + sizeof(SFSNodeData*);
394                 memcpy(((char*)res)+res->dataptr, io->key, prefixlen);
395
396                 info->totalen-= getNodeSize(info,node);
397                 node->nchar-=prefixlen;
398
399                 memmove( ((char*)node) + node->dataptr, ((char*)node) + node->dataptr + prefixlen, node->nchar);
400
401                 size = getNodeSize(info,node);
402                 info->totalen+= size;
403
404                 *(SFSNode**)(res->data) = (SFSNode*)trealloc(node, size);
405                 *(SFSNode**)(res->data) = splitSkipNode(info, *(SFSNode**)(res->data), io, prefixlen); 
406         }
407
408         io->key-=level;
409         io->keylen+=level;
410         return res;
411 }
412
413
414 static SFSNode*
415 enlargeNode(SFSTree *info, SFSNode* node, u_int8_t val, int flag, SFSNodeData **nd) {
416         u_int32_t nchild=0, nchar=0, nfound=0, i;
417         SFSNode *rs,**chld;
418         SFSNodeData *data;
419         int sizenode;
420         char *dataptr;
421
422
423         if ( node ) {
424                 nchar=node->nchar;
425                 nchild=node->nchild;
426                 data=node->data;
427
428                 for(i=0;i<nchar;i++) {
429                         nfound += data->isword;
430                         data++;
431                 }
432         }
433
434         if ( node )
435                 /*info->totalen -= PTRALIGN(SFSNHRDSZ+nchar*sizeof(SFSNodeData)+nchild*sizeof(SFSNode*)+nfound*info->datasize);*/
436                 info->totalen -= getNodeSize(info, node);
437
438         if ( flag & EN_VAL )   nchar++; 
439         if ( flag & EN_CHLD ) nchild++; 
440         if ( flag & EN_DATA )  nfound++;        
441
442
443         sizenode = SFSNHRDSZ+nchar*sizeof(SFSNodeData)+nchild*sizeof(SFSNode*)+nfound*info->datasize;
444         sizenode = PTRALIGN(sizenode);
445
446         info->totalen+=sizenode;
447         if ( !node ) info->nnodes++;
448         rs=(SFSNode*)tmalloc(sizenode);
449
450         rs->isskip = 0;
451         rs->nchar = nchar;
452         rs->nchild = nchild;
453         rs->dataptr=SFSNHRDSZ+nchar*sizeof(SFSNodeData)+nchild*sizeof(SFSNode*);
454         dataptr=((char*)rs) + rs->dataptr;
455         chld = (SFSNode**)( ((char*)rs)+SFSNHRDSZ+nchar*sizeof(SFSNodeData) );
456         data=rs->data;
457
458         if (node) {
459                 SFSNode **ochld=(SFSNode**)( ((char*)node)+SFSNHRDSZ+node->nchar*sizeof(SFSNodeData) );
460                 SFSNodeData *odata = node->data;
461                 char *odataptr=((char*)node) + node->dataptr;
462                 int wasins=0;
463
464                 for(i=0;i<node->nchar;i++) {
465                         if ( val > odata->val ) {
466                                 *(u_int32_t*)data = *(u_int32_t*)odata;
467                                 if ( odata->haschild ) {
468                                         *chld=*ochld;
469                                         data->child = ((char*)chld) - ((char*)data);
470                                         chld++; ochld++;
471                                 }
472                                 if ( odata->isword && info->datasize ) {
473                                         memcpy(dataptr, odataptr, info->datasize);
474                                         data->data = ((dataptr - ((char*)rs)) - rs->dataptr)/info->datasize;
475                                         dataptr += info->datasize; odataptr += info->datasize;
476                                 }
477                                 data++; odata++; 
478                         } else if ( val == odata->val ) {
479                                 tassert ( (flag&EN_VAL)==0 );
480                                 *(u_int32_t*)data = *(u_int32_t*)odata;
481                                 wasins=1;
482                                 if ( odata->haschild || flag & EN_CHLD ) {
483                                         if (odata->haschild) *chld=*ochld;
484                                         data->child = ((char*)chld) - ((char*)data);
485                                         data->haschild=1;
486                                         chld++; if (odata->haschild) ochld++;
487                                 } else
488                                         data->haschild=0;
489                                 if ( odata->isword || flag & EN_DATA ) {
490                                         data->isword=1;
491                                         if ( info->datasize && odata->isword ) {
492                                                 memcpy(dataptr, odataptr, info->datasize);
493                                                 odataptr += info->datasize;
494                                         }
495                                         data->data = ( info->datasize ) ? ((dataptr - ((char*)rs)) - rs->dataptr)/info->datasize : 0;
496                                         dataptr += info->datasize;
497                                 } else
498                                         data->isword=0;
499                                 *nd = data;
500                                 data++; odata++; 
501                         } else {
502                                 if ( wasins==0 ) {
503                                         tassert ( flag&EN_VAL );
504                                         data->val = val;
505                                         if ( flag & EN_CHLD ) {
506                                                 data->haschild=1;
507                                                 data->child = ((char*)chld) - ((char*)data);
508                                                 chld++;
509                                         } else
510                                                 data->haschild=0;
511                                         if ( flag & EN_DATA ) {
512                                                 data->isword=1;
513                                                 data->data = ( info->datasize ) ? ((dataptr - ((char*)rs)) - rs->dataptr)/info->datasize : 0;
514                                                 dataptr += info->datasize;
515                                         } else
516                                                 data->isword=0;
517                                         *nd = data;
518                                         data++;
519                                         wasins=1;
520                                         i--;
521                                 } else {
522                                         *(u_int32_t*)data = *(u_int32_t*)odata;
523                                         if ( odata->haschild ) {
524                                                 *chld=*ochld;
525                                                 data->child = ((char*)chld) - ((char*)data);
526                                                 chld++; ochld++;
527                                         }
528                                         if ( odata->isword && info->datasize ) {
529                                                 memcpy(dataptr, odataptr, info->datasize);
530                                                 data->data = ((dataptr - ((char*)rs)) - rs->dataptr)/info->datasize;
531                                                 dataptr += info->datasize; odataptr += info->datasize;
532                                         }
533                                         data++; odata++; 
534                                 }
535                         } 
536                 }
537                 if ( wasins==0 ) {
538                         tassert ( flag&EN_VAL );
539                         data->val = val;
540                         if ( flag & EN_CHLD ) {
541                                 data->haschild=1;
542                                 data->child = ((char*)chld) - ((char*)data);
543                                 chld++;
544                         } else
545                                 data->haschild=0;
546                         if ( flag & EN_DATA ) {
547                                 data->isword=1;
548                                 data->data = ( info->datasize ) ? ((dataptr - ((char*)rs)) - rs->dataptr)/info->datasize : 0;
549                                 dataptr += info->datasize;
550                         } else 
551                                 data->isword=0;
552                         *nd = data;
553                 }
554         } else {
555                 tassert ( flag & EN_VAL );
556                 data->val = val;
557                 if ( flag & EN_CHLD ) {
558                         data->haschild=1;
559                         data->child = ((char*)chld) - ((char*)data);
560                 } else
561                         data->haschild=0;
562                 if ( flag & EN_DATA ) {
563                         data->isword=1;
564                         data->data = ( info->datasize ) ? ((dataptr - ((char*)rs)) - rs->dataptr)/info->datasize : 0;
565                 } else
566                         data->isword=0;
567                 *nd = data;
568         }
569         if (node) tfree(node);
570         return rs;
571 }
572
573 static SFSNode*
574 addRecord(SFSTree *info, SFSNode* node, SFSDataIO *in, int level) {
575         SFSNodeData *StopLow, *StopHigh, *StopMiddle;
576         u_int8_t *ptr = ((u_int8_t*)in->key) + level;
577
578         if ( node ) {
579                 if ( node->isskip ) {
580                         if ( node->haschild && in->keylen-level > node->nchar && 
581                                         strncmp(in->key+level, ((char*)node)+node->dataptr, node->nchar)==0 ) 
582                                 *(SFSNode**)( node->data ) = addRecord(info, *(SFSNode**)( node->data ), in, level+node->nchar); 
583                         else 
584                                 node = splitSkipNode(info, node, in, level); 
585                 } else {
586                         StopLow = node->data;   
587                         StopHigh = StopLow + node->nchar;
588                         while (StopLow < StopHigh) {
589                                 StopMiddle = StopLow + ((StopHigh - StopLow) >> 1);
590                                 if ( StopMiddle->val == *ptr ) {
591                                         if ( *(ptr+1)=='\0' ) {
592                                                 if ( StopMiddle->isword ) {
593                                                         /* already exists */
594                                                         if ( info->datasize ) 
595                                                                 memcpy(((char*)node) + node->dataptr + info->datasize * StopMiddle->data,
596                                                                         in->data, info->datasize );
597                                                 } else {
598                                                         /* not exist word */
599                                                         if (info->datasize) {
600                                                                 node = enlargeNode(info, node, *ptr, EN_DATA, &StopMiddle);
601                                                                 memcpy(((char*)node) + node->dataptr + info->datasize * StopMiddle->data,
602                                                                         in->data, info->datasize );
603                                                         }
604                                                         StopMiddle->isword = 1;
605                                                 }
606                                         } else if ( StopMiddle->haschild ) {
607                                                 *(SFSNode**)( ((char*)StopMiddle) + StopMiddle->child ) = 
608                                                         addRecord(info, *(SFSNode**)( ((char*)StopMiddle) + StopMiddle->child ), in, level+1);
609                                         } else {
610                                                 node = enlargeNode(info, node, *ptr, EN_CHLD, &StopMiddle);
611                                                 *(SFSNode**)( ((char*)StopMiddle) + StopMiddle->child ) =
612                                                         makeSkipNode(info, in, level+1);
613                                         }
614                                         return node;
615                                 } else if ( StopMiddle->val < *ptr ) {
616                                         StopLow = StopMiddle + 1;
617                                 } else {
618                                         StopHigh = StopMiddle;
619                                 }
620                         }
621                         if ( *(ptr+1)=='\0' ) {
622                                 node = enlargeNode(info, node, *ptr, EN_VAL|EN_DATA, &StopMiddle);
623                                 if ( info->datasize )
624                                         memcpy(((char*)node) + node->dataptr + info->datasize * StopMiddle->data,
625                                         in->data, info->datasize );
626                         } else {
627                                 node = enlargeNode(info, node, *ptr, EN_VAL|EN_CHLD, &StopMiddle);
628                                 *(SFSNode**)( ((char*)StopMiddle) + StopMiddle->child ) = 
629                                         makeSkipNode(info, in, level+1);
630                         }
631                 }
632         } else 
633                 node = makeSkipNode(info, in, level);
634
635         return node;
636
637 }
638
639 void
640 SFSAdd(SFSTree *info, SFSDataIO *in) {
641         CHECK_MEMORY(info);
642         if (in->keylen<=0)
643                 in->keylen=strlen(in->key);
644         info->node = addRecord(info, info->node, in, 0);
645 }
646
647 static SFSNodeStack*
648 pushStack(SFSNodeStack *top, SFSNode *node, int level ) {
649         SFSNodeStack *r=(SFSNodeStack*)tmalloc(sizeof(SFSNodeStack));
650
651         r->next = top;
652         r->node=node;
653         r->data=node->data;
654         r->level=level;
655         r->checkedchild = 0;
656         r->checkedval = 0;
657
658         return r;
659 }
660
661 void 
662 SFSIteratorStart(SFSTree *info) {
663         if ( !info->buf ) {
664                 info->tlen = 32;
665                 info->buf = (char*)tmalloc(info->tlen);
666         } 
667         info->stack = pushStack(NULL, info->node, 0);
668         info->hasword=0; 
669 }
670
671 void
672 SFSPrefixIteratorStart(SFSTree *info, char *word) {
673         SFSNode *node = info->node;
674         SFSNodeData *StopLow, *StopHigh, *StopMiddle;
675         u_int8_t *ptr =(u_int8_t*)word;
676         int len,wlen=strlen(word);
677
678         if ( wlen+1>=info->tlen ) {
679                 info->tlen = 2*wlen;
680                 info->buf = (info->buf) ?
681                                 (char*)trealloc(info->buf,info->tlen)
682                         :
683                                 (char*)tmalloc(info->tlen);
684         }
685
686         info->hasword=0;
687         while( node && *ptr) {
688                 len = wlen - (((char*)ptr)-word);
689                 if ( node->isskip ) {
690                         if ( STRNCMP(ptr, ((char*)node)+node->dataptr, (len<node->nchar) ? len : node->nchar) ) {
691                                 if ( len<=node->nchar ) {
692                                         strcpy(info->buf,word);
693                                         info->stack = pushStack(NULL, node, ((char*)ptr) - word);
694                                         return;
695                                 } else if ( node->haschild ) {
696                                         ptr+=node->nchar;
697                                         node = getSkipChildPointer(info, node);
698                                 } else {
699                                         return;
700                                 }
701                         } else
702                                 return;
703                 } else {
704                         StopLow = node->data;
705                         StopHigh = StopLow + node->nchar;
706                         while (StopLow < StopHigh) {
707                                 StopMiddle = StopLow + ((StopHigh - StopLow) >> 1);
708                                 if ( StopMiddle->val == *ptr ) {
709                                         if ( *(ptr+1)=='\0' ) {
710                                                 len =((char*)ptr)-word+1;
711                                                 strcpy(info->buf,word);
712                                                 if ( StopMiddle->isword ) {
713                                                         info->hasword=1;
714                                                         info->wdata = (void*)( ((char*)node) + node->dataptr + info->datasize * StopMiddle->data );
715                                                 }
716                                                 if ( StopMiddle->haschild )
717                                                         info->stack = pushStack(NULL, getChildPointer( info, StopMiddle ), len);
718                                                 return;
719                                         } else if ( StopMiddle->haschild ) {
720                                                 node = getChildPointer( info, StopMiddle ); 
721                                         } else {
722                                                 return;
723                                         }
724                                         ptr++;
725                                         break;
726                                 } else if ( StopMiddle->val < *ptr ) {
727                                         StopLow = StopMiddle + 1;
728                                 } else {
729                                         StopHigh = StopMiddle;
730                                 }
731                         }
732                         if ( StopLow >= StopHigh )
733                                 break;
734                 }
735         }
736         return;
737 }
738
739 int
740 SFSIterate(SFSTree *info, SFSDataIO *out) {
741         SFSNodeStack *s=info->stack;
742
743         if ( info->hasword ) {
744                 out->key = info->buf;
745                 out->keylen = strlen(out->key);
746                 out->data = info->wdata;
747                 info->hasword = 0;
748                 return 1;
749         }
750
751         if ( s == NULL || s->node == NULL)
752                 return 0;
753                                                          
754         while ( s->level + s->node->nchar + 1 >= info->tlen ) {
755                 info->tlen *= 2;
756                 info->buf = (char*)trealloc(info->buf, info->tlen);
757         }
758
759         if ( s->node->isskip ) {
760                 memcpy( info->buf + s->level, ((char*)s->node) + s->node->dataptr, s->node->nchar );
761                 if ( s->node->isword && !s->checkedval) {
762                         info->buf[ s->level+s->node->nchar ] = '\0';
763                         out->key = info->buf;
764                         out->keylen = s->level+s->node->nchar;
765                         out->data =((char*)(s->node->data)) + ((s->node->haschild) ? sizeof(SFSNode*) : 0);
766                         s->checkedval=1;
767                         return 1;
768                 }
769                 if ( s->node->haschild && !s->checkedchild) {
770                         info->stack = pushStack(s, getSkipChildPointer(info, s->node), s->level+s->node->nchar);
771                         s->checkedchild=1;
772                         if ( SFSIterate(info, out) )
773                                 return 1;
774                 }
775         } else {
776                 while( s->data - s->node->data < s->node->nchar ) {
777                         info->buf[ s->level ] = (char)s->data->val;
778                         if ( s->checkedval==0 && s->data->isword ) {
779                                 info->buf[ s->level+1 ] = '\0';  
780                                 out->key = info->buf;
781                                 out->keylen = s->level+1;
782                                 out->data =((char*)s->node) + s->node->dataptr + info->datasize * s->data->data;
783                                 s->checkedval = 1;
784                                 return 1;
785                         }
786                         if ( s->checkedchild==0 && s->data->haschild ) {
787                                 info->stack = pushStack(s,
788                                         getChildPointer( info, s->data ), s->level+1);
789                                 s->checkedchild=1;
790                                 if ( SFSIterate(info, out) )
791                                         return 1;
792                         }
793                         s->checkedval = s->checkedchild = 0;
794                         s->data++;
795                 }
796         }
797         info->stack = s->next;
798         tfree(s);
799
800         return SFSIterate(info, out);
801 }
802
803 int
804 SFSRange(SFSTree *info, char *word, SFSDataIO *f, SFSDataIO *l) {
805         SFSNodeStack *s=info->stack;
806
807         SFSPrefixIteratorStart(info, word);
808         s=info->stack;
809
810         if ( SFSIterate(info, f) ) {
811                 SFSNodeStack *sptr = info->stack, *stmp;
812                 while( sptr && sptr!=s ) {
813                         stmp=sptr->next;
814                         tfree(sptr);
815                         sptr=stmp;
816                 }
817          
818                 if ( s == NULL ) {
819                         memcpy(l,f,sizeof(SFSDataIO));
820                         return 1;
821                 }
822         } else
823                 return 0;
824                                                          
825         info->stack=NULL;
826
827         while( f->keylen +  s->level + 2 >= info->tlen ) {
828                 info->tlen *= 2;
829                 info->buf = (char*)trealloc(info->buf, info->tlen);
830         }
831
832         f->key = info->buf;
833         l->key = info->buf + f->keylen + 1;
834         memcpy(l->key, f->key, f->keylen + 1);
835                         
836         while(s->node) {
837                 while( f->keylen + 1 + s->level + s->node->nchar + 1 >= info->tlen ) {
838                         info->tlen *= 2;
839                         info->buf = (char*)trealloc(info->buf, info->tlen);
840                 }
841                 if ( s->node->isskip ) {
842                         memcpy(info->buf + f->keylen + 1 + s->level,
843                                 ((char*)(s->node))+s->node->dataptr, s->node->nchar);
844                         s->level+=s->node->nchar;
845                         if (s->node->haschild) {
846                                 s->node=getSkipChildPointer(info, s->node);
847                         } else { /* if (s->node->isword) */
848                                 info->buf[ f->keylen + 1 + s->level  ] = '\0';
849                                 l->data = (void*)(s->node->data);
850                                 l->keylen = s->level+1;
851                                 break;
852                         }
853                 } else {
854                         s->data = s->node->data + s->node->nchar - 1;
855                         while( s->data - s->node->data >= 0 ) {
856                                 info->buf[ f->keylen + 1 + s->level ] = (char)s->data->val;
857                                 if ( s->data->haschild ) {
858                                         s->node = getChildPointer( info, s->data ); 
859                                         s->level++;
860                                         break;
861                                 }
862                                 if ( s->data->isword ) {
863                                         info->buf[ f->keylen + 1 + s->level ] = '\0';
864                                         l->keylen = s->level+1;
865                                         l->data =((char*)s->node) + s->node->dataptr + info->datasize * s->data->data;
866                                         s->node=NULL;
867                                         break;
868                                 }
869                                 s->data--;
870                         }
871                 }
872         }               
873         f->key = info->buf;
874         l->key = info->buf + f->keylen + 1;
875         tfree(s); 
876
877         return 1;
878 }
879
880 typedef struct WorkPlain {
881         SFSTree                 *info;
882         char                    *node;
883         off_t                   offset;
884 } WorkPlain;
885
886 static Opaque
887 plainNode(WorkPlain *wp, SFSNode *node) {
888         int     size = getNodeSize(wp->info, node);
889         off_t   myoffset = wp->offset;
890
891         memcpy( wp->node + wp->offset, node, size );
892         wp->offset += size;
893
894         tassert( wp->offset <= wp->info->totalen );
895
896         if ( node->isskip ) {
897                 if (node->haschild) 
898                         *(Opaque*)(wp->node + myoffset + SFSNHRDSZ) = 
899                                         plainNode(wp, getSkipChildPointer(wp->info, node));
900         } else {
901                 SFSNodeData *nd = node->data;
902                 u_int32_t       i;
903
904                 for(i=0;i<node->nchar;i++) {
905                         if (nd->haschild)
906                                 *(Opaque*)(wp->node + myoffset + ( ((char*)nd) - ((char*)node) ) + nd->child) =
907                                                 plainNode(wp, getChildPointer( wp->info, nd ) );        
908                         nd++;
909                 }
910         }
911  
912         tfree(node);
913
914         return myoffset;
915 }
916
917 void
918 SFSMakePlain(SFSTree *info, void *pointer) {
919         WorkPlain       wp;
920
921         if ( info->plainmemory )
922                 return;
923
924         wp.info = info;
925         wp.offset = 0;
926         if ( pointer )
927                 wp.node = (char*)pointer;
928         else
929                 wp.node = (char*)tmalloc(sizeof(char*) * info->totalen);
930
931         plainNode(&wp, info->node);
932         tassert( wp.offset == info->totalen );
933
934         info->node = (SFSNode*)wp.node; 
935         info->plainmemory = 1;
936 }
937
938 static SFSNode*
939 revertNode(SFSTree *info, SFSNode* node) {
940         int size = getNodeSize(info, node);
941         SFSNode *newnode;
942
943         newnode = (SFSNode*)tmalloc( size );
944         memcpy(newnode, node, size);
945
946         if ( node->isskip ) {
947                 if (node->haschild)
948                         *(SFSNode**)( (char*)(newnode->data) ) = 
949                                 revertNode(info, getSkipChildPointer(info, node));
950         } else {
951                 SFSNodeData *nd = node->data;
952                 SFSNodeData *nnd = newnode->data;
953                 u_int32_t   i;
954
955                 for(i=0;i<node->nchar;i++) {
956                         if (nd->haschild)
957                                 *(SFSNode**) (((char*)nnd) + nnd->child ) = 
958                                         revertNode(info, getChildPointer( info, nd ));
959                         nd++; nnd++;
960                 }
961         }
962
963         return newnode;
964 }
965
966 void *
967 SFSRevertPlain(SFSTree *info) {
968         void *pointer = info->node;
969
970         if (! info->plainmemory )
971                 return NULL;
972
973         info->node = revertNode(info, info->node);
974         info->plainmemory = 0;
975
976         return pointer;
977 }
978
979 static Opaque
980 writeNode(WorkPlain *wp, int fd, SFSNode *node) {
981         int     size = getNodeSize(wp->info, node);
982         SFSNode  *wnode = (SFSNode*)tmalloc(size);
983         off_t   myoffset = wp->offset;
984
985         memcpy( wnode, node, size );
986         wp->offset += size;
987
988         tassert( wp->offset <= wp->info->totalen );
989
990         if ( node->isskip ) {
991                 if (node->haschild) 
992                         *(Opaque*)( ((char*)wnode) + SFSNHRDSZ) = 
993                                         writeNode(wp, fd, getSkipChildPointer(wp->info, node));
994         } else {
995                 SFSNodeData *nd = node->data;
996                 u_int32_t       i;
997
998                 for(i=0;i<node->nchar;i++) {
999                         if (nd->haschild)
1000                                 *(Opaque*)(((char*)wnode) + ( ((char*)nd) - ((char*)node) ) + nd->child) =
1001                                                 writeNode(wp, fd, getChildPointer( wp->info, nd ) );    
1002                         nd++;
1003                 }
1004         }
1005
1006         if ( lseek(fd, myoffset + SFSTDHSZ, SEEK_SET) < 0 )
1007                 tlog(TL_CRIT|TL_EXIT, "lseek failed: %s", strerror(errno));
1008         if ( write(fd, wnode, size) != size )
1009                 tlog(TL_CRIT|TL_EXIT, "write failed: %s", strerror(errno));
1010         
1011         tfree(wnode);
1012
1013         return myoffset;
1014 }
1015
1016 void 
1017 SFSWriteDump(SFSTree *info, char *filename) {
1018         int                             fd;
1019         off_t                           size = info->totalen + SFSTDHSZ;
1020         SFSTreeDumpHeader       dh;
1021
1022         if ( (fd = open(filename, O_RDWR|O_CREAT|O_TRUNC|O_EXLOCK, 0666)) < 0 )
1023                 tlog(TL_CRIT|TL_EXIT, "Can not open file '%s': %s", strerror(errno));
1024
1025         if ( lseek(fd, size, SEEK_SET) < 0 )
1026                 tlog(TL_CRIT|TL_EXIT, "lseek failed: %s", strerror(errno));
1027
1028         dh.version = SFSTREE_VERSION;
1029         dh.opaquesize = sizeof(Opaque);
1030         dh.headersize = SFSTDHSZ;
1031         dh.datasize = info->datasize;
1032         dh.totalen = info->totalen;
1033         dh.nnodes = info->nnodes;
1034
1035         if ( lseek(fd, 0, SEEK_SET) < 0 )
1036                 tlog(TL_CRIT|TL_EXIT, "lseek failed: %s", strerror(errno));
1037         if ( write(fd, &dh, SFSTDHSZ) != SFSTDHSZ )
1038                 tlog(TL_CRIT|TL_EXIT, "write failed: %s", strerror(errno));
1039
1040         if ( info->plainmemory ) {
1041                 if ( write(fd, info->node, info->totalen) != info->totalen )
1042                         tlog(TL_CRIT|TL_EXIT, "write failed: %s", strerror(errno));
1043         } else {
1044                 WorkPlain       wp;
1045
1046                 wp.info = info;
1047                 wp.offset = 0;
1048                 
1049                 writeNode(&wp, fd, info->node);
1050         }
1051         
1052         close(fd);
1053 }
1054
1055 static SFSNode*
1056 readNode(SFSTree *info, int fd, char *buf, int bufsize) {
1057         SFSNode *node;
1058         int             size;
1059
1060         size = read(fd, buf, bufsize );
1061         if ( size < SFSNHRDSZ + sizeof(void*) )
1062                 tlog(TL_CRIT|TL_EXIT, "read failed: %s", strerror(errno));
1063
1064         size = getNodeSize(info, (SFSNode*)buf);
1065         tassert( size <= bufsize );
1066         node = (SFSNode*)tmalloc( size );
1067         memcpy(node, buf, size);
1068
1069         if ( node->isskip ) {
1070                 if (node->haschild) {
1071                         if ( lseek(fd, *(Opaque*)node->data + SFSTDHSZ, SEEK_SET) < 0 )
1072                                 tlog(TL_CRIT|TL_EXIT, "lseek failed: %s", strerror(errno));
1073                         *(SFSNode**)( (char*)(node->data) ) = 
1074                                 readNode(info, fd, buf, bufsize);
1075                 }
1076         } else {
1077                 SFSNodeData *nd = node->data;
1078                 u_int32_t   i;
1079
1080                 for(i=0;i<node->nchar;i++) {
1081                         if (nd->haschild) {
1082                                 if ( lseek(fd, *(Opaque*)(((char*)nd) + nd->child ) + SFSTDHSZ, SEEK_SET) < 0 )
1083                                         tlog(TL_CRIT|TL_EXIT, "lseek failed: %s", strerror(errno));
1084                                 *(SFSNode**) (((char*)nd) + nd->child ) = 
1085                                         readNode(info, fd, buf, bufsize);
1086                         }
1087                         nd++;
1088                 }
1089         }
1090
1091         return node;
1092 }
1093
1094 void
1095 SFSReadDump(SFSTree *info, char *filename) {
1096         int                 fd;
1097         SFSTreeDumpHeader   dh;
1098         char                            *buf;
1099         int                                     bufsize; 
1100         
1101         memset(info,0,sizeof(SFSTree));
1102
1103         if ( (fd = open(filename, O_RDONLY|O_SHLOCK)) < 0 )
1104                 tlog(TL_CRIT|TL_EXIT, "Can not open file '%s': %s", strerror(errno));
1105
1106         if ( read(fd, &dh, SFSTDHSZ) != SFSTDHSZ )
1107                 tlog(TL_CRIT|TL_EXIT, "read failed: %s", strerror(errno));
1108
1109         if ( dh.version != SFSTREE_VERSION )
1110                 tlog(TL_CRIT|TL_EXIT, "Tree version mismatch (should be 0x%04x but 0x%04x)", SFSTREE_VERSION, dh.version);
1111         if ( dh.opaquesize != sizeof(Opaque) )
1112                 tlog(TL_CRIT|TL_EXIT, "sizeof(Opaque) mismatch");
1113         if ( dh.headersize != SFSTDHSZ )
1114                 tlog(TL_CRIT|TL_EXIT, "Tree's header size mismatch (should be %d but %d bytes)", SFSTDHSZ, dh.headersize);
1115
1116         info->totalen = dh.totalen;
1117         info->nnodes = dh.nnodes;
1118         info->datasize = dh.datasize;
1119
1120         /* allocate buffer with max allowed size */
1121         bufsize = SFSNHRDSZ + 256*(sizeof(SFSNodeData) + sizeof(void*) + info->datasize);
1122         buf = tmalloc( bufsize );
1123         info->node = readNode(info, fd, buf, bufsize);
1124         tfree(buf);
1125
1126         close(fd);
1127 }
1128
1129 void
1130 SFSInitFromDump(SFSTree *info, void *pointer, u_int64_t size) {
1131         SFSTreeDumpHeader       *dh;
1132
1133         memset(info,0,sizeof(SFSTree));
1134
1135         if ( size && size < SFSTDHSZ )
1136                 tlog(TL_CRIT|TL_EXIT, "Memsize too small");
1137
1138         dh = (SFSTreeDumpHeader*)pointer;
1139         
1140         if ( dh->version != SFSTREE_VERSION )
1141                 tlog(TL_CRIT|TL_EXIT, "Tree version mismatch (should be 0x%04x but 0x%04x)", SFSTREE_VERSION, dh->version);
1142         if ( dh->opaquesize != sizeof(Opaque) )
1143                 tlog(TL_CRIT|TL_EXIT, "sizeof(Opaque) mismatch");
1144         if ( dh->headersize != SFSTDHSZ )
1145                 tlog(TL_CRIT|TL_EXIT, "Tree's header size mismatch (should be %d but %d bytes)", SFSTDHSZ, dh->headersize);
1146         if ( size && size != dh->totalen + SFSTDHSZ )
1147                 tlog(TL_CRIT|TL_EXIT, "Memory size mismatch (should be %d but %d bytes)", dh->totalen + SFSTDHSZ, size);
1148
1149         info->totalen = dh->totalen;
1150         info->nnodes = dh->nnodes;
1151         info->datasize = dh->datasize;
1152         info->plainmemory = 1;
1153
1154         if ( info->totalen && info->nnodes ) 
1155                 info->node = (SFSNode*)( ((char*)pointer) + SFSTDHSZ );
1156 }