add .gitignore
[tedtools.git] / tbtree.c
index 1351cc8..416cabc 100644 (file)
--- a/tbtree.c
+++ b/tbtree.c
 
 #include "tbtree.h"
 
+#define        NOTFOUND        0
+#define        FOUND           1
+
 static int TBTReadPage(TBTree *db, u_int32_t pagenumber, TBTMemPage **page) ;
 static int TBTWritePage(TBTree *db, TBTMemPage *page) ;
 static int TBTNewPage(TBTree *db, TBTMemPage **page) ;
 
+
+
 int 
 TBTOpen(TBTree *db, char *file) {
        off_t offset;
@@ -74,8 +79,7 @@ TBTOpen(TBTree *db, char *file) {
        }
 
        if ( db->npage ) {
-               db->Cache=(TBTMemPage**)t0malloc( sizeof(TBTMemPage*)*db->npage );
-               db->TimeCache=(TBTMemPage**)t0malloc( sizeof(TBTMemPage*)*db->npage );
+               db->Cache=(TBTMemPage**)t0malloc( sizeof(TBTMemPage*)*HASHSIZE(db->npage) );
                db->curpage=0;
        }
 
@@ -110,6 +114,7 @@ TBTOpen(TBTree *db, char *file) {
                }
        } 
 
+       db->pointersize = TBTPOINTERSIZE(db);
        db->lastpagenumber=0;                            
        return TBT_OK; 
 }
@@ -123,12 +128,18 @@ TBTClose(TBTree *db) {
 
        if ( db->npage ) {
                u_int32_t i;
-
-               for(i=0; i<db->curpage; i++)
-                       tfree( db->Cache[i] );
+               TBTMemPage *ptr,*tmp;
+
+               for(i=0; i<HASHSIZE(db->npage); i++) {
+                       ptr = db->Cache[i];
+                       while(ptr) {
+                               tmp = ptr->link;
+                               tfree(ptr);
+                               ptr=tmp;
+                       }
+               }
 
                tfree( db->Cache );
-               tfree( db->TimeCache );
        }
        
        flock( db->fd, LOCK_UN );
@@ -137,133 +148,173 @@ TBTClose(TBTree *db) {
        return (rc) ? TBT_ERROR : TBT_OK;
 }
 
+static int
+cmpNPage(const void *a, const void *b) {
+       tassert( (*(TBTMemPage**)a)->pagenumber != (*(TBTMemPage**)b)->pagenumber );
+       return ( (*(TBTMemPage**)a)->pagenumber > (*(TBTMemPage**)b)->pagenumber ) ? 1 : -1;
+}
+
+
 int
 TBTSync(TBTree *db) {
-       u_int32_t i;
        int rc=0;
 
        if ( db->readonly || db->npage==0 )
                return TBT_OK;
-       for(i=0; i<db->curpage; i++)
-               if ( !db->Cache[i]->issynced )
-                       rc |= TBTWritePage(db, db->Cache[i]);
+
+       if ( db->npage ) {
+               u_int32_t i, total=0;
+               TBTMemPage *ptr, **data = (TBTMemPage**)tmalloc(db->npage*sizeof(TBTMemPage*)), **pptr;
+               pptr=data;
+               for(i=0; i<HASHSIZE(db->npage); i++) {
+                       ptr = db->Cache[i];
+                       while(ptr) {
+                               if ( !ptr->issynced ) {
+                                       *pptr = ptr;
+                                       pptr++;
+                               } 
+                               ptr = ptr->link;
+                       }
+               }
+               
+               total=pptr-data;
+               if ( total > 0 ) {
+                       if ( total>1 )
+                               qsort( data, total, sizeof(TBTMemPage*), cmpNPage);
+
+                       pptr=data;
+                       while( pptr-data<total) {
+                               rc |= TBTWritePage(db, *pptr);
+                               pptr++;
+                       }
+               }
+               tfree(data);
+       }
        
        if ( fsync(db->fd) ) {
                tlog(TL_CRIT,"TBTSync: fsync failed: %s",strerror(errno));
-               rc=1;
+               rc=TBT_ERROR;
        }
 
        return (rc) ? TBT_ERROR : TBT_OK;
 }
 
-static int
-cmpNPage(const void *a, const void *b) {
-       if (  (*(TBTMemPage**)a)->pagenumber == (*(TBTMemPage**)b)->pagenumber ) 
-               return 0;
-       return ( (*(TBTMemPage**)a)->pagenumber > (*(TBTMemPage**)b)->pagenumber ) ? 1 : -1;
+static TBTMemPage *
+findInCache(TBTree *db, u_int32_t pagenumber) {
+       TBTMemPage *ptr;
+
+       ptr = db->Cache[ pagenumber % HASHSIZE(db->npage) ];
+
+       while(ptr && ptr->pagenumber != pagenumber)
+               ptr = ptr->link; 
+
+       return ptr;
+}
+
+static void
+removeFromCache(TBTree *db, u_int32_t pagenumber) {
+       TBTMemPage *ptr, *prev=NULL;
+       int pos = pagenumber % HASHSIZE(db->npage);
+
+       ptr = db->Cache[ pos ];
+
+       while(ptr) {
+               if ( ptr->pagenumber == pagenumber ) {
+                       if ( prev )
+                               prev->link = ptr->link;
+                       else
+                               db->Cache[ pos ] = ptr->link;
+                       break;
+               }
+               prev=ptr;
+               ptr = ptr->link;
+       }
+}
+
+static void
+insertInCache(TBTree *db, TBTMemPage *page) {
+       int pos = page->pagenumber % HASHSIZE(db->npage);
+
+       page->link = db->Cache[ pos ];
+       db->Cache[ pos ] = page;
 }
 
 static TBTMemPage*
-PageAlloc(TBTree *db, int *pos) {
+PageAlloc(TBTree *db) {
        TBTMemPage *page;
 
        if ( db->curpage < db->npage ) {
-               page = db->Cache[db->curpage] = db->TimeCache[db->curpage] = (TBTMemPage*)t0malloc(sizeof(TBTMemPage));
-               *pos = db->curpage;
+               page = (TBTMemPage*)tmalloc(sizeof(TBTMemPage));
+               memset( page, 0, TBTMEMPAGEHDRSZ );
                page->iscached=1;
+               if ( db->curpage == 0 ) {
+                       db->TimeCache = db->TimeCacheLast = page;       
+               } else {
+                       db->TimeCacheLast->next = page;
+                       page->prev = db->TimeCacheLast;
+                       db->TimeCacheLast = page;
+               }
                db->curpage++;
        } else if ( db->npage == 0 ) {
-               page = (TBTMemPage*)t0malloc(sizeof(TBTMemPage));
+               page = (TBTMemPage*)tmalloc(sizeof(TBTMemPage));
+               memset( page, 0, TBTMEMPAGEHDRSZ );
        } else {
-               u_int32_t i;
-               for( i=0; i<db->curpage && db->TimeCache[i]->islocked; i++ );
-               if ( i == db->curpage ) { /*all pages locked*/
-                       /*tlog(TL_WARN,"TBTReadPage: all pages in cache locked, increase number of cached pages");*/
-                       page = (TBTMemPage*)t0malloc(sizeof(TBTMemPage));
+               page = db->TimeCache;
+               while( page->next && page->islocked ) 
+                       page = page->next;
+               if ( page==db->TimeCacheLast && page->islocked ) { /*all pages locked*/
+                       /* tlog(TL_WARN,"TBTReadPage: all pages in cache locked, increase number of cached pages"); */ 
+                       page = (TBTMemPage*)tmalloc(sizeof(TBTMemPage));
+                       memset( page, 0, TBTMEMPAGEHDRSZ );
                } else {
-                       page = db->TimeCache[i];
                        if ( !page->issynced )
                                if ( TBTWritePage(db, page) )
                                        return NULL;
-                       memset(page,0,sizeof(TBTMemPage));
+                       removeFromCache(db, page->pagenumber);
+                       if ( page != db->TimeCacheLast ) {
+                               if ( page == db->TimeCache ) {
+                                       db->TimeCache = page->next;
+                                       db->TimeCache->prev = NULL;
+                               } else { 
+                                       page->prev->next = page->next;
+                                       page->next->prev = page->prev;
+                               }
+                               memset( page, 0, TBTMEMPAGEHDRSZ );
+                               db->TimeCacheLast->next = page;
+                               page->prev = db->TimeCacheLast;
+                               db->TimeCacheLast = page;
+                       } else 
+                               memset( page, 0, TBTMEMPAGEHDRSZ );
                        page->iscached=1;
-                       *pos=i;
                }
        }
 
        return page;
 }
 
-static void
-moveFirstTime(TBTree *db, int pos) {
-       TBTMemPage *ptr;
-       
-       if ( pos < 0 ) 
-               return;
-       if (pos+1==db->curpage )
-               return;
-
-       ptr = db->TimeCache[pos];
-       memmove( db->TimeCache + pos , db->TimeCache + pos + 1, sizeof(TBTMemPage*)*(db->curpage-pos-1) );
-       db->TimeCache[db->curpage-1] = ptr;
-}
-
-static void
-findAndMoveFirstTime(TBTree *db, TBTMemPage *page) {
-       int pos;
-       for(pos=0;pos<db->curpage;pos++)
-               if ( page==db->TimeCache[pos] ) {
-                       moveFirstTime(db,pos);
-                       break;
-               }
-       tassert( pos<db->curpage );
-}
-
-static void
-findAndMovePN(TBTree *db, TBTMemPage *page) {
-       int pos;
-
-       if ( db->curpage < 2 )
-               return;
-
-       for(pos=0;pos<db->curpage;pos++)
-               if ( page==db->Cache[pos] ) 
-                       break;
-       tassert( pos<db->curpage );
-
-       while(1) {
-               if ( pos+1 != db->curpage && db->Cache[pos+1]->pagenumber < page->pagenumber ) {
-                       db->Cache[pos] = db->Cache[pos+1];
-                       db->Cache[pos+1] = page;
-                       pos++; 
-               } else if ( pos!=0 && db->Cache[pos-1]->pagenumber > page->pagenumber ) {
-                       db->Cache[pos] = db->Cache[pos-1];
-                       db->Cache[pos-1] = page;
-                       pos--;
-               } else
-                       break; 
-       }       
-}
-
 static int
 TBTReadPage(TBTree *db, u_int32_t pagenumber, TBTMemPage **page) {
-       TBTMemPage      *ptr, key, **res;
        int rc;
-       int pos;
-
-       ptr=&key;
-       key.pagenumber = pagenumber;
 
-       res = (TBTMemPage**)bsearch(&ptr, db->Cache, db->curpage, sizeof(TBTMemPage*), cmpNPage);
-       if ( res ) {
+       if ( db->npage && (*page=findInCache(db, pagenumber))!=NULL ) {
                db->cachehits++;
-               *page=*res;
-               findAndMoveFirstTime(db, *page);
+               if ( *page != db->TimeCacheLast ) {
+                       if ( (*page) == db->TimeCache ) {
+                               db->TimeCache = (*page)->next;
+                               db->TimeCache->prev=NULL;
+                       } else { 
+                               (*page)->prev->next = (*page)->next;
+                               (*page)->next->prev = (*page)->prev;
+                       }
+                       db->TimeCacheLast->next = (*page);
+                       (*page)->prev = db->TimeCacheLast;
+                       db->TimeCacheLast = *page;
+                       (*page)->next = NULL;
+               }
        } else {
                off_t  offset = (off_t)pagenumber * (off_t)TBTREEPAGESIZE;
-               
-               if( (*page = PageAlloc(db,&pos))==NULL )
+       
+               if( (*page = PageAlloc(db))==NULL )
                        return TBT_ERROR;
 
                if ( lseek(db->fd, offset, SEEK_SET) != offset ) {
@@ -273,18 +324,14 @@ TBTReadPage(TBTree *db, u_int32_t pagenumber, TBTMemPage **page) {
                (*page)->issynced=1;
                (*page)->pagenumber=pagenumber;
                if ( (rc=read(db->fd, &( (*page)->page ), TBTREEPAGESIZE)) != TBTREEPAGESIZE ) {
-                       tlog(TL_CRIT,"TBTReadPage: read failed: %s %d %d",strerror(errno), pagenumber, offset);
+                       tlog(TL_CRIT,"TBTReadPage: read failed: %s %d %d",strerror(errno), pagenumber, offset); 
                        return TBT_ERROR;
                }
-               if ( (*page)->iscached ) {
-                       moveFirstTime(db, pos);
-                       findAndMovePN(db, *page);
-               } 
+               if ( (*page)->iscached ) 
+                       insertInCache(db, *page);
        }
 
        db->pageread++;
-       gettimeofday( &( (*page)->last ), NULL );
-       
        return TBT_OK;   
 }
 
@@ -314,7 +361,6 @@ TBTWritePage(TBTree *db, TBTMemPage *page) {
 
 static int
 TBTNewPage(TBTree *db, TBTMemPage **page) {
-       int pos;
        if ( db->lastpagenumber==0 ) {
                off_t  offset;
                if ( (offset=lseek(db->fd, 0, SEEK_END)) < 0 ) {
@@ -330,29 +376,24 @@ TBTNewPage(TBTree *db, TBTMemPage **page) {
                db->lastpagenumber = offset/TBTREEPAGESIZE;
        }
 
-       if( (*page = PageAlloc(db,&pos))==NULL )
+       if( (*page = PageAlloc(db))==NULL )
                return TBT_ERROR;
+
        (*page)->pagenumber = db->lastpagenumber;
        (*page)->issynced=0;
        (*page)->islocked=1;
 
        db->lastpagenumber++;
 
-       memset( &((*page)->page), 0, sizeof(TBTPAGEHDRSZ));
+       memset( &((*page)->page), 0, TBTMEMPAGEHDRSZ);
        (*page)->page.freespace = TBTREEPAGESIZE-TBTPAGEHDRSZ;
 
-       gettimeofday( &( (*page)->last ), NULL );
-       if ( (*page)->iscached ) {
-               moveFirstTime(db, pos);
-               findAndMovePN(db, *page);
-       }
+       if ( (*page)->iscached ) 
+               insertInCache(db, *page);
        
        return TBT_OK;
 }
 
-#define        NOTFOUND        0
-#define        FOUND           1
-
 static void
 printValue(u_int32_t len, char *ptr) {
        putchar('\'');
@@ -376,11 +417,11 @@ dumpPage(TBTree *db, TBTPage *page, int follow) {
                page->npointer
        );      
        for(i=0;i<page->npointer;i++) {
-               TBTPointer *ptr= (TBTPointer*)(page->data+TBTPOINTERSIZE(db)*i);
+               TBTPointer *ptr= (TBTPointer*)(page->data+db->pointersize*i);
                for(j=0;j<follow+2;j++)
                        putchar(' ');
-               printf("i:%d(%d) kl:%d ko:%d ",
-                       i, (int)ptr, 
+               printf("i:%d(%p) kl:%d ko:%d ",
+                       i, (void*)ptr, 
                        (db->keylen) ? db->keylen : ptr->key.varlen.length,
                        (db->keylen) ? 0 : ptr->key.varlen.offset
                );
@@ -408,7 +449,7 @@ dumpTree(TBTree *db, u_int32_t pagenumber, int follow) {
        TBTMemPage  *page;
 
        if ( TBTReadPage(db, pagenumber, &page) ) {
-               fprintf(stderr,"BEDA %d\n", pagenumber);
+               tlog(TL_CRIT,"BEDA %d\n", pagenumber);
                return;
        }
 
@@ -422,21 +463,21 @@ dumpTree(TBTree *db, u_int32_t pagenumber, int follow) {
 static int
 findInPage(TBTree *db, TBTPage *page, TBTValue *key, TBTPointer **res) {
        TBTPointer      *StopLow = (TBTPointer*)(page->data);
-       TBTPointer      *StopHigh = (TBTPointer*)(page->data + TBTPOINTERSIZE(db)*page->npointer);
+       TBTPointer      *StopHigh = (TBTPointer*)(page->data + db->pointersize*page->npointer);
        TBTPointer      *StopMiddle;
        int diff ;
        TBTValue  A;
 
+       A.length =  db->keylen;
        /* Loop invariant: StopLow <= StopMiddle < StopHigh */
        while (StopLow < StopHigh) {
-               StopMiddle = (TBTPointer*)(((char*)StopLow) + TBTPOINTERSIZE(db)*((((char*)StopHigh - (char*)StopLow)/TBTPOINTERSIZE(db)) >> 1));
+               StopMiddle = (TBTPointer*)(((char*)StopLow) + db->pointersize*((((char*)StopHigh - (char*)StopLow)/db->pointersize) >> 1));
                if ( db->keylen == 0 ) {
                        A.length = StopMiddle->key.varlen.length;
                        A.value = page->data + StopMiddle->key.varlen.offset;
-               } else {
-                       A.length =  db->keylen;
+               } else 
                        A.value = StopMiddle->key.fixed.key;
-               }
+               
        
                if ( ISINFPOINTER(db, page, StopMiddle) )
                        diff = 1;
@@ -446,7 +487,7 @@ findInPage(TBTree *db, TBTPage *page, TBTValue *key, TBTPointer **res) {
                        *res = StopMiddle;
                        return FOUND;
                } else if ( diff < 0 )
-                       StopLow = (TBTPointer*)((char*)StopMiddle + TBTPOINTERSIZE(db));
+                       StopLow = (TBTPointer*)((char*)StopMiddle + db->pointersize);
                else
                        StopHigh = StopMiddle;
        }
@@ -512,22 +553,22 @@ TBTFind(TBTree *db, TBTValue *key, TBTValue *value) {
 static void
 packLeafKV(TBTree *db, TBTPage *page, TBTPointer *ptr, TBTValue *key, TBTValue *value) {
        if ( ptr ) {
-               if ( (char*)ptr - page->data  != page->npointer * TBTPOINTERSIZE(db) ) 
-                       memmove((char*)ptr+TBTPOINTERSIZE(db), ptr, page->npointer * TBTPOINTERSIZE(db) - ( (char*)ptr - page->data ));
+               if ( (char*)ptr - page->data  != page->npointer * db->pointersize ) 
+                       memmove((char*)ptr+db->pointersize, ptr, page->npointer * db->pointersize - ( (char*)ptr - page->data ));
        } else
-               ptr = (TBTPointer*)(page->data + page->npointer * TBTPOINTERSIZE(db));
+               ptr = (TBTPointer*)(page->data + page->npointer * db->pointersize);
 
        page->npointer++;
-       page->freespace -= TBTPOINTERSIZE(db) + PTRALIGN(value->length);
+       page->freespace -= db->pointersize + PTRALIGN(value->length);
        if ( db->keylen ) {
                memcpy( ptr->key.fixed.key, key->value, db->keylen );
-               ptr->pointer.leaf.offset = page->npointer * TBTPOINTERSIZE(db) + page->freespace;
+               ptr->pointer.leaf.offset = page->npointer * db->pointersize + page->freespace;
        } else {
                page->freespace -= PTRALIGN(key->length);
                ptr->key.varlen.length = key->length;
-               ptr->key.varlen.offset = page->npointer * TBTPOINTERSIZE(db) + page->freespace;
+               ptr->key.varlen.offset = page->npointer * db->pointersize + page->freespace;
                memcpy( page->data + ptr->key.varlen.offset, key->value, key->length);
-               ptr->pointer.leaf.offset = page->npointer * TBTPOINTERSIZE(db) + page->freespace + PTRALIGN(key->length);
+               ptr->pointer.leaf.offset = page->npointer * db->pointersize + page->freespace + PTRALIGN(key->length);
        } 
        ptr->pointer.leaf.length = value->length;
        memcpy( page->data + ptr->pointer.leaf.offset, value->value, value->length );
@@ -536,80 +577,81 @@ packLeafKV(TBTree *db, TBTPage *page, TBTPointer *ptr, TBTValue *key, TBTValue *
 static void
 packInternalKV(TBTree *db, TBTPage *page, TBTPointer *ptr, TBTValue *key, u_int32_t pagenumber) {
        if ( ptr ) {
-               if ( (char*)ptr - page->data  != page->npointer * TBTPOINTERSIZE(db) ) 
-                       memmove((char*)ptr+TBTPOINTERSIZE(db), ptr, page->npointer * TBTPOINTERSIZE(db) - ( (char*)ptr - page->data ));
+               if ( (char*)ptr - page->data  != page->npointer * db->pointersize ) 
+                       memmove((char*)ptr+db->pointersize, ptr, page->npointer * db->pointersize - ( (char*)ptr - page->data ));
        } else
-               ptr = (TBTPointer*)(page->data + page->npointer * TBTPOINTERSIZE(db));
+               ptr = (TBTPointer*)(page->data + page->npointer * db->pointersize);
 
        page->npointer++;
-       page->freespace -= TBTPOINTERSIZE(db);
+       page->freespace -= db->pointersize;
 
        if ( db->keylen ) {
                if ( key->value )
                        memcpy( ptr->key.fixed.key, key->value, db->keylen );
-               else
-                       memset( ptr->key.fixed.key, 0, db->keylen );
        } else {
                page->freespace -= PTRALIGN(key->length);
                ptr->key.varlen.length = key->length;
-               ptr->key.varlen.offset = page->npointer * TBTPOINTERSIZE(db) + page->freespace;
+               ptr->key.varlen.offset = page->npointer * db->pointersize + page->freespace;
                if ( key->length ) 
                        memcpy( page->data + ptr->key.varlen.offset, key->value, key->length);
        }
        ptr->pointer.internal.link = pagenumber;
 }
 
-
 static void
 deleteKV(TBTree *db, TBTPage *page, TBTPointer **ptrs, u_int32_t num) {
-       static char buf[TBTREEPAGESIZE];
-       char  *bufptr = buf;
        TBTPointer *ptr = (TBTPointer*)page->data, *tmp, **ptrptr = ptrs;
-       u_int32_t i, start;
+       u_int32_t i;
+
        /* suppose ptrs are ordered! */
        tmp = ptr;
-       while( ptrptr - ptrs < num && (char*)ptr - page->data < TBTPOINTERSIZE(db)*page->npointer ) {
+       while( ptrptr - ptrs < num && (char*)ptr - page->data < db->pointersize*page->npointer ) {
                if ( *ptrptr < ptr ) {
                        tlog(TL_CRIT,"deleteKV: Something wrong: unexistent *ptrs"); 
                        ptrptr++;
                } else if ( *ptrptr > ptr ) {
                        if ( tmp != ptr )
-                               memcpy(tmp, ptr, TBTPOINTERSIZE(db));
-                       ptr = (TBTPointer*)( (char*)ptr + TBTPOINTERSIZE(db) );
-                       tmp = (TBTPointer*)( (char*)tmp + TBTPOINTERSIZE(db) );
+                               memcpy(tmp, ptr, db->pointersize);
+                       ptr = (TBTPointer*)( (char*)ptr + db->pointersize );
+                       tmp = (TBTPointer*)( (char*)tmp + db->pointersize );
                } else {
-                       page->freespace += TBTPOINTERSIZE(db) + 
+                       page->freespace += db->pointersize + 
                                ( ( page->isleaf ) ? PTRALIGN(ptr->pointer.leaf.length) : 0 ) +
                                ( ( db->keylen ) ? 0 : PTRALIGN(ptr->key.varlen.length) );
                        ptrptr++;
-                       ptr = (TBTPointer*)( (char*)ptr + TBTPOINTERSIZE(db) );
+                       ptr = (TBTPointer*)( (char*)ptr + db->pointersize );
                }
        }
 
-       if ( (char*)ptr - page->data < TBTPOINTERSIZE(db)*page->npointer && tmp!=ptr )
-               memmove( tmp, ptr, page->npointer * TBTPOINTERSIZE(db) - ((char*)ptr - page->data) );
+       if ( (char*)ptr - page->data < db->pointersize*page->npointer && tmp!=ptr )
+               memmove( tmp, ptr, page->npointer * db->pointersize - ((char*)ptr - page->data) );
 
        page->npointer-=num;
 
-       tmp = (TBTPointer*)page->data;
-       start = page->npointer * TBTPOINTERSIZE(db) + page->freespace;
-       for(i=0;i<page->npointer;i++) {
-               if ( page->isleaf ) {
-                       memcpy( bufptr, page->data + tmp->pointer.leaf.offset, tmp->pointer.leaf.length );
-                       tmp->pointer.leaf.offset = start + (bufptr-buf);
-                       bufptr+=PTRALIGN(tmp->pointer.leaf.length);
+       if ( page->isleaf || db->keylen == 0 ) {
+               static char buf[TBTREEPAGESIZE];
+               char  *bufptr = buf;
+               u_int32_t start = page->npointer * db->pointersize + page->freespace;
+
+               tmp = (TBTPointer*)page->data;
+               for(i=0;i<page->npointer;i++) {
+                       if ( page->isleaf ) {
+                               memcpy( bufptr, page->data + tmp->pointer.leaf.offset, tmp->pointer.leaf.length );
+                               tmp->pointer.leaf.offset = start + (bufptr-buf);
+                               bufptr+=PTRALIGN(tmp->pointer.leaf.length);
+                       }
+       
+                       if ( db->keylen == 0 ) {
+                               if ( tmp->key.varlen.length )
+                                       memcpy( bufptr, page->data + tmp->key.varlen.offset, tmp->key.varlen.length );
+                               tmp->key.varlen.offset = start + (bufptr-buf);
+                               bufptr+=PTRALIGN(tmp->key.varlen.length);
+                       }
+                       tmp = (TBTPointer*)( (char*)tmp + db->pointersize );
                }
 
-               if ( db->keylen == 0 ) {
-                       if ( tmp->key.varlen.length )
-                               memcpy( bufptr, page->data + tmp->key.varlen.offset, tmp->key.varlen.length );
-                       tmp->key.varlen.offset = start + (bufptr-buf);
-                       bufptr+=PTRALIGN(tmp->key.varlen.length);
-               }
-               tmp = (TBTPointer*)( (char*)tmp + TBTPOINTERSIZE(db) );
+               memcpy( page->data + start, buf, bufptr-buf );
        }
-
-       memcpy( page->data + start, buf, bufptr-buf );
 }
 
 static u_int32_t
@@ -618,36 +660,36 @@ findDelimiter(TBTree *db, TBTPage *page, TBTPointer *ptr, int size, u_int32_t st
        int i;
        TBTPointer *tomove;
        int lfree=TBTREEPAGESIZE-TBTPAGEHDRSZ, rfree=TBTREEPAGESIZE-TBTPAGEHDRSZ;
-       int nptr = ( (char*)ptr - page->data ) / TBTPOINTERSIZE(db)
+       int nptr = ( (char*)ptr - page->data ) / db->pointersize
        int was=0;
 
        for(i=0; i<page->npointer;i++) {
-               tomove = (TBTPointer*)(page->data + i*TBTPOINTERSIZE(db));
+               tomove = (TBTPointer*)(page->data + i*db->pointersize);
                if ( was==0 && i==nptr ) {
                        sizes[i] = size;
                        was=1;
                } 
-               sizes[i+was] = TBTPOINTERSIZE(db) +
+               sizes[i+was] = db->pointersize +
                        ( (page->isleaf) ? PTRALIGN(tomove->pointer.leaf.length) : 0 ) +
                        ( ( db->keylen ) ? 0 : PTRALIGN(tomove->key.varlen.length) );
        
        }
        
-       if ( was==0 )
+       if ( was==0 ) {
                sizes[page->npointer]=size;
+       }
 
-       for(i=0;i<page->npointer+1;i++) {
+       for(i=0;i<page->npointer+1;i++) 
                if ( i< start )
                        lfree-=sizes[i];
                else 
                        rfree-=sizes[i];
-       }
 
        while( 1 ) {
                if ( lfree<0 ) {
+                       start--;
                        lfree+=sizes[start];
                        rfree-=sizes[start];
-                       start--;
                } else if ( rfree < 0 ) { 
                        lfree-=sizes[start];
                        rfree+=sizes[start];
@@ -672,7 +714,7 @@ populatePages(TBTree *db, TBTPage *srcpage, TBTPage* newpage, u_int32_t start) {
        TBTValue k, v;
 
        for(i=start;i<srcpage->npointer;i++) {
-               tomove = (TBTPointer*)(srcpage->data + i*TBTPOINTERSIZE(db));
+               tomove = (TBTPointer*)(srcpage->data + i*db->pointersize);
 
                todelete[numtodelete] = tomove;
                numtodelete++;
@@ -700,7 +742,7 @@ static int
 splitPage(TBTree *db, TBTMemPage *srcpage, TBTMemPage** newpage, TBTPointer **ptr, int size, TBTValue *key, TBTValue *value, u_int32_t pagenumber) {
        TBTMemPage *tmp;
        int start=0, where=0;
-       int nptr = ( (char*)(*ptr) - srcpage->page.data ) / TBTPOINTERSIZE(db);
+       int nptr = ( (char*)(*ptr) - srcpage->page.data ) / db->pointersize;
 
        if ( TBTNewPage(db, newpage) )
                return TBT_ERROR;
@@ -732,13 +774,12 @@ splitPage(TBTree *db, TBTMemPage *srcpage, TBTMemPage** newpage, TBTPointer **pt
                tmp=srcpage;
        else
                nptr-=start;
-       *ptr = (TBTPointer*)(tmp->page.data + nptr*TBTPOINTERSIZE(db));
+       *ptr = (TBTPointer*)(tmp->page.data + nptr*db->pointersize);
 
        if ( tmp->page.isleaf ) 
                packLeafKV(db, &(tmp->page), *ptr, key, value);
        else
                packInternalKV(db, &(tmp->page), *ptr, key, pagenumber);
-
        return TBT_OK;
 }
 
@@ -770,7 +811,7 @@ layerInsert(TBTree *db, u_int32_t pagenumber, TBTMemPage **left, TBTMemPage **ri
 
        *left=*right=NULL;
        if ( page->page.isleaf ) {
-               u_int32_t size = TBTPOINTERSIZE(db) +
+               u_int32_t size = db->pointersize +
                        PTRALIGN(value->length) +
                        ( ( db->keylen ) ? 0 : PTRALIGN(key->length) );
                
@@ -785,7 +826,9 @@ layerInsert(TBTree *db, u_int32_t pagenumber, TBTMemPage **left, TBTMemPage **ri
                }
 
                if ( size <= page->page.freespace ) {
+                       u_int32_t       oldsize=page->page.freespace;
                        packLeafKV(db, &(page->page), ptr, key, value);
+                       tassert( oldsize == page->page.freespace + size );
                        page->issynced=0;
                } else {
                        rc = splitPage(db, page, &newright, &ptr, size, key, value, 0);
@@ -802,7 +845,7 @@ layerInsert(TBTree *db, u_int32_t pagenumber, TBTMemPage **left, TBTMemPage **ri
 
                if ( *left ) { /* child page was splited */
                        u_int32_t size;
-                       TBTPointer *rightptr = (TBTPointer*)( (*left)->page.data + ((*left)->page.npointer-1) * TBTPOINTERSIZE(db) );
+                       TBTPointer *rightptr = (TBTPointer*)( (*left)->page.data + ((*left)->page.npointer-1) * db->pointersize );
                        TBTValue K;
 
                        if ( ISINFPOINTER(db, &(page->page), ptr) ) {
@@ -815,7 +858,7 @@ layerInsert(TBTree *db, u_int32_t pagenumber, TBTMemPage **left, TBTMemPage **ri
                        K.length = ( db->keylen ) ? db->keylen : rightptr->key.varlen.length;
                        K.value  = ( db->keylen ) ? rightptr->key.fixed.key : (*left)->page.data + rightptr->key.varlen.offset;
 
-                       size = TBTPOINTERSIZE(db) + ( ( db->keylen ) ? 0 : PTRALIGN(K.length) );
+                       size = db->pointersize + ( ( db->keylen ) ? 0 : PTRALIGN(K.length) );
 
                        if ( size <= page->page.freespace ) {
                                packInternalKV(db, &(page->page), ptr, &K, (*left)->pagenumber);
@@ -850,12 +893,12 @@ layerInsert(TBTree *db, u_int32_t pagenumber, TBTMemPage **left, TBTMemPage **ri
                        memset( &(page->page), 0, TBTREEPAGESIZE);
                        page->page.freespace=TBTREEPAGESIZE-TBTPAGEHDRSZ;
        
-                       ptr = (TBTPointer*)( newleft->page.data + (newleft->page.npointer-1) * TBTPOINTERSIZE(db) ); 
+                       ptr = (TBTPointer*)( newleft->page.data + (newleft->page.npointer-1) * db->pointersize ); 
                        K.length = ( db->keylen ) ? db->keylen : ptr->key.varlen.length;
                        K.value  = ( db->keylen ) ? ptr->key.fixed.key : newleft->page.data + ptr->key.varlen.offset;
                        packInternalKV(db, &(page->page), NULL, &K, newleft->pagenumber);
                
-                       ptr = (TBTPointer*)( newright->page.data + (newright->page.npointer-1) * TBTPOINTERSIZE(db) ); 
+                       ptr = (TBTPointer*)( newright->page.data + (newright->page.npointer-1) * db->pointersize ); 
                        K.length = 0;
                        K.value  = NULL;
                        packInternalKV(db, &(page->page), NULL, &K, newright->pagenumber);
@@ -963,7 +1006,7 @@ TBTIterate(TBTree *db, TBTIterator *iterator, TBTValue *key, TBTValue *value ) {
                return TBT_OK;
        }
 
-       if ( (char*)(iterator->ptr) - iterator->page->page.data >= TBTPOINTERSIZE(db) * iterator->page->page.npointer ) {
+       if ( (char*)(iterator->ptr) - iterator->page->page.data >= db->pointersize * iterator->page->page.npointer ) {
                do {
                        u_int32_t pagenumber = iterator->page->page.rightlink;
                        iterator->page->islocked=0;
@@ -995,9 +1038,84 @@ TBTIterate(TBTree *db, TBTIterator *iterator, TBTValue *key, TBTValue *value ) {
        value->length = iterator->ptr->pointer.leaf.length; 
        value->value = iterator->page->page.data + iterator->ptr->pointer.leaf.offset;
 
-       iterator->ptr = ( TBTPointer* ) (  (char*)(iterator->ptr) + TBTPOINTERSIZE(db) );
+       iterator->ptr = ( TBTPointer* ) (  (char*)(iterator->ptr) + db->pointersize );
 
        return TBT_OK;
 }
 
+int 
+TBTGetFirst(TBTree *db, TBTValue *key, TBTValue *value) {
+       TBTIterator     iterator;
+       int rc;
+
+       if ( (rc=TBTInitIterator(db, &iterator))!=TBT_OK )
+               return rc;
+
+       rc=TBTIterate(db, &iterator, key, value);
+
+       if ( key->value ) {
+               char * ptr;
+
+               ptr = tmalloc( key->length );
+               memcpy( ptr, key->value, key->length );
+               key->value = ptr;
+               ptr = tmalloc( value->length );
+               memcpy( ptr, value->value, value->length );
+               value->value = ptr; 
+       }
+
+       TBTFreeIterator(db, &iterator);
+
+       return rc;
+}
+
+static int
+findLast(TBTree *db, TBTValue *key, TBTValue *value, u_int32_t pagenumber) {
+       TBTMemPage      *page;
+       TBTPointer      *ptr;
+       int rc=TBT_OK,i;
+
+       if ( (rc=TBTReadPage(db, pagenumber, &page)) != TBT_OK )
+               return rc;
+
+       if ( page->page.npointer==0 ) {
+               if ( !page->iscached )
+                       tfree(page);
+               return TBT_OK;
+       }
+
+       page->islocked=1;
+       if ( page->page.isleaf ) {
+               ptr = (TBTPointer*)(page->page.data + db->pointersize * (page->page.npointer-1));
+
+               key->length = ( db->keylen ) ? db->keylen : ptr->key.varlen.length;
+               key->value = tmalloc( key->length );
+               memcpy( key->value, ( db->keylen ) ? ptr->key.fixed.key : page->page.data + ptr->key.varlen.offset, key->length ); 
+
+               value->length = ptr->pointer.leaf.length;
+               value->value = tmalloc( value->length );
+               memcpy( value->value, page->page.data + ptr->pointer.leaf.offset, value->length ); 
+       } else {
+               for(i=page->page.npointer-1; i>=0; i--) {
+                       ptr = (TBTPointer*)( page->page.data + db->pointersize * i );
+                       rc = findLast(db, key, value, ptr->pointer.internal.link);
+                       if ( rc!=TBT_OK || key->value )
+                               break;
+               } 
+       }
+
+       page->islocked=0;
+       if ( !page->iscached )
+               tfree(page);
+
+       return rc;
+}
+
+int 
+TBTGetLast(TBTree *db, TBTValue *key, TBTValue *value) {
+       memset(key, 0, sizeof(TBTValue));
+       memset(value, 0, sizeof(TBTValue));
+       return  findLast(db, key, value, TBTPAGEROOT);
+}