#include "tbtree.h"
+#define NOTFOUND 0
+#define FOUND 1
+
static int TBTReadPage(TBTree *db, u_int32_t pagenumber, TBTMemPage **page) ;
static int TBTWritePage(TBTree *db, TBTMemPage *page) ;
static int TBTNewPage(TBTree *db, TBTMemPage **page) ;
+
+
int
TBTOpen(TBTree *db, char *file) {
off_t offset;
}
if ( db->npage ) {
- db->Cache=(TBTMemPage**)t0malloc( sizeof(TBTMemPage*)*db->npage );
- db->TimeCache=(TBTMemPage**)t0malloc( sizeof(TBTMemPage*)*db->npage );
+ db->Cache=(TBTMemPage**)t0malloc( sizeof(TBTMemPage*)*HASHSIZE(db->npage) );
db->curpage=0;
}
if ( db->npage ) {
u_int32_t i;
-
- for(i=0; i<db->curpage; i++)
- tfree( db->Cache[i] );
+ TBTMemPage *ptr,*tmp;
+
+ for(i=0; i<HASHSIZE(db->npage); i++) {
+ ptr = db->Cache[i];
+ while(ptr) {
+ tmp = ptr->link;
+ tfree(ptr);
+ ptr=tmp;
+ }
+ }
tfree( db->Cache );
- tfree( db->TimeCache );
}
flock( db->fd, LOCK_UN );
return (rc) ? TBT_ERROR : TBT_OK;
}
+static int
+cmpNPage(const void *a, const void *b) {
+ tassert( (*(TBTMemPage**)a)->pagenumber != (*(TBTMemPage**)b)->pagenumber );
+ return ( (*(TBTMemPage**)a)->pagenumber > (*(TBTMemPage**)b)->pagenumber ) ? 1 : -1;
+}
+
+
int
TBTSync(TBTree *db) {
- u_int32_t i;
int rc=0;
if ( db->readonly || db->npage==0 )
return TBT_OK;
-
- for(i=0; i<db->curpage; i++)
- if ( !db->Cache[i]->issynced )
- rc |= TBTWritePage(db, db->Cache[i]);
+
+ if ( db->npage ) {
+ u_int32_t i, total=0;
+ TBTMemPage *ptr, **data = (TBTMemPage**)tmalloc(db->npage*sizeof(TBTMemPage*)), **pptr;
+ pptr=data;
+ for(i=0; i<HASHSIZE(db->npage); i++) {
+ ptr = db->Cache[i];
+ while(ptr) {
+ if ( !ptr->issynced ) {
+ *pptr = ptr;
+ pptr++;
+ }
+ ptr = ptr->link;
+ }
+ }
+
+ total=pptr-data;
+ if ( total > 0 ) {
+ if ( total>1 )
+ qsort( data, total, sizeof(TBTMemPage*), cmpNPage);
+
+ pptr=data;
+ while( pptr-data<total) {
+ rc |= TBTWritePage(db, *pptr);
+ pptr++;
+ }
+ }
+ tfree(data);
+ }
if ( fsync(db->fd) ) {
tlog(TL_CRIT,"TBTSync: fsync failed: %s",strerror(errno));
- rc=1;
+ rc=TBT_ERROR;
}
return (rc) ? TBT_ERROR : TBT_OK;
}
-static int
-cmpNPage(const void *a, const void *b) {
- if ( (*(TBTMemPage**)a)->pagenumber == (*(TBTMemPage**)b)->pagenumber )
- return 0;
- return ( (*(TBTMemPage**)a)->pagenumber > (*(TBTMemPage**)b)->pagenumber ) ? 1 : -1;
+static TBTMemPage *
+findInCache(TBTree *db, u_int32_t pagenumber) {
+ TBTMemPage *ptr;
+
+ ptr = db->Cache[ pagenumber % HASHSIZE(db->npage) ];
+
+ while(ptr && ptr->pagenumber != pagenumber)
+ ptr = ptr->link;
+
+ return ptr;;
+}
+
+static void
+removeFromCache(TBTree *db, u_int32_t pagenumber) {
+ TBTMemPage *ptr, *prev=NULL;
+ int pos = pagenumber % HASHSIZE(db->npage);
+
+ ptr = db->Cache[ pos ];
+
+ while(ptr) {
+ if ( ptr->pagenumber == pagenumber ) {
+ if ( prev )
+ prev->link = ptr->link;
+ else
+ db->Cache[ pos ] = ptr->link;
+ break;
+ }
+ prev=ptr;
+ ptr = ptr->link;
+ }
+}
+
+static void
+insertInCache(TBTree *db, TBTMemPage *page) {
+ int pos = page->pagenumber % HASHSIZE(db->npage);
+
+ page->link = db->Cache[ pos ];
+ db->Cache[ pos ] = page;
}
static TBTMemPage*
-PageAlloc(TBTree *db, int *pos) {
+PageAlloc(TBTree *db) {
TBTMemPage *page;
if ( db->curpage < db->npage ) {
- page = db->Cache[db->curpage] = db->TimeCache[db->curpage] = (TBTMemPage*)t0malloc(sizeof(TBTMemPage));
- *pos = db->curpage;
+ page = (TBTMemPage*)tmalloc(sizeof(TBTMemPage));
+ memset( page, 0, TBTMEMPAGEHDRSZ );
page->iscached=1;
+ if ( db->curpage == 0 ) {
+ db->TimeCache = db->TimeCacheLast = page;
+ } else {
+ db->TimeCacheLast->next = page;
+ page->prev = db->TimeCacheLast;
+ db->TimeCacheLast = page;
+ }
db->curpage++;
} else if ( db->npage == 0 ) {
- page = (TBTMemPage*)t0malloc(sizeof(TBTMemPage));
+ page = (TBTMemPage*)tmalloc(sizeof(TBTMemPage));
+ memset( page, 0, TBTMEMPAGEHDRSZ );
} else {
- u_int32_t i;
- for( i=0; i<db->curpage && db->TimeCache[i]->islocked; i++ );
- if ( i == db->curpage ) { /*all pages locked*/
- /*tlog(TL_WARN,"TBTReadPage: all pages in cache locked, increase number of cached pages");*/
- page = (TBTMemPage*)t0malloc(sizeof(TBTMemPage));
+ page = db->TimeCache;
+ while( page->next && page->islocked )
+ page = page->next;
+ if ( page==db->TimeCacheLast && page->islocked ) { /*all pages locked*/
+ /* tlog(TL_WARN,"TBTReadPage: all pages in cache locked, increase number of cached pages"); */
+ page = (TBTMemPage*)tmalloc(sizeof(TBTMemPage));
+ memset( page, 0, TBTMEMPAGEHDRSZ );
} else {
- page = db->TimeCache[i];
if ( !page->issynced )
if ( TBTWritePage(db, page) )
return NULL;
- memset(page,0,sizeof(TBTMemPage));
+ removeFromCache(db, page->pagenumber);
+ if ( page != db->TimeCacheLast ) {
+ if ( page == db->TimeCache ) {
+ db->TimeCache = page->next;
+ db->TimeCache->prev = NULL;
+ } else {
+ page->prev->next = page->next;
+ page->next->prev = page->prev;
+ }
+ memset( page, 0, TBTMEMPAGEHDRSZ );
+ db->TimeCacheLast->next = page;
+ page->prev = db->TimeCacheLast;
+ db->TimeCacheLast = page;
+ } else
+ memset( page, 0, TBTMEMPAGEHDRSZ );
page->iscached=1;
- *pos=i;
}
}
return page;
}
-static void
-moveFirstTime(TBTree *db, int pos) {
- TBTMemPage *ptr;
-
- if ( pos < 0 )
- return;
- if (pos+1==db->curpage )
- return;
-
- ptr = db->TimeCache[pos];
- memmove( db->TimeCache + pos , db->TimeCache + pos + 1, sizeof(TBTMemPage*)*(db->curpage-pos-1) );
- db->TimeCache[db->curpage-1] = ptr;
-}
-
-static void
-findAndMoveFirstTime(TBTree *db, TBTMemPage *page) {
- int pos;
- for(pos=0;pos<db->curpage;pos++)
- if ( page==db->TimeCache[pos] ) {
- moveFirstTime(db,pos);
- break;
- }
- tassert( pos<db->curpage );
-}
-
-static void
-findAndMovePN(TBTree *db, TBTMemPage *page) {
- int pos;
-
- if ( db->curpage < 2 )
- return;
-
- for(pos=0;pos<db->curpage;pos++)
- if ( page==db->Cache[pos] )
- break;
- tassert( pos<db->curpage );
-
- while(1) {
- if ( pos+1 != db->curpage && db->Cache[pos+1]->pagenumber < page->pagenumber ) {
- db->Cache[pos] = db->Cache[pos+1];
- db->Cache[pos+1] = page;
- pos++;
- } else if ( pos!=0 && db->Cache[pos-1]->pagenumber > page->pagenumber ) {
- db->Cache[pos] = db->Cache[pos-1];
- db->Cache[pos-1] = page;
- pos--;
- } else
- break;
- }
-}
-
static int
TBTReadPage(TBTree *db, u_int32_t pagenumber, TBTMemPage **page) {
- TBTMemPage *ptr, key, **res;
int rc;
- int pos;
-
- ptr=&key;
- key.pagenumber = pagenumber;
- res = (TBTMemPage**)bsearch(&ptr, db->Cache, db->curpage, sizeof(TBTMemPage*), cmpNPage);
- if ( res ) {
+ if ( db->npage && (*page=findInCache(db, pagenumber))!=NULL ) {
db->cachehits++;
- *page=*res;
- findAndMoveFirstTime(db, *page);
+ if ( *page != db->TimeCacheLast ) {
+ if ( (*page) == db->TimeCache ) {
+ db->TimeCache = (*page)->next;
+ db->TimeCache->prev=NULL;
+ } else {
+ (*page)->prev->next = (*page)->next;
+ (*page)->next->prev = (*page)->prev;
+ }
+ db->TimeCacheLast->next = (*page);
+ (*page)->prev = db->TimeCacheLast;
+ db->TimeCacheLast = *page;
+ (*page)->next = NULL;
+ }
} else {
off_t offset = (off_t)pagenumber * (off_t)TBTREEPAGESIZE;
-
- if( (*page = PageAlloc(db,&pos))==NULL )
+
+ if( (*page = PageAlloc(db))==NULL )
return TBT_ERROR;
if ( lseek(db->fd, offset, SEEK_SET) != offset ) {
(*page)->issynced=1;
(*page)->pagenumber=pagenumber;
if ( (rc=read(db->fd, &( (*page)->page ), TBTREEPAGESIZE)) != TBTREEPAGESIZE ) {
- tlog(TL_CRIT,"TBTReadPage: read failed: %s %d %d",strerror(errno), pagenumber, offset);
+ tlog(TL_CRIT,"TBTReadPage: read failed: %s %d %d",strerror(errno), pagenumber, offset);
return TBT_ERROR;
}
- if ( (*page)->iscached ) {
- moveFirstTime(db, pos);
- findAndMovePN(db, *page);
- }
+ if ( (*page)->iscached )
+ insertInCache(db, *page);
}
db->pageread++;
- gettimeofday( &( (*page)->last ), NULL );
-
return TBT_OK;
}
static int
TBTNewPage(TBTree *db, TBTMemPage **page) {
- int pos;
if ( db->lastpagenumber==0 ) {
off_t offset;
if ( (offset=lseek(db->fd, 0, SEEK_END)) < 0 ) {
db->lastpagenumber = offset/TBTREEPAGESIZE;
}
- if( (*page = PageAlloc(db,&pos))==NULL )
+ if( (*page = PageAlloc(db))==NULL )
return TBT_ERROR;
+
(*page)->pagenumber = db->lastpagenumber;
(*page)->issynced=0;
(*page)->islocked=1;
db->lastpagenumber++;
- memset( &((*page)->page), 0, sizeof(TBTPAGEHDRSZ));
+ memset( &((*page)->page), 0, TBTMEMPAGEHDRSZ);
(*page)->page.freespace = TBTREEPAGESIZE-TBTPAGEHDRSZ;
- gettimeofday( &( (*page)->last ), NULL );
- if ( (*page)->iscached ) {
- moveFirstTime(db, pos);
- findAndMovePN(db, *page);
- }
+ if ( (*page)->iscached )
+ insertInCache(db, *page);
return TBT_OK;
}
-#define NOTFOUND 0
-#define FOUND 1
-
static void
printValue(u_int32_t len, char *ptr) {
putchar('\'');
TBTMemPage *page;
if ( TBTReadPage(db, pagenumber, &page) ) {
- fprintf(stderr,"BEDA %d\n", pagenumber);
+ tlog(TL_CRIT,"BEDA %d\n", pagenumber);
return;
}
}
- if ( was==0 )
+ if ( was==0 ) {
sizes[page->npointer]=size;
+ }
- for(i=0;i<page->npointer+1;i++) {
+ for(i=0;i<page->npointer+1;i++)
if ( i< start )
lfree-=sizes[i];
else
rfree-=sizes[i];
- }
while( 1 ) {
if ( lfree<0 ) {
+ start--;
lfree+=sizes[start];
rfree-=sizes[start];
- start--;
} else if ( rfree < 0 ) {
lfree-=sizes[start];
rfree+=sizes[start];
packLeafKV(db, &(tmp->page), *ptr, key, value);
else
packInternalKV(db, &(tmp->page), *ptr, key, pagenumber);
-
return TBT_OK;
}
}
if ( size <= page->page.freespace ) {
+ u_int32_t oldsize=page->page.freespace;
packLeafKV(db, &(page->page), ptr, key, value);
+ tassert( oldsize == page->page.freespace + size );
page->issynced=0;
} else {
rc = splitPage(db, page, &newright, &ptr, size, key, value, 0);