X-Git-Url: http://www.sigaev.ru/git/gitweb.cgi?a=blobdiff_plain;f=ftsbench.c;h=3c8b7f63e550f0db10f5745e4c826e9a18bcb1d7;hb=HEAD;hp=3d69a1865ed14e2e8f89cca179d160ada5cf1ad1;hpb=6c66b088981f2b70f8356e0f981ad98dc1c3a253;p=ftsbench.git diff --git a/ftsbench.c b/ftsbench.c index 3d69a18..3c8b7f6 100644 --- a/ftsbench.c +++ b/ftsbench.c @@ -1,9 +1,39 @@ +/* + * Copyright (c) 2006 Teodor Sigaev + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the author nor the names of any co-contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY CONTRIBUTORS ``AS IS'' AND ANY EXPRESS + * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL CONTRIBUTORS BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE + * GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER + * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR + * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN + * IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + #include #include #include #include #include #include +#include #include "ftsbench.h" @@ -44,31 +74,59 @@ usage() { } fputs( - "ftsbench - full text search benchmark ofr RDBMS\n" + "Copyright (c) 2006 Teodor Sigaev . All rights reserved.\n" + "ftsbench - full text search benchmark for RDBMS\n" "Initialization of DB:\n" - "\tftsbench -i [-b RDBMS] [-n NUMROW] [-l LEXFILE] [-g GAMMAFILE] [-f FLAGS] -d DBNAME\n" - "FLAGS are comma-separate list of:\n" - " gin - use GIN index\n" - " gist - use GiST index\n" - " func - use functional index\n", + "ftsbench -i [-b RDBMS] [-n NUMROW] [-l LEXFILE] [-g GAMMAFILE] [-f FLAGS] [-q | -s ID] -d DBNAME\n" + " -b RDBMS\t- type of DB: ", + stdout + ); + fputs( buf, stdout ); + fputs( + "\n" + " -n NUMROW - number of row in table\n" + " -l LEXFILE - file with words and its frequents (default gendata/lex)\n" + " -g GAMMAFILE - file with doc's length distribution (default \n" + " gendata/gamma-lens)\n" + " -l FLAGS - options for db's schema (see below)\n" + " -s ID - SQL mode: output is a SQL queries, ID is an identifier for insert\n" + " statement\n" + " -q - do not print progress message\n", stdout ); fputs( "Run tests:\n" - "\tftsbench [-b RDBMS] [-c NCLIENTS] [-n NUMQUERY] [-l LEXFILE] [-g GAMMAFILE] [-f FLAGS] -d DBNAME\n" - "FLAGS are comma-separate list of:\n" - " and - AND'ing lexemes in query (default)\n" - " or - OR'ing lexemes in query\n" - " sort - sort result of query\n" - "Options are:\n" - " -b RDBMS\t- type of DB: ", + "ftsbench [-b RDBMS] [-c NCLIENTS] [-n NUMQUERY] [-l LEXFILE] [-g GAMMAFILE] [-f FLAGS] [-q | -s ID [-r]] -d DBNAME\n" + " -b RDBMS\t- type of DB: ", stdout ); fputs( buf, stdout ); fputs( "\n" - " -l LEXFILE\t- file with words and its frequents\n" - " -g GAMMAFILE\t- file with doc's length distribution\n", + " -c NCLIENTS - number of clients in parallel\n" + " -n NUMQUERY - number of queries per client\n" + " -l LEXFILE - file with words and its frequents (default gendata/query-lex)\n" + " -g GAMMAFILE - file with doc's length distribution (default \n" + " gendata/query-lens)\n" + " -l FLAGS - options for db's schema (see below)\n" + " -s ID - SQL mode: output is a SQL queries, ID is an identifier for insert\n" + " statement\n" + " -r - row mode: timing every query\n" + " -q - do not print progress message\n", + stdout + ); + fputs( + "FLAGS are comma-separate list of:\n" + " gin - use GIN index\n" + " gist - use GiST index\n" + " func - use functional index\n" + " and - AND'ing lexemes in query (default)\n" + " or - OR'ing lexemes in query\n", + stdout + ); + fputs( + "Print SQL-scheme for statistics:\n" + "ftsbench -S\n", stdout ); exit(1); @@ -83,16 +141,14 @@ getRDBMS(char *name) { if ( DBDesc[i].init ) return DBDesc[i].rdbms; } else if ( strcasecmp(name,DBDesc[i].shortname) == 0 ) { - if ( DBDesc[i].init == NULL ) { - fprintf(stderr,"Support of '%s' isn't compiled-in\n", DBDesc[i].longname); - exit(1); - } + if ( DBDesc[i].init == NULL ) + fatal("Support of '%s' isn't compiled-in\n", DBDesc[i].longname); + return DBDesc[i].rdbms; } } - fprintf(stderr,"Can't find a RDBMS\n"); - exit(1); + fatal("Can't find a RDBMS\n"); return NULLSQL; } @@ -111,17 +167,13 @@ getFLAGS(char *flg) { flags |= FLG_AND; if ( strcasestr(flg,"or") ) flags |= FLG_OR; - if ( strcasestr(flg,"sort") ) - flags |= FLG_SORT; - if ( (flags & FLG_GIST) && (flags & FLG_GIN) ) { - fprintf(stderr,"GIN and GiST flags are mutually exclusive\n"); - exit(1); - } - if ( (flags & FLG_AND) && (flags & FLG_OR) ) { - fprintf(stderr,"AND and OR flags are mutually exclusive\n"); - exit(1); - } + if ( (flags & FLG_GIST) && (flags & FLG_GIN) ) + fatal("GIN and GiST flags are mutually exclusive\n"); + if ( (flags & FLG_AND) && (flags & FLG_OR) ) + fatal("AND and OR flags are mutually exclusive\n"); + else if ( ( flags & ( FLG_AND | FLG_OR ) ) == 0 ) + flags |= FLG_AND; return flags; } @@ -131,10 +183,8 @@ initConnections(RDBMS rdbms, int n, char *connstr) { ftsDB **dbs = (ftsDB**)malloc(sizeof(ftsDB*) * n); int i; - if (!dbs) { - fprintf(stderr,"Not enough mwmory\n"); - exit(1); - } + if (!dbs) + fatal("Not enough mwmory\n"); for(i=0;istrlen = 0; + while(*wptr) { + if ( wptr != words ) + sb_add(b, " ", 1); + + ptr = *wptr; + while( *ptr ) { + if ( *ptr == '\'' ) + sb_add( b, "'", 1 ); + sb_add( b, ptr, 1 ); + ptr++; + } + + wptr++; + } +} + +/* + * main test function, executed in thread + */ static void* execBench(void *in) { ftsDB *db = (ftsDB*)in; - int i; + int i, nres=0; char **words; + struct timeval begin; + double elapsed; + StringBuf b = {NULL,0,0}; for(i=0;iexecQuery(db, words, benchFlags); + + if ( rowMode ) { + elapsed = elapsedtime(&begin); + printQueryWords(&b, words); + + printf("INSERT INTO fb_row (id, f_and, f_or, nclients, query, nres, elapsed) VALUES (%d, '%c', '%c', %d, '%s', %d, %g);\n", + Id, + ( benchFlags & FLG_AND ) ? 't' : 'f', + ( benchFlags & FLG_OR ) ? 't' : 'f', + nClients, + b.str, + db->nres - nres, + elapsed + ); + nres = db->nres; + } free(words); } @@ -190,43 +290,91 @@ execBench(void *in) { return NULL; } +void +report(const char *format, ...) { + va_list args; + + if (sqlMode) + return; + + va_start(args, format); + vfprintf(stdout, format, args); + va_end(args); + + fflush(stdout); +} + +void +fatal(const char *format, ...) { + va_list args; + + va_start(args, format); + vfprintf(stderr, format, args); + va_end(args); + + fflush(stderr); + + exit(1); +} + + extern char *optarg; int main(int argn, char *argv[]) { int initMode = 0; - int n = 0, nclients = 1; + int n = 0; char *lex = NULL; char *doc = NULL; char *dbname = NULL; RDBMS rdbms = NULLSQL; int flags = 0; - int i; + int i; + int quiet = 0, scheme=0; StringBuf b = {NULL,0,0}; + struct timeval begin; + double elapsed; - while((i=getopt(argn,argv,"ib:n:l:g:d:c:hf:")) != EOF) { + while((i=getopt(argn,argv,"ib:n:l:g:d:c:hf:qSs:r")) != EOF) { switch(i) { case 'i': initMode = 1; break; case 'b': rdbms = getRDBMS(optarg); break; case 'n': n=atoi(optarg); break; - case 'c': nclients=atoi(optarg); break; + case 'c': nClients=atoi(optarg); break; case 'l': lex = strdup(optarg); break; case 'g': doc = strdup(optarg); break; case 'd': dbname = strdup(optarg); break; case 'f': flags = getFLAGS(optarg); break; + case 'q': quiet = 1; break; + case 'S': scheme = 1; break; + case 's': sqlMode = 1; Id = atoi(optarg); break; + case 'r': rowMode = 1; break; case 'h': default: usage(); } } + if ( scheme ) { + printScheme(); + return 0; + } + if (rdbms == NULLSQL) rdbms = getRDBMS(NULL); - if ( dbname == NULL || n<0 || nclients<1 ) + if ( dbname == NULL || n<0 || (initMode == 0 && nClients<1) ) usage(); - printf("Running with '%s' RDBMS\n", DBDesc[ rdbms ].longname); + if ( sqlMode ) + quiet = 1; + else + rowMode = 0; + + benchFlags = flags; + benchCount = n; + + report("Running with '%s' RDBMS\n", DBDesc[ rdbms ].longname); if ( initMode ) { ftsDB *db = *initConnections(rdbms, 1, dbname); @@ -234,29 +382,44 @@ main(int argn, char *argv[]) { if (!lex) lex = "gendata/lex"; if (!doc) doc = "gendata/gamma-lens"; - finnegan_init(lex, doc); + finnegan_init(lex, doc, sqlMode); + + gettimeofday(&begin,NULL); db->startCreateScheme(db, flags); prev = time(NULL); for(i=0;iInsertRow(db, i+1, b.str); - if ( prev!=time(NULL) ) { - printf("\r%d(%.02f%%) rows inserted", i, (100.0*i)/n); - fflush(stdout); + if ( !quiet && prev!=time(NULL) ) { + report("\r%d(%.02f%%) rows inserted", i, (100.0*i)/n); prev = time(NULL); } } - printf("\r%d(100.00%%) rows inserted. Finalyze insertion... ", i); - fflush(stdout); + + report("%s%d(100.00%%) rows inserted. Finalyze insertion... ", + (quiet) ? "" : "\r", i); db->finishCreateScheme(db); - printf("done\n"); + elapsed = elapsedtime(&begin); + + report("done\nTime: %.02f secs\n", elapsed); + if (sqlMode) { + printf("INSERT INTO fb_create (id, rdbms, f_gin, f_gist, f_func, rows, elapsed) VALUES (%d, '%s', '%c', '%c', '%c', %d, %g);\n", + Id, + DBDesc[ rdbms ].shortname, + ( flags & FLG_GIN ) ? 't' : 'f', + ( flags & FLG_GIST ) ? 't' : 'f', + ( flags & FLG_FUNC ) ? 't' : 'f', + n, + elapsed + ); + } + db->Close(db); } else { - ftsDB **dbs = initConnections(rdbms, nclients, dbname); - pthread_t *tid = (pthread_t*)malloc( sizeof(pthread_t) * nclients); + ftsDB **dbs = initConnections(rdbms, nClients, dbname); + pthread_t *tid = (pthread_t*)malloc( sizeof(pthread_t) * nClients); struct timeval begin; - double elapsed; - int total=0; + int total=0, nres=0; struct timespec sleepTo = { 0, 0 }; /* @@ -264,34 +427,27 @@ main(int argn, char *argv[]) { */ if (!lex) lex = "gendata/query-lex"; if (!doc) doc = "gendata/query-lens"; - finnegan_init(lex, doc); + finnegan_init(lex, doc, sqlMode); /* * Initial query */ - printf("\r0(0.00%%) queries proceed"); - fflush(stdout); - benchFlags = flags; - benchCount = n; + if ( !quiet ) + report("\r0(0.00%%) queries proceed"); gettimeofday(&begin,NULL); pthread_mutex_lock( &mutexFinish ); - for(i=0;inqueryMutex); total +=dbs[i]->nquery; if ( dbs[i]->nquery < n ) @@ -302,26 +458,39 @@ main(int argn, char *argv[]) { if ( ntogo == 0 ) break; - printf("\r%d(%.02f%%) queries proceed", total, (100.0*(float)total)/(nclients * n)); - fflush(stdout); + if ( !quiet ) + report("\r%d(%.02f%%) queries proceed", total, (100.0*(float)total)/(nClients * n)); sleepTo.tv_sec = time(NULL) + 1; res = pthread_cond_timedwait( &condFinish, &mutexFinish, &sleepTo ); - if ( !(res == ETIMEDOUT || res == 0) ) { - fprintf(stderr,"pthread_cond_timedwait failed: %s", strerror(errno)); - exit(1); - } + if ( !(res == ETIMEDOUT || res == 0) ) + fatal("pthread_cond_timedwait failed: %s\n", strerror(errno)); } elapsed = elapsedtime(&begin); pthread_mutex_unlock( &mutexFinish ); - for(i=0;inres; + dbs[i]->Close(dbs[i]); + } - printf("\r%d(%.02f%%) queries proceed\n", total, (100.0*(float)total)/(nclients * n)); - printf("Total time: %.02f sec, Queries per second: %.02f\n", elapsed, total/elapsed); - fflush(stdout); + report("%s%d(%.02f%%) queries proceed\n", + (quiet) ? "" : "\r", total, (100.0*(float)total)/(nClients * n)); + report("Total number of result: %d\n", nres); + report("Total time: %.02f sec, Queries per second: %.02f\n", elapsed, total/elapsed); + if (sqlMode && !rowMode) { + printf("INSERT INTO fb_search (id, f_and, f_or, nclients, nqueries, nres, elapsed) VALUES (%d, '%c', '%c', %d, %d, %d, %g);\n", + Id, + ( flags & FLG_AND ) ? 't' : 'f', + ( flags & FLG_OR ) ? 't' : 'f', + nClients, + n, + nres, + elapsed + ); + } } return 0;