/* * Copyright (c) 2006 Teodor Sigaev * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the author nor the names of any co-contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY CONTRIBUTORS ``AS IS'' AND ANY EXPRESS * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL CONTRIBUTORS BE LIABLE FOR ANY * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE * GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN * IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include #include #include #include #include #include #include #include "ftsbench.h" typedef enum RDBMS { PostgreSQL = 0, MySQL = 1, NULLSQL } RDBMS; typedef struct RDBMSDesc { RDBMS rdbms; char *shortname; char *longname; ftsDB* (*init)(char *); } RDBMSDesc; static RDBMSDesc DBDesc[] = { { PostgreSQL, "pgsql", "PostgreSQL", PGInit }, { MySQL, "mysql", "MySQL", MYInit }, { NULLSQL, NULL, NULL, NULL } }; static void usage() { char buf[1024]; int i, first=0; *buf = '\0'; for(i=0; DBDesc[i].rdbms != NULLSQL; i++) { if ( DBDesc[i].init == NULL ) continue; if ( first != 0 ) strcat(buf, ", "); strcat(buf, DBDesc[i].shortname); if ( first == 0 ) strcat(buf, "(default)"); first++; } fputs( "Copyright (c) 2006 Teodor Sigaev . All rights reserved.\n" "ftsbench - full text search benchmark for RDBMS\n" "Initialization of DB:\n" "ftsbench -i [-b RDBMS] [-n NUMROW] [-l LEXFILE] [-g GAMMAFILE] [-f FLAGS] [-q | -s ID] -d DBNAME\n" " -b RDBMS\t- type of DB: ", stdout ); fputs( buf, stdout ); fputs( "\n" " -n NUMROW - number of row in table\n" " -l LEXFILE - file with words and its frequents (default gendata/lex)\n" " -g GAMMAFILE - file with doc's length distribution (default \n" " gendata/gamma-lens)\n" " -l FLAGS - options for db's schema (see below)\n" " -s ID - SQL mode: output is a SQL queries, ID is an identifier for insert\n" " statement\n" " -q - do not print progress message\n", stdout ); fputs( "Run tests:\n" "ftsbench [-b RDBMS] [-c NCLIENTS] [-n NUMQUERY] [-l LEXFILE] [-g GAMMAFILE] [-f FLAGS] [-q | -s ID [-r]] -d DBNAME\n" " -b RDBMS\t- type of DB: ", stdout ); fputs( buf, stdout ); fputs( "\n" " -c NCLIENTS - number of clients in parallel\n" " -n NUMQUERY - number of queries per client\n" " -l LEXFILE - file with words and its frequents (default gendata/query-lex)\n" " -g GAMMAFILE - file with doc's length distribution (default \n" " gendata/query-lens)\n" " -l FLAGS - options for db's schema (see below)\n" " -s ID - SQL mode: output is a SQL queries, ID is an identifier for insert\n" " statement\n" " -r - row mode: timing every query\n" " -q - do not print progress message\n", stdout ); fputs( "FLAGS are comma-separate list of:\n" " gin - use GIN index\n" " gist - use GiST index\n" " func - use functional index\n" " and - AND'ing lexemes in query (default)\n" " or - OR'ing lexemes in query\n", stdout ); fputs( "Print SQL-scheme for statistics:\n" "ftsbench -S\n", stdout ); exit(1); } static RDBMS getRDBMS(char *name) { int i; for(i=0; DBDesc[i].rdbms != NULLSQL; i++) { if ( name == NULL ) { if ( DBDesc[i].init ) return DBDesc[i].rdbms; } else if ( strcasecmp(name,DBDesc[i].shortname) == 0 ) { if ( DBDesc[i].init == NULL ) fatal("Support of '%s' isn't compiled-in\n", DBDesc[i].longname); return DBDesc[i].rdbms; } } fatal("Can't find a RDBMS\n"); return NULLSQL; } static int getFLAGS(char *flg) { int flags = 0; if ( strcasestr(flg,"gist") ) flags |= FLG_GIST; if ( strcasestr(flg,"gin") ) flags |= FLG_GIN; if ( strcasestr(flg,"func") ) flags |= FLG_FUNC; if ( strcasestr(flg,"and") ) flags |= FLG_AND; if ( strcasestr(flg,"or") ) flags |= FLG_OR; if ( (flags & FLG_GIST) && (flags & FLG_GIN) ) fatal("GIN and GiST flags are mutually exclusive\n"); if ( (flags & FLG_AND) && (flags & FLG_OR) ) fatal("AND and OR flags are mutually exclusive\n"); else if ( ( flags & ( FLG_AND | FLG_OR ) ) == 0 ) flags |= FLG_AND; return flags; } static ftsDB ** initConnections(RDBMS rdbms, int n, char *connstr) { ftsDB **dbs = (ftsDB**)malloc(sizeof(ftsDB*) * n); int i; if (!dbs) fatal("Not enough mwmory\n"); for(i=0;inqueryMutex, NULL); } return dbs; } static double timediff(struct timeval *begin, struct timeval *end) { return ((double)( end->tv_sec - begin->tv_sec )) + ( (double)( end->tv_usec-begin->tv_usec ) ) / 1.0e+6; } static double elapsedtime(struct timeval *begin) { struct timeval end; gettimeofday(&end,NULL); return timediff(begin,&end); } static int Id = 0; static int sqlMode = 0; static int rowMode = 0; static int benchFlags = 0; static int benchCount = 0; static int nClients = 0; static pthread_cond_t condFinish = PTHREAD_COND_INITIALIZER; static pthread_mutex_t mutexFinish = PTHREAD_MUTEX_INITIALIZER; static pthread_mutex_t mutexWordGen = PTHREAD_MUTEX_INITIALIZER; static void printQueryWords(StringBuf *b, char **words) { char **wptr = words, *ptr; b->strlen = 0; while(*wptr) { if ( wptr != words ) sb_add(b, " ", 1); ptr = *wptr; while( *ptr ) { if ( *ptr == '\'' ) sb_add( b, "'", 1 ); sb_add( b, ptr, 1 ); ptr++; } wptr++; } } /* * main test function, executed in thread */ static void* execBench(void *in) { ftsDB *db = (ftsDB*)in; int i, nres=0; char **words; struct timeval begin; double elapsed; StringBuf b = {NULL,0,0}; for(i=0;iexecQuery(db, words, benchFlags); if ( rowMode ) { elapsed = elapsedtime(&begin); printQueryWords(&b, words); printf("INSERT INTO fb_row (id, f_and, f_or, nclients, query, nres, elapsed) VALUES (%d, '%c', '%c', %d, '%s', %d, %g);\n", Id, ( benchFlags & FLG_AND ) ? 't' : 'f', ( benchFlags & FLG_OR ) ? 't' : 'f', nClients, b.str, db->nres - nres, elapsed ); nres = db->nres; } free(words); } /* * send message about exitting */ pthread_mutex_lock( &mutexFinish ); pthread_cond_broadcast( &condFinish ); pthread_mutex_unlock( &mutexFinish ); return NULL; } void report(const char *format, ...) { va_list args; if (sqlMode) return; va_start(args, format); vfprintf(stdout, format, args); va_end(args); fflush(stdout); } void fatal(const char *format, ...) { va_list args; va_start(args, format); vfprintf(stderr, format, args); va_end(args); fflush(stderr); exit(1); } extern char *optarg; int main(int argn, char *argv[]) { int initMode = 0; int n = 0; char *lex = NULL; char *doc = NULL; char *dbname = NULL; RDBMS rdbms = NULLSQL; int flags = 0; int i; int quiet = 0, scheme=0; StringBuf b = {NULL,0,0}; struct timeval begin; double elapsed; while((i=getopt(argn,argv,"ib:n:l:g:d:c:hf:qSs:r")) != EOF) { switch(i) { case 'i': initMode = 1; break; case 'b': rdbms = getRDBMS(optarg); break; case 'n': n=atoi(optarg); break; case 'c': nClients=atoi(optarg); break; case 'l': lex = strdup(optarg); break; case 'g': doc = strdup(optarg); break; case 'd': dbname = strdup(optarg); break; case 'f': flags = getFLAGS(optarg); break; case 'q': quiet = 1; break; case 'S': scheme = 1; break; case 's': sqlMode = 1; Id = atoi(optarg); break; case 'r': rowMode = 1; break; case 'h': default: usage(); } } if ( scheme ) { printScheme(); return 0; } if (rdbms == NULLSQL) rdbms = getRDBMS(NULL); if ( dbname == NULL || n<0 || (initMode == 0 && nClients<1) ) usage(); if ( sqlMode ) quiet = 1; else rowMode = 0; benchFlags = flags; benchCount = n; report("Running with '%s' RDBMS\n", DBDesc[ rdbms ].longname); if ( initMode ) { ftsDB *db = *initConnections(rdbms, 1, dbname); time_t prev; if (!lex) lex = "gendata/lex"; if (!doc) doc = "gendata/gamma-lens"; finnegan_init(lex, doc, sqlMode); gettimeofday(&begin,NULL); db->startCreateScheme(db, flags); prev = time(NULL); for(i=0;iInsertRow(db, i+1, b.str); if ( !quiet && prev!=time(NULL) ) { report("\r%d(%.02f%%) rows inserted", i, (100.0*i)/n); prev = time(NULL); } } report("%s%d(100.00%%) rows inserted. Finalyze insertion... ", (quiet) ? "" : "\r", i); db->finishCreateScheme(db); elapsed = elapsedtime(&begin); report("done\nTime: %.02f secs\n", elapsed); if (sqlMode) { printf("INSERT INTO fb_create (id, rdbms, f_gin, f_gist, f_func, rows, elapsed) VALUES (%d, '%s', '%c', '%c', '%c', %d, %g);\n", Id, DBDesc[ rdbms ].shortname, ( flags & FLG_GIN ) ? 't' : 'f', ( flags & FLG_GIST ) ? 't' : 'f', ( flags & FLG_FUNC ) ? 't' : 'f', n, elapsed ); } db->Close(db); } else { ftsDB **dbs = initConnections(rdbms, nClients, dbname); pthread_t *tid = (pthread_t*)malloc( sizeof(pthread_t) * nClients); struct timeval begin; int total=0, nres=0; struct timespec sleepTo = { 0, 0 }; /* * startup generator */ if (!lex) lex = "gendata/query-lex"; if (!doc) doc = "gendata/query-lens"; finnegan_init(lex, doc, sqlMode); /* * Initial query */ if ( !quiet ) report("\r0(0.00%%) queries proceed"); gettimeofday(&begin,NULL); pthread_mutex_lock( &mutexFinish ); for(i=0;inqueryMutex); total +=dbs[i]->nquery; if ( dbs[i]->nquery < n ) ntogo++; pthread_mutex_unlock(&dbs[i]->nqueryMutex); } if ( ntogo == 0 ) break; if ( !quiet ) report("\r%d(%.02f%%) queries proceed", total, (100.0*(float)total)/(nClients * n)); sleepTo.tv_sec = time(NULL) + 1; res = pthread_cond_timedwait( &condFinish, &mutexFinish, &sleepTo ); if ( !(res == ETIMEDOUT || res == 0) ) fatal("pthread_cond_timedwait failed: %s\n", strerror(errno)); } elapsed = elapsedtime(&begin); pthread_mutex_unlock( &mutexFinish ); for(i=0;inres; dbs[i]->Close(dbs[i]); } report("%s%d(%.02f%%) queries proceed\n", (quiet) ? "" : "\r", total, (100.0*(float)total)/(nClients * n)); report("Total number of result: %d\n", nres); report("Total time: %.02f sec, Queries per second: %.02f\n", elapsed, total/elapsed); if (sqlMode && !rowMode) { printf("INSERT INTO fb_search (id, f_and, f_or, nclients, nqueries, nres, elapsed) VALUES (%d, '%c', '%c', %d, %d, %d, %g);\n", Id, ( flags & FLG_AND ) ? 't' : 'f', ( flags & FLG_OR ) ? 't' : 'f', nClients, n, nres, elapsed ); } } return 0; }