add .gitignore
[ftsbench.git] / ftsbench.c
index 4eb5c4b..3c8b7f6 100644 (file)
@@ -33,6 +33,7 @@
 #include <string.h>
 #include <errno.h>
 #include <sys/time.h>
+#include <stdarg.h>
 
 #include "ftsbench.h"
 
@@ -76,7 +77,7 @@ usage() {
                "Copyright (c) 2006 Teodor Sigaev <teodor@sigaev.ru>. All rights reserved.\n"
                "ftsbench - full text search benchmark for RDBMS\n"
                "Initialization of DB:\n"
-               "ftsbench -i [-b RDBMS] [-n NUMROW] [-l LEXFILE] [-g GAMMAFILE] [-f FLAGS] [-q] -d DBNAME\n"
+               "ftsbench -i [-b RDBMS] [-n NUMROW] [-l LEXFILE] [-g GAMMAFILE] [-f FLAGS] [-q | -s ID] -d DBNAME\n"
                "  -b RDBMS\t- type of DB: ",
                stdout
        );
@@ -85,14 +86,17 @@ usage() {
                "\n"
                "  -n NUMROW - number of row in table\n"
                "  -l LEXFILE - file with words and its frequents (default gendata/lex)\n"
-               "  -g GAMMAFILE - file with doc's length distribution (default gendata/gamma-lens)\n"
-               "  -l FLGAS - options for db's schema (see below)\n"
+               "  -g GAMMAFILE - file with doc's length distribution (default \n"
+               "                 gendata/gamma-lens)\n"
+               "  -l FLAGS - options for db's schema (see below)\n"
+               "  -s ID - SQL mode: output is a SQL queries, ID is an identifier for insert\n"
+               "          statement\n"
                "  -q - do not print progress message\n",
                stdout
        );
        fputs(
                "Run tests:\n"
-               "ftsbench [-b RDBMS] [-c NCLIENTS] [-n NUMQUERY] [-l LEXFILE] [-g GAMMAFILE] [-f FLAGS] [-q] -d DBNAME\n"
+               "ftsbench [-b RDBMS] [-c NCLIENTS] [-n NUMQUERY] [-l LEXFILE] [-g GAMMAFILE] [-f FLAGS] [-q | -s ID [-r]] -d DBNAME\n"
                "  -b RDBMS\t- type of DB: ",
                stdout
        );
@@ -102,8 +106,12 @@ usage() {
                "  -c NCLIENTS - number of clients in parallel\n"
                "  -n NUMQUERY - number of queries per client\n"
                "  -l LEXFILE - file with words and its frequents (default gendata/query-lex)\n"
-               "  -g GAMMAFILE - file with doc's length distribution (default gendata/query-lens)\n"
-               "  -l FLGAS - options for db's schema (see below)\n"
+               "  -g GAMMAFILE - file with doc's length distribution (default \n"
+               "                 gendata/query-lens)\n"
+               "  -l FLAGS - options for db's schema (see below)\n"
+               "  -s ID - SQL mode: output is a SQL queries, ID is an identifier for insert\n"
+               "          statement\n"
+               "  -r - row mode: timing every query\n"
                "  -q - do not print progress message\n",
                stdout
        );
@@ -133,16 +141,14 @@ getRDBMS(char *name) {
                        if ( DBDesc[i].init )
                                return DBDesc[i].rdbms; 
                } else if ( strcasecmp(name,DBDesc[i].shortname) == 0 ) {
-                       if ( DBDesc[i].init == NULL ) {
-                               fprintf(stderr,"Support of '%s' isn't compiled-in\n", DBDesc[i].longname);
-                               exit(1);
-                       }
+                       if ( DBDesc[i].init == NULL ) 
+                               fatal("Support of '%s' isn't compiled-in\n", DBDesc[i].longname);
+
                        return DBDesc[i].rdbms;
                }
        }
 
-       fprintf(stderr,"Can't find a RDBMS\n");
-       exit(1);
+       fatal("Can't find a RDBMS\n");
        
        return NULLSQL;
 }
@@ -162,14 +168,12 @@ getFLAGS(char *flg) {
        if ( strcasestr(flg,"or") )
                flags |= FLG_OR;
 
-       if ( (flags & FLG_GIST) && (flags & FLG_GIN) ) {
-               fprintf(stderr,"GIN and GiST flags are mutually exclusive\n");
-               exit(1);
-       }
-       if ( (flags & FLG_AND) && (flags & FLG_OR) ) {
-               fprintf(stderr,"AND and OR flags are mutually exclusive\n");
-               exit(1);
-       }
+       if ( (flags & FLG_GIST) && (flags & FLG_GIN) ) 
+               fatal("GIN and GiST flags are mutually exclusive\n");
+       if ( (flags & FLG_AND) && (flags & FLG_OR) ) 
+               fatal("AND and OR flags are mutually exclusive\n");
+       else if ( ( flags & ( FLG_AND | FLG_OR ) ) == 0 )
+               flags |= FLG_AND;
 
        return flags;
 }
@@ -179,10 +183,8 @@ initConnections(RDBMS rdbms, int n, char *connstr) {
        ftsDB   **dbs = (ftsDB**)malloc(sizeof(ftsDB*) * n);
        int i;
 
-       if (!dbs) {
-               fprintf(stderr,"Not enough mwmory\n");
-               exit(1);
-       }
+       if (!dbs) 
+               fatal("Not enough mwmory\n");
 
        for(i=0;i<n;i++) { 
                dbs[i] = DBDesc[rdbms].init(connstr);
@@ -204,17 +206,48 @@ elapsedtime(struct timeval *begin) {
        return timediff(begin,&end);
 }
 
+static int Id = 0;
+static int sqlMode = 0;
+static int rowMode = 0;
 static int benchFlags  = 0;
 static int benchCount  = 0;
+static int nClients  = 0;
 static pthread_cond_t condFinish = PTHREAD_COND_INITIALIZER;
 static pthread_mutex_t mutexFinish = PTHREAD_MUTEX_INITIALIZER;
 static pthread_mutex_t mutexWordGen = PTHREAD_MUTEX_INITIALIZER;
 
+static void
+printQueryWords(StringBuf *b, char **words) {
+       char **wptr = words, *ptr;
+
+       b->strlen = 0;
+       while(*wptr) {
+               if ( wptr != words ) 
+                       sb_add(b, " ", 1);
+
+               ptr = *wptr;
+               while( *ptr ) {
+                       if ( *ptr == '\'' )
+                               sb_add( b, "'", 1 );
+                       sb_add( b, ptr, 1 );
+                       ptr++;
+               }
+
+               wptr++;
+       }
+}
+
+/*
+ * main test function, executed in thread
+ */
 static void*
 execBench(void *in) {
        ftsDB *db = (ftsDB*)in;
-       int i;
+       int i, nres=0;
        char **words;
+       struct  timeval begin;
+       double  elapsed;
+       StringBuf       b = {NULL,0,0};
 
        for(i=0;i<benchCount;i++) {
                /*
@@ -223,8 +256,27 @@ execBench(void *in) {
                pthread_mutex_lock( &mutexWordGen );
                words = generate_querywords();
                pthread_mutex_unlock( &mutexWordGen );
+               
+               if ( rowMode ) 
+                       gettimeofday(&begin,NULL);
 
                db->execQuery(db, words, benchFlags);
+
+               if ( rowMode ) {
+                       elapsed = elapsedtime(&begin);
+                       printQueryWords(&b, words);
+
+                       printf("INSERT INTO fb_row (id, f_and, f_or, nclients, query, nres, elapsed) VALUES (%d, '%c', '%c', %d, '%s', %d, %g);\n",
+                                       Id,
+                                       ( benchFlags & FLG_AND ) ? 't' : 'f',
+                                       ( benchFlags & FLG_OR ) ? 't' : 'f',
+                                       nClients,
+                                       b.str,
+                                       db->nres - nres,
+                                       elapsed
+                       );
+                       nres = db->nres;
+               }
                free(words);
        }
 
@@ -238,33 +290,65 @@ execBench(void *in) {
        return NULL;    
 }
 
+void
+report(const char *format, ...) {
+       va_list args;
+
+       if (sqlMode)
+               return;
+
+       va_start(args, format);
+       vfprintf(stdout, format, args);
+       va_end(args);
+
+       fflush(stdout);
+}
+
+void
+fatal(const char *format, ...) {
+       va_list args;
+
+       va_start(args, format);
+       vfprintf(stderr, format, args);
+       va_end(args);
+
+       fflush(stderr);
+
+       exit(1);
+}
+
+
 extern char *optarg;
 
 int
 main(int argn, char *argv[]) {
        int             initMode = 0;
-       int             n = 0, nclients = 1;
+       int             n = 0;
        char    *lex = NULL;
        char    *doc = NULL;
        char    *dbname = NULL;
        RDBMS   rdbms = NULLSQL;
        int             flags = 0;
-       int i;
+       int     i;
        int             quiet = 0, scheme=0;
        StringBuf       b = {NULL,0,0};
+       struct  timeval begin;
+       double  elapsed;
 
-       while((i=getopt(argn,argv,"ib:n:l:g:d:c:hf:qS")) != EOF) {
+       while((i=getopt(argn,argv,"ib:n:l:g:d:c:hf:qSs:r")) != EOF) {
                switch(i) {
                        case 'i': initMode = 1; break;
                        case 'b': rdbms = getRDBMS(optarg); break;
                        case 'n': n=atoi(optarg); break;
-                       case 'c': nclients=atoi(optarg); break;
+                       case 'c': nClients=atoi(optarg); break;
                        case 'l': lex = strdup(optarg); break;
                        case 'g': doc = strdup(optarg); break;
                        case 'd': dbname = strdup(optarg); break;
                        case 'f': flags = getFLAGS(optarg); break;
                        case 'q': quiet = 1; break;
                        case 'S': scheme = 1; break;
+                       case 's': sqlMode = 1; Id = atoi(optarg); break;
+                       case 'r': rowMode = 1; break;
                        case 'h':
                        default:
                                usage();
@@ -279,10 +363,18 @@ main(int argn, char *argv[]) {
        if (rdbms == NULLSQL)
                rdbms = getRDBMS(NULL);
 
-       if ( dbname == NULL || n<0 || nclients<1 )
+       if ( dbname == NULL || n<0 || (initMode == 0 && nClients<1) ) 
                usage();
 
-       printf("Running with '%s' RDBMS\n", DBDesc[ rdbms ].longname); 
+       if ( sqlMode ) 
+               quiet = 1;
+       else
+               rowMode = 0;
+
+       benchFlags = flags;
+       benchCount = n;
+
+       report("Running with '%s' RDBMS\n", DBDesc[ rdbms ].longname); 
 
        if ( initMode ) {
                ftsDB   *db = *initConnections(rdbms, 1, dbname);
@@ -290,7 +382,9 @@ main(int argn, char *argv[]) {
 
                if (!lex)  lex = "gendata/lex";
                if (!doc)  doc = "gendata/gamma-lens";
-               finnegan_init(lex, doc);
+               finnegan_init(lex, doc, sqlMode);
+
+               gettimeofday(&begin,NULL);
 
                db->startCreateScheme(db, flags);
                prev = time(NULL);
@@ -298,22 +392,33 @@ main(int argn, char *argv[]) {
                        generate_doc(&b);
                        db->InsertRow(db, i+1, b.str);
                        if ( !quiet && prev!=time(NULL) ) {
-                               printf("\r%d(%.02f%%) rows inserted", i, (100.0*i)/n);
-                               fflush(stdout);
+                               report("\r%d(%.02f%%) rows inserted", i, (100.0*i)/n);
                                prev = time(NULL);
                        }
                }
-               printf("%s%d(100.00%%) rows inserted. Finalyze insertion... ", 
+
+               report("%s%d(100.00%%) rows inserted. Finalyze insertion... ", 
                        (quiet) ? "" : "\r", i);
-               fflush(stdout);
                db->finishCreateScheme(db);
-               printf("done\n");
+               elapsed = elapsedtime(&begin);
+
+               report("done\nTime: %.02f secs\n", elapsed);
+               if (sqlMode) {
+                       printf("INSERT INTO fb_create (id, rdbms, f_gin, f_gist, f_func, rows, elapsed) VALUES (%d, '%s', '%c', '%c', '%c', %d, %g);\n",
+                                       Id,
+                                       DBDesc[ rdbms ].shortname,
+                                       ( flags & FLG_GIN ) ? 't' : 'f',
+                                       ( flags & FLG_GIST ) ? 't' : 'f',
+                                       ( flags & FLG_FUNC ) ? 't' : 'f',
+                                       n,
+                                       elapsed
+                       );
+               }
                db->Close(db);
        } else {
-               ftsDB   **dbs = initConnections(rdbms, nclients, dbname);
-               pthread_t       *tid = (pthread_t*)malloc( sizeof(pthread_t) * nclients);
+               ftsDB   **dbs = initConnections(rdbms, nClients, dbname);
+               pthread_t       *tid = (pthread_t*)malloc( sizeof(pthread_t) * nClients);
                struct  timeval begin;
-               double  elapsed;
                int     total=0, nres=0;
                struct      timespec  sleepTo = { 0, 0 };
 
@@ -322,33 +427,27 @@ main(int argn, char *argv[]) {
                 */
                if (!lex)  lex = "gendata/query-lex";
                if (!doc)  doc = "gendata/query-lens";
-               finnegan_init(lex, doc);
+               finnegan_init(lex, doc, sqlMode);
 
                /*
                 * Initial query
                 */
-               if ( !quiet ) {
-                       printf("\r0(0.00%%) queries proceed");
-                       fflush(stdout);
-               }
-               benchFlags = flags;
-               benchCount = n;
+               if ( !quiet ) 
+                       report("\r0(0.00%%) queries proceed");
 
                gettimeofday(&begin,NULL);
 
        pthread_mutex_lock( &mutexFinish );
-               for(i=0;i<nclients;i++) {
-                       if ( pthread_create(tid+i, NULL, execBench, (void*)dbs[i]) != 0 ) {
-                               fprintf(stderr,"pthread_create failed: %s\n", strerror(errno));
-                               exit(1);
-                       }
+               for(i=0;i<nClients;i++) {
+                       if ( pthread_create(tid+i, NULL, execBench, (void*)dbs[i]) != 0 ) 
+                               fatal("pthread_create failed: %s\n", strerror(errno));
                }
 
                for(;;) {
                        int res, ntogo = 0;
 
                        total = 0;
-                       for(i=0;i<nclients;i++) {
+                       for(i=0;i<nClients;i++) {
                                pthread_mutex_lock(&dbs[i]->nqueryMutex);
                                total +=dbs[i]->nquery;
                                if ( dbs[i]->nquery < n )
@@ -359,33 +458,39 @@ main(int argn, char *argv[]) {
                        if ( ntogo == 0 ) 
                                break;
 
-                       if ( !quiet ) {
-                               printf("\r%d(%.02f%%) queries proceed", total, (100.0*(float)total)/(nclients * n));
-                               fflush(stdout);
-                       }
+                       if ( !quiet ) 
+                               report("\r%d(%.02f%%) queries proceed", total, (100.0*(float)total)/(nClients * n));
                        
                        sleepTo.tv_sec = time(NULL) + 1;
                        res = pthread_cond_timedwait( &condFinish, &mutexFinish, &sleepTo );
 
-                       if ( !(res == ETIMEDOUT || res == 0) ) {
-                               fprintf(stderr,"pthread_cond_timedwait failed: %s\n", strerror(errno));
-                               exit(1);
-                       }
+                       if ( !(res == ETIMEDOUT || res == 0) ) 
+                               fatal("pthread_cond_timedwait failed: %s\n", strerror(errno));
                }
                elapsed = elapsedtime(&begin);
                pthread_mutex_unlock( &mutexFinish );
 
-               for(i=0;i<nclients;i++) {
+               for(i=0;i<nClients;i++) {
                        pthread_join(tid[i], NULL);
                        nres += dbs[i]->nres;
                        dbs[i]->Close(dbs[i]);
                }
 
-               printf("%s%d(%.02f%%) queries proceed\n", 
-                       (quiet) ? "" : "\r", total, (100.0*(float)total)/(nclients * n));
-               printf("Total number of result: %d\n", nres);
-               printf("Total time: %.02f sec, Queries per second: %.02f\n", elapsed, total/elapsed);
-               fflush(stdout);
+               report("%s%d(%.02f%%) queries proceed\n", 
+                       (quiet) ? "" : "\r", total, (100.0*(float)total)/(nClients * n));
+               report("Total number of result: %d\n", nres);
+               report("Total time: %.02f sec, Queries per second: %.02f\n", elapsed, total/elapsed);
+               if (sqlMode && !rowMode) {
+                       printf("INSERT INTO fb_search (id, f_and, f_or, nclients, nqueries, nres, elapsed) VALUES (%d, '%c', '%c', %d, %d, %d, %g);\n",
+                                       Id,
+                                       ( flags & FLG_AND ) ? 't' : 'f',
+                                       ( flags & FLG_OR ) ? 't' : 'f',
+                                       nClients,
+                                       n,
+                                       nres,
+                                       elapsed
+                       );
+               }
        }
 
        return 0;