2 * Copyright (c) 2006 Teodor Sigaev <teodor@sigaev.ru>
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 * 3. Neither the name of the author nor the names of any co-contributors
14 * may be used to endorse or promote products derived from this software
15 * without specific prior written permission.
17 * THIS SOFTWARE IS PROVIDED BY CONTRIBUTORS ``AS IS'' AND ANY EXPRESS
18 * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
19 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED. IN NO EVENT SHALL CONTRIBUTORS BE LIABLE FOR ANY
21 * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
23 * GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
24 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
25 * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
26 * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
27 * IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
46 typedef struct RDBMSDesc {
50 ftsDB* (*init)(char *);
53 static RDBMSDesc DBDesc[] = {
54 { PostgreSQL, "pgsql", "PostgreSQL", PGInit },
55 { MySQL, "mysql", "MySQL", MYInit },
56 { NULLSQL, NULL, NULL, NULL }
65 for(i=0; DBDesc[i].rdbms != NULLSQL; i++) {
66 if ( DBDesc[i].init == NULL )
70 strcat(buf, DBDesc[i].shortname);
72 strcat(buf, "(default)");
77 "Copyright (c) 2006 Teodor Sigaev <teodor@sigaev.ru>. All rights reserved.\n"
78 "ftsbench - full text search benchmark for RDBMS\n"
79 "Initialization of DB:\n"
80 "ftsbench -i [-b RDBMS] [-n NUMROW] [-l LEXFILE] [-g GAMMAFILE] [-f FLAGS] [-q | -s ID] -d DBNAME\n"
81 " -b RDBMS\t- type of DB: ",
87 " -n NUMROW - number of row in table\n"
88 " -l LEXFILE - file with words and its frequents (default gendata/lex)\n"
89 " -g GAMMAFILE - file with doc's length distribution (default \n"
90 " gendata/gamma-lens)\n"
91 " -l FLGAS - options for db's schema (see below)\n"
92 " -s ID - SQL mode: output is a SQL queries, ID is an identifier for insert\n"
94 " -q - do not print progress message\n",
99 "ftsbench [-b RDBMS] [-c NCLIENTS] [-n NUMQUERY] [-l LEXFILE] [-g GAMMAFILE] [-f FLAGS] [-q | -s ID [-r]] -d DBNAME\n"
100 " -b RDBMS\t- type of DB: ",
103 fputs( buf, stdout );
106 " -c NCLIENTS - number of clients in parallel\n"
107 " -n NUMQUERY - number of queries per client\n"
108 " -l LEXFILE - file with words and its frequents (default gendata/query-lex)\n"
109 " -g GAMMAFILE - file with doc's length distribution (default \n"
110 " gendata/query-lens)\n"
111 " -l FLGAS - options for db's schema (see below)\n"
112 " -s ID - SQL mode: output is a SQL queries, ID is an identifier for insert\n"
114 " -r - row mode: timing every query\n"
115 " -q - do not print progress message\n",
119 "FLAGS are comma-separate list of:\n"
120 " gin - use GIN index\n"
121 " gist - use GiST index\n"
122 " func - use functional index\n"
123 " and - AND'ing lexemes in query (default)\n"
124 " or - OR'ing lexemes in query\n",
128 "Print SQL-scheme for statistics:\n"
136 getRDBMS(char *name) {
139 for(i=0; DBDesc[i].rdbms != NULLSQL; i++) {
140 if ( name == NULL ) {
141 if ( DBDesc[i].init )
142 return DBDesc[i].rdbms;
143 } else if ( strcasecmp(name,DBDesc[i].shortname) == 0 ) {
144 if ( DBDesc[i].init == NULL ) {
145 fprintf(stderr,"Support of '%s' isn't compiled-in\n", DBDesc[i].longname);
148 return DBDesc[i].rdbms;
152 fprintf(stderr,"Can't find a RDBMS\n");
159 getFLAGS(char *flg) {
162 if ( strcasestr(flg,"gist") )
164 if ( strcasestr(flg,"gin") )
166 if ( strcasestr(flg,"func") )
168 if ( strcasestr(flg,"and") )
170 if ( strcasestr(flg,"or") )
173 if ( (flags & FLG_GIST) && (flags & FLG_GIN) ) {
174 fprintf(stderr,"GIN and GiST flags are mutually exclusive\n");
177 if ( (flags & FLG_AND) && (flags & FLG_OR) ) {
178 fprintf(stderr,"AND and OR flags are mutually exclusive\n");
180 } else if ( ( flags & ( FLG_AND | FLG_OR ) ) == 0 )
187 initConnections(RDBMS rdbms, int n, char *connstr) {
188 ftsDB **dbs = (ftsDB**)malloc(sizeof(ftsDB*) * n);
192 fprintf(stderr,"Not enough mwmory\n");
197 dbs[i] = DBDesc[rdbms].init(connstr);
198 pthread_mutex_init(&dbs[i]->nqueryMutex, NULL);
205 timediff(struct timeval *begin, struct timeval *end) {
206 return ((double)( end->tv_sec - begin->tv_sec )) + ( (double)( end->tv_usec-begin->tv_usec ) ) / 1.0e+6;
210 elapsedtime(struct timeval *begin) {
212 gettimeofday(&end,NULL);
213 return timediff(begin,&end);
217 static int sqlMode = 0;
218 static int rowMode = 0;
219 static int benchFlags = 0;
220 static int benchCount = 0;
221 static int nClients = 0;
222 static pthread_cond_t condFinish = PTHREAD_COND_INITIALIZER;
223 static pthread_mutex_t mutexFinish = PTHREAD_MUTEX_INITIALIZER;
224 static pthread_mutex_t mutexWordGen = PTHREAD_MUTEX_INITIALIZER;
227 printQueryWords(StringBuf *b, char **words) {
228 char **wptr = words, *ptr;
248 * main test function, executed in thread
251 execBench(void *in) {
252 ftsDB *db = (ftsDB*)in;
255 struct timeval begin;
257 StringBuf b = {NULL,0,0};
259 for(i=0;i<benchCount;i++) {
261 * generate_querywords() isn't a thread safe
263 pthread_mutex_lock( &mutexWordGen );
264 words = generate_querywords();
265 pthread_mutex_unlock( &mutexWordGen );
268 gettimeofday(&begin,NULL);
270 db->execQuery(db, words, benchFlags);
273 elapsed = elapsedtime(&begin);
274 printQueryWords(&b, words);
276 printf("INSERT INTO fb_row (id, f_and, f_or, nclients, query, nres, elapsed) VALUES (%d, '%c', '%c', %d, '%s', %d, %g);\n",
278 ( benchFlags & FLG_AND ) ? 't' : 'f',
279 ( benchFlags & FLG_OR ) ? 't' : 'f',
291 * send message about exitting
293 pthread_mutex_lock( &mutexFinish );
294 pthread_cond_broadcast( &condFinish );
295 pthread_mutex_unlock( &mutexFinish );
301 report(const char *format, ...) {
307 va_start(args, format);
308 vfprintf(stdout, format, args);
317 main(int argn, char *argv[]) {
323 RDBMS rdbms = NULLSQL;
326 int quiet = 0, scheme=0;
327 StringBuf b = {NULL,0,0};
328 struct timeval begin;
331 while((i=getopt(argn,argv,"ib:n:l:g:d:c:hf:qSs:r")) != EOF) {
333 case 'i': initMode = 1; break;
334 case 'b': rdbms = getRDBMS(optarg); break;
335 case 'n': n=atoi(optarg); break;
336 case 'c': nClients=atoi(optarg); break;
337 case 'l': lex = strdup(optarg); break;
338 case 'g': doc = strdup(optarg); break;
339 case 'd': dbname = strdup(optarg); break;
340 case 'f': flags = getFLAGS(optarg); break;
341 case 'q': quiet = 1; break;
342 case 'S': scheme = 1; break;
343 case 's': sqlMode = 1; Id = atoi(optarg); break;
344 case 'r': rowMode = 1; break;
356 if (rdbms == NULLSQL)
357 rdbms = getRDBMS(NULL);
359 if ( dbname == NULL || n<0 || (initMode == 0 && nClients<1) )
370 report("Running with '%s' RDBMS\n", DBDesc[ rdbms ].longname);
373 ftsDB *db = *initConnections(rdbms, 1, dbname);
376 if (!lex) lex = "gendata/lex";
377 if (!doc) doc = "gendata/gamma-lens";
378 finnegan_init(lex, doc, sqlMode);
380 gettimeofday(&begin,NULL);
382 db->startCreateScheme(db, flags);
386 db->InsertRow(db, i+1, b.str);
387 if ( !quiet && prev!=time(NULL) ) {
388 report("\r%d(%.02f%%) rows inserted", i, (100.0*i)/n);
393 report("%s%d(100.00%%) rows inserted. Finalyze insertion... ",
394 (quiet) ? "" : "\r", i);
395 db->finishCreateScheme(db);
396 elapsed = elapsedtime(&begin);
398 report("done\nTime: %.02f secs\n", elapsed);
400 printf("INSERT INTO fb_create (id, rdbms, f_gin, f_gist, f_func, rows, elapsed) VALUES (%d, '%s', '%c', '%c', '%c', %d, %g);\n",
402 DBDesc[ rdbms ].shortname,
403 ( flags & FLG_GIN ) ? 't' : 'f',
404 ( flags & FLG_GIST ) ? 't' : 'f',
405 ( flags & FLG_FUNC ) ? 't' : 'f',
412 ftsDB **dbs = initConnections(rdbms, nClients, dbname);
413 pthread_t *tid = (pthread_t*)malloc( sizeof(pthread_t) * nClients);
414 struct timeval begin;
416 struct timespec sleepTo = { 0, 0 };
421 if (!lex) lex = "gendata/query-lex";
422 if (!doc) doc = "gendata/query-lens";
423 finnegan_init(lex, doc, sqlMode);
429 report("\r0(0.00%%) queries proceed");
431 gettimeofday(&begin,NULL);
433 pthread_mutex_lock( &mutexFinish );
434 for(i=0;i<nClients;i++) {
435 if ( pthread_create(tid+i, NULL, execBench, (void*)dbs[i]) != 0 ) {
436 fprintf(stderr,"pthread_create failed: %s\n", strerror(errno));
445 for(i=0;i<nClients;i++) {
446 pthread_mutex_lock(&dbs[i]->nqueryMutex);
447 total +=dbs[i]->nquery;
448 if ( dbs[i]->nquery < n )
450 pthread_mutex_unlock(&dbs[i]->nqueryMutex);
457 report("\r%d(%.02f%%) queries proceed", total, (100.0*(float)total)/(nClients * n));
459 sleepTo.tv_sec = time(NULL) + 1;
460 res = pthread_cond_timedwait( &condFinish, &mutexFinish, &sleepTo );
462 if ( !(res == ETIMEDOUT || res == 0) ) {
463 fprintf(stderr,"pthread_cond_timedwait failed: %s\n", strerror(errno));
467 elapsed = elapsedtime(&begin);
468 pthread_mutex_unlock( &mutexFinish );
470 for(i=0;i<nClients;i++) {
471 pthread_join(tid[i], NULL);
472 nres += dbs[i]->nres;
473 dbs[i]->Close(dbs[i]);
476 report("%s%d(%.02f%%) queries proceed\n",
477 (quiet) ? "" : "\r", total, (100.0*(float)total)/(nClients * n));
478 report("Total number of result: %d\n", nres);
479 report("Total time: %.02f sec, Queries per second: %.02f\n", elapsed, total/elapsed);
480 if (sqlMode && !rowMode) {
481 printf("INSERT INTO fb_search (id, f_and, f_or, nclients, nqueries, nres, elapsed) VALUES (%d, '%c', '%c', %d, %d, %d, %g);\n",
483 ( flags & FLG_AND ) ? 't' : 'f',
484 ( flags & FLG_OR ) ? 't' : 'f',