7 * Utility for filtering lex file from stop words
11 #define TXTBUFLEN 4096
20 comparestr(const void *a, const void *b)
22 return strcasecmp(*(char **) a, *(char **) b);
26 readstoplist(char *filename, StopList * s)
34 if ((hin = fopen(filename, "r")) == NULL) {
35 fprintf(stderr,"Can't open %s: %s\n", filename, strerror(errno));
39 while (fgets(buf, TXTBUFLEN, hin))
41 buf[strlen(buf) - 1] = '\0';
45 if (s->len >= reallen)
49 reallen = (reallen) ? reallen * 2 : 16;
50 tmp = (char **) realloc((void *) stop, sizeof(char *) * reallen);
53 fprintf(stderr,"Not enough memory");
59 stop[s->len] = strdup(buf);
62 fprintf(stderr,"Not enough memory");
71 if (s->stop && s->len > 1)
72 qsort(s->stop, s->len, sizeof(char *), comparestr);
76 searchstoplist(StopList * s, char *key)
78 if ( strlen(key) <=4 )
80 return (s->stop && s->len > 0 && bsearch(&key, s->stop, s->len, sizeof(char *), comparestr)) ? 1 : 0;
84 main(int argn, char *argv[]) {
89 fprintf(stderr,"Usage: %s stopfile < lex\n", argv[0]);
93 readstoplist(argv[1], &sl);
95 while( fgets(buf, TXTBUFLEN, stdin) ) {
99 if ( sscanf( buf, "%s %d", wrd, &occur )!= 2)
102 if ( searchstoplist(&sl, wrd) || occur <=0 )
105 printf("%s %d\n", wrd, occur);