/* * Copyright (c) 2004 Teodor Sigaev * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the author nor the names of any co-contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY CONTRIBUTORS ``AS IS'' AND ANY EXPRESS * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL CONTRIBUTORS BE LIABLE FOR ANY * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE * GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN * IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include #include #include #include static void usage() { printf("Clear text from lib.ru for Pocket PC, Version 0.2.\n"); printf("Copyright (c) 2004 Teodor Sigaev \n"); printf(" All rights reserved.\n"); printf("Usage:\n clrlibru [-i INPUTFILE] [-o OUTPUT] [-l NUMSPACE]\n"); exit(0); } char* RemoveTag[]={ "select", "head", "div", "a", "form", NULL }; static int is_rtag(char *tag, int len) { char **ptr=RemoveTag; tag[len]='\0'; while( *ptr ) { if ( strcmp(tag, *ptr) == 0 ) return 1; ptr++; } return 0; } static char *optarg = NULL; static int current=1; int mgetopt(int argn, char* argv[], char *option) { char key; if ( current >= argn ) return -1; key = *(argv[current]+1); if ( *(argv[current]) == '-' || *(argv[current]) == '/' ) { char *ptr = strchr( option, key ); if ( ptr == NULL ) { printf( "Unknown option: %s\n", argv[current]); exit(1); } current++; if ( *(ptr+1) == ':' ) { if (current < argn) { optarg=argv[current]; current++; } else { printf("No value for -%c\n" ,key); exit(1); } } else optarg=NULL; } else { printf("Unknown option: %s\n", argv[current]); exit(1); } return (int)key; } #define INTXT 0 #define INTAG 1 #define FINDEND 2 #define INDROPTAG 3 #define INDROPINTAG 4 #define INHEADTAG 5 #define INDROPINCLSTAG 6 int main(int argn, char *argv[]) { int ch; FILE *in=stdin, *out=stdout; int state=INTXT; char buf[8192]; int lenbuf=0,closelen=0; int spacelen=4; while((ch = mgetopt(argn, argv, "l:i:o:h?"))!=-1) { switch (ch) { case 'i': if ( (in=fopen(optarg, "r"))==NULL) { printf("Can't open file %s\n", optarg); exit(1); } break; case 'o': if ( (out=fopen(optarg, "w"))==NULL) { printf("Can't open file %s\n",optarg); exit(1); } break; case 'l': spacelen = atoi(optarg); if ( spacelen < 0 ) { printf("-l should be >= 0\n"); exit(1); } break; case 'h': case '?': default: usage(); } } while( (ch=getc(in)) != EOF ) { if ( state==INTXT ) { if ( ch == '<' ) { state=INHEADTAG; lenbuf=0; } else if ( ch == '\n' ) { state=FINDEND; lenbuf=1; *buf = ch; } else if ( ch != '\r' ) fputc(ch,out); } else if ( state==INHEADTAG ) { if ( isalpha(ch) ) { buf[ lenbuf ] = tolower(ch); lenbuf++; } else if ( ch == '>' ) { if ( is_rtag(buf,lenbuf) ) { state = INDROPTAG; closelen=0; } else { state=INTXT; fputc(' ',out); } } else if ( lenbuf == 0 && ch != '/' ) { fputc('<',out); fputc(ch,out); state=INTXT; } else { if ( is_rtag(buf,lenbuf) ) { state = INDROPTAG; closelen=0; } else { state=INTAG; fputc(' ',out); } } } else if ( state==INTAG ) { if ( ch == '>' ) { state=INTXT; fputc(' ',out); } } else if ( state == INDROPTAG ) { if ( ch == '<' ) { state=INDROPINTAG; closelen=0; } } else if ( state == INDROPINTAG ) { if ( ch == '/' ) state=INDROPINCLSTAG; else state=INDROPTAG; } else if ( state == INDROPINCLSTAG ) { if ( isalpha(ch) ) { if ( closelen < lenbuf && tolower(ch) == buf[closelen] ) { closelen++; if ( closelen==lenbuf ) state=INTAG; } else state=INDROPTAG; } else state=INDROPTAG; } else if ( state==FINDEND ) { if ( ch == ' ' ) { buf[ lenbuf ] = ch; lenbuf++; if ( lenbuf > spacelen ) { fwrite(buf, sizeof(char), lenbuf, out); state=INTXT; } } else if ( ch=='\n' ) { buf[ lenbuf ] = ch; lenbuf++; fwrite(buf, sizeof(char), lenbuf, out); state=INTXT; } else if ( ch !='\r' ) { state=INTXT; fputc(' ',out); ungetc(ch,in); } } else { printf("Unknown state: %d\n", state); exit(1); } } if ( in!=stdin ) fclose(in); if ( out!=stdout ) fclose(out); return 0; }