2 * Copyright (c) 2004 Teodor Sigaev <teodor@sigaev.ru>
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 * 3. Neither the name of the author nor the names of any co-contributors
14 * may be used to endorse or promote products derived from this software
15 * without specific prior written permission.
17 * THIS SOFTWARE IS PROVIDED BY CONTRIBUTORS ``AS IS'' AND ANY EXPRESS
18 * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
19 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED. IN NO EVENT SHALL CONTRIBUTORS BE LIABLE FOR ANY
21 * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
23 * GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
24 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
25 * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
26 * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
27 * IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
35 static void pushoutstr(FILE *out, char *buf, int len);
39 printf("Clear text from lib.ru for Pocket PC, Version 0.2.\n");
40 printf("Copyright (c) 2004 Teodor Sigaev <teodor@sigaev.ru>\n");
41 printf(" All rights reserved.\n");
42 printf("Usage:\n clrlibru [-i INPUTFILE] [-o OUTPUT] [-l NUMSPACE]\n");
46 static char* RemoveTag[]={
59 is_rtag(char *tag, int len) {
64 if ( strcmp(tag, *ptr) == 0 )
77 static Quote quoteChange[] = {
92 pushoutquot(FILE *out, char *buf, int buflen) {
93 Quote *ptr = quoteChange;
97 if ( strcmp( ptr->quote, buf ) == 0 ) {
99 ptr->len = strlen( ptr->str );
100 pushoutstr( out, ptr->str, ptr->len );
109 static char *optarg = NULL;
110 static int current=1;
113 mgetopt(int argn, char* argv[], char *option) {
116 if ( current >= argn ) return -1;
118 key = *(argv[current]+1);
119 if ( *(argv[current]) == '-' || *(argv[current]) == '/' ) {
120 char *ptr = strchr( option, key );
122 printf( "Unknown option: %s\n", argv[current]);
126 if ( *(ptr+1) == ':' ) {
127 if (current < argn) {
128 optarg=argv[current];
131 printf("No value for -%c\n" ,key);
137 printf("Unknown option: %s\n", argv[current]);
147 #define INDROPINTAG 4
149 #define INDROPINCLSTAG 6
150 #define WAITAFTERRED 7
151 #define COMMENTBEGIN1 8
152 #define COMMENTBEGIN2 9
154 #define COMMENTEND1 11
155 #define COMMENTEND2 12
158 #define BUFFERLENGTH 8192
159 #define REDSTRING " "
161 typedef enum TypeOut {
171 pushout( FILE *out, TypeOut type, int value ) {
172 static TypeOut PrevType=None;
173 static int prevvalue=0;
174 static int newlinecount=0;
176 if ( type == Char ) {
177 if ( !(( PrevType==Tag || PrevType==None || PrevType==NewLine ) && ( value == ' ' || value == '\t' )) ) {
182 } else if ( type == NewLine ) {
183 if ( newlinecount < 2 )
186 } else if ( type != PrevType ) {
189 if ( !(PrevType==None || PrevType==NewLine || ( PrevType==Char && ( prevvalue == ' ' || prevvalue == '\t' || prevvalue == '\n' ) )) ) {
195 pushout(out, NewLine, 0);
196 fwrite(REDSTRING, sizeof(char), strlen(REDSTRING), out);
200 printf("Unknown type: %d", type);
208 pushoutstr(FILE *out, char *buf, int len) {
210 while( ptr-buf<len ) {
211 pushout(out, Char, (int)(*ptr));
217 main(int argn, char *argv[]) {
219 FILE *in=stdin, *out=stdout;
221 char buf[BUFFERLENGTH];
222 int lenbuf=0,closelen=0;
225 while((ch = mgetopt(argn, argv, "l:i:o:h?"))!=-1) {
228 if ( (in=fopen(optarg, "r"))==NULL) {
229 printf("Can't open file %s\n", optarg);
234 if ( (out=fopen(optarg, "w"))==NULL) {
235 printf("Can't open file %s\n",optarg);
240 spacelen = atoi(optarg);
241 if ( spacelen < 0 || spacelen >= BUFFERLENGTH ) {
242 printf("-l should be >= 0 and < %d\n", BUFFERLENGTH);
253 while( (ch=getc(in)) != EOF ) {
257 if ( state==INTXT ) {
261 } else if ( ch == '\n' ) {
264 } else if ( ch=='&' ) {
269 pushout(out, Char, ch);
271 } else if ( state==INHEADTAG ) {
273 if ( lenbuf < BUFFERLENGTH-1 ) {
274 buf[ lenbuf ] = tolower(ch);
277 } else if ( ch == '!' ) {
278 state = COMMENTBEGIN1;
279 } else if ( ch == '>' ) {
280 if ( is_rtag(buf,lenbuf) ) {
286 pushoutstr(out, "<>", 2);
288 pushout(out, Tag, 0);
290 } else if ( lenbuf == 0 && ch != '/' ) {
291 pushout(out, Char, '<');
292 pushout(out, Char, ch);
295 if ( is_rtag(buf,lenbuf) ) {
302 } else if ( state==INTAG ) {
305 pushout(out, Tag, 0);
307 } else if ( state == INDROPTAG ) {
312 } else if ( state == INDROPINTAG ) {
314 state=INDROPINCLSTAG;
317 } else if ( state == INDROPINCLSTAG ) {
319 if ( closelen < lenbuf && tolower(ch) == buf[closelen] ) {
321 if ( closelen==lenbuf )
327 } else if ( state==FINDEND ) {
328 if ( ch == ' ' || ch == '\t' ) {
330 if ( lenbuf > spacelen ) {
331 pushout( out, Paragraph, 0 );
334 } else if ( ch=='\n' ) {
335 pushout( out, NewLine, 0 );
336 pushout( out, NewLine, 0 );
340 pushout(out, Char, ' ');
343 } else if ( state==WAITAFTERRED ) {
344 if ( !isspace(ch) ) {
348 } else if ( state==COMMENTBEGIN1 ) {
350 state = COMMENTBEGIN2;
352 pushoutstr(out, "<!", 2);
356 } else if ( state==COMMENTBEGIN2 ) {
360 pushoutstr(out, "<!-", 2);
364 } else if ( state==COMMENTIN ) {
367 } else if ( state==COMMENTEND1 ) {
368 state = ( ch == '-' ) ? COMMENTEND2 : COMMENTIN;
369 } else if ( state==COMMENTEND2 ) {
372 else if ( ch != '-' )
374 } else if ( state==INQUOTE ) {
375 if ( isalpha( ch ) && lenbuf < BUFFERLENGTH-2 ) {
378 } else if ( ch == ';' && lenbuf>1 && pushoutquot( out, buf+1, lenbuf-1 ) ) {
381 pushoutstr(out, buf, lenbuf);
386 printf("Unknown state: %d\n", state);