Add copyrights info
[clrlibru.git] / clrlibru.c
1 /*
2  * Copyright (c) 2004 Teodor Sigaev <teodor@sigaev.ru>
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *        notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *        notice, this list of conditions and the following disclaimer in the
12  *        documentation and/or other materials provided with the distribution.
13  * 3. Neither the name of the author nor the names of any co-contributors
14  *        may be used to endorse or promote products derived from this software
15  *        without specific prior written permission.
16  *
17  * THIS SOFTWARE IS PROVIDED BY CONTRIBUTORS ``AS IS'' AND ANY EXPRESS
18  * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
19  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20  * ARE DISCLAIMED. IN NO EVENT SHALL CONTRIBUTORS BE LIABLE FOR ANY
21  * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
23  * GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
24  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
25  * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
26  * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
27  * IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28  */
29
30 #include <stdio.h>
31 #include <stdlib.h>
32 #include <string.h>
33 #include <ctype.h>
34
35 static void
36 usage() {
37         printf("Clear text from lib.ru for Pocket PC, Version 0.2.\n");
38         printf("Copyright (c) 2004 Teodor Sigaev <teodor@sigaev.ru>\n");
39         printf("       All rights reserved.\n");
40         printf("Usage:\n   clrlibru [-i INPUTFILE] [-o OUTPUT] [-l NUMSPACE]\n");
41         exit(0);
42 }
43
44 char* RemoveTag[]={
45         "select",
46         "head",
47         "div",
48         "a",
49         "form",
50         NULL
51 };
52
53 static int
54 is_rtag(char *tag, int len) {
55         char **ptr=RemoveTag;
56
57         tag[len]='\0';
58         while( *ptr ) {
59                 if ( strcmp(tag, *ptr) == 0 ) 
60                         return 1;
61                 ptr++;
62         } 
63         return 0;
64 }
65
66
67 static char *optarg = NULL;
68 static int current=1;
69
70 int
71 mgetopt(int argn, char* argv[], char *option) {
72         char key;
73
74         if ( current >= argn ) return -1;
75
76         key = *(argv[current]+1);
77         if ( *(argv[current]) == '-' || *(argv[current]) == '/' ) {
78                 char *ptr = strchr( option, key );
79                 if ( ptr == NULL ) {
80                         printf( "Unknown option: %s\n", argv[current]);
81                         exit(1);
82                 }
83                 current++;
84                 if ( *(ptr+1) == ':' ) {
85                         if (current < argn) {
86                                 optarg=argv[current];
87                                 current++;
88                         } else {
89                                 printf("No value for -%c\n" ,key);
90                                 exit(1);
91                         }
92                 } else
93                         optarg=NULL;
94         } else {
95                 printf("Unknown option: %s\n", argv[current]);
96                 exit(1);
97         }
98         return (int)key;
99 }
100
101 #define INTXT   0
102 #define INTAG   1
103 #define FINDEND 2
104 #define INDROPTAG       3
105 #define INDROPINTAG     4
106 #define INHEADTAG       5
107 #define INDROPINCLSTAG  6
108
109
110 int 
111 main(int argn, char *argv[]) {
112         int ch;
113         FILE    *in=stdin, *out=stdout;
114         int state=INTXT;
115         char    buf[8192];
116         int lenbuf=0,closelen=0;
117         int spacelen=4;
118
119         while((ch = mgetopt(argn, argv, "l:i:o:h?"))!=-1) {
120                 switch (ch) {
121                         case 'i':
122                                 if ( (in=fopen(optarg, "r"))==NULL) {
123                                         printf("Can't open file %s\n", optarg);
124                                         exit(1);
125                                 }
126                                 break;
127                         case 'o':
128                                 if ( (out=fopen(optarg, "w"))==NULL) {
129                                         printf("Can't open file %s\n",optarg);
130                                         exit(1);
131                                 }
132                                 break;
133                         case 'l':
134                                 spacelen = atoi(optarg);
135                                 if ( spacelen < 0 ) {
136                                         printf("-l should be >= 0\n");
137                                         exit(1);
138                                 }
139                                 break;
140                         case 'h':
141                         case '?':
142                         default:
143                                 usage();
144                 }
145         }
146         
147         while( (ch=getc(in)) != EOF ) {
148                 if ( state==INTXT ) {
149                         if ( ch == '<' ) {
150                                 state=INHEADTAG;
151                                 lenbuf=0;
152                         } else if ( ch == '\n' ) {
153                                 state=FINDEND;
154                                 lenbuf=1;
155                                 *buf = ch;
156                         } else if ( ch != '\r' )
157                                 fputc(ch,out);
158                 } else if ( state==INHEADTAG ) {
159                         if ( isalpha(ch) ) {
160                                 buf[ lenbuf ] = tolower(ch);
161                                 lenbuf++;
162                         } else if ( ch == '>' ) {
163                                 if ( is_rtag(buf,lenbuf) ) {
164                                         state = INDROPTAG;
165                                         closelen=0;
166                                 } else {
167                                         state=INTXT;
168                                         fputc(' ',out);
169                                 }
170                         } else if ( lenbuf == 0 && ch != '/' ) {
171                                 fputc('<',out); fputc(ch,out); 
172                                 state=INTXT;
173                         } else { 
174                                 if ( is_rtag(buf,lenbuf) ) {
175                                         state = INDROPTAG;
176                                         closelen=0;
177                                 } else {
178                                         state=INTAG;
179                                         fputc(' ',out);
180                                 }
181                         }
182                 } else if ( state==INTAG ) {
183                         if ( ch == '>' ) {
184                                 state=INTXT;
185                                 fputc(' ',out);
186                         }       
187                 } else if ( state == INDROPTAG ) {
188                         if ( ch == '<' ) {
189                                 state=INDROPINTAG;
190                                 closelen=0;
191                         }
192                 } else if ( state == INDROPINTAG ) {
193                         if ( ch == '/' )
194                                 state=INDROPINCLSTAG;
195                         else
196                                 state=INDROPTAG;
197                 } else if ( state == INDROPINCLSTAG ) {
198                         if ( isalpha(ch) ) {
199                                 if ( closelen < lenbuf && tolower(ch) == buf[closelen] ) {
200                                         closelen++;
201                                         if ( closelen==lenbuf )
202                                                 state=INTAG;
203                                 } else 
204                                         state=INDROPTAG;
205                         } else
206                                 state=INDROPTAG;
207                 } else if ( state==FINDEND ) {
208                         if ( ch == ' ' ) {
209                                 buf[ lenbuf ] = ch;
210                                 lenbuf++;
211                                 if ( lenbuf > spacelen ) {
212                                         fwrite(buf, sizeof(char), lenbuf, out);
213                                         state=INTXT;
214                                 }
215                         } else if ( ch=='\n' ) {
216                                 buf[ lenbuf ] = ch;
217                                 lenbuf++;
218                                 fwrite(buf, sizeof(char), lenbuf, out);
219                                 state=INTXT;
220                         } else if ( ch !='\r' ) {
221                                 state=INTXT;
222                                 fputc(' ',out);
223                                 ungetc(ch,in);
224                         } 
225                 } else {
226                         printf("Unknown state: %d\n", state);
227                         exit(1);
228                 }
229         } 
230                         
231         if ( in!=stdin )
232                 fclose(in);
233         if ( out!=stdout )
234                 fclose(out);
235
236         return 0;       
237 }
238