Classdesc  3.D29
tokeninput.h
1 /*
2  @copyright Russell Standish 2000-2013
3  @author Russell Standish
4  This file is part of Classdesc
5 
6  Open source licensed under the MIT license. See LICENSE for details.
7 */
8 
9 #include <stdio.h>
10 #include <ctype.h>
11 #include <stdio.h>
12 #include <iostream>
13 #include <string.h>
14 
15 class mark_t;
16 
17 #ifdef _MSC_VER
18 #define USE_FSEEK
19 #endif
20 
21 
23 {
24  FILE* inputstream;
25  FILE* outputstream;
26  int c;
27  int lineno;
28  char getnextc()
29  {
30  c=fgetc(inputstream);
31  switch (c)
32  {
33  case '\n':
34  //printf("%d\n",lineno); /* for debugging purposes */
35  lineno++;
36  break;
37 
38  case '\\': /* check for and remove any line continuations */
39  c=fgetc(inputstream);
40  if (c=='\n')
41  {
42  c=fgetc(inputstream);
43  if (outputstream) fprintf(outputstream,"\\\n");
44  }
45  else {ungetc(c,inputstream); c='\\';}
46  break;
47 
48  case '[':
49  c=fgetc(inputstream);
50  if (c=='[') // attribute leadin - discard attribute
51  {
52  char lc=c;
53  while ((c=fgetc(inputstream)) != ']' && lc!=']') lc=c;
54  getnextc();
55  }
56  else
57  {ungetc(c,inputstream); c='[';}
58  break;
59  case EOF:
60  throw eof();
61  break;
62  }
63  if (outputstream &&!feof(inputstream))
64  fputc(c,outputstream);
65  return c;
66  }
67 public:
68  struct eof {}; /* signal end of input */
69  string token, lasttoken;
70  tokeninput(FILE* in, FILE* out=NULL)
71  {inputstream=in; outputstream=out; getnextc(); lineno=1;}
72 
73  int line() const {return lineno;}
74 
75  /* support for marking and resetting stream */
76  class mark_t;
77  void operator=(mark_t& x);
78  int operator>(mark_t& x); /* returns whether stream is beyond mark */
79  friend class tokeninput::mark_t ;
80 
81  void nexttok()
82  {
83 
84  token.swap(lasttoken);
85 
86  nexttok_again:
87  token.erase();
88  if (feof(inputstream)) throw eof();
89 
90  /* skip white space and # control lines */
91  while (isspace(c)) getnextc();
92  while (c=='#')
93  {
94  while (c!='\n' && !feof(inputstream))
95  {
96  if (c!='\r') token+=c; //ignore '\r' (DOS files)
97  if (c=='/' && getnextc()=='*') /* strip comments */
98  for (char l=getnextc(); l!='*' || c!='/'; l=c, getnextc());
99  getnextc();
100  }
101  if (token.find("#pragma")==0)
102  return; /* parse any pragma omits appearing on stdin */
103  else
104  token.erase();
105  while (isspace(c)) getnextc();
106  }
107  if (feof(inputstream)) throw eof();
108 
109  if (c=='"' || c=='\'') /* handle strings and chars */
110  {
111  int escape=0;
112  char terminal=c;
113  do
114  {
115  token+=c;
116  escape=c=='\\' && !escape;
117  getnextc();
118  }
119  while (c!=terminal || escape);
120  token+=c; /* include terminal " */
121  getnextc();
122  }
123  else if (strchr("#\\!@$~(){}[]:;,.?%*|-+=<>^&/",c))
124  /* operator tokens -
125  treat # and \ as operators, although they're stripped from
126  preprocessed code */
127  {
128  char lc;
129  token+=lc=c;
130  getnextc();
131  if (c=='=' && !strchr("#\\!@$~(){}[]:;,.?",lc))
132  { /* compound assignment operator */
133  token+=c;
134  getnextc();
135  }
136  else
137  switch (lc)
138  {
139  case '<': case '>': case '|': case '&':
140  case '+': case '-': case ':':
141  if (c==lc) /* double symbol */
142  {
143  token+=c;
144  getnextc();
145  if ((lc=='<' || lc=='>') && c=='=') /* <<= & >>= */
146  {
147  token+=c;
148  getnextc();
149  }
150  }
151  else if (lc=='-' && c=='>') /* -> and ->* operators */
152  {
153  token+=c;
154  getnextc();
155  if (c=='*')
156  {
157  token+=c;
158  getnextc();
159  }
160  }
161  break;
162 
163  /* consider the pair [] and () as single token,
164  for use with operator */
165 
166  case '[':
167  if (c==']')
168  {
169  token+=c;
170  getnextc();
171  }
172  break;
173 
174  case '(':
175  if (c==')')
176  {
177  token+=c;
178  getnextc();
179  }
180  break;
181 
182  case '/': /* check for comments, and remove from input stream */
183  if (c=='*')
184  {
185  lc=getnextc();
186  while (lc!='*' || c!='/')
187  {
188  lc=c;
189  getnextc();
190  }
191  getnextc(); /* load up next character from input stream */
192  goto nexttok_again; /* get next token */
193  }
194  else if (c=='/')
195  {
196  while (c!='\n') getnextc();
197  goto nexttok_again; /* get next token */
198  }
199  break;
200  }
201  }
202  else if (!isalnum(c) && c!='_')
203  {
204  getnextc();
205  goto nexttok_again; /* skip non 'C' characters */
206  }
207  else
208  {
209  while (isalnum(c) || c=='_')
210  {
211  token+=c;
212  getnextc();
213  }
214  }
215  }
216 };
217 
218 /* support for marking and resetting stream */
220 {
221  friend void tokeninput::operator=(mark_t& x);
222  friend int tokeninput::operator>(mark_t& x);
223  fpos_t fp;
224  long offset;
225  tokeninput tokinp;
226 public:
227  mark_t(tokeninput& x): tokinp(x)
228  {
229  // if (fgetpos(x.inputstream,&fp)) throw eof();
230  // Some systems (eg CYGWIN) have trouble with fgetpos/fsetpos ?
231  fgetpos(x.inputstream,&fp);
232 
233  offset=ftell(x.inputstream);
234  }
235 };
236 
237 void tokeninput::operator=(mark_t& x)
238 {
239  *this=x.tokinp;
240 #ifdef USE_FSEEK /* some OSes have buggy fsetpos routines */
241  if (fseek(inputstream,x.offset,SEEK_SET)) throw eof();
242 #else
243  if (fsetpos(inputstream,&(x.fp))) throw eof();
244 #endif
245 }
246 
247 int tokeninput::operator>(mark_t& x)
248 {return ftell(inputstream) > x.offset;}
249 
250 /* redefine isalpha so that '_' is considered an alphabetic char */
251 #undef isalpha
252 inline int isalpha(char x)
253 {return x=='_' || (x>='a' && x<='z') || (x>='A' && x<= 'Z');}
254 
255 #include <map>
256 #include <set>
257 template <class K, class T> class hash_map: public map<K,T> {};
258 template <class K> class hash_set: public set<K> {};
259 
260 
261 string gobble_delimited(tokeninput& input, const char *left,
262  const char *right)
263 { string argList = input.lasttoken;
264  argList += " " + input.token + " ";
265  int delim_count=0;
266  string tmp=(string)left+right;
267  if (input.token==tmp) return argList; /* catch the () and [] token cases */
268  for (input.nexttok(); input.token!=right || delim_count>0;
269  input.nexttok())
270  {
271  argList += input.token + " ";
272  if (input.token==left) delim_count++;
273  if (input.token==right) delim_count--;
274  if (input.token==">>" && string(left)=="<")
275  {
276  delim_count-=2;
277  if (delim_count<=0) break;
278  }
279  }
280  return argList;
281 }
282 
283 /* grab arguments to template */
284 string get_template_args(tokeninput& input)
285 {
286  string targs;
287  size_t angle_count=0;
288  do
289  {
290  /* strip out default arguments */
291  if (input.token[0]=='=')
292  while (!strchr(">,",input.token[0]))
293  {
294  if (input.token[0]=='<') gobble_delimited(input,"<",">");
295  input.nexttok();
296  }
297  targs += input.token;
298  if (input.token!=".") targs+=" ";
299  if (input.token[0]=='<') angle_count+=input.token.length();
300  else if (input.token[0]=='>')
301  angle_count-=input.token.length();
302  input.nexttok();
303  }
304  while (angle_count>0);
305  return targs;
306 }
307 
308 
Definition: tokeninput.h:257
Definition: tokeninput.h:219
Definition: tokeninput.h:68
Definition: tokeninput.h:258
Definition: tokeninput.h:22