Linux kernel mirror (for testing)
git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel
os
linux
1/* SPDX-License-Identifier: GPL-2.0-or-later */
2/*
3 * Lexical analysis for genksyms.
4 * Copyright 1996, 1997 Linux International.
5 *
6 * New implementation contributed by Richard Henderson <rth@tamu.edu>
7 * Based on original work by Bjorn Ekwall <bj0rn@blox.se>
8 *
9 * Taken from Linux modutils 2.4.22.
10 */
11
12%{
13
14#include <limits.h>
15#include <stdlib.h>
16#include <string.h>
17#include <ctype.h>
18
19#include "genksyms.h"
20#include "parse.tab.h"
21
22/* We've got a two-level lexer here. We let flex do basic tokenization
23 and then we categorize those basic tokens in the second stage. */
24#define YY_DECL static int yylex1(void)
25
26%}
27
28IDENT [A-Za-z_\$][A-Za-z0-9_\$]*
29
30O_INT 0[0-7]*
31D_INT [1-9][0-9]*
32X_INT 0[Xx][0-9A-Fa-f]+
33I_SUF [Uu]|[Ll]|[Uu][Ll]|[Ll][Uu]
34INT ({O_INT}|{D_INT}|{X_INT}){I_SUF}?
35
36FRAC ([0-9]*\.[0-9]+)|([0-9]+\.)
37EXP [Ee][+-]?[0-9]+
38F_SUF [FfLl]
39REAL ({FRAC}{EXP}?{F_SUF}?)|([0-9]+{EXP}{F_SUF}?)
40
41STRING L?\"([^\\\"]*\\.)*[^\\\"]*\"
42CHAR L?\'([^\\\']*\\.)*[^\\\']*\'
43
44MC_TOKEN ([~%^&*+=|<>/-]=)|(&&)|("||")|(->)|(<<)|(>>)
45
46/* We don't do multiple input files. */
47%option noyywrap
48
49%option noinput
50
51%%
52
53
54 /* Keep track of our location in the original source files. */
55^#[ \t]+{INT}[ \t]+\"[^\"\n]+\".*\n return FILENAME;
56^#.*\n cur_line++;
57\n cur_line++;
58
59 /* Ignore all other whitespace. */
60[ \t\f\v\r]+ ;
61
62
63{STRING} return STRING;
64{CHAR} return CHAR;
65{IDENT} return IDENT;
66
67 /* The Pedant requires that the other C multi-character tokens be
68 recognized as tokens. We don't actually use them since we don't
69 parse expressions, but we do want whitespace to be arranged
70 around them properly. */
71{MC_TOKEN} return OTHER;
72{INT} return INT;
73{REAL} return REAL;
74
75"..." return DOTS;
76
77 /* All other tokens are single characters. */
78. return yytext[0];
79
80
81%%
82
83/* Bring in the keyword recognizer. */
84
85#include "keywords.c"
86
87
88/* Macros to append to our phrase collection list. */
89
90/*
91 * We mark any token, that that equals to a known enumerator, as
92 * SYM_ENUM_CONST. The parser will change this for struct and union tags later,
93 * the only problem is struct and union members:
94 * enum e { a, b }; struct s { int a, b; }
95 * but in this case, the only effect will be, that the ABI checksums become
96 * more volatile, which is acceptable. Also, such collisions are quite rare,
97 * so far it was only observed in include/linux/telephony.h.
98 */
99#define _APP(T,L) do { \
100 cur_node = next_node; \
101 next_node = xmalloc(sizeof(*next_node)); \
102 next_node->next = cur_node; \
103 cur_node->string = memcpy(xmalloc(L+1), T, L+1); \
104 cur_node->tag = \
105 find_symbol(cur_node->string, SYM_ENUM_CONST, 1)?\
106 SYM_ENUM_CONST : SYM_NORMAL ; \
107 cur_node->in_source_file = in_source_file; \
108 } while (0)
109
110#define APP _APP(yytext, yyleng)
111
112
113/* The second stage lexer. Here we incorporate knowledge of the state
114 of the parser to tailor the tokens that are returned. */
115
116int
117yylex(void)
118{
119 static enum {
120 ST_NOTSTARTED, ST_NORMAL, ST_ATTRIBUTE, ST_ASM, ST_TYPEOF, ST_TYPEOF_1,
121 ST_BRACKET, ST_BRACE, ST_EXPRESSION, ST_STATIC_ASSERT,
122 ST_TABLE_1, ST_TABLE_2, ST_TABLE_3, ST_TABLE_4,
123 ST_TABLE_5, ST_TABLE_6
124 } lexstate = ST_NOTSTARTED;
125
126 static int suppress_type_lookup, dont_want_brace_phrase;
127 static struct string_list *next_node;
128
129 int token, count = 0;
130 struct string_list *cur_node;
131
132 if (lexstate == ST_NOTSTARTED)
133 {
134 next_node = xmalloc(sizeof(*next_node));
135 next_node->next = NULL;
136 lexstate = ST_NORMAL;
137 }
138
139repeat:
140 token = yylex1();
141
142 if (token == 0)
143 return 0;
144 else if (token == FILENAME)
145 {
146 char *file, *e;
147
148 /* Save the filename and line number for later error messages. */
149
150 if (cur_filename)
151 free(cur_filename);
152
153 file = strchr(yytext, '\"')+1;
154 e = strchr(file, '\"');
155 *e = '\0';
156 cur_filename = memcpy(xmalloc(e-file+1), file, e-file+1);
157 cur_line = atoi(yytext+2);
158
159 if (!source_file) {
160 source_file = xstrdup(cur_filename);
161 in_source_file = 1;
162 } else {
163 in_source_file = (strcmp(cur_filename, source_file) == 0);
164 }
165
166 goto repeat;
167 }
168
169 switch (lexstate)
170 {
171 case ST_NORMAL:
172 switch (token)
173 {
174 case IDENT:
175 APP;
176 {
177 int r = is_reserved_word(yytext, yyleng);
178 if (r >= 0)
179 {
180 switch (token = r)
181 {
182 case ATTRIBUTE_KEYW:
183 lexstate = ST_ATTRIBUTE;
184 count = 0;
185 goto repeat;
186 case ASM_KEYW:
187 lexstate = ST_ASM;
188 count = 0;
189 goto repeat;
190 case TYPEOF_KEYW:
191 lexstate = ST_TYPEOF;
192 count = 0;
193 goto repeat;
194
195 case STRUCT_KEYW:
196 case UNION_KEYW:
197 case ENUM_KEYW:
198 dont_want_brace_phrase = 3;
199 suppress_type_lookup = 2;
200 goto fini;
201
202 case EXPORT_SYMBOL_KEYW:
203 goto fini;
204
205 case STATIC_ASSERT_KEYW:
206 lexstate = ST_STATIC_ASSERT;
207 count = 0;
208 goto repeat;
209 }
210 }
211 if (!suppress_type_lookup)
212 {
213 if (find_symbol(yytext, SYM_TYPEDEF, 1))
214 token = TYPE;
215 }
216 }
217 break;
218
219 case '[':
220 APP;
221 lexstate = ST_BRACKET;
222 count = 1;
223 goto repeat;
224
225 case '{':
226 APP;
227 if (dont_want_brace_phrase)
228 break;
229 lexstate = ST_BRACE;
230 count = 1;
231 goto repeat;
232
233 case '=': case ':':
234 APP;
235 lexstate = ST_EXPRESSION;
236 break;
237
238 case DOTS:
239 default:
240 APP;
241 break;
242 }
243 break;
244
245 case ST_ATTRIBUTE:
246 APP;
247 switch (token)
248 {
249 case '(':
250 ++count;
251 goto repeat;
252 case ')':
253 if (--count == 0)
254 {
255 lexstate = ST_NORMAL;
256 token = ATTRIBUTE_PHRASE;
257 break;
258 }
259 goto repeat;
260 default:
261 goto repeat;
262 }
263 break;
264
265 case ST_ASM:
266 APP;
267 switch (token)
268 {
269 case '(':
270 ++count;
271 goto repeat;
272 case ')':
273 if (--count == 0)
274 {
275 lexstate = ST_NORMAL;
276 token = ASM_PHRASE;
277 break;
278 }
279 goto repeat;
280 default:
281 goto repeat;
282 }
283 break;
284
285 case ST_TYPEOF_1:
286 if (token == IDENT)
287 {
288 if (is_reserved_word(yytext, yyleng) >= 0
289 || find_symbol(yytext, SYM_TYPEDEF, 1))
290 {
291 yyless(0);
292 unput('(');
293 lexstate = ST_NORMAL;
294 token = TYPEOF_KEYW;
295 break;
296 }
297 _APP("(", 1);
298 }
299 lexstate = ST_TYPEOF;
300 /* FALLTHRU */
301
302 case ST_TYPEOF:
303 switch (token)
304 {
305 case '(':
306 if ( ++count == 1 )
307 lexstate = ST_TYPEOF_1;
308 else
309 APP;
310 goto repeat;
311 case ')':
312 APP;
313 if (--count == 0)
314 {
315 lexstate = ST_NORMAL;
316 token = TYPEOF_PHRASE;
317 break;
318 }
319 goto repeat;
320 default:
321 APP;
322 goto repeat;
323 }
324 break;
325
326 case ST_BRACKET:
327 APP;
328 switch (token)
329 {
330 case '[':
331 ++count;
332 goto repeat;
333 case ']':
334 if (--count == 0)
335 {
336 lexstate = ST_NORMAL;
337 token = BRACKET_PHRASE;
338 break;
339 }
340 goto repeat;
341 default:
342 goto repeat;
343 }
344 break;
345
346 case ST_BRACE:
347 APP;
348 switch (token)
349 {
350 case '{':
351 ++count;
352 goto repeat;
353 case '}':
354 if (--count == 0)
355 {
356 lexstate = ST_NORMAL;
357 token = BRACE_PHRASE;
358 break;
359 }
360 goto repeat;
361 default:
362 goto repeat;
363 }
364 break;
365
366 case ST_EXPRESSION:
367 switch (token)
368 {
369 case '(': case '[': case '{':
370 ++count;
371 APP;
372 goto repeat;
373 case '}':
374 /* is this the last line of an enum declaration? */
375 if (count == 0)
376 {
377 /* Put back the token we just read so's we can find it again
378 after registering the expression. */
379 unput(token);
380
381 lexstate = ST_NORMAL;
382 token = EXPRESSION_PHRASE;
383 break;
384 }
385 /* FALLTHRU */
386 case ')': case ']':
387 --count;
388 APP;
389 goto repeat;
390 case ',': case ';':
391 if (count == 0)
392 {
393 /* Put back the token we just read so's we can find it again
394 after registering the expression. */
395 unput(token);
396
397 lexstate = ST_NORMAL;
398 token = EXPRESSION_PHRASE;
399 break;
400 }
401 APP;
402 goto repeat;
403 default:
404 APP;
405 goto repeat;
406 }
407 break;
408
409 case ST_STATIC_ASSERT:
410 APP;
411 switch (token)
412 {
413 case '(':
414 ++count;
415 goto repeat;
416 case ')':
417 if (--count == 0)
418 {
419 lexstate = ST_NORMAL;
420 token = STATIC_ASSERT_PHRASE;
421 break;
422 }
423 goto repeat;
424 default:
425 goto repeat;
426 }
427 break;
428
429 case ST_TABLE_1:
430 goto repeat;
431
432 case ST_TABLE_2:
433 if (token == IDENT && yyleng == 1 && yytext[0] == 'X')
434 {
435 token = EXPORT_SYMBOL_KEYW;
436 lexstate = ST_TABLE_5;
437 APP;
438 break;
439 }
440 lexstate = ST_TABLE_6;
441 /* FALLTHRU */
442
443 case ST_TABLE_6:
444 switch (token)
445 {
446 case '{': case '[': case '(':
447 ++count;
448 break;
449 case '}': case ']': case ')':
450 --count;
451 break;
452 case ',':
453 if (count == 0)
454 lexstate = ST_TABLE_2;
455 break;
456 };
457 goto repeat;
458
459 case ST_TABLE_3:
460 goto repeat;
461
462 case ST_TABLE_4:
463 if (token == ';')
464 lexstate = ST_NORMAL;
465 goto repeat;
466
467 case ST_TABLE_5:
468 switch (token)
469 {
470 case ',':
471 token = ';';
472 lexstate = ST_TABLE_2;
473 APP;
474 break;
475 default:
476 APP;
477 break;
478 }
479 break;
480
481 default:
482 exit(1);
483 }
484fini:
485
486 if (suppress_type_lookup > 0)
487 --suppress_type_lookup;
488 if (dont_want_brace_phrase > 0)
489 --dont_want_brace_phrase;
490
491 yylval = &next_node->next;
492
493 return token;
494}