Linux kernel mirror (for testing)
git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel
os
linux
1/* SPDX-License-Identifier: GPL-2.0-or-later */
2/*
3 * Lexical analysis for genksyms.
4 * Copyright 1996, 1997 Linux International.
5 *
6 * New implementation contributed by Richard Henderson <rth@tamu.edu>
7 * Based on original work by Bjorn Ekwall <bj0rn@blox.se>
8 *
9 * Taken from Linux modutils 2.4.22.
10 */
11
12%{
13
14#include <limits.h>
15#include <stdbool.h>
16#include <stdlib.h>
17#include <string.h>
18#include <ctype.h>
19
20#include "genksyms.h"
21#include "parse.tab.h"
22
23/* We've got a two-level lexer here. We let flex do basic tokenization
24 and then we categorize those basic tokens in the second stage. */
25#define YY_DECL static int yylex1(void)
26
27%}
28
29IDENT [A-Za-z_\$][A-Za-z0-9_\$]*
30
31O_INT 0[0-7]*
32D_INT [1-9][0-9]*
33X_INT 0[Xx][0-9A-Fa-f]+
34I_SUF [Uu]|[Ll]|[Uu][Ll]|[Ll][Uu]
35INT ({O_INT}|{D_INT}|{X_INT}){I_SUF}?
36
37FRAC ([0-9]*\.[0-9]+)|([0-9]+\.)
38EXP [Ee][+-]?[0-9]+
39F_SUF [FfLl]
40REAL ({FRAC}{EXP}?{F_SUF}?)|([0-9]+{EXP}{F_SUF}?)
41
42STRING L?\"([^\\\"]*\\.)*[^\\\"]*\"
43CHAR L?\'([^\\\']*\\.)*[^\\\']*\'
44
45MC_TOKEN ([~%^&*+=|<>/-]=)|(&&)|("||")|(->)|(<<)|(>>)
46
47/* We don't do multiple input files. */
48%option noyywrap
49
50%option noinput
51
52%%
53
54u?int(8|16|32|64)x(1|2|4|8|16)_t return BUILTIN_INT_KEYW;
55
56 /* Keep track of our location in the original source files. */
57^#[ \t]+{INT}[ \t]+\"[^\"\n]+\".*\n return FILENAME;
58^#.*\n cur_line++;
59\n cur_line++;
60
61 /* Ignore all other whitespace. */
62[ \t\f\v\r]+ ;
63
64
65{STRING} return STRING;
66{CHAR} return CHAR;
67{IDENT} return IDENT;
68
69 /* The Pedant requires that the other C multi-character tokens be
70 recognized as tokens. We don't actually use them since we don't
71 parse expressions, but we do want whitespace to be arranged
72 around them properly. */
73{MC_TOKEN} return OTHER;
74{INT} return INT;
75{REAL} return REAL;
76
77"..." return DOTS;
78
79 /* All other tokens are single characters. */
80. return yytext[0];
81
82
83%%
84
85/* Bring in the keyword recognizer. */
86
87#include "keywords.c"
88
89
90/* Macros to append to our phrase collection list. */
91
92/*
93 * We mark any token, that that equals to a known enumerator, as
94 * SYM_ENUM_CONST. The parser will change this for struct and union tags later,
95 * the only problem is struct and union members:
96 * enum e { a, b }; struct s { int a, b; }
97 * but in this case, the only effect will be, that the ABI checksums become
98 * more volatile, which is acceptable. Also, such collisions are quite rare,
99 * so far it was only observed in include/linux/telephony.h.
100 */
101#define _APP(T,L) do { \
102 cur_node = next_node; \
103 next_node = xmalloc(sizeof(*next_node)); \
104 next_node->next = cur_node; \
105 cur_node->string = memcpy(xmalloc(L+1), T, L+1); \
106 cur_node->tag = \
107 find_symbol(cur_node->string, SYM_ENUM_CONST, 1)?\
108 SYM_ENUM_CONST : SYM_NORMAL ; \
109 cur_node->in_source_file = in_source_file; \
110 } while (0)
111
112#define APP _APP(yytext, yyleng)
113
114
115/* The second stage lexer. Here we incorporate knowledge of the state
116 of the parser to tailor the tokens that are returned. */
117
118/*
119 * The lexer cannot distinguish whether a typedef'ed string is a TYPE or an
120 * IDENT. We need a hint from the parser to handle this accurately.
121 */
122bool dont_want_type_specifier;
123
124int
125yylex(void)
126{
127 static enum {
128 ST_NOTSTARTED, ST_NORMAL, ST_ATTRIBUTE, ST_ASM, ST_TYPEOF, ST_TYPEOF_1,
129 ST_BRACKET, ST_BRACE, ST_EXPRESSION, ST_STATIC_ASSERT,
130 } lexstate = ST_NOTSTARTED;
131
132 static int suppress_type_lookup, dont_want_brace_phrase;
133 static struct string_list *next_node;
134 static char *source_file;
135
136 int token, count = 0;
137 struct string_list *cur_node;
138
139 if (lexstate == ST_NOTSTARTED)
140 {
141 next_node = xmalloc(sizeof(*next_node));
142 next_node->next = NULL;
143 lexstate = ST_NORMAL;
144 }
145
146repeat:
147 token = yylex1();
148
149 if (token == 0)
150 return 0;
151 else if (token == FILENAME)
152 {
153 char *file, *e;
154
155 /* Save the filename and line number for later error messages. */
156
157 if (cur_filename)
158 free(cur_filename);
159
160 file = strchr(yytext, '\"')+1;
161 e = strchr(file, '\"');
162 *e = '\0';
163 cur_filename = memcpy(xmalloc(e-file+1), file, e-file+1);
164 cur_line = atoi(yytext+2);
165
166 if (!source_file) {
167 source_file = xstrdup(cur_filename);
168 in_source_file = 1;
169 } else {
170 in_source_file = (strcmp(cur_filename, source_file) == 0);
171 }
172
173 goto repeat;
174 }
175
176 switch (lexstate)
177 {
178 case ST_NORMAL:
179 APP;
180 switch (token)
181 {
182 case IDENT:
183 {
184 int r = is_reserved_word(yytext, yyleng);
185 if (r >= 0)
186 {
187 switch (token = r)
188 {
189 case ATTRIBUTE_KEYW:
190 lexstate = ST_ATTRIBUTE;
191 count = 0;
192 goto repeat;
193 case ASM_KEYW:
194 lexstate = ST_ASM;
195 count = 0;
196 goto repeat;
197 case TYPEOF_KEYW:
198 lexstate = ST_TYPEOF;
199 count = 0;
200 goto repeat;
201
202 case STRUCT_KEYW:
203 case UNION_KEYW:
204 case ENUM_KEYW:
205 dont_want_brace_phrase = 3;
206 suppress_type_lookup = 2;
207 goto fini;
208
209 case EXPORT_SYMBOL_KEYW:
210 goto fini;
211
212 case STATIC_ASSERT_KEYW:
213 lexstate = ST_STATIC_ASSERT;
214 count = 0;
215 goto repeat;
216 }
217 }
218 if (!suppress_type_lookup && !dont_want_type_specifier)
219 {
220 if (find_symbol(yytext, SYM_TYPEDEF, 1))
221 token = TYPE;
222 }
223 }
224 break;
225
226 case '[':
227 lexstate = ST_BRACKET;
228 count = 1;
229 goto repeat;
230
231 case '{':
232 if (dont_want_brace_phrase)
233 break;
234 lexstate = ST_BRACE;
235 count = 1;
236 goto repeat;
237
238 case '=': case ':':
239 lexstate = ST_EXPRESSION;
240 break;
241
242 default:
243 break;
244 }
245 break;
246
247 case ST_ATTRIBUTE:
248 APP;
249 switch (token)
250 {
251 case '(':
252 ++count;
253 goto repeat;
254 case ')':
255 if (--count == 0)
256 {
257 lexstate = ST_NORMAL;
258 token = ATTRIBUTE_PHRASE;
259 break;
260 }
261 goto repeat;
262 default:
263 goto repeat;
264 }
265 break;
266
267 case ST_ASM:
268 APP;
269 switch (token)
270 {
271 case '(':
272 ++count;
273 goto repeat;
274 case ')':
275 if (--count == 0)
276 {
277 lexstate = ST_NORMAL;
278 token = ASM_PHRASE;
279 break;
280 }
281 goto repeat;
282 default:
283 goto repeat;
284 }
285 break;
286
287 case ST_TYPEOF_1:
288 if (token == IDENT)
289 {
290 if (is_reserved_word(yytext, yyleng) >= 0
291 || find_symbol(yytext, SYM_TYPEDEF, 1))
292 {
293 yyless(0);
294 unput('(');
295 lexstate = ST_NORMAL;
296 token = TYPEOF_KEYW;
297 break;
298 }
299 _APP("(", 1);
300 }
301 lexstate = ST_TYPEOF;
302 /* FALLTHRU */
303
304 case ST_TYPEOF:
305 switch (token)
306 {
307 case '(':
308 if ( ++count == 1 )
309 lexstate = ST_TYPEOF_1;
310 else
311 APP;
312 goto repeat;
313 case ')':
314 APP;
315 if (--count == 0)
316 {
317 lexstate = ST_NORMAL;
318 token = TYPEOF_PHRASE;
319 break;
320 }
321 goto repeat;
322 default:
323 APP;
324 goto repeat;
325 }
326 break;
327
328 case ST_BRACKET:
329 APP;
330 switch (token)
331 {
332 case '[':
333 ++count;
334 goto repeat;
335 case ']':
336 if (--count == 0)
337 {
338 lexstate = ST_NORMAL;
339 token = BRACKET_PHRASE;
340 break;
341 }
342 goto repeat;
343 default:
344 goto repeat;
345 }
346 break;
347
348 case ST_BRACE:
349 APP;
350 switch (token)
351 {
352 case '{':
353 ++count;
354 goto repeat;
355 case '}':
356 if (--count == 0)
357 {
358 lexstate = ST_NORMAL;
359 token = BRACE_PHRASE;
360 break;
361 }
362 goto repeat;
363 default:
364 goto repeat;
365 }
366 break;
367
368 case ST_EXPRESSION:
369 switch (token)
370 {
371 case '(': case '[': case '{':
372 ++count;
373 APP;
374 goto repeat;
375 case '}':
376 /* is this the last line of an enum declaration? */
377 if (count == 0)
378 {
379 /* Put back the token we just read so's we can find it again
380 after registering the expression. */
381 unput(token);
382
383 lexstate = ST_NORMAL;
384 token = EXPRESSION_PHRASE;
385 break;
386 }
387 /* FALLTHRU */
388 case ')': case ']':
389 --count;
390 APP;
391 goto repeat;
392 case ',': case ';':
393 if (count == 0)
394 {
395 /* Put back the token we just read so's we can find it again
396 after registering the expression. */
397 unput(token);
398
399 lexstate = ST_NORMAL;
400 token = EXPRESSION_PHRASE;
401 break;
402 }
403 APP;
404 goto repeat;
405 default:
406 APP;
407 goto repeat;
408 }
409 break;
410
411 case ST_STATIC_ASSERT:
412 APP;
413 switch (token)
414 {
415 case '(':
416 ++count;
417 goto repeat;
418 case ')':
419 if (--count == 0)
420 {
421 lexstate = ST_NORMAL;
422 token = STATIC_ASSERT_PHRASE;
423 break;
424 }
425 goto repeat;
426 default:
427 goto repeat;
428 }
429 break;
430
431 default:
432 exit(1);
433 }
434fini:
435
436 if (suppress_type_lookup > 0)
437 --suppress_type_lookup;
438
439 /*
440 * __attribute__() can be placed immediately after the 'struct' keyword.
441 * e.g.) struct __attribute__((__packed__)) foo { ... };
442 */
443 if (token != ATTRIBUTE_PHRASE && dont_want_brace_phrase > 0)
444 --dont_want_brace_phrase;
445
446 yylval = &next_node->next;
447
448 return token;
449}