jcs's openbsd hax
openbsd
1/* $OpenBSD: awkgram.y,v 1.16 2023/09/10 14:59:00 millert Exp $ */
2/****************************************************************
3Copyright (C) Lucent Technologies 1997
4All Rights Reserved
5
6Permission to use, copy, modify, and distribute this software and
7its documentation for any purpose and without fee is hereby
8granted, provided that the above copyright notice appear in all
9copies and that both that the copyright notice and this
10permission notice and warranty disclaimer appear in supporting
11documentation, and that the name Lucent Technologies or any of
12its entities not be used in advertising or publicity pertaining
13to distribution of the software without specific, written prior
14permission.
15
16LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
17INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS.
18IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY
19SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
20WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER
21IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
22ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
23THIS SOFTWARE.
24****************************************************************/
25
26%{
27#include <stdio.h>
28#include <string.h>
29#include "awk.h"
30
31void checkdup(Node *list, Cell *item);
32int yywrap(void) { return(1); }
33
34Node *beginloc = 0;
35Node *endloc = 0;
36bool infunc = false; /* = true if in arglist or body of func */
37int inloop = 0; /* >= 1 if in while, for, do; can't be bool, since loops can next */
38char *curfname = 0; /* current function name */
39Node *arglist = 0; /* list of args for current function */
40%}
41
42%union {
43 Node *p;
44 Cell *cp;
45 int i;
46 char *s;
47}
48
49%token <i> FIRSTTOKEN /* must be first */
50%token <p> PROGRAM PASTAT PASTAT2 XBEGIN XEND
51%token <i> NL ',' '{' '(' '|' ';' '/' ')' '}' '[' ']'
52%token <i> ARRAY
53%token <i> MATCH NOTMATCH MATCHOP
54%token <i> FINAL DOT ALL CCL NCCL CHAR OR STAR QUEST PLUS EMPTYRE ZERO
55%token <i> AND BOR APPEND EQ GE GT LE LT NE IN
56%token <i> ARG BLTIN BREAK CLOSE CONTINUE DELETE DO EXIT FOR FUNC
57%token <i> GENSUB SUB GSUB IF INDEX LSUBSTR MATCHFCN NEXT NEXTFILE
58%token <i> ADD MINUS MULT DIVIDE MOD
59%token <i> ASSIGN ASGNOP ADDEQ SUBEQ MULTEQ DIVEQ MODEQ POWEQ
60%token <i> PRINT PRINTF SPRINTF
61%token <p> ELSE INTEST CONDEXPR
62%token <i> POSTINCR PREINCR POSTDECR PREDECR
63%token <cp> VAR IVAR VARNF CALL NUMBER STRING
64%token <s> REGEXPR
65
66%type <p> pas pattern ppattern plist pplist patlist prarg term re
67%type <p> pa_pat pa_stat pa_stats
68%type <s> reg_expr
69%type <p> simple_stmt opt_simple_stmt stmt stmtlist
70%type <p> var varname funcname varlist
71%type <p> for if else while
72%type <i> do st
73%type <i> pst opt_pst lbrace rbrace rparen comma nl opt_nl and bor
74%type <i> subop print
75%type <cp> string
76
77%right ASGNOP
78%right '?'
79%right ':'
80%left BOR
81%left AND
82%left GETLINE
83%nonassoc APPEND EQ GE GT LE LT NE MATCHOP IN '|'
84%left ARG BLTIN BREAK CALL CLOSE CONTINUE DELETE DO EXIT FOR FUNC
85%left GSUB IF INDEX LSUBSTR MATCHFCN NEXT NUMBER
86%left PRINT PRINTF RETURN SPLIT SPRINTF STRING SUB SUBSTR
87%left REGEXPR VAR VARNF IVAR WHILE '('
88%left CAT
89%left '+' '-'
90%left '*' '/' '%'
91%left NOT UMINUS UPLUS
92%right POWER
93%right DECR INCR
94%left INDIRECT
95%token LASTTOKEN /* must be last */
96
97%%
98
99program:
100 pas { if (errorflag==0)
101 winner = (Node *)stat3(PROGRAM, beginloc, $1, endloc); }
102 | error { yyclearin; bracecheck(); SYNTAX("bailing out"); }
103 ;
104
105and:
106 AND | and NL
107 ;
108
109bor:
110 BOR | bor NL
111 ;
112
113comma:
114 ',' | comma NL
115 ;
116
117do:
118 DO | do NL
119 ;
120
121else:
122 ELSE | else NL
123 ;
124
125for:
126 FOR '(' opt_simple_stmt ';' opt_nl pattern ';' opt_nl opt_simple_stmt rparen {inloop++;} stmt
127 { --inloop; $$ = stat4(FOR, $3, notnull($6), $9, $12); }
128 | FOR '(' opt_simple_stmt ';' ';' opt_nl opt_simple_stmt rparen {inloop++;} stmt
129 { --inloop; $$ = stat4(FOR, $3, NIL, $7, $10); }
130 | FOR '(' varname IN varname rparen {inloop++;} stmt
131 { --inloop; $$ = stat3(IN, $3, makearr($5), $8); }
132 ;
133
134funcname:
135 VAR { setfname($1); }
136 | CALL { setfname($1); }
137 ;
138
139if:
140 IF '(' pattern rparen { $$ = notnull($3); }
141 ;
142
143lbrace:
144 '{' | lbrace NL
145 ;
146
147nl:
148 NL | nl NL
149 ;
150
151opt_nl:
152 /* empty */ { $$ = 0; }
153 | nl
154 ;
155
156opt_pst:
157 /* empty */ { $$ = 0; }
158 | pst
159 ;
160
161
162opt_simple_stmt:
163 /* empty */ { $$ = 0; }
164 | simple_stmt
165 ;
166
167pas:
168 opt_pst { $$ = 0; }
169 | opt_pst pa_stats opt_pst { $$ = $2; }
170 ;
171
172pa_pat:
173 pattern { $$ = notnull($1); }
174 ;
175
176pa_stat:
177 pa_pat { $$ = stat2(PASTAT, $1, stat2(PRINT, rectonode(), NIL)); }
178 | pa_pat lbrace stmtlist '}' { $$ = stat2(PASTAT, $1, $3); }
179 | pa_pat ',' opt_nl pa_pat { $$ = pa2stat($1, $4, stat2(PRINT, rectonode(), NIL)); }
180 | pa_pat ',' opt_nl pa_pat lbrace stmtlist '}' { $$ = pa2stat($1, $4, $6); }
181 | lbrace stmtlist '}' { $$ = stat2(PASTAT, NIL, $2); }
182 | XBEGIN lbrace stmtlist '}'
183 { beginloc = linkum(beginloc, $3); $$ = 0; }
184 | XEND lbrace stmtlist '}'
185 { endloc = linkum(endloc, $3); $$ = 0; }
186 | FUNC funcname '(' varlist rparen {infunc = true;} lbrace stmtlist '}'
187 { infunc = false; curfname=0; defn((Cell *)$2, $4, $8); $$ = 0; }
188 ;
189
190pa_stats:
191 pa_stat
192 | pa_stats opt_pst pa_stat { $$ = linkum($1, $3); }
193 ;
194
195patlist:
196 pattern
197 | patlist comma pattern { $$ = linkum($1, $3); }
198 ;
199
200ppattern:
201 var ASGNOP ppattern { $$ = op2($2, $1, $3); }
202 | ppattern '?' ppattern ':' ppattern %prec '?'
203 { $$ = op3(CONDEXPR, notnull($1), $3, $5); }
204 | ppattern bor ppattern %prec BOR
205 { $$ = op2(BOR, notnull($1), notnull($3)); }
206 | ppattern and ppattern %prec AND
207 { $$ = op2(AND, notnull($1), notnull($3)); }
208 | ppattern MATCHOP reg_expr { $$ = op3($2, NIL, $1, (Node*)makedfa($3, 0)); free($3); }
209 | ppattern MATCHOP ppattern
210 { if (constnode($3)) {
211 $$ = op3($2, NIL, $1, (Node*)makedfa(strnode($3), 0));
212 free($3);
213 } else
214 $$ = op3($2, (Node *)1, $1, $3); }
215 | ppattern IN varname { $$ = op2(INTEST, $1, makearr($3)); }
216 | '(' plist ')' IN varname { $$ = op2(INTEST, $2, makearr($5)); }
217 | ppattern term %prec CAT { $$ = op2(CAT, $1, $2); }
218 | re
219 | term
220 ;
221
222pattern:
223 var ASGNOP pattern { $$ = op2($2, $1, $3); }
224 | pattern '?' pattern ':' pattern %prec '?'
225 { $$ = op3(CONDEXPR, notnull($1), $3, $5); }
226 | pattern bor pattern %prec BOR
227 { $$ = op2(BOR, notnull($1), notnull($3)); }
228 | pattern and pattern %prec AND
229 { $$ = op2(AND, notnull($1), notnull($3)); }
230 | pattern EQ pattern { $$ = op2($2, $1, $3); }
231 | pattern GE pattern { $$ = op2($2, $1, $3); }
232 | pattern GT pattern { $$ = op2($2, $1, $3); }
233 | pattern LE pattern { $$ = op2($2, $1, $3); }
234 | pattern LT pattern { $$ = op2($2, $1, $3); }
235 | pattern NE pattern { $$ = op2($2, $1, $3); }
236 | pattern MATCHOP reg_expr { $$ = op3($2, NIL, $1, (Node*)makedfa($3, 0)); free($3); }
237 | pattern MATCHOP pattern
238 { if (constnode($3)) {
239 $$ = op3($2, NIL, $1, (Node*)makedfa(strnode($3), 0));
240 free($3);
241 } else
242 $$ = op3($2, (Node *)1, $1, $3); }
243 | pattern IN varname { $$ = op2(INTEST, $1, makearr($3)); }
244 | '(' plist ')' IN varname { $$ = op2(INTEST, $2, makearr($5)); }
245 | pattern '|' GETLINE var {
246 if (safe) SYNTAX("cmd | getline is unsafe");
247 else $$ = op3(GETLINE, $4, itonp($2), $1); }
248 | pattern '|' GETLINE {
249 if (safe) SYNTAX("cmd | getline is unsafe");
250 else $$ = op3(GETLINE, (Node*)0, itonp($2), $1); }
251 | pattern term %prec CAT { $$ = op2(CAT, $1, $2); }
252 | re
253 | term
254 ;
255
256plist:
257 pattern comma pattern { $$ = linkum($1, $3); }
258 | plist comma pattern { $$ = linkum($1, $3); }
259 ;
260
261pplist:
262 ppattern
263 | pplist comma ppattern { $$ = linkum($1, $3); }
264 ;
265
266prarg:
267 /* empty */ { $$ = rectonode(); }
268 | pplist
269 | '(' plist ')' { $$ = $2; }
270 ;
271
272print:
273 PRINT | PRINTF
274 ;
275
276pst:
277 NL | ';' | pst NL | pst ';'
278 ;
279
280rbrace:
281 '}' | rbrace NL
282 ;
283
284re:
285 reg_expr
286 { $$ = op3(MATCH, NIL, rectonode(), (Node*)makedfa($1, 0)); free($1); }
287 | NOT re { $$ = op1(NOT, notnull($2)); }
288 ;
289
290reg_expr:
291 '/' {startreg();} REGEXPR '/' { $$ = $3; }
292 ;
293
294rparen:
295 ')' | rparen NL
296 ;
297
298simple_stmt:
299 print prarg '|' term {
300 if (safe) SYNTAX("print | is unsafe");
301 else $$ = stat3($1, $2, itonp($3), $4); }
302 | print prarg APPEND term {
303 if (safe) SYNTAX("print >> is unsafe");
304 else $$ = stat3($1, $2, itonp($3), $4); }
305 | print prarg GT term {
306 if (safe) SYNTAX("print > is unsafe");
307 else $$ = stat3($1, $2, itonp($3), $4); }
308 | print prarg { $$ = stat3($1, $2, NIL, NIL); }
309 | DELETE varname '[' patlist ']' { $$ = stat2(DELETE, makearr($2), $4); }
310 | DELETE varname { $$ = stat2(DELETE, makearr($2), 0); }
311 | pattern { $$ = exptostat($1); }
312 | error { yyclearin; SYNTAX("illegal statement"); }
313 ;
314
315st:
316 nl
317 | ';' opt_nl
318 ;
319
320stmt:
321 BREAK st { if (!inloop) SYNTAX("break illegal outside of loops");
322 $$ = stat1(BREAK, NIL); }
323 | CONTINUE st { if (!inloop) SYNTAX("continue illegal outside of loops");
324 $$ = stat1(CONTINUE, NIL); }
325 | do {inloop++;} stmt {--inloop;} WHILE '(' pattern ')' st
326 { $$ = stat2(DO, $3, notnull($7)); }
327 | EXIT pattern st { $$ = stat1(EXIT, $2); }
328 | EXIT st { $$ = stat1(EXIT, NIL); }
329 | for
330 | if stmt else stmt { $$ = stat3(IF, $1, $2, $4); }
331 | if stmt { $$ = stat3(IF, $1, $2, NIL); }
332 | lbrace stmtlist rbrace { $$ = $2; }
333 | NEXT st { if (infunc)
334 SYNTAX("next is illegal inside a function");
335 $$ = stat1(NEXT, NIL); }
336 | NEXTFILE st { if (infunc)
337 SYNTAX("nextfile is illegal inside a function");
338 $$ = stat1(NEXTFILE, NIL); }
339 | RETURN pattern st { $$ = stat1(RETURN, $2); }
340 | RETURN st { $$ = stat1(RETURN, NIL); }
341 | simple_stmt st
342 | while {inloop++;} stmt { --inloop; $$ = stat2(WHILE, $1, $3); }
343 | ';' opt_nl { $$ = 0; }
344 ;
345
346stmtlist:
347 stmt
348 | stmtlist stmt { $$ = linkum($1, $2); }
349 ;
350
351subop:
352 SUB | GSUB
353 ;
354
355string:
356 STRING
357 | string STRING { $$ = catstr($1, $2); }
358 ;
359
360term:
361 term '/' ASGNOP term { $$ = op2(DIVEQ, $1, $4); }
362 | term '+' term { $$ = op2(ADD, $1, $3); }
363 | term '-' term { $$ = op2(MINUS, $1, $3); }
364 | term '*' term { $$ = op2(MULT, $1, $3); }
365 | term '/' term { $$ = op2(DIVIDE, $1, $3); }
366 | term '%' term { $$ = op2(MOD, $1, $3); }
367 | term POWER term { $$ = op2(POWER, $1, $3); }
368 | '-' term %prec UMINUS { $$ = op1(UMINUS, $2); }
369 | '+' term %prec UMINUS { $$ = op1(UPLUS, $2); }
370 | NOT term %prec UMINUS { $$ = op1(NOT, notnull($2)); }
371 | BLTIN '(' ')' { $$ = op2(BLTIN, itonp($1), rectonode()); }
372 | BLTIN '(' patlist ')' { $$ = op2(BLTIN, itonp($1), $3); }
373 | BLTIN { $$ = op2(BLTIN, itonp($1), rectonode()); }
374 | CALL '(' ')' { $$ = op2(CALL, celltonode($1,CVAR), NIL); }
375 | CALL '(' patlist ')' { $$ = op2(CALL, celltonode($1,CVAR), $3); }
376 | CLOSE term { $$ = op1(CLOSE, $2); }
377 | DECR var { $$ = op1(PREDECR, $2); }
378 | INCR var { $$ = op1(PREINCR, $2); }
379 | var DECR { $$ = op1(POSTDECR, $1); }
380 | var INCR { $$ = op1(POSTINCR, $1); }
381 | GENSUB '(' reg_expr comma pattern comma pattern ')'
382 { $$ = op5(GENSUB, NIL, (Node*)makedfa($3, 1), $5, $7, rectonode()); }
383 | GENSUB '(' pattern comma pattern comma pattern ')'
384 { if (constnode($3)) {
385 $$ = op5(GENSUB, NIL, (Node *)makedfa(strnode($3), 1), $5, $7, rectonode());
386 free($3);
387 } else
388 $$ = op5(GENSUB, (Node *)1, $3, $5, $7, rectonode());
389 }
390 | GENSUB '(' reg_expr comma pattern comma pattern comma pattern ')'
391 { $$ = op5(GENSUB, NIL, (Node*)makedfa($3, 1), $5, $7, $9); }
392 | GENSUB '(' pattern comma pattern comma pattern comma pattern ')'
393 { if (constnode($3)) {
394 $$ = op5(GENSUB, NIL, (Node *)makedfa(strnode($3),1), $5,$7,$9);
395 free($3);
396 } else
397 $$ = op5(GENSUB, (Node *)1, $3, $5, $7, $9);
398 }
399 | GETLINE var LT term { $$ = op3(GETLINE, $2, itonp($3), $4); }
400 | GETLINE LT term { $$ = op3(GETLINE, NIL, itonp($2), $3); }
401 | GETLINE var { $$ = op3(GETLINE, $2, NIL, NIL); }
402 | GETLINE { $$ = op3(GETLINE, NIL, NIL, NIL); }
403 | INDEX '(' pattern comma pattern ')'
404 { $$ = op2(INDEX, $3, $5); }
405 | INDEX '(' pattern comma reg_expr ')'
406 { SYNTAX("index() doesn't permit regular expressions");
407 $$ = op2(INDEX, $3, (Node*)$5); }
408 | '(' pattern ')' { $$ = $2; }
409 | MATCHFCN '(' pattern comma reg_expr ')'
410 { $$ = op3(MATCHFCN, NIL, $3, (Node*)makedfa($5, 1)); free($5); }
411 | MATCHFCN '(' pattern comma pattern ')'
412 { if (constnode($5)) {
413 $$ = op3(MATCHFCN, NIL, $3, (Node*)makedfa(strnode($5), 1));
414 free($5);
415 } else
416 $$ = op3(MATCHFCN, (Node *)1, $3, $5); }
417 | NUMBER { $$ = celltonode($1, CCON); }
418 | SPLIT '(' pattern comma varname comma pattern ')' /* string */
419 { $$ = op4(SPLIT, $3, makearr($5), $7, (Node*)STRING); }
420 | SPLIT '(' pattern comma varname comma reg_expr ')' /* const /regexp/ */
421 { $$ = op4(SPLIT, $3, makearr($5), (Node*)makedfa($7, 1), (Node *)REGEXPR); free($7); }
422 | SPLIT '(' pattern comma varname ')'
423 { $$ = op4(SPLIT, $3, makearr($5), NIL, (Node*)STRING); } /* default */
424 | SPRINTF '(' patlist ')' { $$ = op1($1, $3); }
425 | string { $$ = celltonode($1, CCON); }
426 | subop '(' reg_expr comma pattern ')'
427 { $$ = op4($1, NIL, (Node*)makedfa($3, 1), $5, rectonode()); free($3); }
428 | subop '(' pattern comma pattern ')'
429 { if (constnode($3)) {
430 $$ = op4($1, NIL, (Node*)makedfa(strnode($3), 1), $5, rectonode());
431 free($3);
432 } else
433 $$ = op4($1, (Node *)1, $3, $5, rectonode()); }
434 | subop '(' reg_expr comma pattern comma var ')'
435 { $$ = op4($1, NIL, (Node*)makedfa($3, 1), $5, $7); free($3); }
436 | subop '(' pattern comma pattern comma var ')'
437 { if (constnode($3)) {
438 $$ = op4($1, NIL, (Node*)makedfa(strnode($3), 1), $5, $7);
439 free($3);
440 } else
441 $$ = op4($1, (Node *)1, $3, $5, $7); }
442 | SUBSTR '(' pattern comma pattern comma pattern ')'
443 { $$ = op3(SUBSTR, $3, $5, $7); }
444 | SUBSTR '(' pattern comma pattern ')'
445 { $$ = op3(SUBSTR, $3, $5, NIL); }
446 | var
447 ;
448
449var:
450 varname
451 | varname '[' patlist ']' { $$ = op2(ARRAY, makearr($1), $3); }
452 | IVAR { $$ = op1(INDIRECT, celltonode($1, CVAR)); }
453 | INDIRECT term { $$ = op1(INDIRECT, $2); }
454 ;
455
456varlist:
457 /* nothing */ { arglist = $$ = 0; }
458 | VAR { arglist = $$ = celltonode($1,CVAR); }
459 | varlist comma VAR {
460 checkdup($1, $3);
461 arglist = $$ = linkum($1,celltonode($3,CVAR)); }
462 ;
463
464varname:
465 VAR { $$ = celltonode($1, CVAR); }
466 | ARG { $$ = op1(ARG, itonp($1)); }
467 | VARNF { $$ = op1(VARNF, (Node *) $1); }
468 ;
469
470
471while:
472 WHILE '(' pattern rparen { $$ = notnull($3); }
473 ;
474
475%%
476
477void setfname(Cell *p)
478{
479 if (isarr(p))
480 SYNTAX("%s is an array, not a function", p->nval);
481 else if (isfcn(p))
482 SYNTAX("you can't define function %s more than once", p->nval);
483 curfname = p->nval;
484}
485
486int constnode(Node *p)
487{
488 return isvalue(p) && ((Cell *) (p->narg[0]))->csub == CCON;
489}
490
491char *strnode(Node *p)
492{
493 return ((Cell *)(p->narg[0]))->sval;
494}
495
496Node *notnull(Node *n)
497{
498 switch (n->nobj) {
499 case LE: case LT: case EQ: case NE: case GT: case GE:
500 case BOR: case AND: case NOT:
501 return n;
502 default:
503 return op2(NE, n, nullnode);
504 }
505}
506
507void checkdup(Node *vl, Cell *cp) /* check if name already in list */
508{
509 char *s = cp->nval;
510 for ( ; vl; vl = vl->nnext) {
511 if (strcmp(s, ((Cell *)(vl->narg[0]))->nval) == 0) {
512 SYNTAX("duplicate argument %s", s);
513 break;
514 }
515 }
516}