jcs's openbsd hax
openbsd
1/* $OpenBSD: run.c,v 1.89 2025/02/05 20:32:56 millert Exp $ */
2/****************************************************************
3Copyright (C) Lucent Technologies 1997
4All Rights Reserved
5
6Permission to use, copy, modify, and distribute this software and
7its documentation for any purpose and without fee is hereby
8granted, provided that the above copyright notice appear in all
9copies and that both that the copyright notice and this
10permission notice and warranty disclaimer appear in supporting
11documentation, and that the name Lucent Technologies or any of
12its entities not be used in advertising or publicity pertaining
13to distribution of the software without specific, written prior
14permission.
15
16LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
17INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS.
18IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY
19SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
20WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER
21IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
22ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
23THIS SOFTWARE.
24****************************************************************/
25
26#define DEBUG
27#include <stdio.h>
28#include <ctype.h>
29#include <errno.h>
30#include <wctype.h>
31#include <fcntl.h>
32#include <setjmp.h>
33#include <limits.h>
34#include <math.h>
35#include <string.h>
36#include <stdlib.h>
37#include <time.h>
38#include <sys/types.h>
39#include <sys/stat.h>
40#include <sys/wait.h>
41#include "awk.h"
42#include "awkgram.tab.h"
43
44
45static void stdinit(void);
46static void flush_all(void);
47static char *wide_char_to_byte_str(int rune, size_t *outlen);
48
49#if 1
50#define tempfree(x) do { if (istemp(x)) tfree(x); } while (/*CONSTCOND*/0)
51#else
52void tempfree(Cell *p) {
53 if (p->ctype == OCELL && (p->csub < CUNK || p->csub > CFREE)) {
54 WARNING("bad csub %d in Cell %d %s",
55 p->csub, p->ctype, p->sval);
56 }
57 if (istemp(p))
58 tfree(p);
59}
60#endif
61
62/* do we really need these? */
63/* #ifdef _NFILE */
64/* #ifndef FOPEN_MAX */
65/* #define FOPEN_MAX _NFILE */
66/* #endif */
67/* #endif */
68/* */
69/* #ifndef FOPEN_MAX */
70/* #define FOPEN_MAX 40 */ /* max number of open files */
71/* #endif */
72/* */
73/* #ifndef RAND_MAX */
74/* #define RAND_MAX 32767 */ /* all that ansi guarantees */
75/* #endif */
76
77jmp_buf env;
78extern int pairstack[];
79extern Awkfloat srand_seed;
80
81Node *winner = NULL; /* root of parse tree */
82Cell *tmps; /* free temporary cells for execution */
83
84static Cell truecell ={ OBOOL, BTRUE, 0, 0, 1.0, NUM, NULL, NULL };
85Cell *True = &truecell;
86static Cell falsecell ={ OBOOL, BFALSE, 0, 0, 0.0, NUM, NULL, NULL };
87Cell *False = &falsecell;
88static Cell breakcell ={ OJUMP, JBREAK, 0, 0, 0.0, NUM, NULL, NULL };
89Cell *jbreak = &breakcell;
90static Cell contcell ={ OJUMP, JCONT, 0, 0, 0.0, NUM, NULL, NULL };
91Cell *jcont = &contcell;
92static Cell nextcell ={ OJUMP, JNEXT, 0, 0, 0.0, NUM, NULL, NULL };
93Cell *jnext = &nextcell;
94static Cell nextfilecell ={ OJUMP, JNEXTFILE, 0, 0, 0.0, NUM, NULL, NULL };
95Cell *jnextfile = &nextfilecell;
96static Cell exitcell ={ OJUMP, JEXIT, 0, 0, 0.0, NUM, NULL, NULL };
97Cell *jexit = &exitcell;
98static Cell retcell ={ OJUMP, JRET, 0, 0, 0.0, NUM, NULL, NULL };
99Cell *jret = &retcell;
100static Cell tempcell ={ OCELL, CTEMP, 0, EMPTY, 0.0, NUM|STR|DONTFREE, NULL, NULL };
101
102Node *curnode = NULL; /* the node being executed, for debugging */
103
104/* buffer memory management */
105int adjbuf(char **pbuf, int *psiz, int minlen, int quantum, char **pbptr,
106 const char *whatrtn)
107/* pbuf: address of pointer to buffer being managed
108 * psiz: address of buffer size variable
109 * minlen: minimum length of buffer needed
110 * quantum: buffer size quantum
111 * pbptr: address of movable pointer into buffer, or 0 if none
112 * whatrtn: name of the calling routine if failure should cause fatal error
113 *
114 * return 0 for realloc failure, !=0 for success
115 */
116{
117 if (minlen > *psiz) {
118 char *tbuf;
119 int rminlen = quantum ? minlen % quantum : 0;
120 int boff = pbptr ? *pbptr - *pbuf : 0;
121 /* round up to next multiple of quantum */
122 if (rminlen)
123 minlen += quantum - rminlen;
124 tbuf = (char *) realloc(*pbuf, minlen);
125 DPRINTF("adjbuf %s: %d %d (pbuf=%p, tbuf=%p)\n", whatrtn, *psiz, minlen, (void*)*pbuf, (void*)tbuf);
126 if (tbuf == NULL) {
127 if (whatrtn)
128 FATAL("out of memory in %s", whatrtn);
129 return 0;
130 }
131 *pbuf = tbuf;
132 *psiz = minlen;
133 if (pbptr)
134 *pbptr = tbuf + boff;
135 }
136 return 1;
137}
138
139void run(Node *a) /* execution of parse tree starts here */
140{
141
142 stdinit();
143 execute(a);
144 closeall();
145}
146
147Cell *execute(Node *u) /* execute a node of the parse tree */
148{
149 Cell *(*proc)(Node **, int);
150 Cell *x;
151 Node *a;
152
153 if (u == NULL)
154 return(True);
155 for (a = u; ; a = a->nnext) {
156 curnode = a;
157 if (isvalue(a)) {
158 x = (Cell *) (a->narg[0]);
159 if (isfld(x) && !donefld)
160 fldbld();
161 else if (isrec(x) && !donerec)
162 recbld();
163 return(x);
164 }
165 if (notlegal(a->nobj)) /* probably a Cell* but too risky to print */
166 FATAL("illegal statement");
167 proc = proctab[a->nobj-FIRSTTOKEN];
168 x = (*proc)(a->narg, a->nobj);
169 if (isfld(x) && !donefld)
170 fldbld();
171 else if (isrec(x) && !donerec)
172 recbld();
173 if (isexpr(a))
174 return(x);
175 if (isjump(x))
176 return(x);
177 if (a->nnext == NULL)
178 return(x);
179 tempfree(x);
180 }
181}
182
183
184Cell *program(Node **a, int n) /* execute an awk program */
185{ /* a[0] = BEGIN, a[1] = body, a[2] = END */
186 Cell *x;
187
188 if (setjmp(env) != 0)
189 goto ex;
190 if (a[0]) { /* BEGIN */
191 x = execute(a[0]);
192 if (isexit(x))
193 return(True);
194 if (isjump(x))
195 FATAL("illegal break, continue, next or nextfile from BEGIN");
196 tempfree(x);
197 }
198 if (a[1] || a[2])
199 while (getrec(&record, &recsize, true) > 0) {
200 x = execute(a[1]);
201 if (isexit(x))
202 break;
203 tempfree(x);
204 }
205 ex:
206 if (setjmp(env) != 0) /* handles exit within END */
207 goto ex1;
208 if (a[2]) { /* END */
209 x = execute(a[2]);
210 if (isbreak(x) || isnext(x) || iscont(x))
211 FATAL("illegal break, continue, next or nextfile from END");
212 tempfree(x);
213 }
214 ex1:
215 return(True);
216}
217
218struct Frame { /* stack frame for awk function calls */
219 int nargs; /* number of arguments in this call */
220 Cell *fcncell; /* pointer to Cell for function */
221 Cell **args; /* pointer to array of arguments after execute */
222 Cell *retval; /* return value */
223};
224
225#define NARGS 50 /* max args in a call */
226
227struct Frame *frame = NULL; /* base of stack frames; dynamically allocated */
228int nframe = 0; /* number of frames allocated */
229struct Frame *frp = NULL; /* frame pointer. bottom level unused */
230
231Cell *call(Node **a, int n) /* function call. very kludgy and fragile */
232{
233 static const Cell newcopycell = { OCELL, CCOPY, 0, EMPTY, 0.0, NUM|STR|DONTFREE, NULL, NULL };
234 int i, ncall, ndef;
235 int freed = 0; /* handles potential double freeing when fcn & param share a tempcell */
236 Node *x;
237 Cell *args[NARGS], *oargs[NARGS]; /* BUG: fixed size arrays */
238 Cell *y, *z, *fcn;
239 char *s;
240
241 fcn = execute(a[0]); /* the function itself */
242 s = fcn->nval;
243 if (!isfcn(fcn))
244 FATAL("calling undefined function %s", s);
245 if (frame == NULL) {
246 frp = frame = (struct Frame *) calloc(nframe += 100, sizeof(*frame));
247 if (frame == NULL)
248 FATAL("out of space for stack frames calling %s", s);
249 }
250 for (ncall = 0, x = a[1]; x != NULL; x = x->nnext) /* args in call */
251 ncall++;
252 ndef = (int) fcn->fval; /* args in defn */
253 DPRINTF("calling %s, %d args (%d in defn), frp=%d\n", s, ncall, ndef, (int) (frp-frame));
254 if (ncall > ndef)
255 WARNING("function %s called with %d args, uses only %d",
256 s, ncall, ndef);
257 if (ncall + ndef > NARGS)
258 FATAL("function %s has %d arguments, limit %d", s, ncall+ndef, NARGS);
259 for (i = 0, x = a[1]; x != NULL; i++, x = x->nnext) { /* get call args */
260 DPRINTF("evaluate args[%d], frp=%d:\n", i, (int) (frp-frame));
261 y = execute(x);
262 oargs[i] = y;
263 DPRINTF("args[%d]: %s %f <%s>, t=%o\n",
264 i, NN(y->nval), y->fval, isarr(y) ? "(array)" : NN(y->sval), y->tval);
265 if (isfcn(y))
266 FATAL("can't use function %s as argument in %s", y->nval, s);
267 if (isarr(y))
268 args[i] = y; /* arrays by ref */
269 else
270 args[i] = copycell(y);
271 tempfree(y);
272 }
273 for ( ; i < ndef; i++) { /* add null args for ones not provided */
274 args[i] = gettemp();
275 *args[i] = newcopycell;
276 }
277 frp++; /* now ok to up frame */
278 if (frp >= frame + nframe) {
279 int dfp = frp - frame; /* old index */
280 frame = (struct Frame *) reallocarray(frame, (nframe += 100), sizeof(*frame));
281 if (frame == NULL)
282 FATAL("out of space for stack frames in %s", s);
283 frp = frame + dfp;
284 }
285 frp->fcncell = fcn;
286 frp->args = args;
287 frp->nargs = ndef; /* number defined with (excess are locals) */
288 frp->retval = gettemp();
289
290 DPRINTF("start exec of %s, frp=%d\n", s, (int) (frp-frame));
291 y = execute((Node *)(fcn->sval)); /* execute body */
292 DPRINTF("finished exec of %s, frp=%d\n", s, (int) (frp-frame));
293
294 for (i = 0; i < ndef; i++) {
295 Cell *t = frp->args[i];
296 if (isarr(t)) {
297 if (t->csub == CCOPY) {
298 if (i >= ncall) {
299 freesymtab(t);
300 t->csub = CTEMP;
301 tempfree(t);
302 } else {
303 oargs[i]->tval = t->tval;
304 oargs[i]->tval &= ~(STR|NUM|DONTFREE);
305 oargs[i]->sval = t->sval;
306 tempfree(t);
307 }
308 }
309 } else if (t != y) { /* kludge to prevent freeing twice */
310 t->csub = CTEMP;
311 tempfree(t);
312 } else if (t == y && t->csub == CCOPY) {
313 t->csub = CTEMP;
314 tempfree(t);
315 freed = 1;
316 }
317 }
318 tempfree(fcn);
319 if (isexit(y) || isnext(y))
320 return y;
321 if (freed == 0) {
322 tempfree(y); /* don't free twice! */
323 }
324 z = frp->retval; /* return value */
325 DPRINTF("%s returns %g |%s| %o\n", s, getfval(z), getsval(z), z->tval);
326 frp--;
327 return(z);
328}
329
330Cell *copycell(Cell *x) /* make a copy of a cell in a temp */
331{
332 Cell *y;
333
334 /* copy is not constant or field */
335
336 y = gettemp();
337 y->tval = x->tval & ~(CON|FLD|REC);
338 y->csub = CCOPY; /* prevents freeing until call is over */
339 y->nval = x->nval; /* BUG? */
340 if (isstr(x) /* || x->ctype == OCELL */) {
341 y->sval = tostring(x->sval);
342 y->tval &= ~DONTFREE;
343 } else
344 y->tval |= DONTFREE;
345 y->fval = x->fval;
346 return y;
347}
348
349Cell *arg(Node **a, int n) /* nth argument of a function */
350{
351
352 n = ptoi(a[0]); /* argument number, counting from 0 */
353 DPRINTF("arg(%d), frp->nargs=%d\n", n, frp->nargs);
354 if (n+1 > frp->nargs)
355 FATAL("argument #%d of function %s was not supplied",
356 n+1, frp->fcncell->nval);
357 return frp->args[n];
358}
359
360Cell *jump(Node **a, int n) /* break, continue, next, nextfile, return */
361{
362 Cell *y;
363
364 switch (n) {
365 case EXIT:
366 if (a[0] != NULL) {
367 y = execute(a[0]);
368 errorflag = (int) getfval(y);
369 tempfree(y);
370 }
371 longjmp(env, 1);
372 case RETURN:
373 if (a[0] != NULL) {
374 y = execute(a[0]);
375 if ((y->tval & (STR|NUM)) == (STR|NUM)) {
376 setsval(frp->retval, getsval(y));
377 frp->retval->fval = getfval(y);
378 frp->retval->tval |= NUM;
379 }
380 else if (y->tval & STR)
381 setsval(frp->retval, getsval(y));
382 else if (y->tval & NUM)
383 setfval(frp->retval, getfval(y));
384 else /* can't happen */
385 FATAL("bad type variable %d", y->tval);
386 tempfree(y);
387 }
388 return(jret);
389 case NEXT:
390 return(jnext);
391 case NEXTFILE:
392 nextfile();
393 return(jnextfile);
394 case BREAK:
395 return(jbreak);
396 case CONTINUE:
397 return(jcont);
398 default: /* can't happen */
399 FATAL("illegal jump type %d", n);
400 }
401 return 0; /* not reached */
402}
403
404Cell *awkgetline(Node **a, int n) /* get next line from specific input */
405{ /* a[0] is variable, a[1] is operator, a[2] is filename */
406 Cell *r, *x;
407 extern Cell **fldtab;
408 FILE *fp;
409 char *buf;
410 int bufsize = recsize;
411 int mode;
412 bool newflag;
413 double result;
414
415 if ((buf = (char *) malloc(bufsize)) == NULL)
416 FATAL("out of memory in getline");
417
418 fflush(stdout); /* in case someone is waiting for a prompt */
419 r = gettemp();
420 if (a[1] != NULL) { /* getline < file */
421 x = execute(a[2]); /* filename */
422 mode = ptoi(a[1]);
423 if (mode == '|') /* input pipe */
424 mode = LE; /* arbitrary flag */
425 fp = openfile(mode, getsval(x), &newflag);
426 tempfree(x);
427 if (fp == NULL)
428 n = -1;
429 else
430 n = readrec(&buf, &bufsize, fp, newflag);
431 if (n <= 0) {
432 ;
433 } else if (a[0] != NULL) { /* getline var <file */
434 x = execute(a[0]);
435 setsval(x, buf);
436 if (is_number(x->sval, & result)) {
437 x->fval = result;
438 x->tval |= NUM;
439 }
440 tempfree(x);
441 } else { /* getline <file */
442 setsval(fldtab[0], buf);
443 if (is_number(fldtab[0]->sval, & result)) {
444 fldtab[0]->fval = result;
445 fldtab[0]->tval |= NUM;
446 }
447 }
448 } else { /* bare getline; use current input */
449 if (a[0] == NULL) /* getline */
450 n = getrec(&record, &recsize, true);
451 else { /* getline var */
452 n = getrec(&buf, &bufsize, false);
453 if (n > 0) {
454 x = execute(a[0]);
455 setsval(x, buf);
456 if (is_number(x->sval, & result)) {
457 x->fval = result;
458 x->tval |= NUM;
459 }
460 tempfree(x);
461 }
462 }
463 }
464 setfval(r, (Awkfloat) n);
465 free(buf);
466 return r;
467}
468
469Cell *getnf(Node **a, int n) /* get NF */
470{
471 if (!donefld)
472 fldbld();
473 return (Cell *) a[0];
474}
475
476static char *
477makearraystring(Node *p, const char *func)
478{
479 char *buf;
480 int bufsz = recsize;
481 size_t blen;
482
483 if ((buf = (char *) malloc(bufsz)) == NULL) {
484 FATAL("%s: out of memory", func);
485 }
486
487 blen = 0;
488 buf[blen] = '\0';
489
490 for (; p; p = p->nnext) {
491 Cell *x = execute(p); /* expr */
492 char *s = getsval(x);
493 size_t seplen = strlen(getsval(subseploc));
494 size_t nsub = p->nnext ? seplen : 0;
495 size_t slen = strlen(s);
496 size_t tlen = blen + slen + nsub;
497
498 if (!adjbuf(&buf, &bufsz, tlen + 1, recsize, 0, func)) {
499 FATAL("%s: out of memory %s[%s...]",
500 func, x->nval, buf);
501 }
502 memcpy(buf + blen, s, slen);
503 if (nsub) {
504 memcpy(buf + blen + slen, *SUBSEP, nsub);
505 }
506 buf[tlen] = '\0';
507 blen = tlen;
508 tempfree(x);
509 }
510 return buf;
511}
512
513Cell *array(Node **a, int n) /* a[0] is symtab, a[1] is list of subscripts */
514{
515 Cell *x, *z;
516 char *buf;
517
518 x = execute(a[0]); /* Cell* for symbol table */
519 buf = makearraystring(a[1], __func__);
520 if (!isarr(x)) {
521 DPRINTF("making %s into an array\n", NN(x->nval));
522 if (freeable(x))
523 xfree(x->sval);
524 x->tval &= ~(STR|NUM|DONTFREE);
525 x->tval |= ARR;
526 x->sval = (char *) makesymtab(NSYMTAB);
527 }
528 z = setsymtab(buf, "", 0.0, STR|NUM, (Array *) x->sval);
529 z->ctype = OCELL;
530 z->csub = CVAR;
531 tempfree(x);
532 free(buf);
533 return(z);
534}
535
536Cell *awkdelete(Node **a, int n) /* a[0] is symtab, a[1] is list of subscripts */
537{
538 Cell *x;
539
540 x = execute(a[0]); /* Cell* for symbol table */
541 if (x == symtabloc) {
542 FATAL("cannot delete SYMTAB or its elements");
543 }
544 if (!isarr(x))
545 return True;
546 if (a[1] == NULL) { /* delete the elements, not the table */
547 freesymtab(x);
548 x->tval &= ~STR;
549 x->tval |= ARR;
550 x->sval = (char *) makesymtab(NSYMTAB);
551 } else {
552 char *buf = makearraystring(a[1], __func__);
553 freeelem(x, buf);
554 free(buf);
555 }
556 tempfree(x);
557 return True;
558}
559
560Cell *intest(Node **a, int n) /* a[0] is index (list), a[1] is symtab */
561{
562 Cell *ap, *k;
563 char *buf;
564
565 ap = execute(a[1]); /* array name */
566 if (!isarr(ap)) {
567 DPRINTF("making %s into an array\n", ap->nval);
568 if (freeable(ap))
569 xfree(ap->sval);
570 ap->tval &= ~(STR|NUM|DONTFREE);
571 ap->tval |= ARR;
572 ap->sval = (char *) makesymtab(NSYMTAB);
573 }
574 buf = makearraystring(a[0], __func__);
575 k = lookup(buf, (Array *) ap->sval);
576 tempfree(ap);
577 free(buf);
578 if (k == NULL)
579 return(False);
580 else
581 return(True);
582}
583
584
585/* ======== utf-8 code ========== */
586
587/*
588 * Awk strings can contain ascii, random 8-bit items (eg Latin-1),
589 * or utf-8. u8_isutf tests whether a string starts with a valid
590 * utf-8 sequence, and returns 0 if not (e.g., high bit set).
591 * u8_nextlen returns length of next valid sequence, which is
592 * 1 for ascii, 2..4 for utf-8, or 1 for high bit non-utf.
593 * u8_strlen returns length of string in valid utf-8 sequences
594 * and/or high-bit bytes. Conversion functions go between byte
595 * number and character number.
596 *
597 * In theory, this behaves the same as before for non-utf8 bytes.
598 *
599 * Limited checking! This is a potential security hole.
600 */
601
602/* is s the beginning of a valid utf-8 string? */
603/* return length 1..4 if yes, 0 if no */
604static int u8_isutf(const char *s)
605{
606 int ret;
607 unsigned char c;
608
609 c = s[0];
610 if (c < 128 || awk_mb_cur_max == 1) {
611 ret = 1; /* what if it's 0? */
612 } else if (((c>>5) & 0x7) == 0x6 && (s[1] & 0xC0) == 0x80) {
613 ret = 2; /* 110xxxxx 10xxxxxx */
614 } else if (((c>>4) & 0xF) == 0xE && (s[1] & 0xC0) == 0x80
615 && (s[2] & 0xC0) == 0x80) {
616 ret = 3; /* 1110xxxx 10xxxxxx 10xxxxxx */
617 } else if (((c>>3) & 0x1F) == 0x1E && (s[1] & 0xC0) == 0x80
618 && (s[2] & 0xC0) == 0x80 && (s[3] & 0xC0) == 0x80) {
619 ret = 4; /* 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx */
620 } else {
621 ret = 0;
622 }
623 return ret;
624}
625
626/* Convert (prefix of) utf8 string to utf-32 rune. */
627/* Sets *rune to the value, returns the length. */
628/* No error checking: watch out. */
629int u8_rune(int *rune, const char *s)
630{
631 int n, ret;
632 unsigned char c;
633
634 c = s[0];
635 if (c < 128 || awk_mb_cur_max == 1) {
636 *rune = c;
637 return 1;
638 }
639
640 n = strlen(s);
641 if (n >= 2 && ((c>>5) & 0x7) == 0x6 && (s[1] & 0xC0) == 0x80) {
642 *rune = ((c & 0x1F) << 6) | (s[1] & 0x3F); /* 110xxxxx 10xxxxxx */
643 ret = 2;
644 } else if (n >= 3 && ((c>>4) & 0xF) == 0xE && (s[1] & 0xC0) == 0x80
645 && (s[2] & 0xC0) == 0x80) {
646 *rune = ((c & 0xF) << 12) | ((s[1] & 0x3F) << 6) | (s[2] & 0x3F);
647 /* 1110xxxx 10xxxxxx 10xxxxxx */
648 ret = 3;
649 } else if (n >= 4 && ((c>>3) & 0x1F) == 0x1E && (s[1] & 0xC0) == 0x80
650 && (s[2] & 0xC0) == 0x80 && (s[3] & 0xC0) == 0x80) {
651 *rune = ((c & 0x7) << 18) | ((s[1] & 0x3F) << 12) | ((s[2] & 0x3F) << 6) | (s[3] & 0x3F);
652 /* 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx */
653 ret = 4;
654 } else {
655 *rune = c;
656 ret = 1;
657 }
658 return ret; /* returns one byte if sequence doesn't look like utf */
659}
660
661/* return length of next sequence: 1 for ascii or random, 2..4 for valid utf8 */
662int u8_nextlen(const char *s)
663{
664 int len;
665
666 len = u8_isutf(s);
667 if (len == 0)
668 len = 1;
669 return len;
670}
671
672/* return number of utf characters or single non-utf bytes */
673static int u8_strlen(const char *s)
674{
675 int i, len, n, totlen;
676 unsigned char c;
677
678 n = strlen(s);
679 totlen = 0;
680 for (i = 0; i < n; i += len) {
681 c = s[i];
682 if (c < 128 || awk_mb_cur_max == 1) {
683 len = 1;
684 } else {
685 len = u8_nextlen(&s[i]);
686 }
687 totlen++;
688 if (i > n)
689 FATAL("bad utf count [%s] n=%d i=%d\n", s, n, i);
690 }
691 return totlen;
692}
693
694/* convert utf-8 char number in a string to its byte offset */
695static int u8_char2byte(const char *s, int charnum)
696{
697 int n;
698 int bytenum = 0;
699
700 while (charnum > 0) {
701 n = u8_nextlen(s);
702 s += n;
703 bytenum += n;
704 charnum--;
705 }
706 return bytenum;
707}
708
709/* convert byte offset in s to utf-8 char number that starts there */
710static int u8_byte2char(const char *s, int bytenum)
711{
712 int i, len, b;
713 int charnum = 0; /* BUG: what origin? */
714 /* should be 0 to match start==0 which means no match */
715
716 b = strlen(s);
717 if (bytenum > b) {
718 return -1; /* ??? */
719 }
720 for (i = 0; i <= bytenum; i += len) {
721 len = u8_nextlen(s+i);
722 charnum++;
723 }
724 return charnum;
725}
726
727/* runetochar() adapted from rune.c in the Plan 9 distribution */
728
729enum
730{
731 Runeerror = 128, /* from somewhere else */
732 Runemax = 0x10FFFF,
733
734 Bit1 = 7,
735 Bitx = 6,
736 Bit2 = 5,
737 Bit3 = 4,
738 Bit4 = 3,
739 Bit5 = 2,
740
741 T1 = ((1<<(Bit1+1))-1) ^ 0xFF, /* 0000 0000 */
742 Tx = ((1<<(Bitx+1))-1) ^ 0xFF, /* 1000 0000 */
743 T2 = ((1<<(Bit2+1))-1) ^ 0xFF, /* 1100 0000 */
744 T3 = ((1<<(Bit3+1))-1) ^ 0xFF, /* 1110 0000 */
745 T4 = ((1<<(Bit4+1))-1) ^ 0xFF, /* 1111 0000 */
746 T5 = ((1<<(Bit5+1))-1) ^ 0xFF, /* 1111 1000 */
747
748 Rune1 = (1<<(Bit1+0*Bitx))-1, /* 0000 0000 0000 0000 0111 1111 */
749 Rune2 = (1<<(Bit2+1*Bitx))-1, /* 0000 0000 0000 0111 1111 1111 */
750 Rune3 = (1<<(Bit3+2*Bitx))-1, /* 0000 0000 1111 1111 1111 1111 */
751 Rune4 = (1<<(Bit4+3*Bitx))-1, /* 0011 1111 1111 1111 1111 1111 */
752
753 Maskx = (1<<Bitx)-1, /* 0011 1111 */
754 Testx = Maskx ^ 0xFF, /* 1100 0000 */
755
756};
757
758int runetochar(char *str, int c)
759{
760 /* one character sequence 00000-0007F => 00-7F */
761 if (c <= Rune1) {
762 str[0] = c;
763 return 1;
764 }
765
766 /* two character sequence 00080-007FF => T2 Tx */
767 if (c <= Rune2) {
768 str[0] = T2 | (c >> 1*Bitx);
769 str[1] = Tx | (c & Maskx);
770 return 2;
771 }
772
773 /* three character sequence 00800-0FFFF => T3 Tx Tx */
774 if (c > Runemax)
775 c = Runeerror;
776 if (c <= Rune3) {
777 str[0] = T3 | (c >> 2*Bitx);
778 str[1] = Tx | ((c >> 1*Bitx) & Maskx);
779 str[2] = Tx | (c & Maskx);
780 return 3;
781 }
782
783 /* four character sequence 010000-1FFFFF => T4 Tx Tx Tx */
784 str[0] = T4 | (c >> 3*Bitx);
785 str[1] = Tx | ((c >> 2*Bitx) & Maskx);
786 str[2] = Tx | ((c >> 1*Bitx) & Maskx);
787 str[3] = Tx | (c & Maskx);
788 return 4;
789}
790
791
792/* ========== end of utf8 code =========== */
793
794
795
796Cell *matchop(Node **a, int n) /* ~ and match() */
797{
798 Cell *x, *y, *z;
799 char *s, *t;
800 int i;
801 int cstart, cpatlen, len;
802 fa *pfa;
803 int (*mf)(fa *, const char *) = match, mode = 0;
804
805 if (n == MATCHFCN) {
806 mf = pmatch;
807 mode = 1;
808 }
809 x = execute(a[1]); /* a[1] = target text */
810 s = getsval(x);
811 if (a[0] == NULL) /* a[1] == 0: already-compiled reg expr */
812 i = (*mf)((fa *) a[2], s);
813 else {
814 y = execute(a[2]); /* a[2] = regular expr */
815 t = getsval(y);
816 pfa = makedfa(t, mode);
817 i = (*mf)(pfa, s);
818 tempfree(y);
819 }
820 z = x;
821 if (n == MATCHFCN) {
822 int start = patbeg - s + 1; /* origin 1 */
823 if (patlen < 0) {
824 start = 0; /* not found */
825 } else {
826 cstart = u8_byte2char(s, start-1);
827 cpatlen = 0;
828 for (i = 0; i < patlen; i += len) {
829 len = u8_nextlen(patbeg+i);
830 cpatlen++;
831 }
832
833 start = cstart;
834 patlen = cpatlen;
835 }
836
837 setfval(rstartloc, (Awkfloat) start);
838 setfval(rlengthloc, (Awkfloat) patlen);
839 x = gettemp();
840 x->tval = NUM;
841 x->fval = start;
842 } else if ((n == MATCH && i == 1) || (n == NOTMATCH && i == 0))
843 x = True;
844 else
845 x = False;
846
847 tempfree(z);
848 return x;
849}
850
851
852Cell *boolop(Node **a, int n) /* a[0] || a[1], a[0] && a[1], !a[0] */
853{
854 Cell *x, *y;
855 int i;
856
857 x = execute(a[0]);
858 i = istrue(x);
859 tempfree(x);
860 switch (n) {
861 case BOR:
862 if (i) return(True);
863 y = execute(a[1]);
864 i = istrue(y);
865 tempfree(y);
866 if (i) return(True);
867 else return(False);
868 case AND:
869 if ( !i ) return(False);
870 y = execute(a[1]);
871 i = istrue(y);
872 tempfree(y);
873 if (i) return(True);
874 else return(False);
875 case NOT:
876 if (i) return(False);
877 else return(True);
878 default: /* can't happen */
879 FATAL("unknown boolean operator %d", n);
880 }
881 return 0; /*NOTREACHED*/
882}
883
884Cell *relop(Node **a, int n) /* a[0 < a[1], etc. */
885{
886 int i;
887 Cell *x, *y;
888 Awkfloat j;
889 bool x_is_nan, y_is_nan;
890
891 x = execute(a[0]);
892 y = execute(a[1]);
893 x_is_nan = isnan(x->fval);
894 y_is_nan = isnan(y->fval);
895 if (x->tval&NUM && y->tval&NUM) {
896 if ((x_is_nan || y_is_nan) && n != NE)
897 return(False);
898 j = x->fval - y->fval;
899 i = j<0? -1: (j>0? 1: 0);
900 } else {
901 i = strcmp(getsval(x), getsval(y));
902 }
903 tempfree(x);
904 tempfree(y);
905 switch (n) {
906 case LT: if (i<0) return(True);
907 else return(False);
908 case LE: if (i<=0) return(True);
909 else return(False);
910 case NE: if (x_is_nan && y_is_nan) return(True);
911 else if (i!=0) return(True);
912 else return(False);
913 case EQ: if (i == 0) return(True);
914 else return(False);
915 case GE: if (i>=0) return(True);
916 else return(False);
917 case GT: if (i>0) return(True);
918 else return(False);
919 default: /* can't happen */
920 FATAL("unknown relational operator %d", n);
921 }
922 return 0; /*NOTREACHED*/
923}
924
925void tfree(Cell *a) /* free a tempcell */
926{
927 if (freeable(a)) {
928 DPRINTF("freeing %s %s %o\n", NN(a->nval), NN(a->sval), a->tval);
929 xfree(a->sval);
930 }
931 if (a == tmps)
932 FATAL("tempcell list is curdled");
933 a->cnext = tmps;
934 tmps = a;
935}
936
937Cell *gettemp(void) /* get a tempcell */
938{ int i;
939 Cell *x;
940
941 if (!tmps) {
942 tmps = (Cell *) calloc(100, sizeof(*tmps));
943 if (!tmps)
944 FATAL("out of space for temporaries");
945 for (i = 1; i < 100; i++)
946 tmps[i-1].cnext = &tmps[i];
947 tmps[i-1].cnext = NULL;
948 }
949 x = tmps;
950 tmps = x->cnext;
951 *x = tempcell;
952 return(x);
953}
954
955Cell *indirect(Node **a, int n) /* $( a[0] ) */
956{
957 Awkfloat val;
958 Cell *x;
959 int m;
960
961 x = execute(a[0]);
962 val = getfval(x); /* freebsd: defend against super large field numbers */
963 if ((Awkfloat)INT_MAX < val)
964 FATAL("trying to access out of range field %s", x->nval);
965 m = (int) val;
966 tempfree(x);
967 x = fieldadr(m);
968 x->ctype = OCELL; /* BUG? why are these needed? */
969 x->csub = CFLD;
970 return(x);
971}
972
973Cell *substr(Node **a, int nnn) /* substr(a[0], a[1], a[2]) */
974{
975 int k, m, n;
976 int mb, nb;
977 char *s;
978 int temp;
979 Cell *x, *y, *z = NULL;
980
981 x = execute(a[0]);
982 y = execute(a[1]);
983 if (a[2] != NULL)
984 z = execute(a[2]);
985 s = getsval(x);
986 k = u8_strlen(s) + 1;
987 if (k <= 1) {
988 tempfree(x);
989 tempfree(y);
990 if (a[2] != NULL) {
991 tempfree(z);
992 }
993 x = gettemp();
994 setsval(x, "");
995 return(x);
996 }
997 m = (int) getfval(y);
998 if (m <= 0)
999 m = 1;
1000 else if (m > k)
1001 m = k;
1002 tempfree(y);
1003 if (a[2] != NULL) {
1004 n = (int) getfval(z);
1005 tempfree(z);
1006 } else
1007 n = k - 1;
1008 if (n < 0)
1009 n = 0;
1010 else if (n > k - m)
1011 n = k - m;
1012 /* m is start, n is length from there */
1013 DPRINTF("substr: m=%d, n=%d, s=%s\n", m, n, s);
1014 y = gettemp();
1015 mb = u8_char2byte(s, m-1); /* byte offset of start char in s */
1016 nb = mb + u8_char2byte(&s[mb], n); /* byte offset of end+1 char in s */
1017
1018 temp = s[nb]; /* with thanks to John Linderman */
1019 s[nb] = '\0';
1020 setsval(y, s + mb);
1021 s[nb] = temp;
1022 tempfree(x);
1023 return(y);
1024}
1025
1026Cell *sindex(Node **a, int nnn) /* index(a[0], a[1]) */
1027{
1028 Cell *x, *y, *z;
1029 char *s1, *s2, *p1, *p2, *q;
1030 Awkfloat v = 0.0;
1031
1032 x = execute(a[0]);
1033 s1 = getsval(x);
1034 y = execute(a[1]);
1035 s2 = getsval(y);
1036
1037 z = gettemp();
1038 for (p1 = s1; *p1 != '\0'; p1++) {
1039 for (q = p1, p2 = s2; *p2 != '\0' && *q == *p2; q++, p2++)
1040 continue;
1041 if (*p2 == '\0') {
1042 /* v = (Awkfloat) (p1 - s1 + 1); origin 1 */
1043
1044 /* should be a function: used in match() as well */
1045 int i, len;
1046 v = 0;
1047 for (i = 0; i < p1-s1+1; i += len) {
1048 len = u8_nextlen(s1+i);
1049 v++;
1050 }
1051 break;
1052 }
1053 }
1054 tempfree(x);
1055 tempfree(y);
1056 setfval(z, v);
1057 return(z);
1058}
1059
1060static int has_utf8(char *s) /* return 1 if s contains any utf-8 (2 bytes or more) character */
1061{
1062 int n;
1063
1064 for (n = 0; *s != 0; s += n) {
1065 n = u8_nextlen(s);
1066 if (n > 1)
1067 return 1;
1068 }
1069 return 0;
1070}
1071
1072#define MAXNUMSIZE 50
1073
1074int format(char **pbuf, int *pbufsize, const char *s, Node *a) /* printf-like conversions */
1075{
1076 char *fmt;
1077 char *p, *t;
1078 const char *os;
1079 Cell *x;
1080 int flag = 0, n;
1081 int fmtwd; /* format width */
1082 int fmtsz = recsize;
1083 char *buf = *pbuf;
1084 int bufsize = *pbufsize;
1085#define FMTSZ(a) (fmtsz - ((a) - fmt))
1086#define BUFSZ(a) (bufsize - ((a) - buf))
1087
1088 static bool first = true;
1089 static bool have_a_format = false;
1090
1091 if (first) {
1092 char xbuf[100];
1093
1094 snprintf(xbuf, sizeof(xbuf), "%a", 42.0);
1095 have_a_format = (strcmp(xbuf, "0x1.5p+5") == 0);
1096 first = false;
1097 }
1098
1099 os = s;
1100 p = buf;
1101 if ((fmt = (char *) malloc(fmtsz)) == NULL)
1102 FATAL("out of memory in format()");
1103 while (*s) {
1104 adjbuf(&buf, &bufsize, MAXNUMSIZE+1+p-buf, recsize, &p, "format1");
1105 if (*s != '%') {
1106 *p++ = *s++;
1107 continue;
1108 }
1109 if (*(s+1) == '%') {
1110 *p++ = '%';
1111 s += 2;
1112 continue;
1113 }
1114 fmtwd = atoi(s+1);
1115 if (fmtwd < 0)
1116 fmtwd = -fmtwd;
1117 adjbuf(&buf, &bufsize, fmtwd+1+p-buf, recsize, &p, "format2");
1118 for (t = fmt; (*t++ = *s) != '\0'; s++) {
1119 if (!adjbuf(&fmt, &fmtsz, MAXNUMSIZE+1+t-fmt, recsize, &t, "format3"))
1120 FATAL("format item %.30s... ran format() out of memory", os);
1121 /* Ignore size specifiers */
1122 if (strchr("hjLlqtz", *s) != NULL) { /* the ansi panoply */
1123 t--;
1124 continue;
1125 }
1126 if (isalpha((uschar)*s))
1127 break;
1128 if (*s == '$') {
1129 FATAL("'$' not permitted in awk formats");
1130 }
1131 if (*s == '*') {
1132 if (a == NULL) {
1133 FATAL("not enough args in printf(%s)", os);
1134 }
1135 x = execute(a);
1136 a = a->nnext;
1137 snprintf(t - 1, FMTSZ(t - 1),
1138 "%d", fmtwd=(int) getfval(x));
1139 if (fmtwd < 0)
1140 fmtwd = -fmtwd;
1141 adjbuf(&buf, &bufsize, fmtwd+1+p-buf, recsize, &p, "format");
1142 t = fmt + strlen(fmt);
1143 tempfree(x);
1144 }
1145 }
1146 *t = '\0';
1147 if (fmtwd < 0)
1148 fmtwd = -fmtwd;
1149 adjbuf(&buf, &bufsize, fmtwd+1+p-buf, recsize, &p, "format4");
1150 switch (*s) {
1151 case 'a': case 'A':
1152 if (have_a_format)
1153 flag = *s;
1154 else
1155 flag = 'f';
1156 break;
1157 case 'f': case 'e': case 'g': case 'E': case 'G':
1158 flag = 'f';
1159 break;
1160 case 'd': case 'i': case 'o': case 'x': case 'X': case 'u':
1161 flag = (*s == 'd' || *s == 'i') ? 'd' : 'u';
1162 *(t-1) = 'j';
1163 *t = *s;
1164 *++t = '\0';
1165 break;
1166 case 's':
1167 flag = 's';
1168 break;
1169 case 'c':
1170 flag = 'c';
1171 break;
1172 default:
1173 WARNING("weird printf conversion %s", fmt);
1174 flag = '?';
1175 break;
1176 }
1177 if (a == NULL)
1178 FATAL("not enough args in printf(%s)", os);
1179 x = execute(a);
1180 a = a->nnext;
1181 n = MAXNUMSIZE;
1182 if (fmtwd > n)
1183 n = fmtwd;
1184 adjbuf(&buf, &bufsize, 1+n+p-buf, recsize, &p, "format5");
1185 switch (flag) {
1186 case '?':
1187 snprintf(p, BUFSZ(p), "%s", fmt); /* unknown, so dump it too */
1188 t = getsval(x);
1189 n = strlen(t);
1190 if (fmtwd > n)
1191 n = fmtwd;
1192 adjbuf(&buf, &bufsize, 1+strlen(p)+n+p-buf, recsize, &p, "format6");
1193 p += strlen(p);
1194 snprintf(p, BUFSZ(p), "%s", t);
1195 break;
1196 case 'a':
1197 case 'A':
1198 case 'f': snprintf(p, BUFSZ(p), fmt, getfval(x)); break;
1199 case 'd': snprintf(p, BUFSZ(p), fmt, (intmax_t) getfval(x)); break;
1200 case 'u': snprintf(p, BUFSZ(p), fmt, (uintmax_t) getfval(x)); break;
1201
1202 case 's': {
1203 t = getsval(x);
1204 n = strlen(t);
1205 /* if simple format or no utf-8 in the string, sprintf works */
1206 if (!has_utf8(t) || strcmp(fmt,"%s") == 0) {
1207 if (fmtwd > n)
1208 n = fmtwd;
1209 if (!adjbuf(&buf, &bufsize, 1+n+p-buf, recsize, &p, "format7"))
1210 FATAL("huge string/format (%d chars) in printf %.30s..." \
1211 " ran format() out of memory", n, t);
1212 snprintf(p, BUFSZ(p), fmt, t);
1213 break;
1214 }
1215
1216 /* get here if string has utf-8 chars and fmt is not plain %s */
1217 /* "%-w.ps", where -, w and .p are all optional */
1218 /* '0' before the w is a flag character */
1219 /* fmt points at % */
1220 int ljust = 0, wid = 0, prec = n, pad = 0;
1221 char *f = fmt+1;
1222 if (f[0] == '-') {
1223 ljust = 1;
1224 f++;
1225 }
1226 // flags '0' and '+' are recognized but skipped
1227 if (f[0] == '0') {
1228 f++;
1229 if (f[0] == '+')
1230 f++;
1231 }
1232 if (f[0] == '+') {
1233 f++;
1234 if (f[0] == '0')
1235 f++;
1236 }
1237 if (isdigit((uschar)f[0])) { /* there is a wid */
1238 wid = strtol(f, &f, 10);
1239 }
1240 if (f[0] == '.') { /* there is a .prec */
1241 prec = strtol(++f, &f, 10);
1242 }
1243 if (prec > u8_strlen(t))
1244 prec = u8_strlen(t);
1245 pad = wid>prec ? wid - prec : 0; // has to be >= 0
1246 int i, precb;
1247
1248 if (ljust) { // print prec chars from t, then pad blanks
1249 precb = u8_char2byte(t, prec);
1250 for (i = 0; i < precb; i++) {
1251 //putchar(t[i]);
1252 *p++ = t[i];
1253 }
1254 for (i = 0; i < pad; i++) {
1255 //printf(" ");
1256 *p++ = ' ';
1257 }
1258 } else { // print pad blanks, then prec chars from t
1259 for (i = 0; i < pad; i++) {
1260 //printf(" ");
1261 *p++ = ' ';
1262 }
1263 precb = u8_char2byte(t, prec);
1264 for (i = 0; i < precb; i++) {
1265 //putchar(t[i]);
1266 *p++ = t[i];
1267 }
1268 }
1269 *p = 0;
1270 break;
1271 }
1272
1273 case 'c': {
1274 /*
1275 * If a numeric value is given, awk should just turn
1276 * it into a character and print it:
1277 * BEGIN { printf("%c\n", 65) }
1278 * prints "A".
1279 *
1280 * But what if the numeric value is > 128 and
1281 * represents a valid Unicode code point?!? We do
1282 * our best to convert it back into UTF-8. If we
1283 * can't, we output the encoding of the Unicode
1284 * "invalid character", 0xFFFD.
1285 */
1286 if (isnum(x)) {
1287 int charval = (int) getfval(x);
1288
1289 if (charval != 0) {
1290 if (charval < 128 || awk_mb_cur_max == 1)
1291 snprintf(p, BUFSZ(p), fmt, charval);
1292 else {
1293 // possible unicode character
1294 size_t count;
1295 char *bs = wide_char_to_byte_str(charval, &count);
1296
1297 if (bs == NULL) { // invalid character
1298 // use unicode invalid character, 0xFFFD
1299 static char invalid_char[] = "\357\277\275";
1300 bs = invalid_char;
1301 count = 3;
1302 }
1303 t = bs;
1304 n = count;
1305 goto format_percent_c;
1306 }
1307 } else {
1308 *p++ = '\0'; /* explicit null byte */
1309 *p = '\0'; /* next output will start here */
1310 }
1311 break;
1312 }
1313 t = getsval(x);
1314 n = u8_nextlen(t);
1315 format_percent_c:
1316 if (n < 2) { /* not utf8 */
1317 snprintf(p, BUFSZ(p), fmt, getsval(x)[0]);
1318 break;
1319 }
1320
1321 // utf8 character, almost same song and dance as for %s
1322 int ljust = 0, wid = 0, prec = n, pad = 0;
1323 char *f = fmt+1;
1324 if (f[0] == '-') {
1325 ljust = 1;
1326 f++;
1327 }
1328 // flags '0' and '+' are recognized but skipped
1329 if (f[0] == '0') {
1330 f++;
1331 if (f[0] == '+')
1332 f++;
1333 }
1334 if (f[0] == '+') {
1335 f++;
1336 if (f[0] == '0')
1337 f++;
1338 }
1339 if (isdigit((uschar)f[0])) { /* there is a wid */
1340 wid = strtol(f, &f, 10);
1341 }
1342 if (f[0] == '.') { /* there is a .prec */
1343 prec = strtol(++f, &f, 10);
1344 }
1345 if (prec > 1) // %c --> only one character
1346 prec = 1;
1347 pad = wid>prec ? wid - prec : 0; // has to be >= 0
1348 int i;
1349
1350 if (ljust) { // print one char from t, then pad blanks
1351 for (i = 0; i < n; i++)
1352 *p++ = t[i];
1353 for (i = 0; i < pad; i++) {
1354 //printf(" ");
1355 *p++ = ' ';
1356 }
1357 } else { // print pad blanks, then prec chars from t
1358 for (i = 0; i < pad; i++) {
1359 //printf(" ");
1360 *p++ = ' ';
1361 }
1362 for (i = 0; i < n; i++)
1363 *p++ = t[i];
1364 }
1365 *p = 0;
1366 break;
1367 }
1368 default:
1369 FATAL("can't happen: bad conversion %c in format()", flag);
1370 }
1371
1372 tempfree(x);
1373 p += strlen(p);
1374 s++;
1375 }
1376 *p = '\0';
1377 free(fmt);
1378 for ( ; a; a = a->nnext) { /* evaluate any remaining args */
1379 x = execute(a);
1380 tempfree(x);
1381 }
1382 *pbuf = buf;
1383 *pbufsize = bufsize;
1384 return p - buf;
1385}
1386
1387Cell *awksprintf(Node **a, int n) /* sprintf(a[0]) */
1388{
1389 Cell *x;
1390 Node *y;
1391 char *buf;
1392 int bufsz=3*recsize;
1393
1394 if ((buf = (char *) malloc(bufsz)) == NULL)
1395 FATAL("out of memory in awksprintf");
1396 y = a[0]->nnext;
1397 x = execute(a[0]);
1398 if (format(&buf, &bufsz, getsval(x), y) == -1)
1399 FATAL("sprintf string %.30s... too long. can't happen.", buf);
1400 tempfree(x);
1401 x = gettemp();
1402 x->sval = buf;
1403 x->tval = STR;
1404 return(x);
1405}
1406
1407Cell *awkprintf(Node **a, int n) /* printf */
1408{ /* a[0] is list of args, starting with format string */
1409 /* a[1] is redirection operator, a[2] is redirection file */
1410 FILE *fp;
1411 Cell *x;
1412 Node *y;
1413 char *buf;
1414 int len;
1415 int bufsz=3*recsize;
1416
1417 if ((buf = (char *) malloc(bufsz)) == NULL)
1418 FATAL("out of memory in awkprintf");
1419 y = a[0]->nnext;
1420 x = execute(a[0]);
1421 if ((len = format(&buf, &bufsz, getsval(x), y)) == -1)
1422 FATAL("printf string %.30s... too long. can't happen.", buf);
1423 tempfree(x);
1424 if (a[1] == NULL) {
1425 /* fputs(buf, stdout); */
1426 fwrite(buf, len, 1, stdout);
1427 if (ferror(stdout))
1428 FATAL("write error on stdout");
1429 } else {
1430 fp = redirect(ptoi(a[1]), a[2]);
1431 /* fputs(buf, fp); */
1432 fwrite(buf, len, 1, fp);
1433 fflush(fp);
1434 if (ferror(fp))
1435 FATAL("write error on %s", filename(fp));
1436 }
1437 free(buf);
1438 return(True);
1439}
1440
1441Cell *arith(Node **a, int n) /* a[0] + a[1], etc. also -a[0] */
1442{
1443 Awkfloat i, j = 0;
1444 double v;
1445 Cell *x, *y, *z;
1446
1447 x = execute(a[0]);
1448 i = getfval(x);
1449 tempfree(x);
1450 if (n != UMINUS && n != UPLUS) {
1451 y = execute(a[1]);
1452 j = getfval(y);
1453 tempfree(y);
1454 }
1455 z = gettemp();
1456 switch (n) {
1457 case ADD:
1458 i += j;
1459 break;
1460 case MINUS:
1461 i -= j;
1462 break;
1463 case MULT:
1464 i *= j;
1465 break;
1466 case DIVIDE:
1467 if (j == 0)
1468 FATAL("division by zero");
1469 i /= j;
1470 break;
1471 case MOD:
1472 if (j == 0)
1473 FATAL("division by zero in mod");
1474 modf(i/j, &v);
1475 i = i - j * v;
1476 break;
1477 case UMINUS:
1478 i = -i;
1479 break;
1480 case UPLUS: /* handled by getfval(), above */
1481 break;
1482 case POWER:
1483 if (j >= 0 && modf(j, &v) == 0.0) /* pos integer exponent */
1484 i = ipow(i, (int) j);
1485 else {
1486 errno = 0;
1487 i = errcheck(pow(i, j), "pow");
1488 }
1489 break;
1490 default: /* can't happen */
1491 FATAL("illegal arithmetic operator %d", n);
1492 }
1493 setfval(z, i);
1494 return(z);
1495}
1496
1497double ipow(double x, int n) /* x**n. ought to be done by pow, but isn't always */
1498{
1499 double v;
1500
1501 if (n <= 0)
1502 return 1;
1503 v = ipow(x, n/2);
1504 if (n % 2 == 0)
1505 return v * v;
1506 else
1507 return x * v * v;
1508}
1509
1510Cell *incrdecr(Node **a, int n) /* a[0]++, etc. */
1511{
1512 Cell *x, *z;
1513 int k;
1514 Awkfloat xf;
1515
1516 x = execute(a[0]);
1517 xf = getfval(x);
1518 k = (n == PREINCR || n == POSTINCR) ? 1 : -1;
1519 if (n == PREINCR || n == PREDECR) {
1520 setfval(x, xf + k);
1521 return(x);
1522 }
1523 z = gettemp();
1524 setfval(z, xf);
1525 setfval(x, xf + k);
1526 tempfree(x);
1527 return(z);
1528}
1529
1530Cell *assign(Node **a, int n) /* a[0] = a[1], a[0] += a[1], etc. */
1531{ /* this is subtle; don't muck with it. */
1532 Cell *x, *y;
1533 Awkfloat xf, yf;
1534 double v;
1535
1536 y = execute(a[1]);
1537 x = execute(a[0]);
1538 if (n == ASSIGN) { /* ordinary assignment */
1539 if (x == y && !(x->tval & (FLD|REC)) && x != nfloc)
1540 ; /* self-assignment: leave alone unless it's a field or NF */
1541 else if ((y->tval & (STR|NUM)) == (STR|NUM)) {
1542 yf = getfval(y);
1543 setsval(x, getsval(y));
1544 x->fval = yf;
1545 x->tval |= NUM;
1546 }
1547 else if (isstr(y))
1548 setsval(x, getsval(y));
1549 else if (isnum(y))
1550 setfval(x, getfval(y));
1551 else
1552 funnyvar(y, "read value of");
1553 tempfree(y);
1554 return(x);
1555 }
1556 xf = getfval(x);
1557 yf = getfval(y);
1558 switch (n) {
1559 case ADDEQ:
1560 xf += yf;
1561 break;
1562 case SUBEQ:
1563 xf -= yf;
1564 break;
1565 case MULTEQ:
1566 xf *= yf;
1567 break;
1568 case DIVEQ:
1569 if (yf == 0)
1570 FATAL("division by zero in /=");
1571 xf /= yf;
1572 break;
1573 case MODEQ:
1574 if (yf == 0)
1575 FATAL("division by zero in %%=");
1576 modf(xf/yf, &v);
1577 xf = xf - yf * v;
1578 break;
1579 case POWEQ:
1580 if (yf >= 0 && modf(yf, &v) == 0.0) /* pos integer exponent */
1581 xf = ipow(xf, (int) yf);
1582 else {
1583 errno = 0;
1584 xf = errcheck(pow(xf, yf), "pow");
1585 }
1586 break;
1587 default:
1588 FATAL("illegal assignment operator %d", n);
1589 break;
1590 }
1591 tempfree(y);
1592 setfval(x, xf);
1593 return(x);
1594}
1595
1596Cell *cat(Node **a, int q) /* a[0] cat a[1] */
1597{
1598 Cell *x, *y, *z;
1599 int n1, n2;
1600 char *s = NULL;
1601 int ssz = 0;
1602
1603 x = execute(a[0]);
1604 n1 = strlen(getsval(x));
1605 adjbuf(&s, &ssz, n1 + 1, recsize, 0, "cat1");
1606 memcpy(s, x->sval, n1);
1607
1608 tempfree(x);
1609
1610 y = execute(a[1]);
1611 n2 = strlen(getsval(y));
1612 adjbuf(&s, &ssz, n1 + n2 + 1, recsize, 0, "cat2");
1613 memcpy(s + n1, y->sval, n2);
1614 s[n1 + n2] = '\0';
1615
1616 tempfree(y);
1617
1618 z = gettemp();
1619 z->sval = s;
1620 z->tval = STR;
1621
1622 return(z);
1623}
1624
1625Cell *pastat(Node **a, int n) /* a[0] { a[1] } */
1626{
1627 Cell *x;
1628
1629 if (a[0] == NULL)
1630 x = execute(a[1]);
1631 else {
1632 x = execute(a[0]);
1633 if (istrue(x)) {
1634 tempfree(x);
1635 x = execute(a[1]);
1636 }
1637 }
1638 return x;
1639}
1640
1641Cell *dopa2(Node **a, int n) /* a[0], a[1] { a[2] } */
1642{
1643 Cell *x;
1644 int pair;
1645
1646 pair = ptoi(a[3]);
1647 if (pairstack[pair] == 0) {
1648 x = execute(a[0]);
1649 if (istrue(x))
1650 pairstack[pair] = 1;
1651 tempfree(x);
1652 }
1653 if (pairstack[pair] == 1) {
1654 x = execute(a[1]);
1655 if (istrue(x))
1656 pairstack[pair] = 0;
1657 tempfree(x);
1658 x = execute(a[2]);
1659 return(x);
1660 }
1661 return(False);
1662}
1663
1664Cell *split(Node **a, int nnn) /* split(a[0], a[1], a[2]); a[3] is type */
1665{
1666 Cell *x = NULL, *y, *ap;
1667 const char *s, *origs, *t;
1668 const char *fs = NULL;
1669 char *origfs = NULL;
1670 int sep;
1671 char temp, num[50];
1672 int j, n, tempstat, arg3type;
1673 double result;
1674
1675 y = execute(a[0]); /* source string */
1676 origs = s = strdup(getsval(y));
1677 if (s == NULL)
1678 FATAL("out of space in split");
1679 tempfree(y);
1680 arg3type = ptoi(a[3]);
1681 if (a[2] == NULL) { /* BUG: CSV should override implicit fs but not explicit */
1682 fs = getsval(fsloc);
1683 } else if (arg3type == STRING) { /* split(str,arr,"string") */
1684 x = execute(a[2]);
1685 fs = origfs = strdup(getsval(x));
1686 if (fs == NULL)
1687 FATAL("out of space in split");
1688 tempfree(x);
1689 } else if (arg3type == REGEXPR) {
1690 fs = "(regexpr)"; /* split(str,arr,/regexpr/) */
1691 } else {
1692 FATAL("illegal type of split");
1693 }
1694 sep = *fs;
1695 ap = execute(a[1]); /* array name */
1696 /* BUG 7/26/22: this appears not to reset array: see C1/asplit */
1697 freesymtab(ap);
1698 DPRINTF("split: s=|%s|, a=%s, sep=|%s|\n", s, NN(ap->nval), fs);
1699 ap->tval &= ~STR;
1700 ap->tval |= ARR;
1701 ap->sval = (char *) makesymtab(NSYMTAB);
1702
1703 n = 0;
1704 if (arg3type == REGEXPR && strlen((char*)((fa*)a[2])->restr) == 0) {
1705 /* split(s, a, //); have to arrange that it looks like empty sep */
1706 arg3type = 0;
1707 fs = "";
1708 sep = 0;
1709 }
1710 if (*s != '\0' && (strlen(fs) > 1 || arg3type == REGEXPR)) { /* reg expr */
1711 fa *pfa;
1712 if (arg3type == REGEXPR) { /* it's ready already */
1713 pfa = (fa *) a[2];
1714 } else {
1715 pfa = makedfa(fs, 1);
1716 }
1717 if (nematch(pfa,s)) {
1718 tempstat = pfa->initstat;
1719 pfa->initstat = 2;
1720 do {
1721 n++;
1722 snprintf(num, sizeof(num), "%d", n);
1723 temp = *patbeg;
1724 setptr(patbeg, '\0');
1725 if (is_number(s, & result))
1726 setsymtab(num, s, result, STR|NUM, (Array *) ap->sval);
1727 else
1728 setsymtab(num, s, 0.0, STR, (Array *) ap->sval);
1729 setptr(patbeg, temp);
1730 s = patbeg + patlen;
1731 if (*(patbeg+patlen-1) == '\0' || *s == '\0') {
1732 n++;
1733 snprintf(num, sizeof(num), "%d", n);
1734 setsymtab(num, "", 0.0, STR, (Array *) ap->sval);
1735 pfa->initstat = tempstat;
1736 goto spdone;
1737 }
1738 } while (nematch(pfa,s));
1739 pfa->initstat = tempstat; /* bwk: has to be here to reset */
1740 /* cf gsub and refldbld */
1741 }
1742 n++;
1743 snprintf(num, sizeof(num), "%d", n);
1744 if (is_number(s, & result))
1745 setsymtab(num, s, result, STR|NUM, (Array *) ap->sval);
1746 else
1747 setsymtab(num, s, 0.0, STR, (Array *) ap->sval);
1748 spdone:
1749 pfa = NULL;
1750
1751 } else if (a[2] == NULL && CSV) { /* CSV only if no explicit separator */
1752 char *newt = (char *) malloc(strlen(s)); /* for building new string; reuse for each field */
1753 for (;;) {
1754 char *fr = newt;
1755 n++;
1756 if (*s == '"' ) { /* start of "..." */
1757 for (s++ ; *s != '\0'; ) {
1758 if (*s == '"' && s[1] != '\0' && s[1] == '"') {
1759 s += 2; /* doubled quote */
1760 *fr++ = '"';
1761 } else if (*s == '"' && (s[1] == '\0' || s[1] == ',')) {
1762 s++; /* skip over closing quote */
1763 break;
1764 } else {
1765 *fr++ = *s++;
1766 }
1767 }
1768 *fr++ = 0;
1769 } else { /* unquoted field */
1770 while (*s != ',' && *s != '\0')
1771 *fr++ = *s++;
1772 *fr++ = 0;
1773 }
1774 snprintf(num, sizeof(num), "%d", n);
1775 if (is_number(newt, &result))
1776 setsymtab(num, newt, result, STR|NUM, (Array *) ap->sval);
1777 else
1778 setsymtab(num, newt, 0.0, STR, (Array *) ap->sval);
1779 if (*s++ == '\0')
1780 break;
1781 }
1782 free(newt);
1783
1784 } else if (!CSV && sep == ' ') { /* usual case: split on white space */
1785 for (n = 0; ; ) {
1786#define ISWS(c) ((c) == ' ' || (c) == '\t' || (c) == '\n')
1787 while (ISWS(*s))
1788 s++;
1789 if (*s == '\0')
1790 break;
1791 n++;
1792 t = s;
1793 do
1794 s++;
1795 while (*s != '\0' && !ISWS(*s));
1796 temp = *s;
1797 setptr(s, '\0');
1798 snprintf(num, sizeof(num), "%d", n);
1799 if (is_number(t, & result))
1800 setsymtab(num, t, result, STR|NUM, (Array *) ap->sval);
1801 else
1802 setsymtab(num, t, 0.0, STR, (Array *) ap->sval);
1803 setptr(s, temp);
1804 if (*s != '\0')
1805 s++;
1806 }
1807
1808 } else if (sep == 0) { /* new: split(s, a, "") => 1 char/elem */
1809 for (n = 0; *s != '\0'; s += u8_nextlen(s)) {
1810 char buf[10];
1811 n++;
1812 snprintf(num, sizeof(num), "%d", n);
1813
1814 for (j = 0; j < u8_nextlen(s); j++) {
1815 buf[j] = s[j];
1816 }
1817 buf[j] = '\0';
1818
1819 if (isdigit((uschar)buf[0]))
1820 setsymtab(num, buf, atof(buf), STR|NUM, (Array *) ap->sval);
1821 else
1822 setsymtab(num, buf, 0.0, STR, (Array *) ap->sval);
1823 }
1824
1825 } else if (*s != '\0') { /* some random single character */
1826 for (;;) {
1827 n++;
1828 t = s;
1829 while (*s != sep && *s != '\0')
1830 s++;
1831 temp = *s;
1832 setptr(s, '\0');
1833 snprintf(num, sizeof(num), "%d", n);
1834 if (is_number(t, & result))
1835 setsymtab(num, t, result, STR|NUM, (Array *) ap->sval);
1836 else
1837 setsymtab(num, t, 0.0, STR, (Array *) ap->sval);
1838 setptr(s, temp);
1839 if (*s++ == '\0')
1840 break;
1841 }
1842 }
1843 tempfree(ap);
1844 xfree(origs);
1845 xfree(origfs);
1846 x = gettemp();
1847 x->tval = NUM;
1848 x->fval = n;
1849 return(x);
1850}
1851
1852Cell *condexpr(Node **a, int n) /* a[0] ? a[1] : a[2] */
1853{
1854 Cell *x;
1855
1856 x = execute(a[0]);
1857 if (istrue(x)) {
1858 tempfree(x);
1859 x = execute(a[1]);
1860 } else {
1861 tempfree(x);
1862 x = execute(a[2]);
1863 }
1864 return(x);
1865}
1866
1867Cell *ifstat(Node **a, int n) /* if (a[0]) a[1]; else a[2] */
1868{
1869 Cell *x;
1870
1871 x = execute(a[0]);
1872 if (istrue(x)) {
1873 tempfree(x);
1874 x = execute(a[1]);
1875 } else if (a[2] != NULL) {
1876 tempfree(x);
1877 x = execute(a[2]);
1878 }
1879 return(x);
1880}
1881
1882Cell *whilestat(Node **a, int n) /* while (a[0]) a[1] */
1883{
1884 Cell *x;
1885
1886 for (;;) {
1887 x = execute(a[0]);
1888 if (!istrue(x))
1889 return(x);
1890 tempfree(x);
1891 x = execute(a[1]);
1892 if (isbreak(x)) {
1893 x = True;
1894 return(x);
1895 }
1896 if (isnext(x) || isexit(x) || isret(x))
1897 return(x);
1898 tempfree(x);
1899 }
1900}
1901
1902Cell *dostat(Node **a, int n) /* do a[0]; while(a[1]) */
1903{
1904 Cell *x;
1905
1906 for (;;) {
1907 x = execute(a[0]);
1908 if (isbreak(x))
1909 return True;
1910 if (isnext(x) || isexit(x) || isret(x))
1911 return(x);
1912 tempfree(x);
1913 x = execute(a[1]);
1914 if (!istrue(x))
1915 return(x);
1916 tempfree(x);
1917 }
1918}
1919
1920Cell *forstat(Node **a, int n) /* for (a[0]; a[1]; a[2]) a[3] */
1921{
1922 Cell *x;
1923
1924 x = execute(a[0]);
1925 tempfree(x);
1926 for (;;) {
1927 if (a[1]!=NULL) {
1928 x = execute(a[1]);
1929 if (!istrue(x)) return(x);
1930 else tempfree(x);
1931 }
1932 x = execute(a[3]);
1933 if (isbreak(x)) /* turn off break */
1934 return True;
1935 if (isnext(x) || isexit(x) || isret(x))
1936 return(x);
1937 tempfree(x);
1938 x = execute(a[2]);
1939 tempfree(x);
1940 }
1941}
1942
1943Cell *instat(Node **a, int n) /* for (a[0] in a[1]) a[2] */
1944{
1945 Cell *x, *vp, *arrayp, *cp, *ncp;
1946 Array *tp;
1947 int i;
1948
1949 vp = execute(a[0]);
1950 arrayp = execute(a[1]);
1951 if (!isarr(arrayp)) {
1952 return True;
1953 }
1954 tp = (Array *) arrayp->sval;
1955 tempfree(arrayp);
1956 for (i = 0; i < tp->size; i++) { /* this routine knows too much */
1957 for (cp = tp->tab[i]; cp != NULL; cp = ncp) {
1958 setsval(vp, cp->nval);
1959 ncp = cp->cnext;
1960 x = execute(a[2]);
1961 if (isbreak(x)) {
1962 tempfree(vp);
1963 return True;
1964 }
1965 if (isnext(x) || isexit(x) || isret(x)) {
1966 tempfree(vp);
1967 return(x);
1968 }
1969 tempfree(x);
1970 }
1971 }
1972 return True;
1973}
1974
1975static char *nawk_convert(const char *s, int (*fun_c)(int),
1976 wint_t (*fun_wc)(wint_t))
1977{
1978 char *buf = NULL;
1979 char *pbuf = NULL;
1980 const char *ps = NULL;
1981 size_t n = 0;
1982 wchar_t wc;
1983 const size_t sz = awk_mb_cur_max;
1984
1985 if (sz == 1) {
1986 buf = tostring(s);
1987
1988 for (pbuf = buf; *pbuf; pbuf++)
1989 *pbuf = fun_c((uschar)*pbuf);
1990
1991 return buf;
1992 } else {
1993 /* upper/lower character may be shorter/longer */
1994 buf = tostringN(s, strlen(s) * sz + 1);
1995
1996 /* reset internal state */
1997 if (mbtowc(NULL, NULL, 0) == -1 || wctomb(NULL, L'\0') == -1)
1998 FATAL("unable to reset character conversion state");
1999
2000 ps = s;
2001 pbuf = buf;
2002 while (n = mbtowc(&wc, ps, sz),
2003 n > 0 && n != (size_t)-1 && n != (size_t)-2)
2004 {
2005 ps += n;
2006
2007 n = wctomb(pbuf, fun_wc(wc));
2008 if (n == (size_t)-1)
2009 FATAL("illegal wide character %s", s);
2010
2011 pbuf += n;
2012 }
2013
2014 *pbuf = '\0';
2015
2016 if (n)
2017 FATAL("illegal byte sequence %s", s);
2018
2019 return buf;
2020 }
2021}
2022
2023#ifdef __DJGPP__
2024static wint_t towupper(wint_t wc)
2025{
2026 if (wc >= 0 && wc < 256)
2027 return toupper(wc & 0xFF);
2028
2029 return wc;
2030}
2031
2032static wint_t towlower(wint_t wc)
2033{
2034 if (wc >= 0 && wc < 256)
2035 return tolower(wc & 0xFF);
2036
2037 return wc;
2038}
2039#endif
2040
2041static char *nawk_toupper(const char *s)
2042{
2043 return nawk_convert(s, toupper, towupper);
2044}
2045
2046static char *nawk_tolower(const char *s)
2047{
2048 return nawk_convert(s, tolower, towlower);
2049}
2050
2051Cell *bltin(Node **a, int n) /* builtin functions. a[0] is type, a[1] is arg list */
2052{
2053 Cell *x, *y;
2054 Awkfloat u = 0;
2055 int t, sz;
2056 Awkfloat tmp;
2057 char *buf, *fmt;
2058 Node *nextarg;
2059 FILE *fp;
2060 int status = 0;
2061 time_t tv;
2062 struct tm *tm, tmbuf;
2063 int estatus = 0;
2064
2065 t = ptoi(a[0]);
2066 x = execute(a[1]);
2067 nextarg = a[1]->nnext;
2068 switch (t) {
2069 case FLENGTH:
2070 if (isarr(x))
2071 u = ((Array *) x->sval)->nelem; /* GROT. should be function*/
2072 else
2073 u = u8_strlen(getsval(x));
2074 break;
2075 case FLOG:
2076 errno = 0;
2077 u = errcheck(log(getfval(x)), "log");
2078 break;
2079 case FINT:
2080 modf(getfval(x), &u); break;
2081 case FEXP:
2082 errno = 0;
2083 u = errcheck(exp(getfval(x)), "exp");
2084 break;
2085 case FSQRT:
2086 errno = 0;
2087 u = errcheck(sqrt(getfval(x)), "sqrt");
2088 break;
2089 case FSIN:
2090 u = sin(getfval(x)); break;
2091 case FCOS:
2092 u = cos(getfval(x)); break;
2093 case FATAN:
2094 if (nextarg == NULL) {
2095 WARNING("atan2 requires two arguments; returning 1.0");
2096 u = 1.0;
2097 } else {
2098 y = execute(a[1]->nnext);
2099 u = atan2(getfval(x), getfval(y));
2100 tempfree(y);
2101 nextarg = nextarg->nnext;
2102 }
2103 break;
2104 case FCOMPL:
2105 u = ~((int)getfval(x));
2106 break;
2107 case FAND:
2108 if (nextarg == 0) {
2109 WARNING("and requires two arguments; returning 0");
2110 u = 0;
2111 break;
2112 }
2113 y = execute(a[1]->nnext);
2114 u = ((int)getfval(x)) & ((int)getfval(y));
2115 tempfree(y);
2116 nextarg = nextarg->nnext;
2117 break;
2118 case FFOR:
2119 if (nextarg == 0) {
2120 WARNING("or requires two arguments; returning 0");
2121 u = 0;
2122 break;
2123 }
2124 y = execute(a[1]->nnext);
2125 u = ((int)getfval(x)) | ((int)getfval(y));
2126 tempfree(y);
2127 nextarg = nextarg->nnext;
2128 break;
2129 case FXOR:
2130 if (nextarg == 0) {
2131 WARNING("xor requires two arguments; returning 0");
2132 u = 0;
2133 break;
2134 }
2135 y = execute(a[1]->nnext);
2136 u = ((int)getfval(x)) ^ ((int)getfval(y));
2137 tempfree(y);
2138 nextarg = nextarg->nnext;
2139 break;
2140 case FLSHIFT:
2141 if (nextarg == 0) {
2142 WARNING("lshift requires two arguments; returning 0");
2143 u = 0;
2144 break;
2145 }
2146 y = execute(a[1]->nnext);
2147 u = ((int)getfval(x)) << ((int)getfval(y));
2148 tempfree(y);
2149 nextarg = nextarg->nnext;
2150 break;
2151 case FRSHIFT:
2152 if (nextarg == 0) {
2153 WARNING("rshift requires two arguments; returning 0");
2154 u = 0;
2155 break;
2156 }
2157 y = execute(a[1]->nnext);
2158 u = ((int)getfval(x)) >> ((int)getfval(y));
2159 tempfree(y);
2160 nextarg = nextarg->nnext;
2161 break;
2162 case FSYSTEM:
2163 fflush(stdout); /* in case something is buffered already */
2164 estatus = status = system(getsval(x));
2165 if (status != -1) {
2166 if (WIFEXITED(status)) {
2167 estatus = WEXITSTATUS(status);
2168 } else if (WIFSIGNALED(status)) {
2169 estatus = WTERMSIG(status) + 256;
2170#ifdef WCOREDUMP
2171 if (WCOREDUMP(status))
2172 estatus += 256;
2173#endif
2174 } else /* something else?!? */
2175 estatus = 0;
2176 }
2177 /* else estatus was set to -1 */
2178 u = estatus;
2179 break;
2180 case FRAND:
2181 /* random() returns numbers in [0..2^31-1]
2182 * in order to get a number in [0, 1), divide it by 2^31
2183 */
2184 u = (Awkfloat) random() / (0x7fffffffL + 0x1UL);
2185 break;
2186 case FSRAND:
2187 if (isrec(x)) { /* no argument provided */
2188 u = time(NULL);
2189 tmp = u;
2190 srandom((unsigned int) u);
2191 } else {
2192 u = getfval(x);
2193 tmp = u;
2194 srandom_deterministic((unsigned int) u);
2195 }
2196 u = srand_seed;
2197 srand_seed = tmp;
2198 break;
2199 case FTOUPPER:
2200 case FTOLOWER:
2201 if (t == FTOUPPER)
2202 buf = nawk_toupper(getsval(x));
2203 else
2204 buf = nawk_tolower(getsval(x));
2205 tempfree(x);
2206 x = gettemp();
2207 setsval(x, buf);
2208 free(buf);
2209 return x;
2210 case FFLUSH:
2211 if (isrec(x) || strlen(getsval(x)) == 0) {
2212 flush_all(); /* fflush() or fflush("") -> all */
2213 u = 0;
2214 } else if ((fp = openfile(FFLUSH, getsval(x), NULL)) == NULL)
2215 u = EOF;
2216 else
2217 u = fflush(fp);
2218 break;
2219 case FMKTIME:
2220 memset(&tmbuf, 0, sizeof(tmbuf));
2221 tm = &tmbuf;
2222 t = sscanf(getsval(x), "%d %d %d %d %d %d %d",
2223 &tm->tm_year, &tm->tm_mon, &tm->tm_mday, &tm->tm_hour,
2224 &tm->tm_min, &tm->tm_sec, &tm->tm_isdst);
2225 switch (t) {
2226 case 6:
2227 tm->tm_isdst = -1; /* let mktime figure it out */
2228 /* FALLTHROUGH */
2229 case 7:
2230 tm->tm_year -= 1900;
2231 tm->tm_mon--;
2232 u = mktime(tm);
2233 break;
2234 default:
2235 u = -1;
2236 break;
2237 }
2238 break;
2239 case FSYSTIME:
2240 u = time((time_t *) 0);
2241 break;
2242 case FSTRFTIME:
2243 /* strftime([format [,timestamp]]) */
2244 if (nextarg) {
2245 y = execute(nextarg);
2246 nextarg = nextarg->nnext;
2247 tv = (time_t) getfval(y);
2248 tempfree(y);
2249 } else
2250 tv = time((time_t *) 0);
2251 tm = localtime(&tv);
2252 if (tm == NULL)
2253 FATAL("bad time %ld", (long)tv);
2254
2255 if (isrec(x)) {
2256 /* format argument not provided, use default */
2257 fmt = tostring("%a %b %d %H:%M:%S %Z %Y");
2258 } else
2259 fmt = tostring(getsval(x));
2260
2261 sz = 32;
2262 buf = NULL;
2263 do {
2264 if ((buf = (char *) reallocarray(buf, 2, sz)) == NULL)
2265 FATAL("out of memory in strftime");
2266 sz *= 2;
2267 } while (strftime(buf, sz, fmt, tm) == 0 && fmt[0] != '\0');
2268
2269 y = gettemp();
2270 setsval(y, buf);
2271 free(fmt);
2272 free(buf);
2273
2274 return y;
2275 default: /* can't happen */
2276 FATAL("illegal function type %d", t);
2277 break;
2278 }
2279 tempfree(x);
2280 x = gettemp();
2281 setfval(x, u);
2282 if (nextarg != NULL) {
2283 WARNING("warning: function has too many arguments");
2284 for ( ; nextarg; nextarg = nextarg->nnext) {
2285 y = execute(nextarg);
2286 tempfree(y);
2287 }
2288 }
2289 return(x);
2290}
2291
2292Cell *printstat(Node **a, int n) /* print a[0] */
2293{
2294 Node *x;
2295 Cell *y;
2296 FILE *fp;
2297
2298 if (a[1] == NULL) /* a[1] is redirection operator, a[2] is file */
2299 fp = stdout;
2300 else
2301 fp = redirect(ptoi(a[1]), a[2]);
2302 for (x = a[0]; x != NULL; x = x->nnext) {
2303 y = execute(x);
2304 fputs(getpssval(y), fp);
2305 tempfree(y);
2306 if (x->nnext == NULL)
2307 fputs(getsval(orsloc), fp);
2308 else
2309 fputs(getsval(ofsloc), fp);
2310 }
2311 if (a[1] != NULL)
2312 fflush(fp);
2313 if (ferror(fp))
2314 FATAL("write error on %s", filename(fp));
2315 return(True);
2316}
2317
2318Cell *nullproc(Node **a, int n)
2319{
2320 return 0;
2321}
2322
2323
2324FILE *redirect(int a, Node *b) /* set up all i/o redirections */
2325{
2326 FILE *fp;
2327 Cell *x;
2328 char *fname;
2329
2330 x = execute(b);
2331 fname = getsval(x);
2332 fp = openfile(a, fname, NULL);
2333 if (fp == NULL)
2334 FATAL("can't open file %s", fname);
2335 tempfree(x);
2336 return fp;
2337}
2338
2339struct files {
2340 FILE *fp;
2341 const char *fname;
2342 int mode; /* '|', 'a', 'w' => LE/LT, GT */
2343} *files;
2344
2345size_t nfiles;
2346
2347static void stdinit(void) /* in case stdin, etc., are not constants */
2348{
2349 nfiles = FOPEN_MAX;
2350 files = (struct files *) calloc(nfiles, sizeof(*files));
2351 if (files == NULL)
2352 FATAL("can't allocate file memory for %zu files", nfiles);
2353 files[0].fp = stdin;
2354 files[0].fname = tostring("/dev/stdin");
2355 files[0].mode = LT;
2356 files[1].fp = stdout;
2357 files[1].fname = tostring("/dev/stdout");
2358 files[1].mode = GT;
2359 files[2].fp = stderr;
2360 files[2].fname = tostring("/dev/stderr");
2361 files[2].mode = GT;
2362}
2363
2364FILE *openfile(int a, const char *us, bool *pnewflag)
2365{
2366 const char *s = us;
2367 size_t i;
2368 int m;
2369 FILE *fp = NULL;
2370 struct stat sbuf;
2371
2372 if (*s == '\0')
2373 FATAL("null file name in print or getline");
2374
2375 for (i = 0; i < nfiles; i++)
2376 if (files[i].fname && strcmp(s, files[i].fname) == 0 &&
2377 (a == files[i].mode || (a==APPEND && files[i].mode==GT) ||
2378 a == FFLUSH)) {
2379 if (pnewflag)
2380 *pnewflag = false;
2381 return files[i].fp;
2382 }
2383 if (a == FFLUSH) /* didn't find it, so don't create it! */
2384 return NULL;
2385 for (i = 0; i < nfiles; i++)
2386 if (files[i].fp == NULL)
2387 break;
2388 if (i >= nfiles) {
2389 struct files *nf;
2390 size_t nnf = nfiles + FOPEN_MAX;
2391 nf = (struct files *) reallocarray(files, nnf, sizeof(*nf));
2392 if (nf == NULL)
2393 FATAL("cannot grow files for %s and %zu files", s, nnf);
2394 memset(&nf[nfiles], 0, FOPEN_MAX * sizeof(*nf));
2395 nfiles = nnf;
2396 files = nf;
2397 }
2398
2399 fflush(stdout); /* force a semblance of order */
2400
2401 /* don't try to read or write a directory */
2402 if (a == LT || a == GT || a == APPEND)
2403 if (stat(s, &sbuf) == 0 && S_ISDIR(sbuf.st_mode))
2404 return NULL;
2405
2406 m = a;
2407 if (a == GT) {
2408 fp = fopen(s, "w");
2409 } else if (a == APPEND) {
2410 fp = fopen(s, "a");
2411 m = GT; /* so can mix > and >> */
2412 } else if (a == '|') { /* output pipe */
2413 fp = popen(s, "w");
2414 } else if (a == LE) { /* input pipe */
2415 fp = popen(s, "r");
2416 } else if (a == LT) { /* getline <file */
2417 fp = strcmp(s, "-") == 0 ? stdin : fopen(s, "r"); /* "-" is stdin */
2418 } else /* can't happen */
2419 FATAL("illegal redirection %d", a);
2420 if (fp != NULL) {
2421 files[i].fname = tostring(s);
2422 files[i].fp = fp;
2423 files[i].mode = m;
2424 if (pnewflag)
2425 *pnewflag = true;
2426 if (fp != stdin && fp != stdout && fp != stderr)
2427 (void) fcntl(fileno(fp), F_SETFD, FD_CLOEXEC);
2428 }
2429 return fp;
2430}
2431
2432const char *filename(FILE *fp)
2433{
2434 size_t i;
2435
2436 for (i = 0; i < nfiles; i++)
2437 if (fp == files[i].fp)
2438 return files[i].fname;
2439 return "???";
2440}
2441
2442Cell *closefile(Node **a, int n)
2443{
2444 Cell *x;
2445 size_t i;
2446 bool stat;
2447
2448 x = execute(a[0]);
2449 getsval(x);
2450 stat = true;
2451 for (i = 0; i < nfiles; i++) {
2452 if (!files[i].fname || strcmp(x->sval, files[i].fname) != 0)
2453 continue;
2454 if (files[i].mode == GT || files[i].mode == '|')
2455 fflush(files[i].fp);
2456 if (ferror(files[i].fp)) {
2457 if ((files[i].mode == GT && files[i].fp != stderr)
2458 || files[i].mode == '|')
2459 FATAL("write error on %s", files[i].fname);
2460 else
2461 WARNING("i/o error occurred on %s", files[i].fname);
2462 }
2463 if (files[i].fp == stdin || files[i].fp == stdout ||
2464 files[i].fp == stderr)
2465 stat = freopen("/dev/null", "r+", files[i].fp) == NULL;
2466 else if (files[i].mode == '|' || files[i].mode == LE)
2467 stat = pclose(files[i].fp) == -1;
2468 else
2469 stat = fclose(files[i].fp) == EOF;
2470 if (stat)
2471 WARNING("i/o error occurred closing %s", files[i].fname);
2472 xfree(files[i].fname);
2473 files[i].fname = NULL; /* watch out for ref thru this */
2474 files[i].fp = NULL;
2475 break;
2476 }
2477 tempfree(x);
2478 x = gettemp();
2479 setfval(x, (Awkfloat) (stat ? -1 : 0));
2480 return(x);
2481}
2482
2483void closeall(void)
2484{
2485 size_t i;
2486 bool stat = false;
2487
2488 for (i = 0; i < nfiles; i++) {
2489 if (! files[i].fp)
2490 continue;
2491 if (files[i].mode == GT || files[i].mode == '|')
2492 fflush(files[i].fp);
2493 if (ferror(files[i].fp)) {
2494 if ((files[i].mode == GT && files[i].fp != stderr)
2495 || files[i].mode == '|')
2496 FATAL("write error on %s", files[i].fname);
2497 else
2498 WARNING("i/o error occurred on %s", files[i].fname);
2499 }
2500 if (files[i].fp == stdin || files[i].fp == stdout ||
2501 files[i].fp == stderr)
2502 continue;
2503 if (files[i].mode == '|' || files[i].mode == LE)
2504 stat = pclose(files[i].fp) == -1;
2505 else
2506 stat = fclose(files[i].fp) == EOF;
2507 if (stat)
2508 WARNING("i/o error occurred while closing %s", files[i].fname);
2509 }
2510}
2511
2512static void flush_all(void)
2513{
2514 size_t i;
2515
2516 for (i = 0; i < nfiles; i++)
2517 if (files[i].fp)
2518 fflush(files[i].fp);
2519}
2520
2521void backsub(char **pb_ptr, const char **sptr_ptr);
2522
2523Cell *dosub(Node **a, int subop) /* sub and gsub */
2524{
2525 fa *pfa;
2526 int tempstat = 0;
2527 char *repl;
2528 Cell *x;
2529
2530 char *buf = NULL;
2531 char *pb = NULL;
2532 int bufsz = recsize;
2533
2534 const char *r, *s;
2535 const char *start;
2536 const char *noempty = NULL; /* empty match disallowed here */
2537 size_t m = 0; /* match count */
2538 size_t whichm = 0; /* which match to select, 0 = global */
2539 int mtype; /* match type */
2540
2541 if (a[0] == NULL) { /* 0 => a[1] is already-compiled regexpr */
2542 pfa = (fa *) a[1];
2543 } else {
2544 x = execute(a[1]);
2545 pfa = makedfa(getsval(x), 1);
2546 tempfree(x);
2547 }
2548
2549 x = execute(a[2]); /* replacement string */
2550 repl = tostring(getsval(x));
2551 tempfree(x);
2552
2553 switch (subop) {
2554 case SUB:
2555 whichm = 1;
2556 x = execute(a[3]); /* source string */
2557 break;
2558 case GSUB:
2559 whichm = 0;
2560 x = execute(a[3]); /* source string */
2561 break;
2562 default:
2563 FATAL("dosub: unrecognized subop: %d", subop);
2564 }
2565
2566 start = getsval(x);
2567 while (pmatch(pfa, start)) {
2568 if (buf == NULL) {
2569 if ((pb = buf = (char *) malloc(bufsz)) == NULL)
2570 FATAL("out of memory in dosub");
2571 tempstat = pfa->initstat;
2572 pfa->initstat = 2;
2573 }
2574
2575 /* match types */
2576 #define MT_IGNORE 0 /* unselected or invalid */
2577 #define MT_INSERT 1 /* selected, empty */
2578 #define MT_REPLACE 2 /* selected, not empty */
2579
2580 /* an empty match just after replacement is invalid */
2581
2582 if (patbeg == noempty && patlen == 0) {
2583 mtype = MT_IGNORE; /* invalid, not counted */
2584 } else if (whichm == ++m || whichm == 0) {
2585 mtype = patlen ? MT_REPLACE : MT_INSERT;
2586 } else {
2587 mtype = MT_IGNORE; /* unselected, but counted */
2588 }
2589
2590 /* leading text: */
2591 if (patbeg > start) {
2592 adjbuf(&buf, &bufsz, (pb - buf) + (patbeg - start),
2593 recsize, &pb, "dosub");
2594 s = start;
2595 while (s < patbeg)
2596 *pb++ = *s++;
2597 }
2598
2599 if (mtype == MT_IGNORE)
2600 goto matching_text; /* skip replacement text */
2601
2602 r = repl;
2603 while (*r != 0) {
2604 adjbuf(&buf, &bufsz, 5+pb-buf, recsize, &pb, "dosub");
2605 if (*r == '\\') {
2606 backsub(&pb, &r);
2607 } else if (*r == '&') {
2608 r++;
2609 adjbuf(&buf, &bufsz, 1+patlen+pb-buf, recsize,
2610 &pb, "dosub");
2611 for (s = patbeg; s < patbeg+patlen; )
2612 *pb++ = *s++;
2613 } else {
2614 *pb++ = *r++;
2615 }
2616 }
2617
2618matching_text:
2619 if (mtype == MT_REPLACE || *patbeg == '\0')
2620 goto next_search; /* skip matching text */
2621
2622 if (patlen == 0)
2623 patlen = u8_nextlen(patbeg);
2624 adjbuf(&buf, &bufsz, (pb-buf) + patlen, recsize, &pb, "dosub");
2625 s = patbeg;
2626 while (s < patbeg + patlen)
2627 *pb++ = *s++;
2628
2629next_search:
2630 start = patbeg + patlen;
2631 if (m == whichm || *patbeg == '\0')
2632 break;
2633 if (mtype == MT_REPLACE)
2634 noempty = start;
2635
2636 #undef MT_IGNORE
2637 #undef MT_INSERT
2638 #undef MT_REPLACE
2639 }
2640
2641 xfree(repl);
2642
2643 if (buf != NULL) {
2644 pfa->initstat = tempstat;
2645
2646 /* trailing text */
2647 adjbuf(&buf, &bufsz, 1+strlen(start)+pb-buf, 0, &pb, "dosub");
2648 while ((*pb++ = *start++) != '\0')
2649 ;
2650
2651 setsval(x, buf);
2652 free(buf);
2653 }
2654
2655 tempfree(x);
2656 x = gettemp();
2657 x->tval = NUM;
2658 x->fval = m;
2659 return x;
2660}
2661
2662Cell *gensub(Node **a, int nnn) /* global selective substitute */
2663 /* XXX incomplete - doesn't support backreferences \0 ... \9 */
2664{
2665 Cell *x, *y, *res, *h;
2666 char *rptr;
2667 const char *sptr;
2668 char *buf, *pb;
2669 const char *t, *q;
2670 fa *pfa;
2671 int mflag, tempstat, num, whichm;
2672 int bufsz = recsize;
2673
2674 if ((buf = (char *) malloc(bufsz)) == NULL)
2675 FATAL("out of memory in gensub");
2676 mflag = 0; /* if mflag == 0, can replace empty string */
2677 num = 0;
2678 x = execute(a[4]); /* source string */
2679 t = getsval(x);
2680 res = copycell(x); /* target string - initially copy of source */
2681 res->csub = CTEMP; /* result values are temporary */
2682 if (a[0] == 0) /* 0 => a[1] is already-compiled regexpr */
2683 pfa = (fa *) a[1]; /* regular expression */
2684 else {
2685 y = execute(a[1]);
2686 pfa = makedfa(getsval(y), 1);
2687 tempfree(y);
2688 }
2689 y = execute(a[2]); /* replacement string */
2690 h = execute(a[3]); /* which matches should be replaced */
2691 sptr = getsval(h);
2692 if (sptr[0] == 'g' || sptr[0] == 'G')
2693 whichm = -1;
2694 else {
2695 /*
2696 * The specified number is index of replacement, starting
2697 * from 1. GNU awk treats index lower than 0 same as
2698 * 1, we do same for compatibility.
2699 */
2700 whichm = (int) getfval(h) - 1;
2701 if (whichm < 0)
2702 whichm = 0;
2703 }
2704 tempfree(h);
2705
2706 if (pmatch(pfa, t)) {
2707 char *sl;
2708
2709 tempstat = pfa->initstat;
2710 pfa->initstat = 2;
2711 pb = buf;
2712 rptr = getsval(y);
2713 /*
2714 * XXX if there are any backreferences in subst string,
2715 * complain now.
2716 */
2717 for (sl = rptr; (sl = strchr(sl, '\\')) && sl[1]; sl++) {
2718 if (strchr("0123456789", sl[1])) {
2719 FATAL("gensub doesn't support backreferences (subst \"%s\")", rptr);
2720 }
2721 }
2722
2723 do {
2724 if (whichm >= 0 && whichm != num) {
2725 num++;
2726 adjbuf(&buf, &bufsz, (pb - buf) + (patbeg - t) + patlen, recsize, &pb, "gensub");
2727
2728 /* copy the part of string up to and including
2729 * match to output buffer */
2730 while (t < patbeg + patlen)
2731 *pb++ = *t++;
2732 continue;
2733 }
2734
2735 if (patlen == 0 && *patbeg != 0) { /* matched empty string */
2736 if (mflag == 0) { /* can replace empty */
2737 num++;
2738 sptr = rptr;
2739 while (*sptr != 0) {
2740 adjbuf(&buf, &bufsz, 5+pb-buf, recsize, &pb, "gensub");
2741 if (*sptr == '\\') {
2742 backsub(&pb, &sptr);
2743 } else if (*sptr == '&') {
2744 sptr++;
2745 adjbuf(&buf, &bufsz, 1+patlen+pb-buf, recsize, &pb, "gensub");
2746 for (q = patbeg; q < patbeg+patlen; )
2747 *pb++ = *q++;
2748 } else
2749 *pb++ = *sptr++;
2750 }
2751 }
2752 if (*t == 0) /* at end */
2753 goto done;
2754 adjbuf(&buf, &bufsz, 2+pb-buf, recsize, &pb, "gensub");
2755 *pb++ = *t++;
2756 if (pb > buf + bufsz) /* BUG: not sure of this test */
2757 FATAL("gensub result0 %.30s too big; can't happen", buf);
2758 mflag = 0;
2759 }
2760 else { /* matched nonempty string */
2761 num++;
2762 sptr = t;
2763 adjbuf(&buf, &bufsz, 1+(patbeg-sptr)+pb-buf, recsize, &pb, "gensub");
2764 while (sptr < patbeg)
2765 *pb++ = *sptr++;
2766 sptr = rptr;
2767 while (*sptr != 0) {
2768 adjbuf(&buf, &bufsz, 5+pb-buf, recsize, &pb, "gensub");
2769 if (*sptr == '\\') {
2770 backsub(&pb, &sptr);
2771 } else if (*sptr == '&') {
2772 sptr++;
2773 adjbuf(&buf, &bufsz, 1+patlen+pb-buf, recsize, &pb, "gensub");
2774 for (q = patbeg; q < patbeg+patlen; )
2775 *pb++ = *q++;
2776 } else
2777 *pb++ = *sptr++;
2778 }
2779 t = patbeg + patlen;
2780 if (patlen == 0 || *t == 0 || *(t-1) == 0)
2781 goto done;
2782 if (pb > buf + bufsz)
2783 FATAL("gensub result1 %.30s too big; can't happen", buf);
2784 mflag = 1;
2785 }
2786 } while (pmatch(pfa,t));
2787 sptr = t;
2788 adjbuf(&buf, &bufsz, 1+strlen(sptr)+pb-buf, 0, &pb, "gensub");
2789 while ((*pb++ = *sptr++) != 0)
2790 ;
2791 done: if (pb > buf + bufsz)
2792 FATAL("gensub result2 %.30s too big; can't happen", buf);
2793 *pb = '\0';
2794 setsval(res, buf);
2795 pfa->initstat = tempstat;
2796 }
2797 tempfree(x);
2798 tempfree(y);
2799 free(buf);
2800 return(res);
2801}
2802
2803void backsub(char **pb_ptr, const char **sptr_ptr) /* handle \\& variations */
2804{ /* sptr[0] == '\\' */
2805 char *pb = *pb_ptr;
2806 const char *sptr = *sptr_ptr;
2807
2808 if (sptr[1] == '\\') {
2809 if (sptr[2] == '\\' && sptr[3] == '&') { /* \\\& -> \& */
2810 *pb++ = '\\';
2811 *pb++ = '&';
2812 sptr += 4;
2813 } else if (sptr[2] == '&') { /* \\& -> \ + matched */
2814 *pb++ = '\\';
2815 sptr += 2;
2816 } else if (do_posix) { /* \\x -> \x */
2817 sptr++;
2818 *pb++ = *sptr++;
2819 } else { /* \\x -> \\x */
2820 *pb++ = *sptr++;
2821 *pb++ = *sptr++;
2822 }
2823 } else if (sptr[1] == '&') { /* literal & */
2824 sptr++;
2825 *pb++ = *sptr++;
2826 } else /* literal \ */
2827 *pb++ = *sptr++;
2828
2829 *pb_ptr = pb;
2830 *sptr_ptr = sptr;
2831}
2832
2833static char *wide_char_to_byte_str(int rune, size_t *outlen)
2834{
2835 static char buf[5];
2836 int len;
2837
2838 if (rune < 0 || rune > 0x10FFFF)
2839 return NULL;
2840
2841 memset(buf, 0, sizeof(buf));
2842
2843 len = 0;
2844 if (rune <= 0x0000007F) {
2845 buf[len++] = rune;
2846 } else if (rune <= 0x000007FF) {
2847 // 110xxxxx 10xxxxxx
2848 buf[len++] = 0xC0 | (rune >> 6);
2849 buf[len++] = 0x80 | (rune & 0x3F);
2850 } else if (rune <= 0x0000FFFF) {
2851 // 1110xxxx 10xxxxxx 10xxxxxx
2852 buf[len++] = 0xE0 | (rune >> 12);
2853 buf[len++] = 0x80 | ((rune >> 6) & 0x3F);
2854 buf[len++] = 0x80 | (rune & 0x3F);
2855
2856 } else {
2857 // 0x00010000 - 0x10FFFF
2858 // 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
2859 buf[len++] = 0xF0 | (rune >> 18);
2860 buf[len++] = 0x80 | ((rune >> 12) & 0x3F);
2861 buf[len++] = 0x80 | ((rune >> 6) & 0x3F);
2862 buf[len++] = 0x80 | (rune & 0x3F);
2863 }
2864
2865 *outlen = len;
2866 buf[len++] = '\0';
2867
2868 return buf;
2869}