diff --git a/backport-CVE-2021-42373.patch b/backport-CVE-2021-42373.patch deleted file mode 100644 index 2edec1bf7d982117b5e073d6904c1bbd1a00f911..0000000000000000000000000000000000000000 --- a/backport-CVE-2021-42373.patch +++ /dev/null @@ -1,29 +0,0 @@ -From 6dc5bd57af2f5cc6b8c953d2b223d3b012b2400b Mon Sep 17 00:00:00 2001 -From: xiechengliang -Date: Fri, 19 Nov 2021 18:34:10 +0800 -Subject: [PATCH] busybox: fix CVE-2021-42373 - -backport from upstream: -https://git.busybox.net/busybox/commit/?id=4d4fc5ca5ee4faae5dc4237f801d9527a3fb20cc - -Signed-off-by: xiechengliang ---- - miscutils/man.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/miscutils/man.c b/miscutils/man.c -index 722f6641e..d319e8bba 100644 ---- a/miscutils/man.c -+++ b/miscutils/man.c -@@ -324,7 +324,7 @@ int man_main(int argc UNUSED_PARAM, char **argv) - - /* is 1st ARG a SECTION? */ - sec_list = conf_sec_list; -- if (is_section_name(conf_sec_list, *argv)) { -+ if (is_section_name(conf_sec_list, *argv) && argv[1]) { - /* yes */ - sec_list = *argv++; - } --- -2.27.0 - diff --git a/backport-CVE-2021-42374.patch b/backport-CVE-2021-42374.patch deleted file mode 100644 index 546f6add2bb9c66e6efc866c450f1b3bc0ad2780..0000000000000000000000000000000000000000 --- a/backport-CVE-2021-42374.patch +++ /dev/null @@ -1,59 +0,0 @@ -From 479e2e47de5f2a9a3ecedda264976bde6945ce60 Mon Sep 17 00:00:00 2001 -From: jikui -Date: Mon, 22 Nov 2021 10:24:24 +0800 -Subject: [PATCH] busybox: fix CVE-2021-42374 - -backport from upstream: -https://git.busybox.net/busybox/patch/?h=1_34_stable&id=04f052c56ded5ab6a904e3a264a73dc0412b2e78 - -Signed-off-by: jikui ---- - archival/libarchive/decompress_unlzma.c | 5 ++++- - testsuite/unlzma.tests | 10 ++++++---- - 2 files changed, 10 insertions(+), 5 deletions(-) - -diff --git a/archival/libarchive/decompress_unlzma.c b/archival/libarchive/decompress_unlzma.c -index 0744f23..fb5aac8 100644 ---- a/archival/libarchive/decompress_unlzma.c -+++ b/archival/libarchive/decompress_unlzma.c -@@ -290,8 +290,11 @@ unpack_lzma_stream(transformer_state_t *xstate) - uint32_t pos; - - pos = buffer_pos - rep0; -- if ((int32_t)pos < 0) -+ if ((int32_t)pos < 0) { - pos += header.dict_size; -+ if ((int32_t)pos < 0) -+ goto bad; -+ } - match_byte = buffer[pos]; - do { - int bit; -diff --git a/testsuite/unlzma.tests b/testsuite/unlzma.tests -index 0e98afe..8c120b1 100755 ---- a/testsuite/unlzma.tests -+++ b/testsuite/unlzma.tests -@@ -8,14 +8,16 @@ - - # Damaged encrypted streams - testing "unlzma (bad archive 1)" \ -- "unlzma /dev/null; echo \$?" \ --"1 -+ "unlzma &1 >/dev/null; echo \$?" \ -+"unlzma: corrupted data -+1 - " "" "" - - # Damaged encrypted streams - testing "unlzma (bad archive 2)" \ -- "unlzma /dev/null; echo \$?" \ --"1 -+ "unlzma &1 >/dev/null; echo \$?" \ -+"unlzma: corrupted data -+1 - " "" "" - - exit $FAILCOUNT --- -2.25.1 - diff --git a/backport-CVE-2021-42375.patch b/backport-CVE-2021-42375.patch deleted file mode 100644 index 802e4541d39d3a00517049f2fc7ae7a16da8b41c..0000000000000000000000000000000000000000 --- a/backport-CVE-2021-42375.patch +++ /dev/null @@ -1,53 +0,0 @@ -From 9ac1dd9017b2b4acba4734f6f989b88da2ad7616 Mon Sep 17 00:00:00 2001 -From: xiechengliang -Date: Wed, 24 Nov 2021 19:15:25 +0800 -Subject: [PATCH 2/2] ash: parser: Fix VSLENGTH parsing with trailing garbage - -Let's adopt Herbert Xu's patch, not waiting for it to reach dash git: -hush already has a similar fix. - -backport from upstream: -https://git.busybox.net/busybox/commit/?id=53a7a9cd8c15d64fcc2278cf8981ba526dfbe0d2 - -Signed-off-by: Denys Vlasenko ---- - shell/ash.c | 9 +++------ - 1 file changed, 3 insertions(+), 6 deletions(-) - -diff --git a/shell/ash.c b/shell/ash.c -index a33ab0626..1ca45f9c1 100644 ---- a/shell/ash.c -+++ b/shell/ash.c -@@ -12635,7 +12635,7 @@ parsesub: { - do { - STPUTC(c, out); - c = pgetc_eatbnl(); -- } while (!subtype && isdigit(c)); -+ } while ((subtype == 0 || subtype == VSLENGTH) && isdigit(c)); - } else if (c != '}') { - /* $[{[#]][}] */ - int cc = c; -@@ -12665,11 +12665,6 @@ parsesub: { - } else - goto badsub; - -- if (c != '}' && subtype == VSLENGTH) { -- /* ${#VAR didn't end with } */ -- goto badsub; -- } -- - if (subtype == 0) { - static const char types[] ALIGN1 = "}-+?="; - /* ${VAR...} but not $VAR or ${#VAR} */ -@@ -12726,6 +12721,8 @@ parsesub: { - #endif - } - } else { -+ if (subtype == VSLENGTH && c != '}') -+ subtype = 0; - badsub: - pungetc(); - } --- -2.27.0 - diff --git a/backport-CVE-2021-42376.patch b/backport-CVE-2021-42376.patch deleted file mode 100644 index de0665c39c5b6c43cadf48865e01afeedceaa326..0000000000000000000000000000000000000000 --- a/backport-CVE-2021-42376.patch +++ /dev/null @@ -1,133 +0,0 @@ -From 251452bc54477ed41da27a1c020a88882aa2eaaf Mon Sep 17 00:00:00 2001 -From: xiechengliang -Date: Sat, 20 Nov 2021 12:01:23 +0800 -Subject: [PATCH 1/2] hush: fix handling of \^C and "^C" - -function old new delta -parse_stream 2238 2252 +14 -encode_string 243 256 +13 ------------------------------------------------------------------------------- -(add/remove: 0/0 grow/shrink: 2/0 up/down: 27/0) Total: 27 bytes - -backport from upstream: -https://git.busybox.net/busybox/commit/?id=1b7a9b68d0e9aa19147d7fda16eb9a6b54156985 -Signed-off-by: Denys Vlasenko ---- - shell/ash_test/ash-misc/control_char3.right | 1 + - shell/ash_test/ash-misc/control_char3.tests | 2 ++ - shell/ash_test/ash-misc/control_char4.right | 1 + - shell/ash_test/ash-misc/control_char4.tests | 2 ++ - shell/hush.c | 11 +++++++++++ - shell/hush_test/hush-misc/control_char3.right | 1 + - shell/hush_test/hush-misc/control_char3.tests | 2 ++ - shell/hush_test/hush-misc/control_char4.right | 1 + - shell/hush_test/hush-misc/control_char4.tests | 2 ++ - 9 files changed, 23 insertions(+) - create mode 100644 shell/ash_test/ash-misc/control_char3.right - create mode 100755 shell/ash_test/ash-misc/control_char3.tests - create mode 100644 shell/ash_test/ash-misc/control_char4.right - create mode 100755 shell/ash_test/ash-misc/control_char4.tests - create mode 100644 shell/hush_test/hush-misc/control_char3.right - create mode 100755 shell/hush_test/hush-misc/control_char3.tests - create mode 100644 shell/hush_test/hush-misc/control_char4.right - create mode 100755 shell/hush_test/hush-misc/control_char4.tests - -diff --git a/shell/ash_test/ash-misc/control_char3.right b/shell/ash_test/ash-misc/control_char3.right -new file mode 100644 -index 000000000..283e02cbb ---- /dev/null -+++ b/shell/ash_test/ash-misc/control_char3.right -@@ -0,0 +1 @@ -+SHELL: line 1: : not found -diff --git a/shell/ash_test/ash-misc/control_char3.tests b/shell/ash_test/ash-misc/control_char3.tests -new file mode 100755 -index 000000000..4359db3f3 ---- /dev/null -+++ b/shell/ash_test/ash-misc/control_char3.tests -@@ -0,0 +1,2 @@ -+# (set argv0 to "SHELL" to avoid "/path/to/shell: blah" in error messages) -+$THIS_SH -c '\' SHELL -diff --git a/shell/ash_test/ash-misc/control_char4.right b/shell/ash_test/ash-misc/control_char4.right -new file mode 100644 -index 000000000..2bf18e684 ---- /dev/null -+++ b/shell/ash_test/ash-misc/control_char4.right -@@ -0,0 +1 @@ -+SHELL: line 1: -: not found -diff --git a/shell/ash_test/ash-misc/control_char4.tests b/shell/ash_test/ash-misc/control_char4.tests -new file mode 100755 -index 000000000..48010f154 ---- /dev/null -+++ b/shell/ash_test/ash-misc/control_char4.tests -@@ -0,0 +1,2 @@ -+# (set argv0 to "SHELL" to avoid "/path/to/shell: blah" in error messages) -+$THIS_SH -c '"-"' SHELL -diff --git a/shell/hush.c b/shell/hush.c -index 9fead37da..249728b9d 100644 ---- a/shell/hush.c -+++ b/shell/hush.c -@@ -5235,6 +5235,11 @@ static int encode_string(o_string *as_string, - } - #endif - o_addQchr(dest, ch); -+ if (ch == SPECIAL_VAR_SYMBOL) { -+ /* Convert "^C" to corresponding special variable reference */ -+ o_addchr(dest, SPECIAL_VAR_QUOTED_SVS); -+ o_addchr(dest, SPECIAL_VAR_SYMBOL); -+ } - goto again; - #undef as_string - } -@@ -5346,6 +5351,11 @@ static struct pipe *parse_stream(char **pstring, - if (ch == '\n') - continue; /* drop \, get next char */ - nommu_addchr(&ctx.as_string, '\\'); -+ if (ch == SPECIAL_VAR_SYMBOL) { -+ nommu_addchr(&ctx.as_string, ch); -+ /* Convert \^C to corresponding special variable reference */ -+ goto case_SPECIAL_VAR_SYMBOL; -+ } - o_addchr(&ctx.word, '\\'); - if (ch == EOF) { - /* Testcase: eval 'echo Ok\' */ -@@ -5670,6 +5680,7 @@ static struct pipe *parse_stream(char **pstring, - /* Note: nommu_addchr(&ctx.as_string, ch) is already done */ - - switch (ch) { -+ case_SPECIAL_VAR_SYMBOL: - case SPECIAL_VAR_SYMBOL: - /* Convert raw ^C to corresponding special variable reference */ - o_addchr(&ctx.word, SPECIAL_VAR_SYMBOL); -diff --git a/shell/hush_test/hush-misc/control_char3.right b/shell/hush_test/hush-misc/control_char3.right -new file mode 100644 -index 000000000..94b4f8699 ---- /dev/null -+++ b/shell/hush_test/hush-misc/control_char3.right -@@ -0,0 +1 @@ -+hush: can't execute '': No such file or directory -diff --git a/shell/hush_test/hush-misc/control_char3.tests b/shell/hush_test/hush-misc/control_char3.tests -new file mode 100755 -index 000000000..4359db3f3 ---- /dev/null -+++ b/shell/hush_test/hush-misc/control_char3.tests -@@ -0,0 +1,2 @@ -+# (set argv0 to "SHELL" to avoid "/path/to/shell: blah" in error messages) -+$THIS_SH -c '\' SHELL -diff --git a/shell/hush_test/hush-misc/control_char4.right b/shell/hush_test/hush-misc/control_char4.right -new file mode 100644 -index 000000000..698e21427 ---- /dev/null -+++ b/shell/hush_test/hush-misc/control_char4.right -@@ -0,0 +1 @@ -+hush: can't execute '-': No such file or directory -diff --git a/shell/hush_test/hush-misc/control_char4.tests b/shell/hush_test/hush-misc/control_char4.tests -new file mode 100755 -index 000000000..48010f154 ---- /dev/null -+++ b/shell/hush_test/hush-misc/control_char4.tests -@@ -0,0 +1,2 @@ -+# (set argv0 to "SHELL" to avoid "/path/to/shell: blah" in error messages) -+$THIS_SH -c '"-"' SHELL --- -2.27.0 - diff --git a/backport-CVE-2021-42377.patch b/backport-CVE-2021-42377.patch deleted file mode 100644 index acf583bfcc945f1327cd4ad39dd461ed5ce4c551..0000000000000000000000000000000000000000 --- a/backport-CVE-2021-42377.patch +++ /dev/null @@ -1,42 +0,0 @@ -From f56e2f2ef9d131b1f62dad4427da1113f9b417c5 Mon Sep 17 00:00:00 2001 -From: jikui -Date: Mon, 22 Nov 2021 16:45:39 +0800 -Subject: [PATCH] busybox: fix CVE-2021-42377 - -backport from upstream: -https://git.busybox.net/busybox/commit/?h=1_34_stable&id=83a4967e50422867f340328d404994553e56b839 - -Signed-off-by: jikui ---- - shell/hush.c | 6 +++++- - 1 file changed, 5 insertions(+), 1 deletion(-) - -diff --git a/shell/hush.c b/shell/hush.c -index 9fead37..48856f2 100644 ---- a/shell/hush.c -+++ b/shell/hush.c -@@ -3694,9 +3694,10 @@ static void debug_print_tree(struct pipe *pi, int lvl) - - pin = 0; - while (pi) { -- fdprintf(2, "%*spipe %d %sres_word=%s followup=%d %s\n", -+ fdprintf(2, "%*spipe %d #cmds:%d %sres_word=%s followup=%d %s\n", - lvl*2, "", - pin, -+ pi->num_cmds, - (IF_HAS_KEYWORDS(pi->pi_inverted ? "! " :) ""), - RES[pi->res_word], - pi->followup, PIPE[pi->followup] -@@ -3839,6 +3840,9 @@ static void done_pipe(struct parse_context *ctx, pipe_style type) - #endif - /* Replace all pipes in ctx with one newly created */ - ctx->list_head = ctx->pipe = pi; -+ /* for case like "cmd && &", do not be tricked by last command -+ * being null - the entire {...} & is NOT null! */ -+ not_null = 1; - } else { - no_conv: - ctx->pipe->followup = type; --- -2.25.1 - diff --git a/backport-CVE-2022-28391.patch b/backport-CVE-2022-28391.patch new file mode 100644 index 0000000000000000000000000000000000000000..0e400c27e3bec52e0d4c97249573add76b417a9e --- /dev/null +++ b/backport-CVE-2022-28391.patch @@ -0,0 +1,87 @@ +From 547745e674728aab32291bd13945d2d134054ffc Mon Sep 17 00:00:00 2001 +From: jikui +Date: Tue, 19 Apr 2022 10:45:22 +0800 +Subject: [PATCH] busybox: fix CVE-2022-28391 + +backport from upstream: +https://git.alpinelinux.org/aports/plain/main/busybox/0001-libbb-sockaddr2str-ensure-only-printable-characters-.patch +https://git.alpinelinux.org/aports/plain/main/busybox/0002-nslookup-sanitize-all-printed-strings-with-printable.patch + +Signed-off-by: jikui +--- + libbb/xconnect.c | 5 +++-- + networking/nslookup.c | 10 +++++----- + 2 files changed, 8 insertions(+), 7 deletions(-) + +diff --git a/libbb/xconnect.c b/libbb/xconnect.c +index 5dd9cfd..264b987 100644 +--- a/libbb/xconnect.c ++++ b/libbb/xconnect.c +@@ -505,12 +505,13 @@ static char* FAST_FUNC sockaddr2str(const struct sockaddr *sa, int flags) + ); + if (rc) + return NULL; ++ /* ensure host contains only printable characters */ + if (flags & IGNORE_PORT) +- return xstrdup(host); ++ return xstrdup(printable_string(host)); + #if ENABLE_FEATURE_IPV6 + if (sa->sa_family == AF_INET6) { + if (strchr(host, ':')) /* heh, it's not a resolved hostname */ +- return xasprintf("[%s]:%s", host, serv); ++ return xasprintf("[%s]:%s", printable_string(host), serv); + /*return xasprintf("%s:%s", host, serv);*/ + /* - fall through instead */ + } +diff --git a/networking/nslookup.c b/networking/nslookup.c +index de7b5c0..0ba4adc 100644 +--- a/networking/nslookup.c ++++ b/networking/nslookup.c +@@ -407,7 +407,7 @@ static int parse_reply(const unsigned char *msg, size_t len) + //printf("Unable to uncompress domain: %s\n", strerror(errno)); + return -1; + } +- printf(format, ns_rr_name(rr), dname); ++ printf(format, ns_rr_name(rr), printable_string(dname)); + break; + + case ns_t_mx: +@@ -422,7 +422,7 @@ static int parse_reply(const unsigned char *msg, size_t len) + //printf("Cannot uncompress MX domain: %s\n", strerror(errno)); + return -1; + } +- printf("%s\tmail exchanger = %d %s\n", ns_rr_name(rr), n, dname); ++ printf("%s\tmail exchanger = %d %s\n", ns_rr_name(rr), n, printable_string(dname)); + break; + + case ns_t_txt: +@@ -434,7 +434,7 @@ static int parse_reply(const unsigned char *msg, size_t len) + if (n > 0) { + memset(dname, 0, sizeof(dname)); + memcpy(dname, ns_rr_rdata(rr) + 1, n); +- printf("%s\ttext = \"%s\"\n", ns_rr_name(rr), dname); ++ printf("%s\ttext = \"%s\"\n", ns_rr_name(rr), printable_string(dname)); + } + break; + +@@ -454,7 +454,7 @@ static int parse_reply(const unsigned char *msg, size_t len) + } + + printf("%s\tservice = %u %u %u %s\n", ns_rr_name(rr), +- ns_get16(cp), ns_get16(cp + 2), ns_get16(cp + 4), dname); ++ ns_get16(cp), ns_get16(cp + 2), ns_get16(cp + 4), printable_string(dname)); + break; + + case ns_t_soa: +@@ -483,7 +483,7 @@ static int parse_reply(const unsigned char *msg, size_t len) + return -1; + } + +- printf("\tmail addr = %s\n", dname); ++ printf("\tmail addr = %s\n", printable_string(dname)); + cp += n; + + printf("\tserial = %lu\n", ns_get32(cp)); +-- +2.17.1 + diff --git a/backport-fix-awk-cve.patch b/backport-fix-awk-cve.patch deleted file mode 100644 index 743f57635701a8e1afbff8f364ca65ac8bff1305..0000000000000000000000000000000000000000 --- a/backport-fix-awk-cve.patch +++ /dev/null @@ -1,7363 +0,0 @@ -From aec213c228426fbad3cd9d4038dffecaf92947bf Mon Sep 17 00:00:00 2001 -From: Ron Yorston -Date: Wed, 27 Jan 2021 11:19:14 +0000 -Subject: [PATCH 01/61] awk: allow printf('%c') to output NUL, closes 13486 - -Treat the output of printf as binary rather than a null-terminated -string so that NUL characters can be output. - -This is considered to be a GNU extension, though it's also available -in mawk and FreeBSD's awk. - -function old new delta -evaluate 3487 3504 +17 -awk_printf 504 519 +15 ------------------------------------------------------------------------------- -(add/remove: 0/0 grow/shrink: 2/0 up/down: 32/0) Total: 32 bytes - -Signed-off-by: Ron Yorston -Signed-off-by: Denys Vlasenko ---- - editors/awk.c | 18 +++++++++++++++--- - testsuite/awk.tests | 5 +++++ - 2 files changed, 20 insertions(+), 3 deletions(-) - -diff --git a/editors/awk.c b/editors/awk.c -index 2c15f9e4e..b4f6a3741 100644 ---- a/editors/awk.c -+++ b/editors/awk.c -@@ -2155,7 +2155,10 @@ static int fmt_num(char *b, int size, const char *format, double n, int int_as_i - } - - /* formatted output into an allocated buffer, return ptr to buffer */ --static char *awk_printf(node *n) -+#if !ENABLE_FEATURE_AWK_GNU_EXTENSIONS -+# define awk_printf(a, b) awk_printf(a) -+#endif -+static char *awk_printf(node *n, int *len) - { - char *b = NULL; - char *fmt, *s, *f; -@@ -2209,6 +2212,10 @@ static char *awk_printf(node *n) - nvfree(v); - b = xrealloc(b, i + 1); - b[i] = '\0'; -+#if ENABLE_FEATURE_AWK_GNU_EXTENSIONS -+ if (len) -+ *len = i; -+#endif - return b; - } - -@@ -2666,6 +2673,7 @@ static var *evaluate(node *op, var *res) - case XC( OC_PRINT ): - case XC( OC_PRINTF ): { - FILE *F = stdout; -+ IF_FEATURE_AWK_GNU_EXTENSIONS(int len;) - - if (op->r.n) { - rstream *rsm = newfile(R.s); -@@ -2703,8 +2711,12 @@ static var *evaluate(node *op, var *res) - fputs(getvar_s(intvar[ORS]), F); - - } else { /* OC_PRINTF */ -- char *s = awk_printf(op1); -+ char *s = awk_printf(op1, &len); -+#if ENABLE_FEATURE_AWK_GNU_EXTENSIONS -+ fwrite(s, len, 1, F); -+#else - fputs(s, F); -+#endif - free(s); - } - fflush(F); -@@ -2978,7 +2990,7 @@ static var *evaluate(node *op, var *res) - break; - - case XC( OC_SPRINTF ): -- setvar_p(res, awk_printf(op1)); -+ setvar_p(res, awk_printf(op1, NULL)); - break; - - case XC( OC_UNARY ): { -diff --git a/testsuite/awk.tests b/testsuite/awk.tests -index 92c83d719..cf9b722dc 100755 ---- a/testsuite/awk.tests -+++ b/testsuite/awk.tests -@@ -383,6 +383,11 @@ testing "awk errors on missing delete arg" \ - "awk -e '{delete}' 2>&1" "awk: cmd. line:1: Too few arguments\n" "" "" - SKIP= - -+optional FEATURE_AWK_GNU_EXTENSIONS -+testing "awk printf('%c') can output NUL" \ -+ "awk '{printf(\"hello%c null\n\", 0)}'" "hello\0 null\n" "" "\n" -+SKIP= -+ - # testing "description" "command" "result" "infile" "stdin" - testing 'awk negative field access' \ - 'awk 2>&1 -- '\''{ $(-1) }'\' \ --- -2.27.0 - - -From 9dcd2d5cc91bde2d6cdd038ed23408057d6f6429 Mon Sep 17 00:00:00 2001 -From: Denys Vlasenko -Date: Wed, 16 Jun 2021 09:18:08 +0200 -Subject: [PATCH 02/61] awk: fix use-after-free in "$BIGNUM1 $BIGGERNUM2" - concat op - -Second reference to a field reallocs/moves Fields[] array, but first ref -still tries to use the element where it was before move. - -function old new delta -fsrealloc 94 106 +12 - -Signed-off-by: Denys Vlasenko ---- - editors/awk.c | 85 ++++++++++++++++++++++++++++++++++++++++++--------- - 1 file changed, 71 insertions(+), 14 deletions(-) - -diff --git a/editors/awk.c b/editors/awk.c -index b4f6a3741..48836298c 100644 ---- a/editors/awk.c -+++ b/editors/awk.c -@@ -1745,12 +1745,22 @@ static char* qrealloc(char *b, int n, int *size) - /* resize field storage space */ - static void fsrealloc(int size) - { -- int i; -+ int i, newsize; - - if (size >= maxfields) { -+ /* Sanity cap, easier than catering for overflows */ -+ if (size > 0xffffff) -+ bb_die_memory_exhausted(); -+ - i = maxfields; - maxfields = size + 16; -- Fields = xrealloc(Fields, maxfields * sizeof(Fields[0])); -+ -+ newsize = maxfields * sizeof(Fields[0]); -+ debug_printf_eval("fsrealloc: xrealloc(%p, %u)\n", Fields, newsize); -+ Fields = xrealloc(Fields, newsize); -+ debug_printf_eval("fsrealloc: Fields=%p..%p\n", Fields, (char*)Fields + newsize - 1); -+ /* ^^^ did Fields[] move? debug aid for L.v getting "upstaged" by R.v in evaluate() */ -+ - for (; i < maxfields; i++) { - Fields[i].type = VF_SPECIAL; - Fields[i].string = NULL; -@@ -2614,20 +2624,30 @@ static var *evaluate(node *op, var *res) - /* execute inevitable things */ - if (opinfo & OF_RES1) - L.v = evaluate(op1, v1); -- if (opinfo & OF_RES2) -- R.v = evaluate(op->r.n, v1+1); - if (opinfo & OF_STR1) { - L.s = getvar_s(L.v); - debug_printf_eval("L.s:'%s'\n", L.s); - } -- if (opinfo & OF_STR2) { -- R.s = getvar_s(R.v); -- debug_printf_eval("R.s:'%s'\n", R.s); -- } - if (opinfo & OF_NUM1) { - L_d = getvar_i(L.v); - debug_printf_eval("L_d:%f\n", L_d); - } -+ /* NB: Must get string/numeric values of L (done above) -+ * _before_ evaluate()'ing R.v: if both L and R are $NNNs, -+ * and right one is large, then L.v points to Fields[NNN1], -+ * second evaluate() reallocates and moves (!) Fields[], -+ * R.v points to Fields[NNN2] but L.v now points to freed mem! -+ * (Seen trying to evaluate "$444 $44444") -+ */ -+ if (opinfo & OF_RES2) { -+ R.v = evaluate(op->r.n, v1+1); -+ //TODO: L.v may be invalid now, set L.v to NULL to catch bugs? -+ //L.v = NULL; -+ } -+ if (opinfo & OF_STR2) { -+ R.s = getvar_s(R.v); -+ debug_printf_eval("R.s:'%s'\n", R.s); -+ } - - debug_printf_eval("switch(0x%x)\n", XC(opinfo & OPCLSMASK)); - switch (XC(opinfo & OPCLSMASK)) { -@@ -2636,6 +2656,7 @@ static var *evaluate(node *op, var *res) - - /* test pattern */ - case XC( OC_TEST ): -+ debug_printf_eval("TEST\n"); - if ((op1->info & OPCLSMASK) == OC_COMMA) { - /* it's range pattern */ - if ((opinfo & OF_CHECKED) || ptest(op1->l.n)) { -@@ -2653,25 +2674,32 @@ static var *evaluate(node *op, var *res) - - /* just evaluate an expression, also used as unconditional jump */ - case XC( OC_EXEC ): -+ debug_printf_eval("EXEC\n"); - break; - - /* branch, used in if-else and various loops */ - case XC( OC_BR ): -+ debug_printf_eval("BR\n"); - op = istrue(L.v) ? op->a.n : op->r.n; - break; - - /* initialize for-in loop */ - case XC( OC_WALKINIT ): -+ debug_printf_eval("WALKINIT\n"); - hashwalk_init(L.v, iamarray(R.v)); - break; - - /* get next array item */ - case XC( OC_WALKNEXT ): -+ debug_printf_eval("WALKNEXT\n"); - op = hashwalk_next(L.v) ? op->a.n : op->r.n; - break; - - case XC( OC_PRINT ): -- case XC( OC_PRINTF ): { -+ debug_printf_eval("PRINT /\n"); -+ case XC( OC_PRINTF ): -+ debug_printf_eval("PRINTF\n"); -+ { - FILE *F = stdout; - IF_FEATURE_AWK_GNU_EXTENSIONS(int len;) - -@@ -2726,22 +2754,28 @@ static var *evaluate(node *op, var *res) - /* case XC( OC_DELETE ): - moved to happen before arg evaluation */ - - case XC( OC_NEWSOURCE ): -+ debug_printf_eval("NEWSOURCE\n"); - g_progname = op->l.new_progname; - break; - - case XC( OC_RETURN ): -+ debug_printf_eval("RETURN\n"); - copyvar(res, L.v); - break; - - case XC( OC_NEXTFILE ): -+ debug_printf_eval("NEXTFILE\n"); - nextfile = TRUE; - case XC( OC_NEXT ): -+ debug_printf_eval("NEXT\n"); - nextrec = TRUE; - case XC( OC_DONE ): -+ debug_printf_eval("DONE\n"); - clrvar(res); - break; - - case XC( OC_EXIT ): -+ debug_printf_eval("EXIT\n"); - awk_exit(L_d); - - /* -- recursive node type -- */ -@@ -2761,15 +2795,18 @@ static var *evaluate(node *op, var *res) - break; - - case XC( OC_IN ): -+ debug_printf_eval("IN\n"); - setvar_i(res, hash_search(iamarray(R.v), L.s) ? 1 : 0); - break; - - case XC( OC_REGEXP ): -+ debug_printf_eval("REGEXP\n"); - op1 = op; - L.s = getvar_s(intvar[F0]); - goto re_cont; - - case XC( OC_MATCH ): -+ debug_printf_eval("MATCH\n"); - op1 = op->r.n; - re_cont: - { -@@ -2795,6 +2832,7 @@ static var *evaluate(node *op, var *res) - break; - - case XC( OC_TERNARY ): -+ debug_printf_eval("TERNARY\n"); - if ((op->r.n->info & OPCLSMASK) != OC_COLON) - syntax_error(EMSG_POSSIBLE_ERROR); - res = evaluate(istrue(L.v) ? op->r.n->l.n : op->r.n->r.n, res); -@@ -2803,6 +2841,7 @@ static var *evaluate(node *op, var *res) - case XC( OC_FUNC ): { - var *vbeg, *v; - const char *sv_progname; -+ debug_printf_eval("FUNC\n"); - - /* The body might be empty, still has to eval the args */ - if (!op->r.n->info && !op->r.f->body.first) -@@ -2832,7 +2871,10 @@ static var *evaluate(node *op, var *res) - } - - case XC( OC_GETLINE ): -- case XC( OC_PGETLINE ): { -+ debug_printf_eval("GETLINE /\n"); -+ case XC( OC_PGETLINE ): -+ debug_printf_eval("PGETLINE\n"); -+ { - rstream *rsm; - int i; - -@@ -2873,6 +2915,7 @@ static var *evaluate(node *op, var *res) - /* simple builtins */ - case XC( OC_FBLTIN ): { - double R_d = R_d; /* for compiler */ -+ debug_printf_eval("FBLTIN\n"); - - switch (opn) { - case F_in: -@@ -2986,14 +3029,18 @@ static var *evaluate(node *op, var *res) - } - - case XC( OC_BUILTIN ): -+ debug_printf_eval("BUILTIN\n"); - res = exec_builtin(op, res); - break; - - case XC( OC_SPRINTF ): -+ debug_printf_eval("SPRINTF\n"); - setvar_p(res, awk_printf(op1, NULL)); - break; - -- case XC( OC_UNARY ): { -+ case XC( OC_UNARY ): -+ debug_printf_eval("UNARY\n"); -+ { - double Ld, R_d; - - Ld = R_d = getvar_i(R.v); -@@ -3023,7 +3070,9 @@ static var *evaluate(node *op, var *res) - break; - } - -- case XC( OC_FIELD ): { -+ case XC( OC_FIELD ): -+ debug_printf_eval("FIELD\n"); -+ { - int i = (int)getvar_i(R.v); - if (i < 0) - syntax_error(EMSG_NEGATIVE_FIELD); -@@ -3040,8 +3089,10 @@ static var *evaluate(node *op, var *res) - - /* concatenation (" ") and index joining (",") */ - case XC( OC_CONCAT ): -+ debug_printf_eval("CONCAT /\n"); - case XC( OC_COMMA ): { - const char *sep = ""; -+ debug_printf_eval("COMMA\n"); - if ((opinfo & OPCLSMASK) == OC_COMMA) - sep = getvar_s(intvar[SUBSEP]); - setvar_p(res, xasprintf("%s%s%s", L.s, sep, R.s)); -@@ -3049,17 +3100,22 @@ static var *evaluate(node *op, var *res) - } - - case XC( OC_LAND ): -+ debug_printf_eval("LAND\n"); - setvar_i(res, istrue(L.v) ? ptest(op->r.n) : 0); - break; - - case XC( OC_LOR ): -+ debug_printf_eval("LOR\n"); - setvar_i(res, istrue(L.v) ? 1 : ptest(op->r.n)); - break; - - case XC( OC_BINARY ): -- case XC( OC_REPLACE ): { -+ debug_printf_eval("BINARY /\n"); -+ case XC( OC_REPLACE ): -+ debug_printf_eval("REPLACE\n"); -+ { - double R_d = getvar_i(R.v); -- debug_printf_eval("BINARY/REPLACE: R_d:%f opn:%c\n", R_d, opn); -+ debug_printf_eval("R_d:%f opn:%c\n", R_d, opn); - switch (opn) { - case '+': - L_d += R_d; -@@ -3095,6 +3151,7 @@ static var *evaluate(node *op, var *res) - case XC( OC_COMPARE ): { - int i = i; /* for compiler */ - double Ld; -+ debug_printf_eval("COMPARE\n"); - - if (is_numeric(L.v) && is_numeric(R.v)) { - Ld = getvar_i(L.v) - getvar_i(R.v); --- -2.27.0 - - -From 1d5e5492dd8368ee3870bcd390754aa7c3f8956c Mon Sep 17 00:00:00 2001 -From: Denys Vlasenko -Date: Fri, 18 Jun 2021 16:35:27 +0200 -Subject: [PATCH 03/61] awk: after preinc/dec, only allow variable, field ref, - array ref, or another preinc/dec - -Accepting nonsense like "--4", and even "-- -4" is confusing. - -function old new delta -parse_expr 917 938 +21 - -Signed-off-by: Denys Vlasenko ---- - editors/awk.c | 87 ++++++++++++++++++++++++++++++++++++++++----------- - 1 file changed, 69 insertions(+), 18 deletions(-) - -diff --git a/editors/awk.c b/editors/awk.c -index 48836298c..2563722f9 100644 ---- a/editors/awk.c -+++ b/editors/awk.c -@@ -66,6 +66,8 @@ - #endif - #ifndef debug_printf_parse - # define debug_printf_parse(...) (fprintf(stderr, __VA_ARGS__)) -+#else -+# define debug_parse_print_tc(...) ((void)0) - #endif - - -@@ -210,13 +212,13 @@ typedef struct tsplitter_s { - #define TC_SEQTERM (1 << 1) /* ) */ - #define TC_REGEXP (1 << 2) /* /.../ */ - #define TC_OUTRDR (1 << 3) /* | > >> */ --#define TC_UOPPOST (1 << 4) /* unary postfix operator */ --#define TC_UOPPRE1 (1 << 5) /* unary prefix operator */ -+#define TC_UOPPOST (1 << 4) /* unary postfix operator ++ -- */ -+#define TC_UOPPRE1 (1 << 5) /* unary prefix operator ++ -- $ */ - #define TC_BINOPX (1 << 6) /* two-opnd operator */ - #define TC_IN (1 << 7) - #define TC_COMMA (1 << 8) - #define TC_PIPE (1 << 9) /* input redirection pipe */ --#define TC_UOPPRE2 (1 << 10) /* unary prefix operator */ -+#define TC_UOPPRE2 (1 << 10) /* unary prefix operator + - ! */ - #define TC_ARRTERM (1 << 11) /* ] */ - #define TC_GRPSTART (1 << 12) /* { */ - #define TC_GRPTERM (1 << 13) /* } */ -@@ -243,14 +245,51 @@ typedef struct tsplitter_s { - #define TC_STRING (1 << 29) - #define TC_NUMBER (1 << 30) - --#define TC_UOPPRE (TC_UOPPRE1 | TC_UOPPRE2) -+#ifndef debug_parse_print_tc -+#define debug_parse_print_tc(n) do { \ -+if ((n) & TC_SEQSTART) debug_printf_parse(" SEQSTART"); \ -+if ((n) & TC_SEQTERM ) debug_printf_parse(" SEQTERM" ); \ -+if ((n) & TC_REGEXP ) debug_printf_parse(" REGEXP" ); \ -+if ((n) & TC_OUTRDR ) debug_printf_parse(" OUTRDR" ); \ -+if ((n) & TC_UOPPOST ) debug_printf_parse(" UOPPOST" ); \ -+if ((n) & TC_UOPPRE1 ) debug_printf_parse(" UOPPRE1" ); \ -+if ((n) & TC_BINOPX ) debug_printf_parse(" BINOPX" ); \ -+if ((n) & TC_IN ) debug_printf_parse(" IN" ); \ -+if ((n) & TC_COMMA ) debug_printf_parse(" COMMA" ); \ -+if ((n) & TC_PIPE ) debug_printf_parse(" PIPE" ); \ -+if ((n) & TC_UOPPRE2 ) debug_printf_parse(" UOPPRE2" ); \ -+if ((n) & TC_ARRTERM ) debug_printf_parse(" ARRTERM" ); \ -+if ((n) & TC_GRPSTART) debug_printf_parse(" GRPSTART"); \ -+if ((n) & TC_GRPTERM ) debug_printf_parse(" GRPTERM" ); \ -+if ((n) & TC_SEMICOL ) debug_printf_parse(" SEMICOL" ); \ -+if ((n) & TC_NEWLINE ) debug_printf_parse(" NEWLINE" ); \ -+if ((n) & TC_STATX ) debug_printf_parse(" STATX" ); \ -+if ((n) & TC_WHILE ) debug_printf_parse(" WHILE" ); \ -+if ((n) & TC_ELSE ) debug_printf_parse(" ELSE" ); \ -+if ((n) & TC_BUILTIN ) debug_printf_parse(" BUILTIN" ); \ -+if ((n) & TC_LENGTH ) debug_printf_parse(" LENGTH" ); \ -+if ((n) & TC_GETLINE ) debug_printf_parse(" GETLINE" ); \ -+if ((n) & TC_FUNCDECL) debug_printf_parse(" FUNCDECL"); \ -+if ((n) & TC_BEGIN ) debug_printf_parse(" BEGIN" ); \ -+if ((n) & TC_END ) debug_printf_parse(" END" ); \ -+if ((n) & TC_EOF ) debug_printf_parse(" EOF" ); \ -+if ((n) & TC_VARIABLE) debug_printf_parse(" VARIABLE"); \ -+if ((n) & TC_ARRAY ) debug_printf_parse(" ARRAY" ); \ -+if ((n) & TC_FUNCTION) debug_printf_parse(" FUNCTION"); \ -+if ((n) & TC_STRING ) debug_printf_parse(" STRING" ); \ -+if ((n) & TC_NUMBER ) debug_printf_parse(" NUMBER" ); \ -+} while (0) -+#endif - - /* combined token classes */ -+#define TC_UOPPRE (TC_UOPPRE1 | TC_UOPPRE2) -+ - #define TC_BINOP (TC_BINOPX | TC_COMMA | TC_PIPE | TC_IN) - //#define TC_UNARYOP (TC_UOPPRE | TC_UOPPOST) - #define TC_OPERAND (TC_VARIABLE | TC_ARRAY | TC_FUNCTION \ - | TC_BUILTIN | TC_LENGTH | TC_GETLINE \ - | TC_SEQSTART | TC_STRING | TC_NUMBER) -+#define TC_LVALUE (TC_VARIABLE | TC_ARRAY) - - #define TC_STATEMNT (TC_STATX | TC_WHILE) - #define TC_OPTERM (TC_SEMICOL | TC_NEWLINE) -@@ -284,7 +323,6 @@ typedef struct tsplitter_s { - #define OF_CHECKED 0x200000 - #define OF_REQUIRED 0x400000 - -- - /* combined operator flags */ - #define xx 0 - #define xV OF_RES2 -@@ -313,10 +351,8 @@ typedef struct tsplitter_s { - #define PRIMASK2 0x7E000000 - - /* Operation classes */ -- - #define SHIFT_TIL_THIS 0x0600 - #define RECUR_FROM_THIS 0x1000 -- - enum { - OC_DELETE = 0x0100, OC_EXEC = 0x0200, OC_NEWSOURCE = 0x0300, - OC_PRINT = 0x0400, OC_PRINTF = 0x0500, OC_WALKINIT = 0x0600, -@@ -411,7 +447,9 @@ static const uint32_t tokeninfo[] ALIGN4 = { - OC_REGEXP, - xS|'a', xS|'w', xS|'|', - OC_UNARY|xV|P(9)|'p', OC_UNARY|xV|P(9)|'m', -- OC_UNARY|xV|P(9)|'P', OC_UNARY|xV|P(9)|'M', OC_FIELD|xV|P(5), -+#define TI_PREINC (OC_UNARY|xV|P(9)|'P') -+#define TI_PREDEC (OC_UNARY|xV|P(9)|'M') -+ TI_PREINC, TI_PREDEC, OC_FIELD|xV|P(5), - OC_COMPARE|VV|P(39)|5, OC_MOVE|VV|P(74), OC_REPLACE|NV|P(74)|'+', OC_REPLACE|NV|P(74)|'-', - OC_REPLACE|NV|P(74)|'*', OC_REPLACE|NV|P(74)|'/', OC_REPLACE|NV|P(74)|'%', OC_REPLACE|NV|P(74)|'&', - OC_BINARY|NV|P(29)|'+', OC_BINARY|NV|P(29)|'-', OC_REPLACE|NV|P(74)|'&', OC_BINARY|NV|P(15)|'&', -@@ -1070,6 +1108,10 @@ static uint32_t next_token(uint32_t expected) - uint32_t tc; - const uint32_t *ti; - -+ debug_printf_parse("%s() expected(%x):", __func__, expected); -+ debug_parse_print_tc(expected); -+ debug_printf_parse("\n"); -+ - if (t_rollback) { - debug_printf_parse("%s: using rolled-back token\n", __func__); - t_rollback = FALSE; -@@ -1226,7 +1268,9 @@ static uint32_t next_token(uint32_t expected) - EMSG_UNEXP_EOS : EMSG_UNEXP_TOKEN); - } - -- debug_printf_parse("%s: returning, ltclass:%x t_double:%f\n", __func__, ltclass, t_double); -+ debug_printf_parse("%s: returning, t_double:%f ltclass:", __func__, t_double); -+ debug_parse_print_tc(ltclass); -+ debug_printf_parse("\n"); - return ltclass; - #undef concat_inserted - #undef save_tclass -@@ -1266,7 +1310,7 @@ static node *condition(void) - - /* parse expression terminated by given argument, return ptr - * to built subtree. Terminator is eaten by parse_expr */ --static node *parse_expr(uint32_t iexp) -+static node *parse_expr(uint32_t term_tc) - { - node sn; - node *cn = &sn; -@@ -1274,13 +1318,15 @@ static node *parse_expr(uint32_t iexp) - uint32_t tc, xtc; - var *v; - -- debug_printf_parse("%s(%x)\n", __func__, iexp); -+ debug_printf_parse("%s() term_tc(%x):", __func__, term_tc); -+ debug_parse_print_tc(term_tc); -+ debug_printf_parse("\n"); - - sn.info = PRIMASK; - sn.r.n = sn.a.n = glptr = NULL; -- xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP | iexp; -+ xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP | term_tc; - -- while (!((tc = next_token(xtc)) & iexp)) { -+ while (!((tc = next_token(xtc)) & term_tc)) { - - if (glptr && (t_info == (OC_COMPARE | VV | P(39) | 2))) { - /* input redirection (<) attached to glptr node */ -@@ -1313,25 +1359,28 @@ static node *parse_expr(uint32_t iexp) - next_token(TC_GETLINE); - /* give maximum priority to this pipe */ - cn->info &= ~PRIMASK; -- xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp; -+ xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | term_tc; - } - } else { - cn->r.n = vn; -- xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp; -+ xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | term_tc; - } - vn->a.n = cn; - - } else { -- debug_printf_parse("%s: other\n", __func__); -+ debug_printf_parse("%s: other, t_info:%x\n", __func__, t_info); - /* for operands and prefix-unary operators, attach them - * to last node */ - vn = cn; - cn = vn->r.n = new_node(t_info); - cn->a.n = vn; -+ - xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP; -+ if (t_info == TI_PREINC || t_info == TI_PREDEC) -+ xtc = TC_LVALUE | TC_UOPPRE1; - if (tc & (TC_OPERAND | TC_REGEXP)) { - debug_printf_parse("%s: TC_OPERAND | TC_REGEXP\n", __func__); -- xtc = TC_UOPPRE | TC_UOPPOST | TC_BINOP | TC_OPERAND | iexp; -+ xtc = TC_UOPPRE | TC_UOPPOST | TC_BINOP | TC_OPERAND | term_tc; - /* one should be very careful with switch on tclass - - * only simple tclasses should be used! */ - switch (tc) { -@@ -1388,7 +1437,7 @@ static node *parse_expr(uint32_t iexp) - case TC_GETLINE: - debug_printf_parse("%s: TC_GETLINE\n", __func__); - glptr = cn; -- xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp; -+ xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | term_tc; - break; - - case TC_BUILTIN: -@@ -1603,6 +1652,8 @@ static void parse_program(char *p) - func *f; - var *v; - -+ debug_printf_parse("%s()\n", __func__); -+ - g_pos = p; - t_lineno = 1; - while ((tclass = next_token(TC_EOF | TC_OPSEQ | TC_GRPSTART | --- -2.27.0 - - -From 3d0acb8934f496021a63471ef9e29c87520612a0 Mon Sep 17 00:00:00 2001 -From: Denys Vlasenko -Date: Sun, 20 Jun 2021 22:52:29 +0200 -Subject: [PATCH 04/61] qwk: make code clearer, no actual code changes - -Signed-off-by: Denys Vlasenko ---- - editors/awk.c | 5 +++-- - 1 file changed, 3 insertions(+), 2 deletions(-) - -diff --git a/editors/awk.c b/editors/awk.c -index 2563722f9..5f1d670a4 100644 ---- a/editors/awk.c -+++ b/editors/awk.c -@@ -455,7 +455,8 @@ static const uint32_t tokeninfo[] ALIGN4 = { - OC_BINARY|NV|P(29)|'+', OC_BINARY|NV|P(29)|'-', OC_REPLACE|NV|P(74)|'&', OC_BINARY|NV|P(15)|'&', - OC_BINARY|NV|P(25)|'/', OC_BINARY|NV|P(25)|'%', OC_BINARY|NV|P(15)|'&', OC_BINARY|NV|P(25)|'*', - OC_COMPARE|VV|P(39)|4, OC_COMPARE|VV|P(39)|3, OC_COMPARE|VV|P(39)|0, OC_COMPARE|VV|P(39)|1, -- OC_COMPARE|VV|P(39)|2, OC_MATCH|Sx|P(45)|'!', OC_MATCH|Sx|P(45)|'~', OC_LAND|Vx|P(55), -+#define TI_LESS (OC_COMPARE|VV|P(39)|2) -+ TI_LESS, OC_MATCH|Sx|P(45)|'!', OC_MATCH|Sx|P(45)|'~', OC_LAND|Vx|P(55), - OC_LOR|Vx|P(59), OC_TERNARY|Vx|P(64)|'?', OC_COLON|xx|P(67)|':', - OC_IN|SV|P(49), /* TC_IN */ - OC_COMMA|SS|P(80), -@@ -1328,7 +1329,7 @@ static node *parse_expr(uint32_t term_tc) - - while (!((tc = next_token(xtc)) & term_tc)) { - -- if (glptr && (t_info == (OC_COMPARE | VV | P(39) | 2))) { -+ if (glptr && (t_info == TI_LESS)) { - /* input redirection (<) attached to glptr node */ - debug_printf_parse("%s: input redir\n", __func__); - cn = glptr->l.n = new_node(OC_CONCAT | SS | P(37)); --- -2.27.0 - - -From 3c18df6595f8efc0229d7afc948b8ef38fb6f1aa Mon Sep 17 00:00:00 2001 -From: Denys Vlasenko -Date: Fri, 25 Jun 2021 19:38:27 +0200 -Subject: [PATCH 05/61] awk: more efficient -f FILE, document what "some trick - in next_token" is - -function old new delta -awk_main 890 898 +8 - -Signed-off-by: Denys Vlasenko ---- - editors/awk.c | 33 ++++++++++++++++++++++++--------- - 1 file changed, 24 insertions(+), 9 deletions(-) - -diff --git a/editors/awk.c b/editors/awk.c -index 5f1d670a4..1b23c17d2 100644 ---- a/editors/awk.c -+++ b/editors/awk.c -@@ -1217,6 +1217,8 @@ static uint32_t next_token(uint32_t expected) - if (!isalnum_(*p)) - syntax_error(EMSG_UNEXP_TOKEN); /* no */ - /* yes */ -+/* "move name one char back" trick: we need a byte for NUL terminator */ -+/* NB: this results in argv[i][-1] being used (!!!) in e.g. "awk -e 'NAME'" case */ - t_string = --p; - while (isalnum_(*++p)) { - p[-1] = *p; -@@ -3345,7 +3347,7 @@ int awk_main(int argc UNUSED_PARAM, char **argv) - #if ENABLE_FEATURE_AWK_GNU_EXTENSIONS - llist_t *list_e = NULL; - #endif -- int i, j; -+ int i; - var *v; - var tv; - char **envp; -@@ -3417,30 +3419,43 @@ int awk_main(int argc UNUSED_PARAM, char **argv) - bb_show_usage(); - } - while (list_f) { -- char *s = NULL; -- FILE *from_file; -+ int fd; -+ char *s; - - g_progname = llist_pop(&list_f); -- from_file = xfopen_stdin(g_progname); -- /* one byte is reserved for some trick in next_token */ -- for (i = j = 1; j > 0; i += j) { -- s = xrealloc(s, i + 4096); -- j = fread(s + i, 1, 4094, from_file); -+ fd = xopen_stdin(g_progname); -+ /* 1st byte is reserved for "move name one char back" trick in next_token */ -+ i = 1; -+ s = NULL; -+ for (;;) { -+ int sz; -+ s = xrealloc(s, i + 1000); -+ sz = safe_read(fd, s + i, 1000); -+ if (sz <= 0) -+ break; -+ i += sz; - } -+ s = xrealloc(s, i + 1); /* trim unused 999 bytes */ - s[i] = '\0'; -- fclose(from_file); -+ close(fd); - parse_program(s + 1); - free(s); - } - g_progname = "cmd. line"; - #if ENABLE_FEATURE_AWK_GNU_EXTENSIONS - while (list_e) { -+ /* NB: "move name one char back" trick in next_token -+ * can use argv[i][-1] here. -+ */ - parse_program(llist_pop(&list_e)); - } - #endif - if (!(opt & (OPT_f | OPT_e))) { - if (!*argv) - bb_show_usage(); -+ /* NB: "move name one char back" trick in next_token -+ * can use argv[i][-1] here. -+ */ - parse_program(*argv++); - } - --- -2.27.0 - - -From f8243879801f8d9d9fffbde592aee4264aa30d71 Mon Sep 17 00:00:00 2001 -From: Denys Vlasenko -Date: Fri, 25 Jun 2021 19:41:05 +0200 -Subject: [PATCH 06/61] awk: move locals deeper into scopes where they are - used, no logic changes - -Signed-off-by: Denys Vlasenko ---- - editors/awk.c | 62 ++++++++++++++++++++++++++------------------------- - 1 file changed, 32 insertions(+), 30 deletions(-) - -diff --git a/editors/awk.c b/editors/awk.c -index 1b23c17d2..86076d7b6 100644 ---- a/editors/awk.c -+++ b/editors/awk.c -@@ -3254,20 +3254,19 @@ static var *evaluate(node *op, var *res) - - static int awk_exit(int r) - { -- var tv; - unsigned i; -- hash_item *hi; -- -- zero_out_var(&tv); - - if (!exiting) { -+ var tv; - exiting = TRUE; - nextrec = FALSE; -+ zero_out_var(&tv); - evaluate(endseq.first, &tv); - } - - /* waiting for children */ - for (i = 0; i < fdhash->csize; i++) { -+ hash_item *hi; - hi = fdhash->items[i]; - while (hi) { - if (hi->data.rs.F && hi->data.rs.is_pipe) -@@ -3348,11 +3347,7 @@ int awk_main(int argc UNUSED_PARAM, char **argv) - llist_t *list_e = NULL; - #endif - int i; -- var *v; - var tv; -- char **envp; -- char *vnames = (char *)vNames; /* cheat */ -- char *vvalues = (char *)vValues; - - INIT_G(); - -@@ -3361,8 +3356,6 @@ int awk_main(int argc UNUSED_PARAM, char **argv) - if (ENABLE_LOCALE_SUPPORT) - setlocale(LC_NUMERIC, "C"); - -- zero_out_var(&tv); -- - /* allocate global buffer */ - g_buf = xmalloc(MAXVARFMT + 1); - -@@ -3372,16 +3365,21 @@ int awk_main(int argc UNUSED_PARAM, char **argv) - fnhash = hash_init(); - - /* initialize variables */ -- for (i = 0; *vnames; i++) { -- intvar[i] = v = newvar(nextword(&vnames)); -- if (*vvalues != '\377') -- setvar_s(v, nextword(&vvalues)); -- else -- setvar_i(v, 0); -- -- if (*vnames == '*') { -- v->type |= VF_SPECIAL; -- vnames++; -+ { -+ char *vnames = (char *)vNames; /* cheat */ -+ char *vvalues = (char *)vValues; -+ for (i = 0; *vnames; i++) { -+ var *v; -+ intvar[i] = v = newvar(nextword(&vnames)); -+ if (*vvalues != '\377') -+ setvar_s(v, nextword(&vvalues)); -+ else -+ setvar_i(v, 0); -+ -+ if (*vnames == '*') { -+ v->type |= VF_SPECIAL; -+ vnames++; -+ } - } - } - -@@ -3393,16 +3391,19 @@ int awk_main(int argc UNUSED_PARAM, char **argv) - newfile("/dev/stderr")->F = stderr; - - /* Huh, people report that sometimes environ is NULL. Oh well. */ -- if (environ) for (envp = environ; *envp; envp++) { -- /* environ is writable, thus we don't strdup it needlessly */ -- char *s = *envp; -- char *s1 = strchr(s, '='); -- if (s1) { -- *s1 = '\0'; -- /* Both findvar and setvar_u take const char* -- * as 2nd arg -> environment is not trashed */ -- setvar_u(findvar(iamarray(intvar[ENVIRON]), s), s1 + 1); -- *s1 = '='; -+ if (environ) { -+ char **envp; -+ for (envp = environ; *envp; envp++) { -+ /* environ is writable, thus we don't strdup it needlessly */ -+ char *s = *envp; -+ char *s1 = strchr(s, '='); -+ if (s1) { -+ *s1 = '\0'; -+ /* Both findvar and setvar_u take const char* -+ * as 2nd arg -> environment is not trashed */ -+ setvar_u(findvar(iamarray(intvar[ENVIRON]), s), s1 + 1); -+ *s1 = '='; -+ } - } - } - opt = getopt32(argv, OPTSTR_AWK, &opt_F, &list_v, &list_f, IF_FEATURE_AWK_GNU_EXTENSIONS(&list_e,) NULL); -@@ -3466,6 +3467,7 @@ int awk_main(int argc UNUSED_PARAM, char **argv) - setari_u(intvar[ARGV], ++i, *argv++); - setvar_i(intvar[ARGC], i + 1); - -+ zero_out_var(&tv); - evaluate(beginseq.first, &tv); - if (!mainseq.first && !endseq.first) - awk_exit(EXIT_SUCCESS); --- -2.27.0 - - -From b52a50128d64e1f601e17507ffc118c180ef7b3d Mon Sep 17 00:00:00 2001 -From: Denys Vlasenko -Date: Tue, 29 Jun 2021 01:03:42 +0200 -Subject: [PATCH 07/61] awk: remove redundant check - -function old new delta -next_token 785 784 -1 -parse_program 337 328 -9 ------------------------------------------------------------------------------- -(add/remove: 0/0 grow/shrink: 0/2 up/down: 0/-10) Total: -10 bytes - -Signed-off-by: Denys Vlasenko ---- - editors/awk.c | 32 ++++++++++++++++++-------------- - 1 file changed, 18 insertions(+), 14 deletions(-) - -diff --git a/editors/awk.c b/editors/awk.c -index 86076d7b6..9826a57c6 100644 ---- a/editors/awk.c -+++ b/editors/awk.c -@@ -1093,8 +1093,9 @@ static void nvfree(var *v) - - /* ------- awk program text parsing ------- */ - --/* Parse next token pointed by global pos, place results into global ttt. -- * If token isn't expected, give away. Return token class -+/* Parse next token pointed by global pos, place results into global t_XYZ variables. -+ * If token isn't expected, print error message and die. -+ * Return token class (also store it in t_tclass). - */ - static uint32_t next_token(uint32_t expected) - { -@@ -1248,33 +1249,35 @@ static uint32_t next_token(uint32_t expected) - goto readnext; - - /* insert concatenation operator when needed */ -- debug_printf_parse("%s: %x %x %x concat_inserted?\n", __func__, -- (ltclass & TC_CONCAT1), (tc & TC_CONCAT2), (expected & TC_BINOP)); -+ debug_printf_parse("%s: concat_inserted if all nonzero: %x %x %x %x\n", __func__, -+ (ltclass & TC_CONCAT1), (tc & TC_CONCAT2), (expected & TC_BINOP), -+ !(ltclass == TC_LENGTH && tc == TC_SEQSTART)); - if ((ltclass & TC_CONCAT1) && (tc & TC_CONCAT2) && (expected & TC_BINOP) - && !(ltclass == TC_LENGTH && tc == TC_SEQSTART) /* but not for "length(..." */ - ) { - concat_inserted = TRUE; - save_tclass = tc; - save_info = t_info; -- tc = TC_BINOP; -+ tc = TC_BINOPX; - t_info = OC_CONCAT | SS | P(35); - } - -- debug_printf_parse("%s: t_tclass=tc=%x\n", __func__, t_tclass); - t_tclass = tc; -+ debug_printf_parse("%s: t_tclass=tc=%x\n", __func__, tc); - } -- ltclass = t_tclass; -- - /* Are we ready for this? */ -- if (!(ltclass & expected)) { -+ if (!(t_tclass & expected)) { - syntax_error((ltclass & (TC_NEWLINE | TC_EOF)) ? - EMSG_UNEXP_EOS : EMSG_UNEXP_TOKEN); - } - -- debug_printf_parse("%s: returning, t_double:%f ltclass:", __func__, t_double); -- debug_parse_print_tc(ltclass); -+ debug_printf_parse("%s: returning, t_double:%f t_tclass:", __func__, t_double); -+ debug_parse_print_tc(t_tclass); - debug_printf_parse("\n"); -- return ltclass; -+ -+ ltclass = t_tclass; -+ -+ return t_tclass; - #undef concat_inserted - #undef save_tclass - #undef save_info -@@ -1700,8 +1703,9 @@ static void parse_program(char *p) - /* Arg followed either by end of arg list or 1 comma */ - if (next_token(TC_COMMA | TC_SEQTERM) & TC_SEQTERM) - break; -- if (t_tclass != TC_COMMA) -- syntax_error(EMSG_UNEXP_TOKEN); -+//Impossible: next_token() above would error out and die -+// if (t_tclass != TC_COMMA) -+// syntax_error(EMSG_UNEXP_TOKEN); - } - seq = &f->body; - chain_group(); --- -2.27.0 - - -From 96368c3613c1b01c42b7b382d01142a07c919f60 Mon Sep 17 00:00:00 2001 -From: Denys Vlasenko -Date: Tue, 29 Jun 2021 01:09:08 +0200 -Subject: [PATCH 08/61] awk: make ltclass ("last token class") local to - next_token() - -function old new delta -next_token 784 790 +6 -next_input_file 219 216 -3 ------------------------------------------------------------------------------- -(add/remove: 0/0 grow/shrink: 1/1 up/down: 6/-3) Total: 3 bytes - -Signed-off-by: Denys Vlasenko ---- - editors/awk.c | 24 ++++++++++-------------- - 1 file changed, 10 insertions(+), 14 deletions(-) - -diff --git a/editors/awk.c b/editors/awk.c -index 9826a57c6..418bda160 100644 ---- a/editors/awk.c -+++ b/editors/awk.c -@@ -556,7 +556,6 @@ struct globals2 { - - uint32_t next_token__save_tclass; - uint32_t next_token__save_info; -- uint32_t next_token__ltclass; - smallint next_token__concat_inserted; - - smallint next_input_file__files_happen; -@@ -615,7 +614,7 @@ struct globals2 { - #define rsplitter (G.rsplitter ) - #define INIT_G() do { \ - SET_PTR_TO_GLOBALS((char*)xzalloc(sizeof(G1)+sizeof(G)) + sizeof(G1)); \ -- G.next_token__ltclass = TC_OPTERM; \ -+ t_tclass = TC_OPTERM; \ - G.evaluate__seed = 1; \ - } while (0) - -@@ -1102,13 +1101,13 @@ static uint32_t next_token(uint32_t expected) - #define concat_inserted (G.next_token__concat_inserted) - #define save_tclass (G.next_token__save_tclass) - #define save_info (G.next_token__save_info) --/* Initialized to TC_OPTERM: */ --#define ltclass (G.next_token__ltclass) - - char *p, *s; - const char *tl; -- uint32_t tc; - const uint32_t *ti; -+ uint32_t tc, last_token_class; -+ -+ last_token_class = t_tclass; /* t_tclass is initialized to TC_OPTERM */ - - debug_printf_parse("%s() expected(%x):", __func__, expected); - debug_parse_print_tc(expected); -@@ -1245,15 +1244,15 @@ static uint32_t next_token(uint32_t expected) - g_pos = p; - - /* skipping newlines in some cases */ -- if ((ltclass & TC_NOTERM) && (tc & TC_NEWLINE)) -+ if ((last_token_class & TC_NOTERM) && (tc & TC_NEWLINE)) - goto readnext; - - /* insert concatenation operator when needed */ - debug_printf_parse("%s: concat_inserted if all nonzero: %x %x %x %x\n", __func__, -- (ltclass & TC_CONCAT1), (tc & TC_CONCAT2), (expected & TC_BINOP), -- !(ltclass == TC_LENGTH && tc == TC_SEQSTART)); -- if ((ltclass & TC_CONCAT1) && (tc & TC_CONCAT2) && (expected & TC_BINOP) -- && !(ltclass == TC_LENGTH && tc == TC_SEQSTART) /* but not for "length(..." */ -+ (last_token_class & TC_CONCAT1), (tc & TC_CONCAT2), (expected & TC_BINOP), -+ !(last_token_class == TC_LENGTH && tc == TC_SEQSTART)); -+ if ((last_token_class & TC_CONCAT1) && (tc & TC_CONCAT2) && (expected & TC_BINOP) -+ && !(last_token_class == TC_LENGTH && tc == TC_SEQSTART) /* but not for "length(..." */ - ) { - concat_inserted = TRUE; - save_tclass = tc; -@@ -1267,7 +1266,7 @@ static uint32_t next_token(uint32_t expected) - } - /* Are we ready for this? */ - if (!(t_tclass & expected)) { -- syntax_error((ltclass & (TC_NEWLINE | TC_EOF)) ? -+ syntax_error((last_token_class & (TC_NEWLINE | TC_EOF)) ? - EMSG_UNEXP_EOS : EMSG_UNEXP_TOKEN); - } - -@@ -1275,13 +1274,10 @@ static uint32_t next_token(uint32_t expected) - debug_parse_print_tc(t_tclass); - debug_printf_parse("\n"); - -- ltclass = t_tclass; -- - return t_tclass; - #undef concat_inserted - #undef save_tclass - #undef save_info --#undef ltclass - } - - static void rollback_token(void) --- -2.27.0 - - -From 8b51ddd054a3454171440035ed7f125483e9697c Mon Sep 17 00:00:00 2001 -From: Denys Vlasenko -Date: Tue, 29 Jun 2021 01:23:37 +0200 -Subject: [PATCH 09/61] awk: use TS_foo for combined token classes. No code - changes - -Confusion with "simple" classes was the cause of a bug fixed by previous commit - -Signed-off-by: Denys Vlasenko ---- - editors/awk.c | 128 +++++++++++++++++++++++++------------------------- - 1 file changed, 64 insertions(+), 64 deletions(-) - -diff --git a/editors/awk.c b/editors/awk.c -index 418bda160..764a3dd49 100644 ---- a/editors/awk.c -+++ b/editors/awk.c -@@ -281,39 +281,39 @@ if ((n) & TC_NUMBER ) debug_printf_parse(" NUMBER" ); \ - } while (0) - #endif - --/* combined token classes */ --#define TC_UOPPRE (TC_UOPPRE1 | TC_UOPPRE2) -+/* combined token classes ("token [class] sets") */ -+#define TS_UOPPRE (TC_UOPPRE1 | TC_UOPPRE2) - --#define TC_BINOP (TC_BINOPX | TC_COMMA | TC_PIPE | TC_IN) --//#define TC_UNARYOP (TC_UOPPRE | TC_UOPPOST) --#define TC_OPERAND (TC_VARIABLE | TC_ARRAY | TC_FUNCTION \ -- | TC_BUILTIN | TC_LENGTH | TC_GETLINE \ -- | TC_SEQSTART | TC_STRING | TC_NUMBER) --#define TC_LVALUE (TC_VARIABLE | TC_ARRAY) -+#define TS_BINOP (TC_BINOPX | TC_COMMA | TC_PIPE | TC_IN) -+//#define TS_UNARYOP (TS_UOPPRE | TC_UOPPOST) -+#define TS_OPERAND (TC_VARIABLE | TC_ARRAY | TC_FUNCTION \ -+ | TC_BUILTIN | TC_LENGTH | TC_GETLINE \ -+ | TC_SEQSTART | TC_STRING | TC_NUMBER) - --#define TC_STATEMNT (TC_STATX | TC_WHILE) --#define TC_OPTERM (TC_SEMICOL | TC_NEWLINE) -+#define TS_LVALUE (TC_VARIABLE | TC_ARRAY) -+#define TS_STATEMNT (TC_STATX | TC_WHILE) -+#define TS_OPTERM (TC_SEMICOL | TC_NEWLINE) - - /* word tokens, cannot mean something else if not expected */ --#define TC_WORD (TC_IN | TC_STATEMNT | TC_ELSE \ -- | TC_BUILTIN | TC_LENGTH | TC_GETLINE \ -- | TC_FUNCDECL | TC_BEGIN | TC_END) -+#define TS_WORD (TC_IN | TS_STATEMNT | TC_ELSE \ -+ | TC_BUILTIN | TC_LENGTH | TC_GETLINE \ -+ | TC_FUNCDECL | TC_BEGIN | TC_END) - - /* discard newlines after these */ --#define TC_NOTERM (TC_COMMA | TC_GRPSTART | TC_GRPTERM \ -- | TC_BINOP | TC_OPTERM) -+#define TS_NOTERM (TC_COMMA | TC_GRPSTART | TC_GRPTERM \ -+ | TS_BINOP | TS_OPTERM) - - /* what can expression begin with */ --#define TC_OPSEQ (TC_OPERAND | TC_UOPPRE | TC_REGEXP) -+#define TS_OPSEQ (TS_OPERAND | TS_UOPPRE | TC_REGEXP) - /* what can group begin with */ --#define TC_GRPSEQ (TC_OPSEQ | TC_OPTERM | TC_STATEMNT | TC_GRPSTART) -+#define TS_GRPSEQ (TS_OPSEQ | TS_OPTERM | TS_STATEMNT | TC_GRPSTART) - --/* if previous token class is CONCAT1 and next is CONCAT2, concatenation */ -+/* if previous token class is CONCAT_L and next is CONCAT_R, concatenation */ - /* operator is inserted between them */ --#define TC_CONCAT1 (TC_VARIABLE | TC_ARRTERM | TC_SEQTERM \ -+#define TS_CONCAT_L (TC_VARIABLE | TC_ARRTERM | TC_SEQTERM \ - | TC_STRING | TC_NUMBER | TC_UOPPOST \ - | TC_LENGTH) --#define TC_CONCAT2 (TC_OPERAND | TC_UOPPRE) -+#define TS_CONCAT_R (TS_OPERAND | TS_UOPPRE) - - #define OF_RES1 0x010000 - #define OF_RES2 0x020000 -@@ -614,7 +614,7 @@ struct globals2 { - #define rsplitter (G.rsplitter ) - #define INIT_G() do { \ - SET_PTR_TO_GLOBALS((char*)xzalloc(sizeof(G1)+sizeof(G)) + sizeof(G1)); \ -- t_tclass = TC_OPTERM; \ -+ t_tclass = TS_OPTERM; \ - G.evaluate__seed = 1; \ - } while (0) - -@@ -1107,7 +1107,7 @@ static uint32_t next_token(uint32_t expected) - const uint32_t *ti; - uint32_t tc, last_token_class; - -- last_token_class = t_tclass; /* t_tclass is initialized to TC_OPTERM */ -+ last_token_class = t_tclass; /* t_tclass is initialized to TS_OPTERM */ - - debug_printf_parse("%s() expected(%x):", __func__, expected); - debug_parse_print_tc(expected); -@@ -1198,9 +1198,9 @@ static uint32_t next_token(uint32_t expected) - * token matches, - * and it's not a longer word, - */ -- if ((tc & (expected | TC_WORD | TC_NEWLINE)) -+ if ((tc & (expected | TS_WORD | TC_NEWLINE)) - && strncmp(p, tl, l) == 0 -- && !((tc & TC_WORD) && isalnum_(p[l])) -+ && !((tc & TS_WORD) && isalnum_(p[l])) - ) { - /* then this is what we are looking for */ - t_info = *ti; -@@ -1244,14 +1244,14 @@ static uint32_t next_token(uint32_t expected) - g_pos = p; - - /* skipping newlines in some cases */ -- if ((last_token_class & TC_NOTERM) && (tc & TC_NEWLINE)) -+ if ((last_token_class & TS_NOTERM) && (tc & TC_NEWLINE)) - goto readnext; - - /* insert concatenation operator when needed */ - debug_printf_parse("%s: concat_inserted if all nonzero: %x %x %x %x\n", __func__, -- (last_token_class & TC_CONCAT1), (tc & TC_CONCAT2), (expected & TC_BINOP), -+ (last_token_class & TS_CONCAT_L), (tc & TS_CONCAT_R), (expected & TS_BINOP), - !(last_token_class == TC_LENGTH && tc == TC_SEQSTART)); -- if ((last_token_class & TC_CONCAT1) && (tc & TC_CONCAT2) && (expected & TC_BINOP) -+ if ((last_token_class & TS_CONCAT_L) && (tc & TS_CONCAT_R) && (expected & TS_BINOP) - && !(last_token_class == TC_LENGTH && tc == TC_SEQSTART) /* but not for "length(..." */ - ) { - concat_inserted = TRUE; -@@ -1317,7 +1317,7 @@ static node *parse_expr(uint32_t term_tc) - node sn; - node *cn = &sn; - node *vn, *glptr; -- uint32_t tc, xtc; -+ uint32_t tc, expected_tc; - var *v; - - debug_printf_parse("%s() term_tc(%x):", __func__, term_tc); -@@ -1326,20 +1326,20 @@ static node *parse_expr(uint32_t term_tc) - - sn.info = PRIMASK; - sn.r.n = sn.a.n = glptr = NULL; -- xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP | term_tc; -+ expected_tc = TS_OPERAND | TS_UOPPRE | TC_REGEXP | term_tc; - -- while (!((tc = next_token(xtc)) & term_tc)) { -+ while (!((tc = next_token(expected_tc)) & term_tc)) { - - if (glptr && (t_info == TI_LESS)) { - /* input redirection (<) attached to glptr node */ - debug_printf_parse("%s: input redir\n", __func__); - cn = glptr->l.n = new_node(OC_CONCAT | SS | P(37)); - cn->a.n = glptr; -- xtc = TC_OPERAND | TC_UOPPRE; -+ expected_tc = TS_OPERAND | TS_UOPPRE; - glptr = NULL; - -- } else if (tc & (TC_BINOP | TC_UOPPOST)) { -- debug_printf_parse("%s: TC_BINOP | TC_UOPPOST tc:%x\n", __func__, tc); -+ } else if (tc & (TS_BINOP | TC_UOPPOST)) { -+ debug_printf_parse("%s: TS_BINOP | TC_UOPPOST tc:%x\n", __func__, tc); - /* for binary and postfix-unary operators, jump back over - * previous operators with higher priority */ - vn = cn; -@@ -1353,19 +1353,19 @@ static node *parse_expr(uint32_t term_tc) - t_info += P(6); - cn = vn->a.n->r.n = new_node(t_info); - cn->a.n = vn->a.n; -- if (tc & TC_BINOP) { -+ if (tc & TS_BINOP) { - cn->l.n = vn; -- xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP; -+ expected_tc = TS_OPERAND | TS_UOPPRE | TC_REGEXP; - if ((t_info & OPCLSMASK) == OC_PGETLINE) { - /* it's a pipe */ - next_token(TC_GETLINE); - /* give maximum priority to this pipe */ - cn->info &= ~PRIMASK; -- xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | term_tc; -+ expected_tc = TS_OPERAND | TS_UOPPRE | TS_BINOP | term_tc; - } - } else { - cn->r.n = vn; -- xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | term_tc; -+ expected_tc = TS_OPERAND | TS_UOPPRE | TS_BINOP | term_tc; - } - vn->a.n = cn; - -@@ -1377,14 +1377,14 @@ static node *parse_expr(uint32_t term_tc) - cn = vn->r.n = new_node(t_info); - cn->a.n = vn; - -- xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP; -+ expected_tc = TS_OPERAND | TS_UOPPRE | TC_REGEXP; - if (t_info == TI_PREINC || t_info == TI_PREDEC) -- xtc = TC_LVALUE | TC_UOPPRE1; -- if (tc & (TC_OPERAND | TC_REGEXP)) { -- debug_printf_parse("%s: TC_OPERAND | TC_REGEXP\n", __func__); -- xtc = TC_UOPPRE | TC_UOPPOST | TC_BINOP | TC_OPERAND | term_tc; -+ expected_tc = TS_LVALUE | TC_UOPPRE1; -+ if (tc & (TS_OPERAND | TC_REGEXP)) { -+ debug_printf_parse("%s: TS_OPERAND | TC_REGEXP\n", __func__); -+ expected_tc = TS_UOPPRE | TC_UOPPOST | TS_BINOP | TS_OPERAND | term_tc; - /* one should be very careful with switch on tclass - -- * only simple tclasses should be used! */ -+ * only simple tclasses should be used (TC_xyz, not TS_xyz) */ - switch (tc) { - case TC_VARIABLE: - case TC_ARRAY: -@@ -1412,7 +1412,7 @@ static node *parse_expr(uint32_t term_tc) - setvar_i(v, t_double); - else { - setvar_s(v, t_string); -- xtc &= ~TC_UOPPOST; /* "str"++ is not allowed */ -+ expected_tc &= ~TC_UOPPOST; /* "str"++ is not allowed */ - } - break; - -@@ -1439,7 +1439,7 @@ static node *parse_expr(uint32_t term_tc) - case TC_GETLINE: - debug_printf_parse("%s: TC_GETLINE\n", __func__); - glptr = cn; -- xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | term_tc; -+ expected_tc = TS_OPERAND | TS_UOPPRE | TS_BINOP | term_tc; - break; - - case TC_BUILTIN: -@@ -1450,7 +1450,7 @@ static node *parse_expr(uint32_t term_tc) - case TC_LENGTH: - debug_printf_parse("%s: TC_LENGTH\n", __func__); - next_token(TC_SEQSTART /* length(...) */ -- | TC_OPTERM /* length; (or newline)*/ -+ | TS_OPTERM /* length; (or newline)*/ - | TC_GRPTERM /* length } */ - | TC_BINOPX /* length NUM */ - | TC_COMMA /* print length, 1 */ -@@ -1464,7 +1464,7 @@ static node *parse_expr(uint32_t term_tc) - } - } - } -- } -+ } /* while() */ - - debug_printf_parse("%s() returns %p\n", __func__, sn.r.n); - return sn.r.n; -@@ -1497,7 +1497,7 @@ static void chain_expr(uint32_t info) - - n = chain_node(info); - -- n->l.n = parse_expr(TC_OPTERM | TC_GRPTERM); -+ n->l.n = parse_expr(TS_OPTERM | TC_GRPTERM); - if ((info & OF_REQUIRED) && !n->l.n) - syntax_error(EMSG_TOO_FEW_ARGS); - -@@ -1535,12 +1535,12 @@ static void chain_group(void) - node *n, *n2, *n3; - - do { -- c = next_token(TC_GRPSEQ); -+ c = next_token(TS_GRPSEQ); - } while (c & TC_NEWLINE); - - if (c & TC_GRPSTART) { - debug_printf_parse("%s: TC_GRPSTART\n", __func__); -- while (next_token(TC_GRPSEQ | TC_GRPTERM) != TC_GRPTERM) { -+ while (next_token(TS_GRPSEQ | TC_GRPTERM) != TC_GRPTERM) { - debug_printf_parse("%s: !TC_GRPTERM\n", __func__); - if (t_tclass & TC_NEWLINE) - continue; -@@ -1548,13 +1548,13 @@ static void chain_group(void) - chain_group(); - } - debug_printf_parse("%s: TC_GRPTERM\n", __func__); -- } else if (c & (TC_OPSEQ | TC_OPTERM)) { -- debug_printf_parse("%s: TC_OPSEQ | TC_OPTERM\n", __func__); -+ } else if (c & (TS_OPSEQ | TS_OPTERM)) { -+ debug_printf_parse("%s: TS_OPSEQ | TS_OPTERM\n", __func__); - rollback_token(); - chain_expr(OC_EXEC | Vx); - } else { -- /* TC_STATEMNT */ -- debug_printf_parse("%s: TC_STATEMNT(?)\n", __func__); -+ /* TS_STATEMNT */ -+ debug_printf_parse("%s: TS_STATEMNT(?)\n", __func__); - switch (t_info & OPCLSMASK) { - case ST_IF: - debug_printf_parse("%s: ST_IF\n", __func__); -@@ -1563,7 +1563,7 @@ static void chain_group(void) - chain_group(); - n2 = chain_node(OC_EXEC); - n->r.n = seq->last; -- if (next_token(TC_GRPSEQ | TC_GRPTERM | TC_ELSE) == TC_ELSE) { -+ if (next_token(TS_GRPSEQ | TC_GRPTERM | TC_ELSE) == TC_ELSE) { - chain_group(); - n2->a.n = seq->last; - } else { -@@ -1616,10 +1616,10 @@ static void chain_group(void) - case OC_PRINTF: - debug_printf_parse("%s: OC_PRINT[F]\n", __func__); - n = chain_node(t_info); -- n->l.n = parse_expr(TC_OPTERM | TC_OUTRDR | TC_GRPTERM); -+ n->l.n = parse_expr(TS_OPTERM | TC_OUTRDR | TC_GRPTERM); - if (t_tclass & TC_OUTRDR) { - n->info |= t_info; -- n->r.n = parse_expr(TC_OPTERM | TC_GRPTERM); -+ n->r.n = parse_expr(TS_OPTERM | TC_GRPTERM); - } - if (t_tclass & TC_GRPTERM) - rollback_token(); -@@ -1658,11 +1658,11 @@ static void parse_program(char *p) - - g_pos = p; - t_lineno = 1; -- while ((tclass = next_token(TC_EOF | TC_OPSEQ | TC_GRPSTART | -- TC_OPTERM | TC_BEGIN | TC_END | TC_FUNCDECL)) != TC_EOF) { -+ while ((tclass = next_token(TC_EOF | TS_OPSEQ | TC_GRPSTART | -+ TS_OPTERM | TC_BEGIN | TC_END | TC_FUNCDECL)) != TC_EOF) { - -- if (tclass & TC_OPTERM) { -- debug_printf_parse("%s: TC_OPTERM\n", __func__); -+ if (tclass & TS_OPTERM) { -+ debug_printf_parse("%s: TS_OPTERM\n", __func__); - continue; - } - -@@ -1706,11 +1706,11 @@ static void parse_program(char *p) - seq = &f->body; - chain_group(); - clear_array(ahash); -- } else if (tclass & TC_OPSEQ) { -- debug_printf_parse("%s: TC_OPSEQ\n", __func__); -+ } else if (tclass & TS_OPSEQ) { -+ debug_printf_parse("%s: TS_OPSEQ\n", __func__); - rollback_token(); - cn = chain_node(OC_TEST); -- cn->l.n = parse_expr(TC_OPTERM | TC_EOF | TC_GRPSTART); -+ cn->l.n = parse_expr(TS_OPTERM | TC_EOF | TC_GRPSTART); - if (t_tclass & TC_GRPSTART) { - debug_printf_parse("%s: TC_GRPSTART\n", __func__); - rollback_token(); --- -2.27.0 - - -From 01cbacb45972e14aa3072bf539c391dd03ed3955 Mon Sep 17 00:00:00 2001 -From: Denys Vlasenko -Date: Tue, 29 Jun 2021 01:30:49 +0200 -Subject: [PATCH 10/61] awk: deindent code block, no code changes - -Signed-off-by: Denys Vlasenko ---- - editors/awk.c | 177 +++++++++++++++++++++++++------------------------- - 1 file changed, 90 insertions(+), 87 deletions(-) - -diff --git a/editors/awk.c b/editors/awk.c -index 764a3dd49..9a3b63df6 100644 ---- a/editors/awk.c -+++ b/editors/awk.c -@@ -1337,8 +1337,9 @@ static node *parse_expr(uint32_t term_tc) - cn->a.n = glptr; - expected_tc = TS_OPERAND | TS_UOPPRE; - glptr = NULL; -- -- } else if (tc & (TS_BINOP | TC_UOPPOST)) { -+ continue; -+ } -+ if (tc & (TS_BINOP | TC_UOPPOST)) { - debug_printf_parse("%s: TS_BINOP | TC_UOPPOST tc:%x\n", __func__, tc); - /* for binary and postfix-unary operators, jump back over - * previous operators with higher priority */ -@@ -1368,101 +1369,103 @@ static node *parse_expr(uint32_t term_tc) - expected_tc = TS_OPERAND | TS_UOPPRE | TS_BINOP | term_tc; - } - vn->a.n = cn; -+ continue; -+ } - -- } else { -- debug_printf_parse("%s: other, t_info:%x\n", __func__, t_info); -- /* for operands and prefix-unary operators, attach them -- * to last node */ -- vn = cn; -- cn = vn->r.n = new_node(t_info); -- cn->a.n = vn; -+ debug_printf_parse("%s: other, t_info:%x\n", __func__, t_info); -+ /* for operands and prefix-unary operators, attach them -+ * to last node */ -+ vn = cn; -+ cn = vn->r.n = new_node(t_info); -+ cn->a.n = vn; - -- expected_tc = TS_OPERAND | TS_UOPPRE | TC_REGEXP; -- if (t_info == TI_PREINC || t_info == TI_PREDEC) -- expected_tc = TS_LVALUE | TC_UOPPRE1; -- if (tc & (TS_OPERAND | TC_REGEXP)) { -- debug_printf_parse("%s: TS_OPERAND | TC_REGEXP\n", __func__); -- expected_tc = TS_UOPPRE | TC_UOPPOST | TS_BINOP | TS_OPERAND | term_tc; -- /* one should be very careful with switch on tclass - -- * only simple tclasses should be used (TC_xyz, not TS_xyz) */ -- switch (tc) { -- case TC_VARIABLE: -- case TC_ARRAY: -- debug_printf_parse("%s: TC_VARIABLE | TC_ARRAY\n", __func__); -- cn->info = OC_VAR; -- v = hash_search(ahash, t_string); -- if (v != NULL) { -- cn->info = OC_FNARG; -- cn->l.aidx = v->x.aidx; -- } else { -- cn->l.v = newvar(t_string); -- } -- if (tc & TC_ARRAY) { -- cn->info |= xS; -- cn->r.n = parse_expr(TC_ARRTERM); -- } -- break; -+ expected_tc = TS_OPERAND | TS_UOPPRE | TC_REGEXP; -+ if (t_info == TI_PREINC || t_info == TI_PREDEC) -+ expected_tc = TS_LVALUE | TC_UOPPRE1; - -- case TC_NUMBER: -- case TC_STRING: -- debug_printf_parse("%s: TC_NUMBER | TC_STRING\n", __func__); -- cn->info = OC_VAR; -- v = cn->l.v = xzalloc(sizeof(var)); -- if (tc & TC_NUMBER) -- setvar_i(v, t_double); -- else { -- setvar_s(v, t_string); -- expected_tc &= ~TC_UOPPOST; /* "str"++ is not allowed */ -- } -- break; -+ if (!(tc & (TS_OPERAND | TC_REGEXP))) -+ continue; - -- case TC_REGEXP: -- debug_printf_parse("%s: TC_REGEXP\n", __func__); -- mk_re_node(t_string, cn, xzalloc(sizeof(regex_t)*2)); -- break; -+ debug_printf_parse("%s: TS_OPERAND | TC_REGEXP\n", __func__); -+ expected_tc = TS_UOPPRE | TC_UOPPOST | TS_BINOP | TS_OPERAND | term_tc; -+ /* one should be very careful with switch on tclass - -+ * only simple tclasses should be used (TC_xyz, not TS_xyz) */ -+ switch (tc) { -+ case TC_VARIABLE: -+ case TC_ARRAY: -+ debug_printf_parse("%s: TC_VARIABLE | TC_ARRAY\n", __func__); -+ cn->info = OC_VAR; -+ v = hash_search(ahash, t_string); -+ if (v != NULL) { -+ cn->info = OC_FNARG; -+ cn->l.aidx = v->x.aidx; -+ } else { -+ cn->l.v = newvar(t_string); -+ } -+ if (tc & TC_ARRAY) { -+ cn->info |= xS; -+ cn->r.n = parse_expr(TC_ARRTERM); -+ } -+ break; - -- case TC_FUNCTION: -- debug_printf_parse("%s: TC_FUNCTION\n", __func__); -- cn->info = OC_FUNC; -- cn->r.f = newfunc(t_string); -- cn->l.n = condition(); -- break; -+ case TC_NUMBER: -+ case TC_STRING: -+ debug_printf_parse("%s: TC_NUMBER | TC_STRING\n", __func__); -+ cn->info = OC_VAR; -+ v = cn->l.v = xzalloc(sizeof(var)); -+ if (tc & TC_NUMBER) -+ setvar_i(v, t_double); -+ else { -+ setvar_s(v, t_string); -+ expected_tc &= ~TC_UOPPOST; /* "str"++ is not allowed */ -+ } -+ break; - -- case TC_SEQSTART: -- debug_printf_parse("%s: TC_SEQSTART\n", __func__); -- cn = vn->r.n = parse_expr(TC_SEQTERM); -- if (!cn) -- syntax_error("Empty sequence"); -- cn->a.n = vn; -- break; -+ case TC_REGEXP: -+ debug_printf_parse("%s: TC_REGEXP\n", __func__); -+ mk_re_node(t_string, cn, xzalloc(sizeof(regex_t)*2)); -+ break; - -- case TC_GETLINE: -- debug_printf_parse("%s: TC_GETLINE\n", __func__); -- glptr = cn; -- expected_tc = TS_OPERAND | TS_UOPPRE | TS_BINOP | term_tc; -- break; -+ case TC_FUNCTION: -+ debug_printf_parse("%s: TC_FUNCTION\n", __func__); -+ cn->info = OC_FUNC; -+ cn->r.f = newfunc(t_string); -+ cn->l.n = condition(); -+ break; - -- case TC_BUILTIN: -- debug_printf_parse("%s: TC_BUILTIN\n", __func__); -- cn->l.n = condition(); -- break; -+ case TC_SEQSTART: -+ debug_printf_parse("%s: TC_SEQSTART\n", __func__); -+ cn = vn->r.n = parse_expr(TC_SEQTERM); -+ if (!cn) -+ syntax_error("Empty sequence"); -+ cn->a.n = vn; -+ break; - -- case TC_LENGTH: -- debug_printf_parse("%s: TC_LENGTH\n", __func__); -- next_token(TC_SEQSTART /* length(...) */ -- | TS_OPTERM /* length; (or newline)*/ -- | TC_GRPTERM /* length } */ -- | TC_BINOPX /* length NUM */ -- | TC_COMMA /* print length, 1 */ -- ); -- rollback_token(); -- if (t_tclass & TC_SEQSTART) { -- /* It was a "(" token. Handle just like TC_BUILTIN */ -- cn->l.n = condition(); -- } -- break; -- } -+ case TC_GETLINE: -+ debug_printf_parse("%s: TC_GETLINE\n", __func__); -+ glptr = cn; -+ expected_tc = TS_OPERAND | TS_UOPPRE | TS_BINOP | term_tc; -+ break; -+ -+ case TC_BUILTIN: -+ debug_printf_parse("%s: TC_BUILTIN\n", __func__); -+ cn->l.n = condition(); -+ break; -+ -+ case TC_LENGTH: -+ debug_printf_parse("%s: TC_LENGTH\n", __func__); -+ next_token(TC_SEQSTART /* length(...) */ -+ | TS_OPTERM /* length; (or newline)*/ -+ | TC_GRPTERM /* length } */ -+ | TC_BINOPX /* length NUM */ -+ | TC_COMMA /* print length, 1 */ -+ ); -+ rollback_token(); -+ if (t_tclass & TC_SEQSTART) { -+ /* It was a "(" token. Handle just like TC_BUILTIN */ -+ cn->l.n = condition(); - } -+ break; - } - } /* while() */ - --- -2.27.0 - - -From acea2fffaa696b855d5189a8a1cd7591fac8891d Mon Sep 17 00:00:00 2001 -From: Denys Vlasenko -Date: Tue, 29 Jun 2021 01:50:47 +0200 -Subject: [PATCH 11/61] awk: rename TC_SEQSTART/END to L/RPAREN, no code - changes - -Signed-off-by: Denys Vlasenko ---- - editors/awk.c | 94 +++++++++++++++++++++++++-------------------------- - 1 file changed, 47 insertions(+), 47 deletions(-) - -diff --git a/editors/awk.c b/editors/awk.c -index 9a3b63df6..d31b97d86 100644 ---- a/editors/awk.c -+++ b/editors/awk.c -@@ -207,48 +207,48 @@ typedef struct tsplitter_s { - } tsplitter; - - /* simple token classes */ --/* Order and hex values are very important!!! See next_token() */ --#define TC_SEQSTART (1 << 0) /* ( */ --#define TC_SEQTERM (1 << 1) /* ) */ -+/* order and hex values are very important!!! See next_token() */ -+#define TC_LPAREN (1 << 0) /* ( */ -+#define TC_RPAREN (1 << 1) /* ) */ - #define TC_REGEXP (1 << 2) /* /.../ */ - #define TC_OUTRDR (1 << 3) /* | > >> */ - #define TC_UOPPOST (1 << 4) /* unary postfix operator ++ -- */ - #define TC_UOPPRE1 (1 << 5) /* unary prefix operator ++ -- $ */ - #define TC_BINOPX (1 << 6) /* two-opnd operator */ --#define TC_IN (1 << 7) --#define TC_COMMA (1 << 8) --#define TC_PIPE (1 << 9) /* input redirection pipe */ -+#define TC_IN (1 << 7) /* 'in' */ -+#define TC_COMMA (1 << 8) /* , */ -+#define TC_PIPE (1 << 9) /* input redirection pipe | */ - #define TC_UOPPRE2 (1 << 10) /* unary prefix operator + - ! */ - #define TC_ARRTERM (1 << 11) /* ] */ - #define TC_GRPSTART (1 << 12) /* { */ - #define TC_GRPTERM (1 << 13) /* } */ --#define TC_SEMICOL (1 << 14) -+#define TC_SEMICOL (1 << 14) /* ; */ - #define TC_NEWLINE (1 << 15) - #define TC_STATX (1 << 16) /* ctl statement (for, next...) */ --#define TC_WHILE (1 << 17) --#define TC_ELSE (1 << 18) -+#define TC_WHILE (1 << 17) /* 'while' */ -+#define TC_ELSE (1 << 18) /* 'else' */ - #define TC_BUILTIN (1 << 19) - /* This costs ~50 bytes of code. - * A separate class to support deprecated "length" form. If we don't need that - * (i.e. if we demand that only "length()" with () is valid), then TC_LENGTH - * can be merged with TC_BUILTIN: - */ --#define TC_LENGTH (1 << 20) --#define TC_GETLINE (1 << 21) -+#define TC_LENGTH (1 << 20) /* 'length' */ -+#define TC_GETLINE (1 << 21) /* 'getline' */ - #define TC_FUNCDECL (1 << 22) /* 'function' 'func' */ --#define TC_BEGIN (1 << 23) --#define TC_END (1 << 24) -+#define TC_BEGIN (1 << 23) /* 'BEGIN' */ -+#define TC_END (1 << 24) /* 'END' */ - #define TC_EOF (1 << 25) --#define TC_VARIABLE (1 << 26) --#define TC_ARRAY (1 << 27) --#define TC_FUNCTION (1 << 28) --#define TC_STRING (1 << 29) -+#define TC_VARIABLE (1 << 26) /* name */ -+#define TC_ARRAY (1 << 27) /* name[ */ -+#define TC_FUNCTION (1 << 28) /* name( - but unlike TC_ARRAY, parser does not consume '(' */ -+#define TC_STRING (1 << 29) /* "..." */ - #define TC_NUMBER (1 << 30) - - #ifndef debug_parse_print_tc - #define debug_parse_print_tc(n) do { \ --if ((n) & TC_SEQSTART) debug_printf_parse(" SEQSTART"); \ --if ((n) & TC_SEQTERM ) debug_printf_parse(" SEQTERM" ); \ -+if ((n) & TC_LPAREN ) debug_printf_parse(" LPAREN" ); \ -+if ((n) & TC_RPAREN ) debug_printf_parse(" RPAREN" ); \ - if ((n) & TC_REGEXP ) debug_printf_parse(" REGEXP" ); \ - if ((n) & TC_OUTRDR ) debug_printf_parse(" OUTRDR" ); \ - if ((n) & TC_UOPPOST ) debug_printf_parse(" UOPPOST" ); \ -@@ -288,7 +288,7 @@ if ((n) & TC_NUMBER ) debug_printf_parse(" NUMBER" ); \ - //#define TS_UNARYOP (TS_UOPPRE | TC_UOPPOST) - #define TS_OPERAND (TC_VARIABLE | TC_ARRAY | TC_FUNCTION \ - | TC_BUILTIN | TC_LENGTH | TC_GETLINE \ -- | TC_SEQSTART | TC_STRING | TC_NUMBER) -+ | TC_LPAREN | TC_STRING | TC_NUMBER) - - #define TS_LVALUE (TC_VARIABLE | TC_ARRAY) - #define TS_STATEMNT (TC_STATX | TC_WHILE) -@@ -310,7 +310,7 @@ if ((n) & TC_NUMBER ) debug_printf_parse(" NUMBER" ); \ - - /* if previous token class is CONCAT_L and next is CONCAT_R, concatenation */ - /* operator is inserted between them */ --#define TS_CONCAT_L (TC_VARIABLE | TC_ARRTERM | TC_SEQTERM \ -+#define TS_CONCAT_L (TC_VARIABLE | TC_ARRTERM | TC_RPAREN \ - | TC_STRING | TC_NUMBER | TC_UOPPOST \ - | TC_LENGTH) - #define TS_CONCAT_R (TS_OPERAND | TS_UOPPRE) -@@ -394,8 +394,8 @@ enum { - #define NTCC '\377' - - static const char tokenlist[] ALIGN1 = -- "\1(" NTC /* TC_SEQSTART */ -- "\1)" NTC /* TC_SEQTERM */ -+ "\1(" NTC /* TC_LPAREN */ -+ "\1)" NTC /* TC_RPAREN */ - "\1/" NTC /* TC_REGEXP */ - "\2>>" "\1>" "\1|" NTC /* TC_OUTRDR */ - "\2++" "\2--" NTC /* TC_UOPPOST */ -@@ -1250,9 +1250,9 @@ static uint32_t next_token(uint32_t expected) - /* insert concatenation operator when needed */ - debug_printf_parse("%s: concat_inserted if all nonzero: %x %x %x %x\n", __func__, - (last_token_class & TS_CONCAT_L), (tc & TS_CONCAT_R), (expected & TS_BINOP), -- !(last_token_class == TC_LENGTH && tc == TC_SEQSTART)); -+ !(last_token_class == TC_LENGTH && tc == TC_LPAREN)); - if ((last_token_class & TS_CONCAT_L) && (tc & TS_CONCAT_R) && (expected & TS_BINOP) -- && !(last_token_class == TC_LENGTH && tc == TC_SEQSTART) /* but not for "length(..." */ -+ && !(last_token_class == TC_LENGTH && tc == TC_LPAREN) /* but not for "length(..." */ - ) { - concat_inserted = TRUE; - save_tclass = tc; -@@ -1304,10 +1304,10 @@ static void mk_re_node(const char *s, node *n, regex_t *re) - xregcomp(re + 1, s, REG_EXTENDED | REG_ICASE); - } - --static node *condition(void) -+static node *parse_lrparen_list(void) - { -- next_token(TC_SEQSTART); -- return parse_expr(TC_SEQTERM); -+ next_token(TC_LPAREN); -+ return parse_expr(TC_RPAREN); - } - - /* parse expression terminated by given argument, return ptr -@@ -1430,12 +1430,12 @@ static node *parse_expr(uint32_t term_tc) - debug_printf_parse("%s: TC_FUNCTION\n", __func__); - cn->info = OC_FUNC; - cn->r.f = newfunc(t_string); -- cn->l.n = condition(); -+ cn->l.n = parse_lrparen_list(); - break; - -- case TC_SEQSTART: -- debug_printf_parse("%s: TC_SEQSTART\n", __func__); -- cn = vn->r.n = parse_expr(TC_SEQTERM); -+ case TC_LPAREN: -+ debug_printf_parse("%s: TC_LPAREN\n", __func__); -+ cn = vn->r.n = parse_expr(TC_RPAREN); - if (!cn) - syntax_error("Empty sequence"); - cn->a.n = vn; -@@ -1449,21 +1449,21 @@ static node *parse_expr(uint32_t term_tc) - - case TC_BUILTIN: - debug_printf_parse("%s: TC_BUILTIN\n", __func__); -- cn->l.n = condition(); -+ cn->l.n = parse_lrparen_list(); - break; - - case TC_LENGTH: - debug_printf_parse("%s: TC_LENGTH\n", __func__); -- next_token(TC_SEQSTART /* length(...) */ -+ next_token(TC_LPAREN /* length(...) */ - | TS_OPTERM /* length; (or newline)*/ - | TC_GRPTERM /* length } */ - | TC_BINOPX /* length NUM */ - | TC_COMMA /* print length, 1 */ - ); - rollback_token(); -- if (t_tclass & TC_SEQSTART) { -+ if (t_tclass & TC_LPAREN) { - /* It was a "(" token. Handle just like TC_BUILTIN */ -- cn->l.n = condition(); -+ cn->l.n = parse_lrparen_list(); - } - break; - } -@@ -1562,7 +1562,7 @@ static void chain_group(void) - case ST_IF: - debug_printf_parse("%s: ST_IF\n", __func__); - n = chain_node(OC_BR | Vx); -- n->l.n = condition(); -+ n->l.n = parse_lrparen_list(); - chain_group(); - n2 = chain_node(OC_EXEC); - n->r.n = seq->last; -@@ -1576,7 +1576,7 @@ static void chain_group(void) - - case ST_WHILE: - debug_printf_parse("%s: ST_WHILE\n", __func__); -- n2 = condition(); -+ n2 = parse_lrparen_list(); - n = chain_loop(NULL); - n->l.n = n2; - break; -@@ -1587,14 +1587,14 @@ static void chain_group(void) - n = chain_loop(NULL); - n2->a.n = n->a.n; - next_token(TC_WHILE); -- n->l.n = condition(); -+ n->l.n = parse_lrparen_list(); - break; - - case ST_FOR: - debug_printf_parse("%s: ST_FOR\n", __func__); -- next_token(TC_SEQSTART); -- n2 = parse_expr(TC_SEMICOL | TC_SEQTERM); -- if (t_tclass & TC_SEQTERM) { /* for-in */ -+ next_token(TC_LPAREN); -+ n2 = parse_expr(TC_SEMICOL | TC_RPAREN); -+ if (t_tclass & TC_RPAREN) { /* for-in */ - if (!n2 || (n2->info & OPCLSMASK) != OC_IN) - syntax_error(EMSG_UNEXP_TOKEN); - n = chain_node(OC_WALKINIT | VV); -@@ -1607,7 +1607,7 @@ static void chain_group(void) - n = chain_node(OC_EXEC | Vx); - n->l.n = n2; - n2 = parse_expr(TC_SEMICOL); -- n3 = parse_expr(TC_SEQTERM); -+ n3 = parse_expr(TC_RPAREN); - n = chain_loop(n3); - n->l.n = n2; - if (!n2) -@@ -1686,13 +1686,13 @@ static void parse_program(char *p) - f->body.first = NULL; - f->nargs = 0; - /* Match func arg list: a comma sep list of >= 0 args, and a close paren */ -- while (next_token(TC_VARIABLE | TC_SEQTERM | TC_COMMA)) { -+ while (next_token(TC_VARIABLE | TC_RPAREN | TC_COMMA)) { - /* Either an empty arg list, or trailing comma from prev iter - * must be followed by an arg */ -- if (f->nargs == 0 && t_tclass == TC_SEQTERM) -+ if (f->nargs == 0 && t_tclass == TC_RPAREN) - break; - -- /* TC_SEQSTART/TC_COMMA must be followed by TC_VARIABLE */ -+ /* TC_LPAREN/TC_COMMA must be followed by TC_VARIABLE */ - if (t_tclass != TC_VARIABLE) - syntax_error(EMSG_UNEXP_TOKEN); - -@@ -1700,7 +1700,7 @@ static void parse_program(char *p) - v->x.aidx = f->nargs++; - - /* Arg followed either by end of arg list or 1 comma */ -- if (next_token(TC_COMMA | TC_SEQTERM) & TC_SEQTERM) -+ if (next_token(TC_COMMA | TC_RPAREN) & TC_RPAREN) - break; - //Impossible: next_token() above would error out and die - // if (t_tclass != TC_COMMA) --- -2.27.0 - - -From 100c649a6d5b8085be19fdcbf02218cf2bcb3cae Mon Sep 17 00:00:00 2001 -From: Denys Vlasenko -Date: Tue, 29 Jun 2021 02:32:32 +0200 -Subject: [PATCH 12/61] awk: simplify parsing of function declaration - -function old new delta -parse_program 328 313 -15 - -Signed-off-by: Denys Vlasenko ---- - editors/awk.c | 26 ++++++++++---------------- - 1 file changed, 10 insertions(+), 16 deletions(-) - -diff --git a/editors/awk.c b/editors/awk.c -index d31b97d86..08ff02adb 100644 ---- a/editors/awk.c -+++ b/editors/awk.c -@@ -769,7 +769,7 @@ static void hash_remove(xhash *hash, const char *name) - - static char *skip_spaces(char *p) - { -- while (1) { -+ for (;;) { - if (*p == '\\' && p[1] == '\n') { - p++; - t_lineno++; -@@ -1685,26 +1685,20 @@ static void parse_program(char *p) - f = newfunc(t_string); - f->body.first = NULL; - f->nargs = 0; -- /* Match func arg list: a comma sep list of >= 0 args, and a close paren */ -- while (next_token(TC_VARIABLE | TC_RPAREN | TC_COMMA)) { -- /* Either an empty arg list, or trailing comma from prev iter -- * must be followed by an arg */ -- if (f->nargs == 0 && t_tclass == TC_RPAREN) -- break; -- -- /* TC_LPAREN/TC_COMMA must be followed by TC_VARIABLE */ -- if (t_tclass != TC_VARIABLE) -+ /* func arg list: comma sep list of args, and a close paren */ -+ for (;;) { -+ if (next_token(TC_VARIABLE | TC_RPAREN) == TC_RPAREN) { -+ if (f->nargs == 0) -+ break; /* func() is ok */ -+ /* func(a,) is not ok */ - syntax_error(EMSG_UNEXP_TOKEN); -- -+ } - v = findvar(ahash, t_string); - v->x.aidx = f->nargs++; -- - /* Arg followed either by end of arg list or 1 comma */ -- if (next_token(TC_COMMA | TC_RPAREN) & TC_RPAREN) -+ if (next_token(TC_COMMA | TC_RPAREN) == TC_RPAREN) - break; --//Impossible: next_token() above would error out and die --// if (t_tclass != TC_COMMA) --// syntax_error(EMSG_UNEXP_TOKEN); -+ /* it was a comma, we ate it */ - } - seq = &f->body; - chain_group(); --- -2.27.0 - - -From 38cbb39458b554d5bcfb5d326dd235f81e3c9b9d Mon Sep 17 00:00:00 2001 -From: Denys Vlasenko -Date: Tue, 29 Jun 2021 02:43:02 +0200 -Subject: [PATCH 13/61] awk: g_buf[] does not need a separate allocation - -function old new delta -exec_builtin 1400 1414 +14 -evaluate 3132 3141 +9 -getvar_s 121 125 +4 -awk_main 902 886 -16 ------------------------------------------------------------------------------- -(add/remove: 0/0 grow/shrink: 3/1 up/down: 27/-16) Total: 11 bytes - -Signed-off-by: Denys Vlasenko ---- - editors/awk.c | 8 +++----- - 1 file changed, 3 insertions(+), 5 deletions(-) - -diff --git a/editors/awk.c b/editors/awk.c -index 08ff02adb..7e4f0d142 100644 ---- a/editors/awk.c -+++ b/editors/awk.c -@@ -535,7 +535,6 @@ struct globals { - var *Fields; - nvblock *g_cb; - char *g_pos; -- char *g_buf; - smallint icase; - smallint exiting; - smallint nextrec; -@@ -571,6 +570,8 @@ struct globals2 { - - /* biggest and least used members go last */ - tsplitter fsplitter, rsplitter; -+ -+ char g_buf[MAXVARFMT + 1]; - }; - #define G1 (ptr_to_globals[-1]) - #define G (*(struct globals2 *)ptr_to_globals) -@@ -598,7 +599,6 @@ struct globals2 { - #define Fields (G1.Fields ) - #define g_cb (G1.g_cb ) - #define g_pos (G1.g_pos ) --#define g_buf (G1.g_buf ) - #define icase (G1.icase ) - #define exiting (G1.exiting ) - #define nextrec (G1.nextrec ) -@@ -612,6 +612,7 @@ struct globals2 { - #define intvar (G.intvar ) - #define fsplitter (G.fsplitter ) - #define rsplitter (G.rsplitter ) -+#define g_buf (G.g_buf ) - #define INIT_G() do { \ - SET_PTR_TO_GLOBALS((char*)xzalloc(sizeof(G1)+sizeof(G)) + sizeof(G1)); \ - t_tclass = TS_OPTERM; \ -@@ -3353,9 +3354,6 @@ int awk_main(int argc UNUSED_PARAM, char **argv) - if (ENABLE_LOCALE_SUPPORT) - setlocale(LC_NUMERIC, "C"); - -- /* allocate global buffer */ -- g_buf = xmalloc(MAXVARFMT + 1); -- - vhash = hash_init(); - ahash = hash_init(); - fdhash = hash_init(); --- -2.27.0 - - -From 743b012550834fe032bdc71257e646e202eac2b2 Mon Sep 17 00:00:00 2001 -From: Denys Vlasenko -Date: Tue, 29 Jun 2021 03:02:21 +0200 -Subject: [PATCH 14/61] awk: when parsing TC_FUNCTION token, eat its opening - '(' - -...like we do for array references. - -function old new delta -parse_expr 938 948 +10 -next_token 788 791 +3 -parse_program 313 310 -3 ------------------------------------------------------------------------------- -(add/remove: 0/0 grow/shrink: 2/1 up/down: 13/-3) Total: 10 bytes - -Signed-off-by: Denys Vlasenko ---- - editors/awk.c | 30 +++++++++++++++--------------- - 1 file changed, 15 insertions(+), 15 deletions(-) - -diff --git a/editors/awk.c b/editors/awk.c -index 7e4f0d142..1a4468a53 100644 ---- a/editors/awk.c -+++ b/editors/awk.c -@@ -241,7 +241,7 @@ typedef struct tsplitter_s { - #define TC_EOF (1 << 25) - #define TC_VARIABLE (1 << 26) /* name */ - #define TC_ARRAY (1 << 27) /* name[ */ --#define TC_FUNCTION (1 << 28) /* name( - but unlike TC_ARRAY, parser does not consume '(' */ -+#define TC_FUNCTION (1 << 28) /* name( */ - #define TC_STRING (1 << 29) /* "..." */ - #define TC_NUMBER (1 << 30) - -@@ -959,6 +959,7 @@ static double getvar_i(var *v) - v->number = my_strtod(&s); - debug_printf_eval("%f (s:'%s')\n", v->number, s); - if (v->type & VF_USER) { -+//TODO: skip_spaces() also skips backslash+newline, is it intended here? - s = skip_spaces(s); - if (*s != '\0') - v->type &= ~VF_USER; -@@ -1103,7 +1104,7 @@ static uint32_t next_token(uint32_t expected) - #define save_tclass (G.next_token__save_tclass) - #define save_info (G.next_token__save_info) - -- char *p, *s; -+ char *p; - const char *tl; - const uint32_t *ti; - uint32_t tc, last_token_class; -@@ -1131,15 +1132,12 @@ static uint32_t next_token(uint32_t expected) - while (*p != '\n' && *p != '\0') - p++; - -- if (*p == '\n') -- t_lineno++; -- - if (*p == '\0') { - tc = TC_EOF; - debug_printf_parse("%s: token found: TC_EOF\n", __func__); - } else if (*p == '\"') { - /* it's a string */ -- t_string = s = ++p; -+ char *s = t_string = ++p; - while (*p != '\"') { - char *pp; - if (*p == '\0' || *p == '\n') -@@ -1154,7 +1152,7 @@ static uint32_t next_token(uint32_t expected) - debug_printf_parse("%s: token found:'%s' TC_STRING\n", __func__, t_string); - } else if ((expected & TC_REGEXP) && *p == '/') { - /* it's regexp */ -- t_string = s = ++p; -+ char *s = t_string = ++p; - while (*p != '/') { - if (*p == '\0' || *p == '\n') - syntax_error(EMSG_UNEXP_EOS); -@@ -1185,6 +1183,9 @@ static uint32_t next_token(uint32_t expected) - tc = TC_NUMBER; - debug_printf_parse("%s: token found:%f TC_NUMBER\n", __func__, t_double); - } else { -+ if (*p == '\n') -+ t_lineno++; -+ - /* search for something known */ - tl = tokenlist; - tc = 0x00000001; -@@ -1230,15 +1231,15 @@ static uint32_t next_token(uint32_t expected) - if (!(expected & TC_VARIABLE) || (expected & TC_ARRAY)) - p = skip_spaces(p); - if (*p == '(') { -+ p++; - tc = TC_FUNCTION; - debug_printf_parse("%s: token found:'%s' TC_FUNCTION\n", __func__, t_string); -+ } else if (*p == '[') { -+ p++; -+ tc = TC_ARRAY; -+ debug_printf_parse("%s: token found:'%s' TC_ARRAY\n", __func__, t_string); - } else { -- if (*p == '[') { -- p++; -- tc = TC_ARRAY; -- debug_printf_parse("%s: token found:'%s' TC_ARRAY\n", __func__, t_string); -- } else -- debug_printf_parse("%s: token found:'%s' TC_VARIABLE\n", __func__, t_string); -+ debug_printf_parse("%s: token found:'%s' TC_VARIABLE\n", __func__, t_string); - } - } - token_found: -@@ -1431,7 +1432,7 @@ static node *parse_expr(uint32_t term_tc) - debug_printf_parse("%s: TC_FUNCTION\n", __func__); - cn->info = OC_FUNC; - cn->r.f = newfunc(t_string); -- cn->l.n = parse_lrparen_list(); -+ cn->l.n = parse_expr(TC_RPAREN); - break; - - case TC_LPAREN: -@@ -1682,7 +1683,6 @@ static void parse_program(char *p) - } else if (tclass & TC_FUNCDECL) { - debug_printf_parse("%s: TC_FUNCDECL\n", __func__); - next_token(TC_FUNCTION); -- g_pos++; - f = newfunc(t_string); - f->body.first = NULL; - f->nargs = 0; --- -2.27.0 - - -From f80dfb802b4a0984293d50f80cd41519b109b524 Mon Sep 17 00:00:00 2001 -From: Denys Vlasenko -Date: Tue, 29 Jun 2021 03:27:07 +0200 -Subject: [PATCH 15/61] awk: get rid of "move name one char back" trick in - next_token() - -function old new delta -next_token 791 812 +21 -awk_main 886 831 -55 ------------------------------------------------------------------------------- -(add/remove: 0/0 grow/shrink: 1/1 up/down: 21/-55) Total: -34 bytes - -Signed-off-by: Denys Vlasenko ---- - editors/awk.c | 54 +++++++++++++++++++++++++-------------------------- - 1 file changed, 27 insertions(+), 27 deletions(-) - -diff --git a/editors/awk.c b/editors/awk.c -index 1a4468a53..fb1e5d59b 100644 ---- a/editors/awk.c -+++ b/editors/awk.c -@@ -535,6 +535,7 @@ struct globals { - var *Fields; - nvblock *g_cb; - char *g_pos; -+ char g_saved_ch; - smallint icase; - smallint exiting; - smallint nextrec; -@@ -599,6 +600,7 @@ struct globals2 { - #define Fields (G1.Fields ) - #define g_cb (G1.g_cb ) - #define g_pos (G1.g_pos ) -+#define g_saved_ch (G1.g_saved_ch ) - #define icase (G1.icase ) - #define exiting (G1.exiting ) - #define nextrec (G1.nextrec ) -@@ -1125,6 +1127,10 @@ static uint32_t next_token(uint32_t expected) - t_info = save_info; - } else { - p = g_pos; -+ if (g_saved_ch != '\0') { -+ *p = g_saved_ch; -+ g_saved_ch = '\0'; -+ } - readnext: - p = skip_spaces(p); - g_lineno = t_lineno; -@@ -1183,6 +1189,8 @@ static uint32_t next_token(uint32_t expected) - tc = TC_NUMBER; - debug_printf_parse("%s: token found:%f TC_NUMBER\n", __func__, t_double); - } else { -+ char *end_of_name; -+ - if (*p == '\n') - t_lineno++; - -@@ -1219,16 +1227,14 @@ static uint32_t next_token(uint32_t expected) - if (!isalnum_(*p)) - syntax_error(EMSG_UNEXP_TOKEN); /* no */ - /* yes */ --/* "move name one char back" trick: we need a byte for NUL terminator */ --/* NB: this results in argv[i][-1] being used (!!!) in e.g. "awk -e 'NAME'" case */ -- t_string = --p; -- while (isalnum_(*++p)) { -- p[-1] = *p; -- } -- p[-1] = '\0'; -+ t_string = p; -+ while (isalnum_(*p)) -+ p++; -+ end_of_name = p; - tc = TC_VARIABLE; - /* also consume whitespace between functionname and bracket */ - if (!(expected & TC_VARIABLE) || (expected & TC_ARRAY)) -+//TODO: why if variable can be here (but not array ref), skipping is not allowed? Example where it matters? - p = skip_spaces(p); - if (*p == '(') { - p++; -@@ -1240,7 +1246,19 @@ static uint32_t next_token(uint32_t expected) - debug_printf_parse("%s: token found:'%s' TC_ARRAY\n", __func__, t_string); - } else { - debug_printf_parse("%s: token found:'%s' TC_VARIABLE\n", __func__, t_string); -+ if (end_of_name == p) { -+ /* there is no space for trailing NUL in t_string! -+ * We need to save the char we are going to NUL. -+ * (we'll use it in future call to next_token()) -+ */ -+ g_saved_ch = *end_of_name; -+// especially pathological example is V="abc"; V.2 - it's V concatenated to .2 -+// (it evaluates to "abc0.2"). Because of this case, we can't simply cache -+// '.' and analyze it later: we also have to *store it back* in next -+// next_token(), in order to give my_strtod() the undamaged ".2" string. -+ } - } -+ *end_of_name = '\0'; /* terminate t_string */ - } - token_found: - g_pos = p; -@@ -3420,38 +3438,20 @@ int awk_main(int argc UNUSED_PARAM, char **argv) - - g_progname = llist_pop(&list_f); - fd = xopen_stdin(g_progname); -- /* 1st byte is reserved for "move name one char back" trick in next_token */ -- i = 1; -- s = NULL; -- for (;;) { -- int sz; -- s = xrealloc(s, i + 1000); -- sz = safe_read(fd, s + i, 1000); -- if (sz <= 0) -- break; -- i += sz; -- } -- s = xrealloc(s, i + 1); /* trim unused 999 bytes */ -- s[i] = '\0'; -+ s = xmalloc_read(fd, NULL); /* it's NUL-terminated */ - close(fd); -- parse_program(s + 1); -+ parse_program(s); - free(s); - } - g_progname = "cmd. line"; - #if ENABLE_FEATURE_AWK_GNU_EXTENSIONS - while (list_e) { -- /* NB: "move name one char back" trick in next_token -- * can use argv[i][-1] here. -- */ - parse_program(llist_pop(&list_e)); - } - #endif - if (!(opt & (OPT_f | OPT_e))) { - if (!*argv) - bb_show_usage(); -- /* NB: "move name one char back" trick in next_token -- * can use argv[i][-1] here. -- */ - parse_program(*argv++); - } - --- -2.27.0 - - -From 7fbe3864b057dd6c1ba39d7b5071502c09280767 Mon Sep 17 00:00:00 2001 -From: Denys Vlasenko -Date: Tue, 29 Jun 2021 03:44:56 +0200 -Subject: [PATCH 16/61] awk: code shrink - -function old new delta -parse_expr 948 945 -3 -chain_expr 65 62 -3 -chain_group 655 649 -6 -parse_program 310 303 -7 -rollback_token 10 - -10 ------------------------------------------------------------------------------- -(add/remove: 0/1 grow/shrink: 0/4 up/down: 0/-29) Total: -29 bytes - -Signed-off-by: Denys Vlasenko ---- - editors/awk.c | 18 +++++++++++------- - 1 file changed, 11 insertions(+), 7 deletions(-) - -diff --git a/editors/awk.c b/editors/awk.c -index fb1e5d59b..3d1c04a32 100644 ---- a/editors/awk.c -+++ b/editors/awk.c -@@ -1300,7 +1300,7 @@ static uint32_t next_token(uint32_t expected) - #undef save_info - } - --static void rollback_token(void) -+static ALWAYS_INLINE void rollback_token(void) - { - t_rollback = TRUE; - } -@@ -1474,14 +1474,14 @@ static node *parse_expr(uint32_t term_tc) - - case TC_LENGTH: - debug_printf_parse("%s: TC_LENGTH\n", __func__); -- next_token(TC_LPAREN /* length(...) */ -+ tc = next_token(TC_LPAREN /* length(...) */ - | TS_OPTERM /* length; (or newline)*/ - | TC_GRPTERM /* length } */ - | TC_BINOPX /* length NUM */ - | TC_COMMA /* print length, 1 */ - ); - rollback_token(); -- if (t_tclass & TC_LPAREN) { -+ if (tc & TC_LPAREN) { - /* It was a "(" token. Handle just like TC_BUILTIN */ - cn->l.n = parse_lrparen_list(); - } -@@ -1563,19 +1563,23 @@ static void chain_group(void) - - if (c & TC_GRPSTART) { - debug_printf_parse("%s: TC_GRPSTART\n", __func__); -- while (next_token(TS_GRPSEQ | TC_GRPTERM) != TC_GRPTERM) { -+ while ((c = next_token(TS_GRPSEQ | TC_GRPTERM)) != TC_GRPTERM) { - debug_printf_parse("%s: !TC_GRPTERM\n", __func__); -- if (t_tclass & TC_NEWLINE) -+ if (c & TC_NEWLINE) - continue; - rollback_token(); - chain_group(); - } - debug_printf_parse("%s: TC_GRPTERM\n", __func__); -- } else if (c & (TS_OPSEQ | TS_OPTERM)) { -+ return; -+ } -+ if (c & (TS_OPSEQ | TS_OPTERM)) { - debug_printf_parse("%s: TS_OPSEQ | TS_OPTERM\n", __func__); - rollback_token(); - chain_expr(OC_EXEC | Vx); -- } else { -+ return; -+ } -+ { - /* TS_STATEMNT */ - debug_printf_parse("%s: TS_STATEMNT(?)\n", __func__); - switch (t_info & OPCLSMASK) { --- -2.27.0 - - -From 9dba9fae14ec415943d1fda31b6b48d56d5cb0d0 Mon Sep 17 00:00:00 2001 -From: Denys Vlasenko -Date: Tue, 29 Jun 2021 03:47:46 +0200 -Subject: [PATCH 17/61] awk: deindent a block, no code changes - -Signed-off-by: Denys Vlasenko ---- - editors/awk.c | 167 +++++++++++++++++++++++++------------------------- - 1 file changed, 83 insertions(+), 84 deletions(-) - -diff --git a/editors/awk.c b/editors/awk.c -index 3d1c04a32..34bcc1798 100644 ---- a/editors/awk.c -+++ b/editors/awk.c -@@ -1579,98 +1579,97 @@ static void chain_group(void) - chain_expr(OC_EXEC | Vx); - return; - } -- { -- /* TS_STATEMNT */ -- debug_printf_parse("%s: TS_STATEMNT(?)\n", __func__); -- switch (t_info & OPCLSMASK) { -- case ST_IF: -- debug_printf_parse("%s: ST_IF\n", __func__); -- n = chain_node(OC_BR | Vx); -- n->l.n = parse_lrparen_list(); -+ -+ /* TS_STATEMNT */ -+ debug_printf_parse("%s: TS_STATEMNT(?)\n", __func__); -+ switch (t_info & OPCLSMASK) { -+ case ST_IF: -+ debug_printf_parse("%s: ST_IF\n", __func__); -+ n = chain_node(OC_BR | Vx); -+ n->l.n = parse_lrparen_list(); -+ chain_group(); -+ n2 = chain_node(OC_EXEC); -+ n->r.n = seq->last; -+ if (next_token(TS_GRPSEQ | TC_GRPTERM | TC_ELSE) == TC_ELSE) { - chain_group(); -- n2 = chain_node(OC_EXEC); -- n->r.n = seq->last; -- if (next_token(TS_GRPSEQ | TC_GRPTERM | TC_ELSE) == TC_ELSE) { -- chain_group(); -- n2->a.n = seq->last; -- } else { -- rollback_token(); -- } -- break; -+ n2->a.n = seq->last; -+ } else { -+ rollback_token(); -+ } -+ break; - -- case ST_WHILE: -- debug_printf_parse("%s: ST_WHILE\n", __func__); -- n2 = parse_lrparen_list(); -- n = chain_loop(NULL); -- n->l.n = n2; -- break; -+ case ST_WHILE: -+ debug_printf_parse("%s: ST_WHILE\n", __func__); -+ n2 = parse_lrparen_list(); -+ n = chain_loop(NULL); -+ n->l.n = n2; -+ break; - -- case ST_DO: -- debug_printf_parse("%s: ST_DO\n", __func__); -- n2 = chain_node(OC_EXEC); -- n = chain_loop(NULL); -- n2->a.n = n->a.n; -- next_token(TC_WHILE); -- n->l.n = parse_lrparen_list(); -- break; -+ case ST_DO: -+ debug_printf_parse("%s: ST_DO\n", __func__); -+ n2 = chain_node(OC_EXEC); -+ n = chain_loop(NULL); -+ n2->a.n = n->a.n; -+ next_token(TC_WHILE); -+ n->l.n = parse_lrparen_list(); -+ break; - -- case ST_FOR: -- debug_printf_parse("%s: ST_FOR\n", __func__); -- next_token(TC_LPAREN); -- n2 = parse_expr(TC_SEMICOL | TC_RPAREN); -- if (t_tclass & TC_RPAREN) { /* for-in */ -- if (!n2 || (n2->info & OPCLSMASK) != OC_IN) -- syntax_error(EMSG_UNEXP_TOKEN); -- n = chain_node(OC_WALKINIT | VV); -- n->l.n = n2->l.n; -- n->r.n = n2->r.n; -- n = chain_loop(NULL); -- n->info = OC_WALKNEXT | Vx; -- n->l.n = n2->l.n; -- } else { /* for (;;) */ -- n = chain_node(OC_EXEC | Vx); -- n->l.n = n2; -- n2 = parse_expr(TC_SEMICOL); -- n3 = parse_expr(TC_RPAREN); -- n = chain_loop(n3); -- n->l.n = n2; -- if (!n2) -- n->info = OC_EXEC; -- } -- break; -+ case ST_FOR: -+ debug_printf_parse("%s: ST_FOR\n", __func__); -+ next_token(TC_LPAREN); -+ n2 = parse_expr(TC_SEMICOL | TC_RPAREN); -+ if (t_tclass & TC_RPAREN) { /* for-in */ -+ if (!n2 || (n2->info & OPCLSMASK) != OC_IN) -+ syntax_error(EMSG_UNEXP_TOKEN); -+ n = chain_node(OC_WALKINIT | VV); -+ n->l.n = n2->l.n; -+ n->r.n = n2->r.n; -+ n = chain_loop(NULL); -+ n->info = OC_WALKNEXT | Vx; -+ n->l.n = n2->l.n; -+ } else { /* for (;;) */ -+ n = chain_node(OC_EXEC | Vx); -+ n->l.n = n2; -+ n2 = parse_expr(TC_SEMICOL); -+ n3 = parse_expr(TC_RPAREN); -+ n = chain_loop(n3); -+ n->l.n = n2; -+ if (!n2) -+ n->info = OC_EXEC; -+ } -+ break; - -- case OC_PRINT: -- case OC_PRINTF: -- debug_printf_parse("%s: OC_PRINT[F]\n", __func__); -- n = chain_node(t_info); -- n->l.n = parse_expr(TS_OPTERM | TC_OUTRDR | TC_GRPTERM); -- if (t_tclass & TC_OUTRDR) { -- n->info |= t_info; -- n->r.n = parse_expr(TS_OPTERM | TC_GRPTERM); -- } -- if (t_tclass & TC_GRPTERM) -- rollback_token(); -- break; -+ case OC_PRINT: -+ case OC_PRINTF: -+ debug_printf_parse("%s: OC_PRINT[F]\n", __func__); -+ n = chain_node(t_info); -+ n->l.n = parse_expr(TS_OPTERM | TC_OUTRDR | TC_GRPTERM); -+ if (t_tclass & TC_OUTRDR) { -+ n->info |= t_info; -+ n->r.n = parse_expr(TS_OPTERM | TC_GRPTERM); -+ } -+ if (t_tclass & TC_GRPTERM) -+ rollback_token(); -+ break; - -- case OC_BREAK: -- debug_printf_parse("%s: OC_BREAK\n", __func__); -- n = chain_node(OC_EXEC); -- n->a.n = break_ptr; -- chain_expr(t_info); -- break; -+ case OC_BREAK: -+ debug_printf_parse("%s: OC_BREAK\n", __func__); -+ n = chain_node(OC_EXEC); -+ n->a.n = break_ptr; -+ chain_expr(t_info); -+ break; - -- case OC_CONTINUE: -- debug_printf_parse("%s: OC_CONTINUE\n", __func__); -- n = chain_node(OC_EXEC); -- n->a.n = continue_ptr; -- chain_expr(t_info); -- break; -+ case OC_CONTINUE: -+ debug_printf_parse("%s: OC_CONTINUE\n", __func__); -+ n = chain_node(OC_EXEC); -+ n->a.n = continue_ptr; -+ chain_expr(t_info); -+ break; - -- /* delete, next, nextfile, return, exit */ -- default: -- debug_printf_parse("%s: default\n", __func__); -- chain_expr(t_info); -- } -+ /* delete, next, nextfile, return, exit */ -+ default: -+ debug_printf_parse("%s: default\n", __func__); -+ chain_expr(t_info); - } - } - --- -2.27.0 - - -From bc9e60546c860c130ed9c312517fbbaf0ad51871 Mon Sep 17 00:00:00 2001 -From: Denys Vlasenko -Date: Tue, 29 Jun 2021 12:16:36 +0200 -Subject: [PATCH 18/61] awk: fix parsing of expressions such as "v (a)" - -function old new delta -next_token 812 825 +13 - -Signed-off-by: Denys Vlasenko ---- - editors/awk.c | 22 ++++++++++++++++++---- - testsuite/awk.tests | 11 +++++++++++ - 2 files changed, 29 insertions(+), 4 deletions(-) - -diff --git a/editors/awk.c b/editors/awk.c -index 34bcc1798..ce860dc04 100644 ---- a/editors/awk.c -+++ b/editors/awk.c -@@ -1231,11 +1231,24 @@ static uint32_t next_token(uint32_t expected) - while (isalnum_(*p)) - p++; - end_of_name = p; -- tc = TC_VARIABLE; -- /* also consume whitespace between functionname and bracket */ -- if (!(expected & TC_VARIABLE) || (expected & TC_ARRAY)) --//TODO: why if variable can be here (but not array ref), skipping is not allowed? Example where it matters? -+ -+ if (last_token_class == TC_FUNCDECL) -+ /* eat space in "function FUNC (...) {...}" declaration */ - p = skip_spaces(p); -+ else if (expected & TC_ARRAY) { -+ /* eat space between array name and [ */ -+ char *s = skip_spaces(p); -+ if (*s == '[') /* array ref, not just a name? */ -+ p = s; -+ } -+ /* else: do NOT consume whitespace after variable name! -+ * gawk allows definition "function FUNC (p) {...}" - note space, -+ * but disallows the call "FUNC (p)" because it isn't one - -+ * expression "v (a)" should NOT be parsed as TC_FUNCTION: -+ * it is a valid concatenation if "v" is a variable, -+ * not a function name (and type of name is not known at parse time). -+ */ -+ - if (*p == '(') { - p++; - tc = TC_FUNCTION; -@@ -1245,6 +1258,7 @@ static uint32_t next_token(uint32_t expected) - tc = TC_ARRAY; - debug_printf_parse("%s: token found:'%s' TC_ARRAY\n", __func__, t_string); - } else { -+ tc = TC_VARIABLE; - debug_printf_parse("%s: token found:'%s' TC_VARIABLE\n", __func__, t_string); - if (end_of_name == p) { - /* there is no space for trailing NUL in t_string! -diff --git a/testsuite/awk.tests b/testsuite/awk.tests -index cf9b722dc..6e35d33dd 100755 ---- a/testsuite/awk.tests -+++ b/testsuite/awk.tests -@@ -71,6 +71,17 @@ testing "awk properly handles undefined function" \ - "L1\n\nawk: cmd. line:5: Call to undefined function\n" \ - "" "" - -+prg=' -+BEGIN { -+ v=1 -+ a=2 -+ print v (a) -+}' -+testing "'v (a)' is not a function call, it is a concatenation" \ -+ "awk '$prg' 2>&1" \ -+ "12\n" \ -+ "" "" -+ - - optional DESKTOP - testing "awk hex const 1" "awk '{ print or(0xffffffff,1) }'" "4294967295\n" "" "\n" --- -2.27.0 - - -From 08444111ee05f6514bcf6a8c8898ab4e4b827982 Mon Sep 17 00:00:00 2001 -From: Denys Vlasenko -Date: Tue, 29 Jun 2021 14:33:04 +0200 -Subject: [PATCH 19/61] awk: document which hashes are used at what state - (parse/execute) - -We can free them after they are no longer needed. -(Currently, being a NOEXEC applet is much larger waste of memory -for the case of long-running awk script). - -function old new delta -awk_main 831 827 -4 - -Signed-off-by: Denys Vlasenko ---- - editors/awk.c | 30 ++++++++++++++++++++---------- - 1 file changed, 20 insertions(+), 10 deletions(-) - -diff --git a/editors/awk.c b/editors/awk.c -index ce860dc04..6142144bb 100644 ---- a/editors/awk.c -+++ b/editors/awk.c -@@ -527,7 +527,10 @@ struct globals { - chain *seq; - node *break_ptr, *continue_ptr; - rstream *iF; -- xhash *vhash, *ahash, *fdhash, *fnhash; -+ xhash *ahash; /* argument names, used only while parsing function bodies */ -+ xhash *fnhash; /* function names, used only in parsing stage */ -+ xhash *vhash; /* variables and arrays */ -+ xhash *fdhash; /* file objects, used only in execution stage */ - const char *g_progname; - int g_lineno; - int nfields; -@@ -1719,6 +1722,7 @@ static void parse_program(char *p) - debug_printf_parse("%s: TC_FUNCDECL\n", __func__); - next_token(TC_FUNCTION); - f = newfunc(t_string); -+//FIXME: dup check: functions can't be redefined, this is not ok: awk 'func f(){}; func f(){}' - f->body.first = NULL; - f->nargs = 0; - /* func arg list: comma sep list of args, and a close paren */ -@@ -3389,12 +3393,8 @@ int awk_main(int argc UNUSED_PARAM, char **argv) - if (ENABLE_LOCALE_SUPPORT) - setlocale(LC_NUMERIC, "C"); - -- vhash = hash_init(); -- ahash = hash_init(); -- fdhash = hash_init(); -- fnhash = hash_init(); -- - /* initialize variables */ -+ vhash = hash_init(); - { - char *vnames = (char *)vNames; /* cheat */ - char *vvalues = (char *)vValues; -@@ -3416,10 +3416,6 @@ int awk_main(int argc UNUSED_PARAM, char **argv) - handle_special(intvar[FS]); - handle_special(intvar[RS]); - -- newfile("/dev/stdin")->F = stdin; -- newfile("/dev/stdout")->F = stdout; -- newfile("/dev/stderr")->F = stderr; -- - /* Huh, people report that sometimes environ is NULL. Oh well. */ - if (environ) { - char **envp; -@@ -3449,6 +3445,10 @@ int awk_main(int argc UNUSED_PARAM, char **argv) - if (!is_assignment(llist_pop(&list_v))) - bb_show_usage(); - } -+ -+ /* Parse all supplied programs */ -+ fnhash = hash_init(); -+ ahash = hash_init(); - while (list_f) { - int fd; - char *s; -@@ -3471,6 +3471,11 @@ int awk_main(int argc UNUSED_PARAM, char **argv) - bb_show_usage(); - parse_program(*argv++); - } -+ //free_hash(ahash) // ~250 bytes, arg names, used only during parse of function bodies -+ //ahash = NULL; // debug -+ //free_hash(fnhash) // ~250 bytes, used only for function names -+ //fnhash = NULL; // debug -+ /* parsing done, on to executing */ - - /* fill in ARGV array */ - setari_u(intvar[ARGV], 0, "awk"); -@@ -3479,6 +3484,11 @@ int awk_main(int argc UNUSED_PARAM, char **argv) - setari_u(intvar[ARGV], ++i, *argv++); - setvar_i(intvar[ARGC], i + 1); - -+ fdhash = hash_init(); -+ newfile("/dev/stdin")->F = stdin; -+ newfile("/dev/stdout")->F = stdout; -+ newfile("/dev/stderr")->F = stderr; -+ - zero_out_var(&tv); - evaluate(beginseq.first, &tv); - if (!mainseq.first && !endseq.first) --- -2.27.0 - - -From ce151c62189985344d90fc554f8780c7305112f8 Mon Sep 17 00:00:00 2001 -From: Denys Vlasenko -Date: Tue, 29 Jun 2021 18:33:25 +0200 -Subject: [PATCH 20/61] awk: free unused parsing structures after parse is done - -function old new delta -hash_clear - 90 +90 -awk_main 827 849 +22 -clear_array 90 - -90 ------------------------------------------------------------------------------- -(add/remove: 1/1 grow/shrink: 1/0 up/down: 112/-90) Total: 22 bytes - -Signed-off-by: Denys Vlasenko ---- - editors/awk.c | 74 ++++++++++++++++++++++++++++++++------------------- - 1 file changed, 47 insertions(+), 27 deletions(-) - -diff --git a/editors/awk.c b/editors/awk.c -index 6142144bb..4e29b28cf 100644 ---- a/editors/awk.c -+++ b/editors/awk.c -@@ -530,7 +530,8 @@ struct globals { - xhash *ahash; /* argument names, used only while parsing function bodies */ - xhash *fnhash; /* function names, used only in parsing stage */ - xhash *vhash; /* variables and arrays */ -- xhash *fdhash; /* file objects, used only in execution stage */ -+ //xhash *fdhash; /* file objects, used only in execution stage */ -+ //we are reusing ahash as fdhash, via define (see later) - const char *g_progname; - int g_lineno; - int nfields; -@@ -592,10 +593,13 @@ struct globals2 { - #define break_ptr (G1.break_ptr ) - #define continue_ptr (G1.continue_ptr) - #define iF (G1.iF ) --#define vhash (G1.vhash ) - #define ahash (G1.ahash ) --#define fdhash (G1.fdhash ) - #define fnhash (G1.fnhash ) -+#define vhash (G1.vhash ) -+#define fdhash ahash -+//^^^^^^^^^^^^^^^^^^ ahash is cleared after every function parsing, -+// and ends up empty after parsing phase. Thus, we can simply reuse it -+// for fdhash in execution stage. - #define g_progname (G1.g_progname ) - #define g_lineno (G1.g_lineno ) - #define nfields (G1.nfields ) -@@ -682,6 +686,33 @@ static xhash *hash_init(void) - return newhash; - } - -+static void hash_clear(xhash *hash) -+{ -+ unsigned i; -+ hash_item *hi, *thi; -+ -+ for (i = 0; i < hash->csize; i++) { -+ hi = hash->items[i]; -+ while (hi) { -+ thi = hi; -+ hi = hi->next; -+ free(thi->data.v.string); -+ free(thi); -+ } -+ hash->items[i] = NULL; -+ } -+ hash->glen = hash->nel = 0; -+} -+ -+#if 0 //UNUSED -+static void hash_free(xhash *hash) -+{ -+ hash_clear(hash); -+ free(hash->items); -+ free(hash); -+} -+#endif -+ - /* find item in hash, return ptr to data, NULL if not found */ - static void *hash_search(xhash *hash, const char *name) - { -@@ -869,23 +900,7 @@ static xhash *iamarray(var *v) - return a->x.array; - } - --static void clear_array(xhash *array) --{ -- unsigned i; -- hash_item *hi, *thi; -- -- for (i = 0; i < array->csize; i++) { -- hi = array->items[i]; -- while (hi) { -- thi = hi; -- hi = hi->next; -- free(thi->data.v.string); -- free(thi); -- } -- array->items[i] = NULL; -- } -- array->glen = array->nel = 0; --} -+#define clear_array(array) hash_clear(array) - - /* clear a variable */ - static var *clrvar(var *v) -@@ -1742,7 +1757,7 @@ static void parse_program(char *p) - } - seq = &f->body; - chain_group(); -- clear_array(ahash); -+ hash_clear(ahash); - } else if (tclass & TS_OPSEQ) { - debug_printf_parse("%s: TS_OPSEQ\n", __func__); - rollback_token(); -@@ -3471,11 +3486,16 @@ int awk_main(int argc UNUSED_PARAM, char **argv) - bb_show_usage(); - parse_program(*argv++); - } -- //free_hash(ahash) // ~250 bytes, arg names, used only during parse of function bodies -- //ahash = NULL; // debug -- //free_hash(fnhash) // ~250 bytes, used only for function names -- //fnhash = NULL; // debug -- /* parsing done, on to executing */ -+ /* Free unused parse structures */ -+ //hash_free(fnhash); // ~250 bytes when empty, used only for function names -+ //^^^^^^^^^^^^^^^^^ does not work, hash_clear() inside SEGVs -+ // (IOW: hash_clear() assumes it's a hash of variables. fnhash is not). -+ free(fnhash->items); -+ free(fnhash); -+ fnhash = NULL; // debug -+ //hash_free(ahash); // empty after parsing, will reuse as fdhash instead of freeing -+ -+ /* Parsing done, on to executing */ - - /* fill in ARGV array */ - setari_u(intvar[ARGV], 0, "awk"); -@@ -3484,7 +3504,7 @@ int awk_main(int argc UNUSED_PARAM, char **argv) - setari_u(intvar[ARGV], ++i, *argv++); - setvar_i(intvar[ARGC], i + 1); - -- fdhash = hash_init(); -+ //fdhash = ahash - done via define - newfile("/dev/stdin")->F = stdin; - newfile("/dev/stdout")->F = stdout; - newfile("/dev/stderr")->F = stderr; --- -2.27.0 - - -From 465eba0f032c96966d2547f116784fb0d8751943 Mon Sep 17 00:00:00 2001 -From: Denys Vlasenko -Date: Tue, 29 Jun 2021 19:07:36 +0200 -Subject: [PATCH 21/61] awk: assorted optimizations - -hash_find(): do not caclculate hash twice. Do not divide - can use -cheap multiply-by-8 shift. - -nextword(): do not repeatedly increment in-memory value, do it in register, -then store final result. - -hashwalk_init(): do not strlen() twice. - -function old new delta -hash_search3 - 49 +49 -hash_find 259 281 +22 -nextword 19 16 -3 -evaluate 3141 3137 -4 -hash_search 54 28 -26 ------------------------------------------------------------------------------- -(add/remove: 1/0 grow/shrink: 1/3 up/down: 71/-33) Total: 38 bytes - -Signed-off-by: Denys Vlasenko ---- - editors/awk.c | 26 +++++++++++++++++--------- - 1 file changed, 17 insertions(+), 9 deletions(-) - -diff --git a/editors/awk.c b/editors/awk.c -index 4e29b28cf..a4cd3cf93 100644 ---- a/editors/awk.c -+++ b/editors/awk.c -@@ -696,6 +696,7 @@ static void hash_clear(xhash *hash) - while (hi) { - thi = hi; - hi = hi->next; -+//FIXME: this assumes that it's a hash of *variables*: - free(thi->data.v.string); - free(thi); - } -@@ -714,11 +715,11 @@ static void hash_free(xhash *hash) - #endif - - /* find item in hash, return ptr to data, NULL if not found */ --static void *hash_search(xhash *hash, const char *name) -+static NOINLINE void *hash_search3(xhash *hash, const char *name, unsigned idx) - { - hash_item *hi; - -- hi = hash->items[hashidx(name) % hash->csize]; -+ hi = hash->items[idx % hash->csize]; - while (hi) { - if (strcmp(hi->name, name) == 0) - return &hi->data; -@@ -727,6 +728,11 @@ static void *hash_search(xhash *hash, const char *name) - return NULL; - } - -+static void *hash_search(xhash *hash, const char *name) -+{ -+ return hash_search3(hash, name, hashidx(name)); -+} -+ - /* grow hash if it becomes too big */ - static void hash_rebuild(xhash *hash) - { -@@ -762,16 +768,17 @@ static void *hash_find(xhash *hash, const char *name) - unsigned idx; - int l; - -- hi = hash_search(hash, name); -+ idx = hashidx(name); -+ hi = hash_search3(hash, name, idx); - if (!hi) { -- if (++hash->nel / hash->csize > 10) -+ if (++hash->nel > hash->csize * 8) - hash_rebuild(hash); - - l = strlen(name) + 1; - hi = xzalloc(sizeof(*hi) + l); - strcpy(hi->name, name); - -- idx = hashidx(name) % hash->csize; -+ idx = idx % hash->csize; - hi->next = hash->items[idx]; - hash->items[idx] = hi; - hash->glen += l; -@@ -822,8 +829,10 @@ static char *skip_spaces(char *p) - static char *nextword(char **s) - { - char *p = *s; -- while (*(*s)++ != '\0') -+ char *q = p; -+ while (*q++ != '\0') - continue; -+ *s = q; - return p; - } - -@@ -2116,8 +2125,7 @@ static void hashwalk_init(var *v, xhash *array) - for (i = 0; i < array->csize; i++) { - hi = array->items[i]; - while (hi) { -- strcpy(w->end, hi->name); -- nextword(&w->end); -+ w->end = stpcpy(w->end, hi->name) + 1; - hi = hi->next; - } - } -@@ -3504,7 +3512,7 @@ int awk_main(int argc UNUSED_PARAM, char **argv) - setari_u(intvar[ARGV], ++i, *argv++); - setvar_i(intvar[ARGC], i + 1); - -- //fdhash = ahash - done via define -+ //fdhash = ahash; // done via define - newfile("/dev/stdin")->F = stdin; - newfile("/dev/stdout")->F = stdout; - newfile("/dev/stderr")->F = stderr; --- -2.27.0 - - -From 467708ee9c852a4535d554214bb70b916743335a Mon Sep 17 00:00:00 2001 -From: Denys Vlasenko -Date: Wed, 30 Jun 2021 02:12:27 +0200 -Subject: [PATCH 22/61] awk: remove custom pool allocator for temporary awk - variables - -It seems to be designed to reduce overhead of malloc's auxiliary data, -by allocating at least 64 variables as a block. -With "struct var" being about 20-32 bytes long (32/64 bits), -malloc overhead for one temporary indeed is high, ~33% more memory used -than needed. - -function old new delta -evaluate 3137 3145 +8 -modprobe_main 798 803 +5 -exec_builtin 1414 1419 +5 -awk_printf 476 481 +5 -as_regex 132 137 +5 -EMSG_INTERNAL_ERROR 15 - -15 -nvfree 169 116 -53 -nvalloc 145 - -145 ------------------------------------------------------------------------------- -(add/remove: 0/2 grow/shrink: 5/1 up/down: 28/-213) Total: -185 bytes - -Signed-off-by: Denys Vlasenko ---- - editors/awk.c | 164 +++++++++++++++++++------------------------------- - 1 file changed, 61 insertions(+), 103 deletions(-) - -diff --git a/editors/awk.c b/editors/awk.c -index a4cd3cf93..35c11ec58 100644 ---- a/editors/awk.c -+++ b/editors/awk.c -@@ -93,7 +93,6 @@ enum { - }; - - #define MAXVARFMT 240 --#define MINNVBLOCK 64 - - /* variable flags */ - #define VF_NUMBER 0x0001 /* 1 = primary type is number */ -@@ -120,8 +119,8 @@ typedef struct walker_list { - /* Variable */ - typedef struct var_s { - unsigned type; /* flags */ -- double number; - char *string; -+ double number; - union { - int aidx; /* func arg idx (for compilation stage) */ - struct xhash_s *array; /* array ptr */ -@@ -192,15 +191,6 @@ typedef struct node_s { - } a; - } node; - --/* Block of temporary variables */ --typedef struct nvblock_s { -- int size; -- var *pos; -- struct nvblock_s *prev; -- struct nvblock_s *next; -- var nv[]; --} nvblock; -- - typedef struct tsplitter_s { - node n; - regex_t re[2]; -@@ -537,7 +527,6 @@ struct globals { - int nfields; - int maxfields; /* used in fsrealloc() only */ - var *Fields; -- nvblock *g_cb; - char *g_pos; - char g_saved_ch; - smallint icase; -@@ -605,7 +594,6 @@ struct globals2 { - #define nfields (G1.nfields ) - #define maxfields (G1.maxfields ) - #define Fields (G1.Fields ) --#define g_cb (G1.g_cb ) - #define g_pos (G1.g_pos ) - #define g_saved_ch (G1.g_saved_ch ) - #define icase (G1.icase ) -@@ -640,7 +628,6 @@ static int awk_exit(int) NORETURN; - - /* ---- error handling ---- */ - --static const char EMSG_INTERNAL_ERROR[] ALIGN1 = "Internal error"; - static const char EMSG_UNEXP_EOS[] ALIGN1 = "Unexpected end of string"; - static const char EMSG_UNEXP_TOKEN[] ALIGN1 = "Unexpected token"; - static const char EMSG_DIV_BY_ZERO[] ALIGN1 = "Division by zero"; -@@ -1050,77 +1037,6 @@ static int istrue(var *v) - return (v->string && v->string[0]); - } - --/* temporary variables allocator. Last allocated should be first freed */ --static var *nvalloc(int n) --{ -- nvblock *pb = NULL; -- var *v, *r; -- int size; -- -- while (g_cb) { -- pb = g_cb; -- if ((g_cb->pos - g_cb->nv) + n <= g_cb->size) -- break; -- g_cb = g_cb->next; -- } -- -- if (!g_cb) { -- size = (n <= MINNVBLOCK) ? MINNVBLOCK : n; -- g_cb = xzalloc(sizeof(nvblock) + size * sizeof(var)); -- g_cb->size = size; -- g_cb->pos = g_cb->nv; -- g_cb->prev = pb; -- /*g_cb->next = NULL; - xzalloc did it */ -- if (pb) -- pb->next = g_cb; -- } -- -- v = r = g_cb->pos; -- g_cb->pos += n; -- -- while (v < g_cb->pos) { -- v->type = 0; -- v->string = NULL; -- v++; -- } -- -- return r; --} -- --static void nvfree(var *v) --{ -- var *p; -- -- if (v < g_cb->nv || v >= g_cb->pos) -- syntax_error(EMSG_INTERNAL_ERROR); -- -- for (p = v; p < g_cb->pos; p++) { -- if ((p->type & (VF_ARRAY | VF_CHILD)) == VF_ARRAY) { -- clear_array(iamarray(p)); -- free(p->x.array->items); -- free(p->x.array); -- } -- if (p->type & VF_WALK) { -- walker_list *n; -- walker_list *w = p->x.walker; -- debug_printf_walker("nvfree: freeing walker @%p\n", &p->x.walker); -- p->x.walker = NULL; -- while (w) { -- n = w->prev; -- debug_printf_walker(" free(%p)\n", w); -- free(w); -- w = n; -- } -- } -- clrvar(p); -- } -- -- g_cb->pos = v; -- while (g_cb->prev && g_cb->pos == g_cb->nv) { -- g_cb = g_cb->prev; -- } --} -- - /* ------- awk program text parsing ------- */ - - /* Parse next token pointed by global pos, place results into global t_XYZ variables. -@@ -1793,6 +1709,41 @@ static void parse_program(char *p) - - /* -------- program execution part -------- */ - -+/* temporary variables allocator */ -+static var *nvalloc(int sz) -+{ -+ return xzalloc(sz * sizeof(var)); -+} -+ -+static void nvfree(var *v, int sz) -+{ -+ var *p = v; -+ -+ while (--sz >= 0) { -+ if ((p->type & (VF_ARRAY | VF_CHILD)) == VF_ARRAY) { -+ clear_array(iamarray(p)); -+ free(p->x.array->items); -+ free(p->x.array); -+ } -+ if (p->type & VF_WALK) { -+ walker_list *n; -+ walker_list *w = p->x.walker; -+ debug_printf_walker("nvfree: freeing walker @%p\n", &p->x.walker); -+ p->x.walker = NULL; -+ while (w) { -+ n = w->prev; -+ debug_printf_walker(" free(%p)\n", w); -+ free(w); -+ w = n; -+ } -+ } -+ clrvar(p); -+ p++; -+ } -+ -+ free(v); -+} -+ - static node *mk_splitter(const char *s, tsplitter *spl) - { - regex_t *re, *ire; -@@ -1814,9 +1765,9 @@ static node *mk_splitter(const char *s, tsplitter *spl) - return n; - } - --/* use node as a regular expression. Supplied with node ptr and regex_t -+/* Use node as a regular expression. Supplied with node ptr and regex_t - * storage space. Return ptr to regex (if result points to preg, it should -- * be later regfree'd manually -+ * be later regfree'd manually). - */ - static regex_t *as_regex(node *op, regex_t *preg) - { -@@ -1840,7 +1791,7 @@ static regex_t *as_regex(node *op, regex_t *preg) - cflags &= ~REG_EXTENDED; - xregcomp(preg, s, cflags); - } -- nvfree(v); -+ nvfree(v, 1); - return preg; - } - -@@ -2292,6 +2243,8 @@ static char *awk_printf(node *n, int *len) - var *v, *arg; - - v = nvalloc(1); -+//TODO: above, to avoid allocating a single temporary var, take a pointer -+//to a temporary that our caller (evaluate()) already has? - fmt = f = xstrdup(getvar_s(evaluate(nextarg(&n), v))); - - i = 0; -@@ -2333,7 +2286,7 @@ static char *awk_printf(node *n, int *len) - } - - free(fmt); -- nvfree(v); -+ nvfree(v, 1); - b = xrealloc(b, i + 1); - b[i] = '\0'; - #if ENABLE_FEATURE_AWK_GNU_EXTENSIONS -@@ -2661,14 +2614,14 @@ static NOINLINE var *exec_builtin(node *op, var *res) - break; - } - -- nvfree(tv); -+ nvfree(tv, 4); - return res; - #undef tspl - } - - /* - * Evaluate node - the heart of the program. Supplied with subtree -- * and place where to store result. returns ptr to result. -+ * and place where to store result. Returns ptr to result. - */ - #define XC(n) ((n) >> 8) - -@@ -2953,33 +2906,38 @@ static var *evaluate(node *op, var *res) - break; - - case XC( OC_FUNC ): { -- var *vbeg, *v; -+ var *tv, *sv_fnargs; - const char *sv_progname; -+ int nargs1, i; -+ - debug_printf_eval("FUNC\n"); - -- /* The body might be empty, still has to eval the args */ - if (!op->r.n->info && !op->r.f->body.first) - syntax_error(EMSG_UNDEF_FUNC); - -- vbeg = v = nvalloc(op->r.f->nargs + 1); -+ /* The body might be empty, still has to eval the args */ -+ nargs1 = op->r.f->nargs + 1; -+ tv = nvalloc(nargs1); -+ i = 0; - while (op1) { -+//TODO: explain why one iteration is done even for the case p->r.f->nargs == 0 - var *arg = evaluate(nextarg(&op1), v1); -- copyvar(v, arg); -- v->type |= VF_CHILD; -- v->x.parent = arg; -- if (++v - vbeg >= op->r.f->nargs) -+ copyvar(&tv[i], arg); -+ tv[i].type |= VF_CHILD; -+ tv[i].x.parent = arg; -+ if (++i >= op->r.f->nargs) - break; - } - -- v = fnargs; -- fnargs = vbeg; -+ sv_fnargs = fnargs; - sv_progname = g_progname; - -+ fnargs = tv; - res = evaluate(op->r.f->body.first, res); -+ nvfree(fnargs, nargs1); - - g_progname = sv_progname; -- nvfree(fnargs); -- fnargs = v; -+ fnargs = sv_fnargs; - - break; - } -@@ -3301,7 +3259,7 @@ static var *evaluate(node *op, var *res) - break; - } /* while (op) */ - -- nvfree(v1); -+ nvfree(v1, 2); - debug_printf_eval("returning from %s(): %p\n", __func__, res); - return res; - #undef fnargs --- -2.27.0 - - -From c5ddfb36e34c93d63546bc3a7f458b946fa64825 Mon Sep 17 00:00:00 2001 -From: Denys Vlasenko -Date: Wed, 30 Jun 2021 12:12:20 +0200 -Subject: [PATCH 23/61] awk: replace incorrect use of union in undefined - function check (no code changes) - -...which reveals that it's buggy: it thinks "func f(){}" is an undefined function! - -Signed-off-by: Denys Vlasenko ---- - editors/awk.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/editors/awk.c b/editors/awk.c -index 35c11ec58..1115085da 100644 ---- a/editors/awk.c -+++ b/editors/awk.c -@@ -2912,7 +2912,7 @@ static var *evaluate(node *op, var *res) - - debug_printf_eval("FUNC\n"); - -- if (!op->r.n->info && !op->r.f->body.first) -+ if (op->r.f->nargs == 0 && !op->r.f->body.first) - syntax_error(EMSG_UNDEF_FUNC); - - /* The body might be empty, still has to eval the args */ --- -2.27.0 - - -From 1295da1db50adb2b6db53c6d057fdcc952b0bc78 Mon Sep 17 00:00:00 2001 -From: Denys Vlasenko -Date: Wed, 30 Jun 2021 12:23:51 +0200 -Subject: [PATCH 24/61] awk: allow empty fuinctions with no arguments, disallow - function redefinitions - -function old new delta -.rodata 103681 103700 +19 -parse_program 303 307 +4 -evaluate 3145 3141 -4 ------------------------------------------------------------------------------- -(add/remove: 0/0 grow/shrink: 2/1 up/down: 23/-4) Total: 19 bytes - -Signed-off-by: Denys Vlasenko ---- - editors/awk.c | 11 +++++++---- - testsuite/awk.tests | 10 ++++++++++ - 2 files changed, 17 insertions(+), 4 deletions(-) - -diff --git a/editors/awk.c b/editors/awk.c -index 1115085da..c05d5d651 100644 ---- a/editors/awk.c -+++ b/editors/awk.c -@@ -139,6 +139,7 @@ typedef struct chain_s { - /* Function */ - typedef struct func_s { - unsigned nargs; -+ smallint defined; - struct chain_s body; - } func; - -@@ -1662,9 +1663,11 @@ static void parse_program(char *p) - debug_printf_parse("%s: TC_FUNCDECL\n", __func__); - next_token(TC_FUNCTION); - f = newfunc(t_string); --//FIXME: dup check: functions can't be redefined, this is not ok: awk 'func f(){}; func f(){}' -- f->body.first = NULL; -- f->nargs = 0; -+ if (f->defined) -+ syntax_error("Duplicate function"); -+ f->defined = 1; -+ //f->body.first = NULL; - already is -+ //f->nargs = 0; - already is - /* func arg list: comma sep list of args, and a close paren */ - for (;;) { - if (next_token(TC_VARIABLE | TC_RPAREN) == TC_RPAREN) { -@@ -2912,7 +2915,7 @@ static var *evaluate(node *op, var *res) - - debug_printf_eval("FUNC\n"); - -- if (op->r.f->nargs == 0 && !op->r.f->body.first) -+ if (!op->r.f->defined) - syntax_error(EMSG_UNDEF_FUNC); - - /* The body might be empty, still has to eval the args */ -diff --git a/testsuite/awk.tests b/testsuite/awk.tests -index 6e35d33dd..873cc3680 100755 ---- a/testsuite/awk.tests -+++ b/testsuite/awk.tests -@@ -44,6 +44,16 @@ testing "awk handles empty function f(arg){}" \ - "L1\n\nL2\n\n" \ - "" "" - -+prg=' -+function empty_fun(){} -+END {empty_fun() -+ print "Ok" -+}' -+testing "awk handles empty function f(){}" \ -+ "awk '$prg'" \ -+ "Ok\n" \ -+ "" "" -+ - prg=' - function outer_fun() { - return 1 --- -2.27.0 - - -From d88539017ebe731ba507fda8def65969bd14e582 Mon Sep 17 00:00:00 2001 -From: Denys Vlasenko -Date: Wed, 30 Jun 2021 12:42:39 +0200 -Subject: [PATCH 25/61] awk: rewrite "print" logic a bit to make it clearer - -Signed-off-by: Denys Vlasenko ---- - editors/awk.c | 9 ++++----- - 1 file changed, 4 insertions(+), 5 deletions(-) - -diff --git a/editors/awk.c b/editors/awk.c -index c05d5d651..0fbca0433 100644 ---- a/editors/awk.c -+++ b/editors/awk.c -@@ -2792,7 +2792,7 @@ static var *evaluate(node *op, var *res) - if (!op1) { - fputs(getvar_s(intvar[F0]), F); - } else { -- while (op1) { -+ for (;;) { - var *v = evaluate(nextarg(&op1), v1); - if (v->type & VF_NUMBER) { - fmt_num(g_buf, MAXVARFMT, getvar_s(intvar[OFMT]), -@@ -2801,13 +2801,12 @@ static var *evaluate(node *op, var *res) - } else { - fputs(getvar_s(v), F); - } -- -- if (op1) -- fputs(getvar_s(intvar[OFS]), F); -+ if (!op1) -+ break; -+ fputs(getvar_s(intvar[OFS]), F); - } - } - fputs(getvar_s(intvar[ORS]), F); -- - } else { /* OC_PRINTF */ - char *s = awk_printf(op1, &len); - #if ENABLE_FEATURE_AWK_GNU_EXTENSIONS --- -2.27.0 - - -From 04a90dbf88727415f4bcd3d1125d463255557d55 Mon Sep 17 00:00:00 2001 -From: Denys Vlasenko -Date: Wed, 30 Jun 2021 12:52:51 +0200 -Subject: [PATCH 26/61] awk: evaluate all, even superfluous function args - -function old new delta -evaluate 3128 3135 +7 - -Signed-off-by: Denys Vlasenko ---- - editors/awk.c | 19 ++++++++++++------- - testsuite/awk.tests | 8 +++++++- - 2 files changed, 19 insertions(+), 8 deletions(-) - -diff --git a/editors/awk.c b/editors/awk.c -index 0fbca0433..47bbc10a6 100644 ---- a/editors/awk.c -+++ b/editors/awk.c -@@ -2910,7 +2910,7 @@ static var *evaluate(node *op, var *res) - case XC( OC_FUNC ): { - var *tv, *sv_fnargs; - const char *sv_progname; -- int nargs1, i; -+ int nargs, i; - - debug_printf_eval("FUNC\n"); - -@@ -2918,17 +2918,22 @@ static var *evaluate(node *op, var *res) - syntax_error(EMSG_UNDEF_FUNC); - - /* The body might be empty, still has to eval the args */ -- nargs1 = op->r.f->nargs + 1; -- tv = nvalloc(nargs1); -+ nargs = op->r.f->nargs; -+ tv = nvalloc(nargs); - i = 0; - while (op1) { --//TODO: explain why one iteration is done even for the case p->r.f->nargs == 0 - var *arg = evaluate(nextarg(&op1), v1); -+ if (i == nargs) { -+ /* call with more arguments than function takes. -+ * (gawk warns: "warning: function 'f' called with more arguments than declared"). -+ * They are still evaluated, but discarded: */ -+ clrvar(arg); -+ continue; -+ } - copyvar(&tv[i], arg); - tv[i].type |= VF_CHILD; - tv[i].x.parent = arg; -- if (++i >= op->r.f->nargs) -- break; -+ i++; - } - - sv_fnargs = fnargs; -@@ -2936,7 +2941,7 @@ static var *evaluate(node *op, var *res) - - fnargs = tv; - res = evaluate(op->r.f->body.first, res); -- nvfree(fnargs, nargs1); -+ nvfree(fnargs, nargs); - - g_progname = sv_progname; - fnargs = sv_fnargs; -diff --git a/testsuite/awk.tests b/testsuite/awk.tests -index 873cc3680..3c230393f 100755 ---- a/testsuite/awk.tests -+++ b/testsuite/awk.tests -@@ -87,11 +87,17 @@ BEGIN { - a=2 - print v (a) - }' --testing "'v (a)' is not a function call, it is a concatenation" \ -+testing "awk 'v (a)' is not a function call, it is a concatenation" \ - "awk '$prg' 2>&1" \ - "12\n" \ - "" "" - -+prg='func f(){print"F"};func g(){print"G"};BEGIN{f(g(),g())}' -+testing "awk unused function args are evaluated" \ -+ "awk '$prg' 2>&1" \ -+ "G\nG\nF\n" \ -+ "" "" -+ - - optional DESKTOP - testing "awk hex const 1" "awk '{ print or(0xffffffff,1) }'" "4294967295\n" "" "\n" --- -2.27.0 - - -From fd5451c7894cd617a812d095a5d4d3cdc215b218 Mon Sep 17 00:00:00 2001 -From: Denys Vlasenko -Date: Thu, 1 Jul 2021 16:02:16 +0200 -Subject: [PATCH 27/61] awk: rename temp variables, no code changes - -Signed-off-by: Denys Vlasenko ---- - editors/awk.c | 76 +++++++++++++++++++++++++++++++-------------------- - 1 file changed, 46 insertions(+), 30 deletions(-) - -diff --git a/editors/awk.c b/editors/awk.c -index 47bbc10a6..2c2cb74d7 100644 ---- a/editors/awk.c -+++ b/editors/awk.c -@@ -1775,14 +1775,14 @@ static node *mk_splitter(const char *s, tsplitter *spl) - static regex_t *as_regex(node *op, regex_t *preg) - { - int cflags; -- var *v; -+ var *tmpvar; - const char *s; - - if ((op->info & OPCLSMASK) == OC_REGEXP) { - return icase ? op->r.ire : op->l.re; - } -- v = nvalloc(1); -- s = getvar_s(evaluate(op, v)); -+ tmpvar = nvalloc(1); -+ s = getvar_s(evaluate(op, tmpvar)); - - cflags = icase ? REG_EXTENDED | REG_ICASE : REG_EXTENDED; - /* Testcase where REG_EXTENDED fails (unpaired '{'): -@@ -1794,7 +1794,7 @@ static regex_t *as_regex(node *op, regex_t *preg) - cflags &= ~REG_EXTENDED; - xregcomp(preg, s, cflags); - } -- nvfree(v, 1); -+ nvfree(tmpvar, 1); - return preg; - } - -@@ -2243,12 +2243,12 @@ static char *awk_printf(node *n, int *len) - const char *s1; - int i, j, incr, bsize; - char c, c1; -- var *v, *arg; -+ var *tmpvar, *arg; - -- v = nvalloc(1); -+ tmpvar = nvalloc(1); - //TODO: above, to avoid allocating a single temporary var, take a pointer - //to a temporary that our caller (evaluate()) already has? -- fmt = f = xstrdup(getvar_s(evaluate(nextarg(&n), v))); -+ fmt = f = xstrdup(getvar_s(evaluate(nextarg(&n), tmpvar))); - - i = 0; - while (*f) { -@@ -2268,7 +2268,7 @@ static char *awk_printf(node *n, int *len) - f++; - c1 = *f; - *f = '\0'; -- arg = evaluate(nextarg(&n), v); -+ arg = evaluate(nextarg(&n), tmpvar); - - j = i; - if (c == 'c' || !c) { -@@ -2289,7 +2289,7 @@ static char *awk_printf(node *n, int *len) - } - - free(fmt); -- nvfree(v, 1); -+ nvfree(tmpvar, 1); - b = xrealloc(b, i + 1); - b[i] = '\0'; - #if ENABLE_FEATURE_AWK_GNU_EXTENSIONS -@@ -2429,7 +2429,7 @@ static NOINLINE var *exec_builtin(node *op, var *res) - { - #define tspl (G.exec_builtin__tspl) - -- var *tv; -+ var *tmpvars; - node *an[4]; - var *av[4]; - const char *as[4]; -@@ -2441,7 +2441,12 @@ static NOINLINE var *exec_builtin(node *op, var *res) - time_t tt; - int i, l, ll, n; - -- tv = nvalloc(4); -+ tmpvars = nvalloc(4); -+#define TMPVAR0 (tmpvars) -+#define TMPVAR1 (tmpvars + 1) -+#define TMPVAR2 (tmpvars + 2) -+#define TMPVAR3 (tmpvars + 3) -+#define TMPVAR(i) (tmpvars + (i)) - isr = info = op->info; - op = op->l.n; - -@@ -2449,7 +2454,7 @@ static NOINLINE var *exec_builtin(node *op, var *res) - for (i = 0; i < 4 && op; i++) { - an[i] = nextarg(&op); - if (isr & 0x09000000) -- av[i] = evaluate(an[i], &tv[i]); -+ av[i] = evaluate(an[i], TMPVAR(i)); - if (isr & 0x08000000) - as[i] = getvar_s(av[i]); - isr >>= 1; -@@ -2474,7 +2479,7 @@ static NOINLINE var *exec_builtin(node *op, var *res) - - if (nargs > 2) { - spl = (an[2]->info & OPCLSMASK) == OC_REGEXP ? -- an[2] : mk_splitter(getvar_s(evaluate(an[2], &tv[2])), &tspl); -+ an[2] : mk_splitter(getvar_s(evaluate(an[2], TMPVAR2)), &tspl); - } else { - spl = &fsplitter.n; - } -@@ -2617,7 +2622,13 @@ static NOINLINE var *exec_builtin(node *op, var *res) - break; - } - -- nvfree(tv, 4); -+ nvfree(tmpvars, 4); -+#undef TMPVAR0 -+#undef TMPVAR1 -+#undef TMPVAR2 -+#undef TMPVAR3 -+#undef TMPVAR -+ - return res; - #undef tspl - } -@@ -2636,14 +2647,16 @@ static var *evaluate(node *op, var *res) - #define seed (G.evaluate__seed) - #define sreg (G.evaluate__sreg) - -- var *v1; -+ var *tmpvars; -+#define TMPVAR0 (tmpvars) -+#define TMPVAR1 (tmpvars + 1) - - if (!op) - return setvar_s(res, NULL); - - debug_printf_eval("entered %s()\n", __func__); - -- v1 = nvalloc(2); -+ tmpvars = nvalloc(2); - - while (op) { - struct { -@@ -2683,7 +2696,7 @@ static var *evaluate(node *op, var *res) - } - if (op1->r.n) { /* array ref? */ - const char *s; -- s = getvar_s(evaluate(op1->r.n, v1)); -+ s = getvar_s(evaluate(op1->r.n, TMPVAR0)); - hash_remove(iamarray(v), s); - } else { - clear_array(iamarray(v)); -@@ -2693,7 +2706,7 @@ static var *evaluate(node *op, var *res) - - /* execute inevitable things */ - if (opinfo & OF_RES1) -- L.v = evaluate(op1, v1); -+ L.v = evaluate(op1, TMPVAR0); - if (opinfo & OF_STR1) { - L.s = getvar_s(L.v); - debug_printf_eval("L.s:'%s'\n", L.s); -@@ -2710,7 +2723,7 @@ static var *evaluate(node *op, var *res) - * (Seen trying to evaluate "$444 $44444") - */ - if (opinfo & OF_RES2) { -- R.v = evaluate(op->r.n, v1+1); -+ R.v = evaluate(op->r.n, TMPVAR1); - //TODO: L.v may be invalid now, set L.v to NULL to catch bugs? - //L.v = NULL; - } -@@ -2793,7 +2806,7 @@ static var *evaluate(node *op, var *res) - fputs(getvar_s(intvar[F0]), F); - } else { - for (;;) { -- var *v = evaluate(nextarg(&op1), v1); -+ var *v = evaluate(nextarg(&op1), TMPVAR0); - if (v->type & VF_NUMBER) { - fmt_num(g_buf, MAXVARFMT, getvar_s(intvar[OFMT]), - getvar_i(v), TRUE); -@@ -2892,7 +2905,7 @@ static var *evaluate(node *op, var *res) - /* if source is a temporary string, jusk relink it to dest */ - //Disabled: if R.v is numeric but happens to have cached R.v->string, - //then L.v ends up being a string, which is wrong --// if (R.v == v1+1 && R.v->string) { -+// if (R.v == TMPVAR1 && R.v->string) { - // res = setvar_p(L.v, R.v->string); - // R.v->string = NULL; - // } else { -@@ -2908,7 +2921,7 @@ static var *evaluate(node *op, var *res) - break; - - case XC( OC_FUNC ): { -- var *tv, *sv_fnargs; -+ var *argvars, *sv_fnargs; - const char *sv_progname; - int nargs, i; - -@@ -2919,10 +2932,10 @@ static var *evaluate(node *op, var *res) - - /* The body might be empty, still has to eval the args */ - nargs = op->r.f->nargs; -- tv = nvalloc(nargs); -+ argvars = nvalloc(nargs); - i = 0; - while (op1) { -- var *arg = evaluate(nextarg(&op1), v1); -+ var *arg = evaluate(nextarg(&op1), TMPVAR0); - if (i == nargs) { - /* call with more arguments than function takes. - * (gawk warns: "warning: function 'f' called with more arguments than declared"). -@@ -2930,18 +2943,18 @@ static var *evaluate(node *op, var *res) - clrvar(arg); - continue; - } -- copyvar(&tv[i], arg); -- tv[i].type |= VF_CHILD; -- tv[i].x.parent = arg; -+ copyvar(&argvars[i], arg); -+ argvars[i].type |= VF_CHILD; -+ argvars[i].x.parent = arg; - i++; - } - - sv_fnargs = fnargs; - sv_progname = g_progname; - -- fnargs = tv; -+ fnargs = argvars; - res = evaluate(op->r.f->body.first, res); -- nvfree(fnargs, nargs); -+ nvfree(argvars, nargs); - - g_progname = sv_progname; - fnargs = sv_fnargs; -@@ -3266,7 +3279,10 @@ static var *evaluate(node *op, var *res) - break; - } /* while (op) */ - -- nvfree(v1, 2); -+ nvfree(tmpvars, 2); -+#undef TMPVAR0 -+#undef TMPVAR1 -+ - debug_printf_eval("returning from %s(): %p\n", __func__, res); - return res; - #undef fnargs --- -2.27.0 - - -From b1abb8374ff4bd36d9e850a92ab7a3a7668615d2 Mon Sep 17 00:00:00 2001 -From: Denys Vlasenko -Date: Thu, 1 Jul 2021 17:50:26 +0200 -Subject: [PATCH 28/61] awk: use static tmpvars instead of nvalloc(1)ed ones - -ptest() was using this idea already. - -As far as I can see, this is safe. Ttestsuite passes. - -One downside is that a temporary from e.g. printf invocation -won't be freed until the next printf call. - -function old new delta -awk_printf 481 468 -13 -as_regex 137 111 -26 ------------------------------------------------------------------------------- -(add/remove: 0/0 grow/shrink: 0/2 up/down: 0/-39) Total: -39 bytes - -Signed-off-by: Denys Vlasenko ---- - editors/awk.c | 49 ++++++++++++++++++++++++++++++++++--------------- - 1 file changed, 34 insertions(+), 15 deletions(-) - -diff --git a/editors/awk.c b/editors/awk.c -index 2c2cb74d7..0be044eef 100644 ---- a/editors/awk.c -+++ b/editors/awk.c -@@ -559,7 +559,9 @@ struct globals2 { - unsigned evaluate__seed; - regex_t evaluate__sreg; - -- var ptest__v; -+ var ptest__tmpvar; -+ var awk_printf__tmpvar; -+ var as_regex__tmpvar; - - tsplitter exec_builtin__tspl; - -@@ -1775,14 +1777,19 @@ static node *mk_splitter(const char *s, tsplitter *spl) - static regex_t *as_regex(node *op, regex_t *preg) - { - int cflags; -- var *tmpvar; - const char *s; - - if ((op->info & OPCLSMASK) == OC_REGEXP) { - return icase ? op->r.ire : op->l.re; - } -- tmpvar = nvalloc(1); -- s = getvar_s(evaluate(op, tmpvar)); -+ -+#define TMPVAR (&G.as_regex__tmpvar) -+ //tmpvar = nvalloc(1); -+ // We use a single "static" tmpvar (instead of on-stack or malloced one) -+ // to decrease memory consumption in deeply-recursive awk programs. -+ // The rule to work safely is to never call evaluate() while our static -+ // TMPVAR's value is still needed. -+ s = getvar_s(evaluate(op, TMPVAR)); - - cflags = icase ? REG_EXTENDED | REG_ICASE : REG_EXTENDED; - /* Testcase where REG_EXTENDED fails (unpaired '{'): -@@ -1794,7 +1801,8 @@ static regex_t *as_regex(node *op, regex_t *preg) - cflags &= ~REG_EXTENDED; - xregcomp(preg, s, cflags); - } -- nvfree(tmpvar, 1); -+ //nvfree(tmpvar, 1); -+#undef TMPVAR - return preg; - } - -@@ -2105,8 +2113,11 @@ static int hashwalk_next(var *v) - /* evaluate node, return 1 when result is true, 0 otherwise */ - static int ptest(node *pattern) - { -- /* ptest__v is "static": to save stack space? */ -- return istrue(evaluate(pattern, &G.ptest__v)); -+ // We use a single "static" tmpvar (instead of on-stack or malloced one) -+ // to decrease memory consumption in deeply-recursive awk programs. -+ // The rule to work safely is to never call evaluate() while our static -+ // TMPVAR's value is still needed. -+ return istrue(evaluate(pattern, &G.ptest__tmpvar)); - } - - /* read next record from stream rsm into a variable v */ -@@ -2243,12 +2254,18 @@ static char *awk_printf(node *n, int *len) - const char *s1; - int i, j, incr, bsize; - char c, c1; -- var *tmpvar, *arg; -- -- tmpvar = nvalloc(1); --//TODO: above, to avoid allocating a single temporary var, take a pointer --//to a temporary that our caller (evaluate()) already has? -- fmt = f = xstrdup(getvar_s(evaluate(nextarg(&n), tmpvar))); -+ var *arg; -+ -+ //tmpvar = nvalloc(1); -+#define TMPVAR (&G.awk_printf__tmpvar) -+ // We use a single "static" tmpvar (instead of on-stack or malloced one) -+ // to decrease memory consumption in deeply-recursive awk programs. -+ // The rule to work safely is to never call evaluate() while our static -+ // TMPVAR's value is still needed. -+ fmt = f = xstrdup(getvar_s(evaluate(nextarg(&n), TMPVAR))); -+ // ^^^^^^^^^ here we immediately strdup() the value, so the later call -+ // to evaluate() potentially recursing into another awk_printf() can't -+ // mangle the value. - - i = 0; - while (*f) { -@@ -2268,7 +2285,7 @@ static char *awk_printf(node *n, int *len) - f++; - c1 = *f; - *f = '\0'; -- arg = evaluate(nextarg(&n), tmpvar); -+ arg = evaluate(nextarg(&n), TMPVAR); - - j = i; - if (c == 'c' || !c) { -@@ -2289,7 +2306,9 @@ static char *awk_printf(node *n, int *len) - } - - free(fmt); -- nvfree(tmpvar, 1); -+// nvfree(tmpvar, 1); -+#undef TMPVAR -+ - b = xrealloc(b, i + 1); - b[i] = '\0'; - #if ENABLE_FEATURE_AWK_GNU_EXTENSIONS --- -2.27.0 - - -From de5007b20bc226273fb50130f2cb0fcaf7abfd3b Mon Sep 17 00:00:00 2001 -From: Denys Vlasenko -Date: Fri, 2 Jul 2021 14:27:40 +0200 -Subject: [PATCH 29/61] awk: shuffle functions to reduce forward declarations, - no code changes - -Signed-off-by: Denys Vlasenko ---- - editors/awk.c | 192 ++++++++++++++++++++++++-------------------------- - 1 file changed, 94 insertions(+), 98 deletions(-) - -diff --git a/editors/awk.c b/editors/awk.c -index 0be044eef..6833c2f0d 100644 ---- a/editors/awk.c -+++ b/editors/awk.c -@@ -619,18 +619,6 @@ struct globals2 { - G.evaluate__seed = 1; \ - } while (0) - -- --/* function prototypes */ --static void handle_special(var *); --static node *parse_expr(uint32_t); --static void chain_group(void); --static var *evaluate(node *, var *); --static rstream *next_input_file(void); --static int fmt_num(char *, int, const char *, double, int); --static int awk_exit(int) NORETURN; -- --/* ---- error handling ---- */ -- - static const char EMSG_UNEXP_EOS[] ALIGN1 = "Unexpected end of string"; - static const char EMSG_UNEXP_TOKEN[] ALIGN1 = "Unexpected token"; - static const char EMSG_DIV_BY_ZERO[] ALIGN1 = "Division by zero"; -@@ -642,10 +630,7 @@ static const char EMSG_UNDEF_FUNC[] ALIGN1 = "Call to undefined function"; - static const char EMSG_NO_MATH[] ALIGN1 = "Math support is not compiled in"; - static const char EMSG_NEGATIVE_FIELD[] ALIGN1 = "Access to negative field"; - --static void zero_out_var(var *vp) --{ -- memset(vp, 0, sizeof(*vp)); --} -+static int awk_exit(int) NORETURN; - - static void syntax_error(const char *message) NORETURN; - static void syntax_error(const char *message) -@@ -653,6 +638,11 @@ static void syntax_error(const char *message) - bb_error_msg_and_die("%s:%i: %s", g_progname, g_lineno, message); - } - -+static void zero_out_var(var *vp) -+{ -+ memset(vp, 0, sizeof(*vp)); -+} -+ - /* ---- hash stuff ---- */ - - static unsigned hashidx(const char *name) -@@ -885,10 +875,29 @@ static double my_strtod(char **pp) - - /* -------- working with variables (set/get/copy/etc) -------- */ - --static xhash *iamarray(var *v) -+static int fmt_num(char *b, int size, const char *format, double n, int int_as_int) - { -- var *a = v; -+ int r = 0; -+ char c; -+ const char *s = format; -+ -+ if (int_as_int && n == (long long)n) { -+ r = snprintf(b, size, "%lld", (long long)n); -+ } else { -+ do { c = *s; } while (c && *++s); -+ if (strchr("diouxX", c)) { -+ r = snprintf(b, size, format, (int)n); -+ } else if (strchr("eEfgG", c)) { -+ r = snprintf(b, size, format, n); -+ } else { -+ syntax_error(EMSG_INV_FMT); -+ } -+ } -+ return r; -+} - -+static xhash *iamarray(var *a) -+{ - while (a->type & VF_CHILD) - a = a->x.parent; - -@@ -913,6 +922,8 @@ static var *clrvar(var *v) - return v; - } - -+static void handle_special(var *); -+ - /* assign string value to variable */ - static var *setvar_p(var *v, char *value) - { -@@ -1284,6 +1295,8 @@ static void mk_re_node(const char *s, node *n, regex_t *re) - xregcomp(re + 1, s, REG_EXTENDED | REG_ICASE); - } - -+static node *parse_expr(uint32_t); -+ - static node *parse_lrparen_list(void) - { - next_token(TC_LPAREN); -@@ -1488,6 +1501,8 @@ static void chain_expr(uint32_t info) - rollback_token(); - } - -+static void chain_group(void); -+ - static node *chain_loop(node *nn) - { - node *n, *n2, *save_brk, *save_cont; -@@ -1770,6 +1785,8 @@ static node *mk_splitter(const char *s, tsplitter *spl) - return n; - } - -+static var *evaluate(node *, var *); -+ - /* Use node as a regular expression. Supplied with node ptr and regex_t - * storage space. Return ptr to regex (if result points to preg, it should - * be later regfree'd manually). -@@ -2222,27 +2239,6 @@ static int awk_getline(rstream *rsm, var *v) - return r; - } - --static int fmt_num(char *b, int size, const char *format, double n, int int_as_int) --{ -- int r = 0; -- char c; -- const char *s = format; -- -- if (int_as_int && n == (long long)n) { -- r = snprintf(b, size, "%lld", (long long)n); -- } else { -- do { c = *s; } while (c && *++s); -- if (strchr("diouxX", c)) { -- r = snprintf(b, size, format, (int)n); -- } else if (strchr("eEfgG", c)) { -- r = snprintf(b, size, format, n); -- } else { -- syntax_error(EMSG_INV_FMT); -- } -- } -- return r; --} -- - /* formatted output into an allocated buffer, return ptr to buffer */ - #if !ENABLE_FEATURE_AWK_GNU_EXTENSIONS - # define awk_printf(a, b) awk_printf(a) -@@ -2306,7 +2302,7 @@ static char *awk_printf(node *n, int *len) - } - - free(fmt); --// nvfree(tmpvar, 1); -+ //nvfree(tmpvar, 1); - #undef TMPVAR - - b = xrealloc(b, i + 1); -@@ -2652,6 +2648,64 @@ static NOINLINE var *exec_builtin(node *op, var *res) - #undef tspl - } - -+/* if expr looks like "var=value", perform assignment and return 1, -+ * otherwise return 0 */ -+static int is_assignment(const char *expr) -+{ -+ char *exprc, *val; -+ -+ if (!isalnum_(*expr) || (val = strchr(expr, '=')) == NULL) { -+ return FALSE; -+ } -+ -+ exprc = xstrdup(expr); -+ val = exprc + (val - expr); -+ *val++ = '\0'; -+ -+ unescape_string_in_place(val); -+ setvar_u(newvar(exprc), val); -+ free(exprc); -+ return TRUE; -+} -+ -+/* switch to next input file */ -+static rstream *next_input_file(void) -+{ -+#define rsm (G.next_input_file__rsm) -+#define files_happen (G.next_input_file__files_happen) -+ -+ FILE *F; -+ const char *fname, *ind; -+ -+ if (rsm.F) -+ fclose(rsm.F); -+ rsm.F = NULL; -+ rsm.pos = rsm.adv = 0; -+ -+ for (;;) { -+ if (getvar_i(intvar[ARGIND])+1 >= getvar_i(intvar[ARGC])) { -+ if (files_happen) -+ return NULL; -+ fname = "-"; -+ F = stdin; -+ break; -+ } -+ ind = getvar_s(incvar(intvar[ARGIND])); -+ fname = getvar_s(findvar(iamarray(intvar[ARGV]), ind)); -+ if (fname && *fname && !is_assignment(fname)) { -+ F = xfopen_stdin(fname); -+ break; -+ } -+ } -+ -+ files_happen = TRUE; -+ setvar_s(intvar[FILENAME], fname); -+ rsm.F = F; -+ return &rsm; -+#undef rsm -+#undef files_happen -+} -+ - /* - * Evaluate node - the heart of the program. Supplied with subtree - * and place where to store result. Returns ptr to result. -@@ -3338,64 +3392,6 @@ static int awk_exit(int r) - exit(r); - } - --/* if expr looks like "var=value", perform assignment and return 1, -- * otherwise return 0 */ --static int is_assignment(const char *expr) --{ -- char *exprc, *val; -- -- if (!isalnum_(*expr) || (val = strchr(expr, '=')) == NULL) { -- return FALSE; -- } -- -- exprc = xstrdup(expr); -- val = exprc + (val - expr); -- *val++ = '\0'; -- -- unescape_string_in_place(val); -- setvar_u(newvar(exprc), val); -- free(exprc); -- return TRUE; --} -- --/* switch to next input file */ --static rstream *next_input_file(void) --{ --#define rsm (G.next_input_file__rsm) --#define files_happen (G.next_input_file__files_happen) -- -- FILE *F; -- const char *fname, *ind; -- -- if (rsm.F) -- fclose(rsm.F); -- rsm.F = NULL; -- rsm.pos = rsm.adv = 0; -- -- for (;;) { -- if (getvar_i(intvar[ARGIND])+1 >= getvar_i(intvar[ARGC])) { -- if (files_happen) -- return NULL; -- fname = "-"; -- F = stdin; -- break; -- } -- ind = getvar_s(incvar(intvar[ARGIND])); -- fname = getvar_s(findvar(iamarray(intvar[ARGV]), ind)); -- if (fname && *fname && !is_assignment(fname)) { -- F = xfopen_stdin(fname); -- break; -- } -- } -- -- files_happen = TRUE; -- setvar_s(intvar[FILENAME], fname); -- rsm.F = F; -- return &rsm; --#undef rsm --#undef files_happen --} -- - int awk_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE; - int awk_main(int argc UNUSED_PARAM, char **argv) - { --- -2.27.0 - - -From c14ab33f2d8eb07dbf27570be30121cc9734ba04 Mon Sep 17 00:00:00 2001 -From: Denys Vlasenko -Date: Fri, 2 Jul 2021 14:29:01 +0200 -Subject: [PATCH 30/61] awk: when parsing length(), simplify eating of LPAREN - -function old new delta -parse_expr 945 948 +3 - -Signed-off-by: Denys Vlasenko ---- - editors/awk.c | 7 ++++--- - 1 file changed, 4 insertions(+), 3 deletions(-) - -diff --git a/editors/awk.c b/editors/awk.c -index 6833c2f0d..f65449a09 100644 ---- a/editors/awk.c -+++ b/editors/awk.c -@@ -1453,10 +1453,11 @@ static node *parse_expr(uint32_t term_tc) - | TC_BINOPX /* length NUM */ - | TC_COMMA /* print length, 1 */ - ); -- rollback_token(); -- if (tc & TC_LPAREN) { -+ if (tc != TC_LPAREN) -+ rollback_token(); -+ else { - /* It was a "(" token. Handle just like TC_BUILTIN */ -- cn->l.n = parse_lrparen_list(); -+ cn->l.n = parse_expr(TC_RPAREN); - } - break; - } --- -2.27.0 - - -From 8be97151d5ba9f98f27f58068416c203565708d0 Mon Sep 17 00:00:00 2001 -From: Denys Vlasenko -Date: Fri, 2 Jul 2021 14:33:13 +0200 -Subject: [PATCH 31/61] awk: use "static" tmpvars in main and exit - -function old new delta -awk_exit 103 93 -10 -awk_main 850 832 -18 ------------------------------------------------------------------------------- -(add/remove: 0/0 grow/shrink: 0/2 up/down: 0/-28) Total: -28 bytes - -Signed-off-by: Denys Vlasenko ---- - editors/awk.c | 17 +++++------------ - 1 file changed, 5 insertions(+), 12 deletions(-) - -diff --git a/editors/awk.c b/editors/awk.c -index f65449a09..9f5a94037 100644 ---- a/editors/awk.c -+++ b/editors/awk.c -@@ -562,6 +562,8 @@ struct globals2 { - var ptest__tmpvar; - var awk_printf__tmpvar; - var as_regex__tmpvar; -+ var exit__tmpvar; -+ var main__tmpvar; - - tsplitter exec_builtin__tspl; - -@@ -638,11 +640,6 @@ static void syntax_error(const char *message) - bb_error_msg_and_die("%s:%i: %s", g_progname, g_lineno, message); - } - --static void zero_out_var(var *vp) --{ -- memset(vp, 0, sizeof(*vp)); --} -- - /* ---- hash stuff ---- */ - - static unsigned hashidx(const char *name) -@@ -3372,11 +3369,9 @@ static int awk_exit(int r) - unsigned i; - - if (!exiting) { -- var tv; - exiting = TRUE; - nextrec = FALSE; -- zero_out_var(&tv); -- evaluate(endseq.first, &tv); -+ evaluate(endseq.first, &G.exit__tmpvar); - } - - /* waiting for children */ -@@ -3404,7 +3399,6 @@ int awk_main(int argc UNUSED_PARAM, char **argv) - llist_t *list_e = NULL; - #endif - int i; -- var tv; - - INIT_G(); - -@@ -3514,8 +3508,7 @@ int awk_main(int argc UNUSED_PARAM, char **argv) - newfile("/dev/stdout")->F = stdout; - newfile("/dev/stderr")->F = stderr; - -- zero_out_var(&tv); -- evaluate(beginseq.first, &tv); -+ evaluate(beginseq.first, &G.main__tmpvar); - if (!mainseq.first && !endseq.first) - awk_exit(EXIT_SUCCESS); - -@@ -3532,7 +3525,7 @@ int awk_main(int argc UNUSED_PARAM, char **argv) - nextrec = FALSE; - incvar(intvar[NR]); - incvar(intvar[FNR]); -- evaluate(mainseq.first, &tv); -+ evaluate(mainseq.first, &G.main__tmpvar); - - if (nextfile) - break; --- -2.27.0 - - -From 7f4cd583daf8dcb431f07fd3402ca7ddc11b21ab Mon Sep 17 00:00:00 2001 -From: Denys Vlasenko -Date: Fri, 2 Jul 2021 14:53:52 +0200 -Subject: [PATCH 32/61] awk: shuffle globals for smaller offsets - -function old new delta -awk_main 832 829 -3 -evaluate 3229 3223 -6 ------------------------------------------------------------------------------- -(add/remove: 0/0 grow/shrink: 0/2 up/down: 0/-9) Total: -9 bytes - -Signed-off-by: Denys Vlasenko ---- - editors/awk.c | 25 +++++++++++++------------ - 1 file changed, 13 insertions(+), 12 deletions(-) - -diff --git a/editors/awk.c b/editors/awk.c -index 9f5a94037..068ed687b 100644 ---- a/editors/awk.c -+++ b/editors/awk.c -@@ -536,6 +536,11 @@ struct globals { - smallint nextfile; - smallint is_f0_split; - smallint t_rollback; -+ -+ /* former statics from various functions */ -+ smallint next_token__concat_inserted; -+ uint32_t next_token__save_tclass; -+ uint32_t next_token__save_info; - }; - struct globals2 { - uint32_t t_info; /* often used */ -@@ -548,15 +553,11 @@ struct globals2 { - /* former statics from various functions */ - char *split_f0__fstrings; - -- uint32_t next_token__save_tclass; -- uint32_t next_token__save_info; -- smallint next_token__concat_inserted; -- -- smallint next_input_file__files_happen; - rstream next_input_file__rsm; -+ smallint next_input_file__files_happen; - -- var *evaluate__fnargs; - unsigned evaluate__seed; -+ var *evaluate__fnargs; - regex_t evaluate__sreg; - - var ptest__tmpvar; -@@ -575,10 +576,10 @@ struct globals2 { - #define G1 (ptr_to_globals[-1]) - #define G (*(struct globals2 *)ptr_to_globals) - /* For debug. nm --size-sort awk.o | grep -vi ' [tr] ' */ --/*char G1size[sizeof(G1)]; - 0x74 */ --/*char Gsize[sizeof(G)]; - 0x1c4 */ -+//char G1size[sizeof(G1)]; // 0x70 -+//char Gsize[sizeof(G)]; // 0x2f8 - /* Trying to keep most of members accessible with short offsets: */ --/*char Gofs_seed[offsetof(struct globals2, evaluate__seed)]; - 0x90 */ -+//char Gofs_seed[offsetof(struct globals2, evaluate__seed)]; // 0x7c - #define t_double (G1.t_double ) - #define beginseq (G1.beginseq ) - #define mainseq (G1.mainseq ) -@@ -1056,9 +1057,9 @@ static int istrue(var *v) - */ - static uint32_t next_token(uint32_t expected) - { --#define concat_inserted (G.next_token__concat_inserted) --#define save_tclass (G.next_token__save_tclass) --#define save_info (G.next_token__save_info) -+#define concat_inserted (G1.next_token__concat_inserted) -+#define save_tclass (G1.next_token__save_tclass) -+#define save_info (G1.next_token__save_info) - - char *p; - const char *tl; --- -2.27.0 - - -From 51262cc2c47f586d9478cc3c4f4977d98b36222b Mon Sep 17 00:00:00 2001 -From: Denys Vlasenko -Date: Fri, 2 Jul 2021 15:19:14 +0200 -Subject: [PATCH 33/61] awk: do not special-case "delete" - -Rework of the previous fix: -Can use operation attributes to disable arg evaluation instead of special-casing. - -function old new delta -.rodata 104032 104036 +4 -evaluate 3223 3215 -8 ------------------------------------------------------------------------------- -(add/remove: 0/0 grow/shrink: 1/1 up/down: 4/-8) Total: -4 bytes - -Signed-off-by: Denys Vlasenko ---- - editors/awk.c | 56 +++++++++++++++++++++++++-------------------------- - 1 file changed, 27 insertions(+), 29 deletions(-) - -diff --git a/editors/awk.c b/editors/awk.c -index 068ed687b..a3dda6959 100644 ---- a/editors/awk.c -+++ b/editors/awk.c -@@ -319,7 +319,7 @@ if ((n) & TC_NUMBER ) debug_printf_parse(" NUMBER" ); \ - #define xV OF_RES2 - #define xS (OF_RES2 | OF_STR2) - #define Vx OF_RES1 --#define Rx (OF_RES1 | OF_NUM1 | OF_REQUIRED) -+#define Rx OF_REQUIRED - #define VV (OF_RES1 | OF_RES2) - #define Nx (OF_RES1 | OF_NUM1) - #define NV (OF_RES1 | OF_NUM1 | OF_RES2) -@@ -2750,32 +2750,6 @@ static var *evaluate(node *op, var *res) - op1 = op->l.n; - debug_printf_eval("opinfo:%08x opn:%08x\n", opinfo, opn); - -- /* "delete" is special: -- * "delete array[var--]" must evaluate index expr only once, -- * must not evaluate it in "execute inevitable things" part. -- */ -- if (XC(opinfo & OPCLSMASK) == XC(OC_DELETE)) { -- uint32_t info = op1->info & OPCLSMASK; -- var *v; -- -- debug_printf_eval("DELETE\n"); -- if (info == OC_VAR) { -- v = op1->l.v; -- } else if (info == OC_FNARG) { -- v = &fnargs[op1->l.aidx]; -- } else { -- syntax_error(EMSG_NOT_ARRAY); -- } -- if (op1->r.n) { /* array ref? */ -- const char *s; -- s = getvar_s(evaluate(op1->r.n, TMPVAR0)); -- hash_remove(iamarray(v), s); -- } else { -- clear_array(iamarray(v)); -- } -- goto next; -- } -- - /* execute inevitable things */ - if (opinfo & OF_RES1) - L.v = evaluate(op1, TMPVAR0); -@@ -2905,7 +2879,31 @@ static var *evaluate(node *op, var *res) - break; - } - -- /* case XC( OC_DELETE ): - moved to happen before arg evaluation */ -+ case XC( OC_DELETE ): -+ debug_printf_eval("DELETE\n"); -+ { -+ /* "delete" is special: -+ * "delete array[var--]" must evaluate index expr only once. -+ */ -+ uint32_t info = op1->info & OPCLSMASK; -+ var *v; -+ -+ if (info == OC_VAR) { -+ v = op1->l.v; -+ } else if (info == OC_FNARG) { -+ v = &fnargs[op1->l.aidx]; -+ } else { -+ syntax_error(EMSG_NOT_ARRAY); -+ } -+ if (op1->r.n) { /* array ref? */ -+ const char *s; -+ s = getvar_s(evaluate(op1->r.n, TMPVAR0)); -+ hash_remove(iamarray(v), s); -+ } else { -+ clear_array(iamarray(v)); -+ } -+ break; -+ } - - case XC( OC_NEWSOURCE ): - debug_printf_eval("NEWSOURCE\n"); -@@ -3342,7 +3340,7 @@ static var *evaluate(node *op, var *res) - default: - syntax_error(EMSG_POSSIBLE_ERROR); - } /* switch */ -- next: -+ - if ((opinfo & OPCLSMASK) <= SHIFT_TIL_THIS) - op = op->a.n; - if ((opinfo & OPCLSMASK) >= RECUR_FROM_THIS) --- -2.27.0 - - -From 2f36bdf0eb01846b23c1a340ff6f19fd9377ed6a Mon Sep 17 00:00:00 2001 -From: Denys Vlasenko -Date: Fri, 2 Jul 2021 17:32:08 +0200 -Subject: [PATCH 34/61] awk: make builtin definitions more understandable, no - code changes - -Signed-off-by: Denys Vlasenko ---- - editors/awk.c | 71 +++++++++++++++++++++++++++++++++++---------------- - 1 file changed, 49 insertions(+), 22 deletions(-) - -diff --git a/editors/awk.c b/editors/awk.c -index a3dda6959..fb841687e 100644 ---- a/editors/awk.c -+++ b/editors/awk.c -@@ -331,8 +331,7 @@ if ((n) & TC_NUMBER ) debug_printf_parse(" NUMBER" ); \ - #define OPNMASK 0x007F - - /* operator priority is a highest byte (even: r->l, odd: l->r grouping) -- * For builtins it has different meaning: n n s3 s2 s1 v3 v2 v1, -- * n - min. number of args, vN - resolve Nth arg to var, sN - resolve to string -+ * (for builtins it has different meaning) - */ - #undef P - #undef PRIMASK -@@ -430,8 +429,6 @@ static const char tokenlist[] ALIGN1 = - /* compiler adds trailing "\0" */ - ; - --#define OC_B OC_BUILTIN -- - static const uint32_t tokeninfo[] ALIGN4 = { - 0, - 0, -@@ -464,20 +461,43 @@ static const uint32_t tokeninfo[] ALIGN4 = { - OC_RETURN|Vx, OC_EXIT|Nx, - ST_WHILE, - 0, /* else */ -- OC_B|B_an|P(0x83), OC_B|B_co|P(0x41), OC_B|B_ls|P(0x83), OC_B|B_or|P(0x83), -- OC_B|B_rs|P(0x83), OC_B|B_xo|P(0x83), -- OC_FBLTIN|Sx|F_cl, OC_FBLTIN|Sx|F_sy, OC_FBLTIN|Sx|F_ff, OC_B|B_a2|P(0x83), -- OC_FBLTIN|Nx|F_co, OC_FBLTIN|Nx|F_ex, OC_FBLTIN|Nx|F_in, OC_FBLTIN|Nx|F_lg, -- OC_FBLTIN|F_rn, OC_FBLTIN|Nx|F_si, OC_FBLTIN|Nx|F_sq, OC_FBLTIN|Nx|F_sr, -- OC_B|B_ge|P(0xd6), OC_B|B_gs|P(0xb6), OC_B|B_ix|P(0x9b), /* OC_FBLTIN|Sx|F_le, was here */ -- OC_B|B_ma|P(0x89), OC_B|B_sp|P(0x8b), OC_SPRINTF, OC_B|B_su|P(0xb6), -- OC_B|B_ss|P(0x8f), OC_FBLTIN|F_ti, OC_B|B_ti|P(0x0b), OC_B|B_mt|P(0x0b), -- OC_B|B_lo|P(0x49), OC_B|B_up|P(0x49), -- OC_FBLTIN|Sx|F_le, /* TC_LENGTH */ -- OC_GETLINE|SV|P(0), -- 0, 0, -- 0, -- 0 /* TC_END */ -+// OC_B's are builtins with enforced minimum number of arguments (two upper bits). -+// Highest byte bit pattern: nn s3s2s1 v3v2v1 -+// nn - min. number of args, sN - resolve Nth arg to string, vN - resolve to var -+// OC_FBLTIN's are builtins with one optional argument, -+// TODO: enforce exactly one arg for: system, close, cos, sin, exp, int, log, sqrt -+// zero args for: rand systime -+// Do have one optional arg: fflush, srand, length -+#define OC_B OC_BUILTIN -+#define A1 P(0x40) /*one arg*/ -+#define A2 P(0x80) /*two args*/ -+#define A3 P(0xc0) /*three args*/ -+#define __v P(1) -+#define _vv P(3) -+#define __s__v P(9) -+#define __s_vv P(0x0b) -+#define __svvv P(0x0f) -+#define _ss_vv P(0x1b) -+#define _s_vv_ P(0x16) -+#define ss_vv_ P(0x36) -+ OC_B|B_an|_vv|A2, OC_B|B_co|__v|A1, OC_B|B_ls|_vv|A2, OC_B|B_or|_vv|A2, // and compl lshift or -+ OC_B|B_rs|_vv|A2, OC_B|B_xo|_vv|A2, // rshift xor -+ OC_FBLTIN|Sx|F_cl, OC_FBLTIN|Sx|F_sy, OC_FBLTIN|Sx|F_ff, OC_B|B_a2|_vv|A2, // close system fflush atan2 -+ OC_FBLTIN|Nx|F_co, OC_FBLTIN|Nx|F_ex, OC_FBLTIN|Nx|F_in, OC_FBLTIN|Nx|F_lg, // cos exp int log -+ OC_FBLTIN|F_rn, OC_FBLTIN|Nx|F_si, OC_FBLTIN|Nx|F_sq, OC_FBLTIN|Nx|F_sr, // rand sin sqrt srand -+ OC_B|B_ge|_s_vv_|A3, OC_B|B_gs|ss_vv_|A2, OC_B|B_ix|_ss_vv|A2, // gensub gsub index /*length was here*/ -+ OC_B|B_ma|__s__v|A2, OC_B|B_sp|__s_vv|A2, OC_SPRINTF, OC_B|B_su|ss_vv_|A2, // match split sprintf sub -+ OC_B|B_ss|__svvv|A2, OC_FBLTIN|F_ti, OC_B|B_ti|__s_vv, OC_B|B_mt|__s_vv, // substr systime strftime mktime -+ OC_B|B_lo|__s__v|A1, OC_B|B_up|__s__v|A1, // tolower toupper -+ OC_FBLTIN|Sx|F_le, // length -+ OC_GETLINE|SV, // getline -+ 0, 0, // func function -+ 0, // BEGIN -+ 0 // END -+#undef A1 -+#undef A2 -+#undef A3 -+#undef OC_B - }; - - /* internal variable names and their initial values */ -@@ -1630,6 +1650,7 @@ static void chain_group(void) - debug_printf_parse("%s: OC_BREAK\n", __func__); - n = chain_node(OC_EXEC); - n->a.n = break_ptr; -+//TODO: if break_ptr is NULL, syntax error (not in the loop)? - chain_expr(t_info); - break; - -@@ -1637,6 +1658,7 @@ static void chain_group(void) - debug_printf_parse("%s: OC_CONTINUE\n", __func__); - n = chain_node(OC_EXEC); - n->a.n = continue_ptr; -+//TODO: if continue_ptr is NULL, syntax error (not in the loop)? - chain_expr(t_info); - break; - -@@ -1799,8 +1821,8 @@ static regex_t *as_regex(node *op, regex_t *preg) - return icase ? op->r.ire : op->l.re; - } - --#define TMPVAR (&G.as_regex__tmpvar) - //tmpvar = nvalloc(1); -+#define TMPVAR (&G.as_regex__tmpvar) - // We use a single "static" tmpvar (instead of on-stack or malloced one) - // to decrease memory consumption in deeply-recursive awk programs. - // The rule to work safely is to never call evaluate() while our static -@@ -2720,8 +2742,6 @@ static var *evaluate(node *op, var *res) - #define sreg (G.evaluate__sreg) - - var *tmpvars; --#define TMPVAR0 (tmpvars) --#define TMPVAR1 (tmpvars + 1) - - if (!op) - return setvar_s(res, NULL); -@@ -2729,6 +2749,8 @@ static var *evaluate(node *op, var *res) - debug_printf_eval("entered %s()\n", __func__); - - tmpvars = nvalloc(2); -+#define TMPVAR0 (tmpvars) -+#define TMPVAR1 (tmpvars + 1) - - while (op) { - struct { -@@ -3166,7 +3188,7 @@ static var *evaluate(node *op, var *res) - rstream *rsm; - int err = 0; - rsm = (rstream *)hash_search(fdhash, L.s); -- debug_printf_eval("OC_FBLTIN F_cl rsm:%p\n", rsm); -+ debug_printf_eval("OC_FBLTIN close: op1:%p s:'%s' rsm:%p\n", op1, L.s, rsm); - if (rsm) { - debug_printf_eval("OC_FBLTIN F_cl " - "rsm->is_pipe:%d, ->F:%p\n", -@@ -3177,6 +3199,11 @@ static var *evaluate(node *op, var *res) - */ - if (rsm->F) - err = rsm->is_pipe ? pclose(rsm->F) : fclose(rsm->F); -+//TODO: fix this case: -+// $ awk 'BEGIN { print close(""); print ERRNO }' -+// -1 -+// close of redirection that was never opened -+// (we print 0, 0) - free(rsm->buffer); - hash_remove(fdhash, L.s); - } --- -2.27.0 - - -From 8eb26034fb7225862c73f1dfa947a5d4910a0935 Mon Sep 17 00:00:00 2001 -From: Denys Vlasenko -Date: Fri, 2 Jul 2021 18:28:12 +0200 -Subject: [PATCH 35/61] awk: enforce simple builtins' argument number - -function old new delta -evaluate 3215 3303 +88 -.rodata 104036 104107 +71 ------------------------------------------------------------------------------- -(add/remove: 0/0 grow/shrink: 2/0 up/down: 159/0) Total: 159 bytes - -Signed-off-by: Denys Vlasenko ---- - editors/awk.c | 45 ++++++++++++++++++++++++++++----------------- - 1 file changed, 28 insertions(+), 17 deletions(-) - -diff --git a/editors/awk.c b/editors/awk.c -index fb841687e..1925e0771 100644 ---- a/editors/awk.c -+++ b/editors/awk.c -@@ -464,11 +464,11 @@ static const uint32_t tokeninfo[] ALIGN4 = { - // OC_B's are builtins with enforced minimum number of arguments (two upper bits). - // Highest byte bit pattern: nn s3s2s1 v3v2v1 - // nn - min. number of args, sN - resolve Nth arg to string, vN - resolve to var --// OC_FBLTIN's are builtins with one optional argument, --// TODO: enforce exactly one arg for: system, close, cos, sin, exp, int, log, sqrt --// zero args for: rand systime --// Do have one optional arg: fflush, srand, length --#define OC_B OC_BUILTIN -+// OC_FBLTIN's are builtins with zero or one argument. -+// |Rx| enforces that arg is present for: system, close, cos, sin, exp, int, log, sqrt. -+// Check for no args is present in builtins' code (not in this table): rand, systime. -+// Have one _optional_ arg: fflush, srand, length -+#define OC_B OC_BUILTIN - #define A1 P(0x40) /*one arg*/ - #define A2 P(0x80) /*two args*/ - #define A3 P(0xc0) /*three args*/ -@@ -480,15 +480,15 @@ static const uint32_t tokeninfo[] ALIGN4 = { - #define _ss_vv P(0x1b) - #define _s_vv_ P(0x16) - #define ss_vv_ P(0x36) -- OC_B|B_an|_vv|A2, OC_B|B_co|__v|A1, OC_B|B_ls|_vv|A2, OC_B|B_or|_vv|A2, // and compl lshift or -- OC_B|B_rs|_vv|A2, OC_B|B_xo|_vv|A2, // rshift xor -- OC_FBLTIN|Sx|F_cl, OC_FBLTIN|Sx|F_sy, OC_FBLTIN|Sx|F_ff, OC_B|B_a2|_vv|A2, // close system fflush atan2 -- OC_FBLTIN|Nx|F_co, OC_FBLTIN|Nx|F_ex, OC_FBLTIN|Nx|F_in, OC_FBLTIN|Nx|F_lg, // cos exp int log -- OC_FBLTIN|F_rn, OC_FBLTIN|Nx|F_si, OC_FBLTIN|Nx|F_sq, OC_FBLTIN|Nx|F_sr, // rand sin sqrt srand -- OC_B|B_ge|_s_vv_|A3, OC_B|B_gs|ss_vv_|A2, OC_B|B_ix|_ss_vv|A2, // gensub gsub index /*length was here*/ -- OC_B|B_ma|__s__v|A2, OC_B|B_sp|__s_vv|A2, OC_SPRINTF, OC_B|B_su|ss_vv_|A2, // match split sprintf sub -- OC_B|B_ss|__svvv|A2, OC_FBLTIN|F_ti, OC_B|B_ti|__s_vv, OC_B|B_mt|__s_vv, // substr systime strftime mktime -- OC_B|B_lo|__s__v|A1, OC_B|B_up|__s__v|A1, // tolower toupper -+ OC_B|B_an|_vv|A2, OC_B|B_co|__v|A1, OC_B|B_ls|_vv|A2, OC_B|B_or|_vv|A2, // and compl lshift or -+ OC_B|B_rs|_vv|A2, OC_B|B_xo|_vv|A2, // rshift xor -+ OC_FBLTIN|Sx|Rx|F_cl,OC_FBLTIN|Sx|Rx|F_sy,OC_FBLTIN|Sx|F_ff, OC_B|B_a2|_vv|A2, // close system fflush atan2 -+ OC_FBLTIN|Nx|Rx|F_co,OC_FBLTIN|Nx|Rx|F_ex,OC_FBLTIN|Nx|Rx|F_in,OC_FBLTIN|Nx|Rx|F_lg,// cos exp int log -+ OC_FBLTIN|F_rn, OC_FBLTIN|Nx|Rx|F_si,OC_FBLTIN|Nx|Rx|F_sq,OC_FBLTIN|Nx|F_sr, // rand sin sqrt srand -+ OC_B|B_ge|_s_vv_|A3, OC_B|B_gs|ss_vv_|A2, OC_B|B_ix|_ss_vv|A2, // gensub gsub index /*length was here*/ -+ OC_B|B_ma|__s__v|A2, OC_B|B_sp|__s_vv|A2, OC_SPRINTF, OC_B|B_su|ss_vv_|A2, // match split sprintf sub -+ OC_B|B_ss|__svvv|A2, OC_FBLTIN|F_ti, OC_B|B_ti|__s_vv, OC_B|B_mt|__s_vv, // substr systime strftime mktime -+ OC_B|B_lo|__s__v|A1, OC_B|B_up|__s__v|A1, // tolower toupper - OC_FBLTIN|Sx|F_le, // length - OC_GETLINE|SV, // getline - 0, 0, // func function -@@ -2773,8 +2773,11 @@ static var *evaluate(node *op, var *res) - debug_printf_eval("opinfo:%08x opn:%08x\n", opinfo, opn); - - /* execute inevitable things */ -- if (opinfo & OF_RES1) -+ if (opinfo & OF_RES1) { -+ if ((opinfo & OF_REQUIRED) && !op1) -+ syntax_error(EMSG_TOO_FEW_ARGS); - L.v = evaluate(op1, TMPVAR0); -+ } - if (opinfo & OF_STR1) { - L.s = getvar_s(L.v); - debug_printf_eval("L.s:'%s'\n", L.s); -@@ -3101,12 +3104,18 @@ static var *evaluate(node *op, var *res) - double R_d = R_d; /* for compiler */ - debug_printf_eval("FBLTIN\n"); - -+ if (op1 && (op1->info & OPCLSMASK) == OC_COMMA) -+ /* Simple builtins take one arg maximum */ -+ syntax_error("Too many arguments"); -+ - switch (opn) { - case F_in: - R_d = (long long)L_d; - break; - -- case F_rn: -+ case F_rn: /*rand*/ -+ if (op1) -+ syntax_error("Too many arguments"); - R_d = (double)rand() / (double)RAND_MAX; - break; - -@@ -3149,7 +3158,9 @@ static var *evaluate(node *op, var *res) - srand(seed); - break; - -- case F_ti: -+ case F_ti: /*systime*/ -+ if (op1) -+ syntax_error("Too many arguments"); - R_d = time(NULL); - break; - --- -2.27.0 - - -From bd554e662f7246fd1518db37049aaf9ecf61bce9 Mon Sep 17 00:00:00 2001 -From: Denys Vlasenko -Date: Fri, 2 Jul 2021 18:55:00 +0200 -Subject: [PATCH 36/61] awk: beautify builtins table, no code changes - -Signed-off-by: Denys Vlasenko ---- - editors/awk.c | 26 ++++++++++++++------------ - 1 file changed, 14 insertions(+), 12 deletions(-) - -diff --git a/editors/awk.c b/editors/awk.c -index 1925e0771..8d7777ca6 100644 ---- a/editors/awk.c -+++ b/editors/awk.c -@@ -464,11 +464,12 @@ static const uint32_t tokeninfo[] ALIGN4 = { - // OC_B's are builtins with enforced minimum number of arguments (two upper bits). - // Highest byte bit pattern: nn s3s2s1 v3v2v1 - // nn - min. number of args, sN - resolve Nth arg to string, vN - resolve to var --// OC_FBLTIN's are builtins with zero or one argument. -+// OC_F's are builtins with zero or one argument. - // |Rx| enforces that arg is present for: system, close, cos, sin, exp, int, log, sqrt. - // Check for no args is present in builtins' code (not in this table): rand, systime. - // Have one _optional_ arg: fflush, srand, length - #define OC_B OC_BUILTIN -+#define OC_F OC_FBLTIN - #define A1 P(0x40) /*one arg*/ - #define A2 P(0x80) /*two args*/ - #define A3 P(0xc0) /*three args*/ -@@ -480,17 +481,17 @@ static const uint32_t tokeninfo[] ALIGN4 = { - #define _ss_vv P(0x1b) - #define _s_vv_ P(0x16) - #define ss_vv_ P(0x36) -- OC_B|B_an|_vv|A2, OC_B|B_co|__v|A1, OC_B|B_ls|_vv|A2, OC_B|B_or|_vv|A2, // and compl lshift or -- OC_B|B_rs|_vv|A2, OC_B|B_xo|_vv|A2, // rshift xor -- OC_FBLTIN|Sx|Rx|F_cl,OC_FBLTIN|Sx|Rx|F_sy,OC_FBLTIN|Sx|F_ff, OC_B|B_a2|_vv|A2, // close system fflush atan2 -- OC_FBLTIN|Nx|Rx|F_co,OC_FBLTIN|Nx|Rx|F_ex,OC_FBLTIN|Nx|Rx|F_in,OC_FBLTIN|Nx|Rx|F_lg,// cos exp int log -- OC_FBLTIN|F_rn, OC_FBLTIN|Nx|Rx|F_si,OC_FBLTIN|Nx|Rx|F_sq,OC_FBLTIN|Nx|F_sr, // rand sin sqrt srand -- OC_B|B_ge|_s_vv_|A3, OC_B|B_gs|ss_vv_|A2, OC_B|B_ix|_ss_vv|A2, // gensub gsub index /*length was here*/ -- OC_B|B_ma|__s__v|A2, OC_B|B_sp|__s_vv|A2, OC_SPRINTF, OC_B|B_su|ss_vv_|A2, // match split sprintf sub -- OC_B|B_ss|__svvv|A2, OC_FBLTIN|F_ti, OC_B|B_ti|__s_vv, OC_B|B_mt|__s_vv, // substr systime strftime mktime -- OC_B|B_lo|__s__v|A1, OC_B|B_up|__s__v|A1, // tolower toupper -- OC_FBLTIN|Sx|F_le, // length -- OC_GETLINE|SV, // getline -+ OC_B|B_an|_vv|A2, OC_B|B_co|__v|A1, OC_B|B_ls|_vv|A2, OC_B|B_or|_vv|A2, // and compl lshift or -+ OC_B|B_rs|_vv|A2, OC_B|B_xo|_vv|A2, // rshift xor -+ OC_F|F_cl|Sx|Rx, OC_F|F_sy|Sx|Rx, OC_F|F_ff|Sx, OC_B|B_a2|_vv|A2, // close system fflush atan2 -+ OC_F|F_co|Nx|Rx, OC_F|F_ex|Nx|Rx, OC_F|F_in|Nx|Rx, OC_F|F_lg|Nx|Rx, // cos exp int log -+ OC_F|F_rn, OC_F|F_si|Nx|Rx, OC_F|F_sq|Nx|Rx, OC_F|F_sr|Nx, // rand sin sqrt srand -+ OC_B|B_ge|_s_vv_|A3,OC_B|B_gs|ss_vv_|A2,OC_B|B_ix|_ss_vv|A2, // gensub gsub index /*length was here*/ -+ OC_B|B_ma|__s__v|A2,OC_B|B_sp|__s_vv|A2,OC_SPRINTF, OC_B|B_su|ss_vv_|A2,// match split sprintf sub -+ OC_B|B_ss|__svvv|A2,OC_F|F_ti, OC_B|B_ti|__s_vv, OC_B|B_mt|__s_vv, // substr systime strftime mktime -+ OC_B|B_lo|__s__v|A1,OC_B|B_up|__s__v|A1, // tolower toupper -+ OC_F|F_le|Sx, // length -+ OC_GETLINE|SV, // getline - 0, 0, // func function - 0, // BEGIN - 0 // END -@@ -498,6 +499,7 @@ static const uint32_t tokeninfo[] ALIGN4 = { - #undef A2 - #undef A3 - #undef OC_B -+#undef OC_F - }; - - /* internal variable names and their initial values */ --- -2.27.0 - - -From 2fcb86ed0176fcfe85d279d637a3d1b15ecf24bb Mon Sep 17 00:00:00 2001 -From: Denys Vlasenko -Date: Fri, 2 Jul 2021 19:38:03 +0200 -Subject: [PATCH 37/61] awk: rand() could return 1.0, fix this - should be in - [0,1) - -While at it, make it finer-grained (63 bits of randomness) - -function old new delta -evaluate 3303 3336 +33 -.rodata 104107 104111 +4 ------------------------------------------------------------------------------- -(add/remove: 0/0 grow/shrink: 2/0 up/down: 37/0) Total: 37 bytes - -Signed-off-by: Denys Vlasenko ---- - editors/awk.c | 15 +++++++++++++-- - 1 file changed, 13 insertions(+), 2 deletions(-) - -diff --git a/editors/awk.c b/editors/awk.c -index 8d7777ca6..64fe81be4 100644 ---- a/editors/awk.c -+++ b/editors/awk.c -@@ -3118,9 +3118,20 @@ static var *evaluate(node *op, var *res) - case F_rn: /*rand*/ - if (op1) - syntax_error("Too many arguments"); -- R_d = (double)rand() / (double)RAND_MAX; -+ { -+#if RAND_MAX >= 0x7fffffff -+ uint32_t u = ((uint32_t)rand() << 16) ^ rand(); -+ uint64_t v = ((uint64_t)rand() << 32) | u; -+ /* the above shift+or is optimized out on 32-bit arches */ -+# if RAND_MAX > 0x7fffffff -+ v &= 0x7fffffffffffffffUL; -+# endif -+ R_d = (double)v / 0x8000000000000000UL; -+#else -+# error Not implemented for this value of RAND_MAX -+#endif - break; -- -+ } - case F_co: - if (ENABLE_FEATURE_AWK_LIBM) { - R_d = cos(L_d); --- -2.27.0 - - -From c4aa325fa23237d1c9452ed2be468730d6e2c615 Mon Sep 17 00:00:00 2001 -From: Denys Vlasenko -Date: Fri, 2 Jul 2021 22:28:51 +0200 -Subject: [PATCH 38/61] awk: fix beavior of "exit" without parameter - -function old new delta -evaluate 3336 3339 +3 -awk_exit 93 94 +1 -awk_main 829 827 -2 ------------------------------------------------------------------------------- -(add/remove: 0/0 grow/shrink: 2/1 up/down: 4/-2) Total: 2 bytes - -Signed-off-by: Denys Vlasenko ---- - editors/awk.c | 40 ++++++++++++++++++++++------------------ - testsuite/awk.tests | 5 +++++ - 2 files changed, 27 insertions(+), 18 deletions(-) - -diff --git a/editors/awk.c b/editors/awk.c -index 64fe81be4..86cb7a95f 100644 ---- a/editors/awk.c -+++ b/editors/awk.c -@@ -578,6 +578,8 @@ struct globals2 { - rstream next_input_file__rsm; - smallint next_input_file__files_happen; - -+ smalluint exitcode; -+ - unsigned evaluate__seed; - var *evaluate__fnargs; - regex_t evaluate__sreg; -@@ -655,7 +657,7 @@ static const char EMSG_UNDEF_FUNC[] ALIGN1 = "Call to undefined function"; - static const char EMSG_NO_MATH[] ALIGN1 = "Math support is not compiled in"; - static const char EMSG_NEGATIVE_FIELD[] ALIGN1 = "Access to negative field"; - --static int awk_exit(int) NORETURN; -+static int awk_exit(void) NORETURN; - - static void syntax_error(const char *message) NORETURN; - static void syntax_error(const char *message) -@@ -2779,14 +2781,14 @@ static var *evaluate(node *op, var *res) - if ((opinfo & OF_REQUIRED) && !op1) - syntax_error(EMSG_TOO_FEW_ARGS); - L.v = evaluate(op1, TMPVAR0); -- } -- if (opinfo & OF_STR1) { -- L.s = getvar_s(L.v); -- debug_printf_eval("L.s:'%s'\n", L.s); -- } -- if (opinfo & OF_NUM1) { -- L_d = getvar_i(L.v); -- debug_printf_eval("L_d:%f\n", L_d); -+ if (opinfo & OF_STR1) { -+ L.s = getvar_s(L.v); -+ debug_printf_eval("L.s:'%s'\n", L.s); -+ } -+ if (opinfo & OF_NUM1) { -+ L_d = getvar_i(L.v); -+ debug_printf_eval("L_d:%f\n", L_d); -+ } - } - /* NB: Must get string/numeric values of L (done above) - * _before_ evaluate()'ing R.v: if both L and R are $NNNs, -@@ -2799,10 +2801,10 @@ static var *evaluate(node *op, var *res) - R.v = evaluate(op->r.n, TMPVAR1); - //TODO: L.v may be invalid now, set L.v to NULL to catch bugs? - //L.v = NULL; -- } -- if (opinfo & OF_STR2) { -- R.s = getvar_s(R.v); -- debug_printf_eval("R.s:'%s'\n", R.s); -+ if (opinfo & OF_STR2) { -+ R.s = getvar_s(R.v); -+ debug_printf_eval("R.s:'%s'\n", R.s); -+ } - } - - debug_printf_eval("switch(0x%x)\n", XC(opinfo & OPCLSMASK)); -@@ -2955,7 +2957,9 @@ static var *evaluate(node *op, var *res) - - case XC( OC_EXIT ): - debug_printf_eval("EXIT\n"); -- awk_exit(L_d); -+ if (op1) -+ G.exitcode = (int)L_d; -+ awk_exit(); - - /* -- recursive node type -- */ - -@@ -3414,7 +3418,7 @@ static var *evaluate(node *op, var *res) - - /* -------- main & co. -------- */ - --static int awk_exit(int r) -+static int awk_exit(void) - { - unsigned i; - -@@ -3435,7 +3439,7 @@ static int awk_exit(int r) - } - } - -- exit(r); -+ exit(G.exitcode); - } - - int awk_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE; -@@ -3560,7 +3564,7 @@ int awk_main(int argc UNUSED_PARAM, char **argv) - - evaluate(beginseq.first, &G.main__tmpvar); - if (!mainseq.first && !endseq.first) -- awk_exit(EXIT_SUCCESS); -+ awk_exit(); - - /* input file could already be opened in BEGIN block */ - if (!iF) -@@ -3587,6 +3591,6 @@ int awk_main(int argc UNUSED_PARAM, char **argv) - iF = next_input_file(); - } - -- awk_exit(EXIT_SUCCESS); -+ awk_exit(); - /*return 0;*/ - } -diff --git a/testsuite/awk.tests b/testsuite/awk.tests -index 3c230393f..770d8ffce 100755 ---- a/testsuite/awk.tests -+++ b/testsuite/awk.tests -@@ -445,4 +445,9 @@ testing 'awk $NF is empty' \ - '' \ - 'a=====123=' - -+testing "awk exit N propagates through END's exit" \ -+ "awk 'BEGIN { exit 42 } END { exit }'; echo \$?" \ -+ "42\n" \ -+ '' '' -+ - exit $FAILCOUNT --- -2.27.0 - - -From 1829a5b292a37553e8cc8f544448c591b3a7b3f6 Mon Sep 17 00:00:00 2001 -From: Denys Vlasenko -Date: Fri, 2 Jul 2021 23:07:21 +0200 -Subject: [PATCH 39/61] awk: fix detection of VAR=VAL arguments - -1NAME=VAL is not it, neither is VA.R=VAL - -function old new delta -next_input_file 216 214 -2 -is_assignment 115 91 -24 ------------------------------------------------------------------------------- -(add/remove: 0/0 grow/shrink: 0/2 up/down: 0/-26) Total: -26 bytes - -Signed-off-by: Denys Vlasenko ---- - editors/awk.c | 9 ++++----- - 1 file changed, 4 insertions(+), 5 deletions(-) - -diff --git a/editors/awk.c b/editors/awk.c -index 86cb7a95f..9f14f0f9a 100644 ---- a/editors/awk.c -+++ b/editors/awk.c -@@ -2679,7 +2679,8 @@ static int is_assignment(const char *expr) - { - char *exprc, *val; - -- if (!isalnum_(*expr) || (val = strchr(expr, '=')) == NULL) { -+ val = (char*)endofname(expr); -+ if (val == (char*)expr || *val != '=') { - return FALSE; - } - -@@ -2699,7 +2700,6 @@ static rstream *next_input_file(void) - #define rsm (G.next_input_file__rsm) - #define files_happen (G.next_input_file__files_happen) - -- FILE *F; - const char *fname, *ind; - - if (rsm.F) -@@ -2712,20 +2712,19 @@ static rstream *next_input_file(void) - if (files_happen) - return NULL; - fname = "-"; -- F = stdin; -+ rsm.F = stdin; - break; - } - ind = getvar_s(incvar(intvar[ARGIND])); - fname = getvar_s(findvar(iamarray(intvar[ARGV]), ind)); - if (fname && *fname && !is_assignment(fname)) { -- F = xfopen_stdin(fname); -+ rsm.F = xfopen_stdin(fname); - break; - } - } - - files_happen = TRUE; - setvar_s(intvar[FILENAME], fname); -- rsm.F = F; - return &rsm; - #undef rsm - #undef files_happen --- -2.27.0 - - -From 2e495deee760595d6b0df37f1f9b7d1e4ecab1ed Mon Sep 17 00:00:00 2001 -From: Denys Vlasenko -Date: Fri, 2 Jul 2021 23:24:52 +0200 -Subject: [PATCH 40/61] awk: use smaller regmatch_t arrays, they had 2 elements - for no apparent reason - -function old new delta -exec_builtin 1479 1434 -45 - -Signed-off-by: Denys Vlasenko ---- - editors/awk.c | 8 +++++--- - 1 file changed, 5 insertions(+), 3 deletions(-) - -diff --git a/editors/awk.c b/editors/awk.c -index 9f14f0f9a..c06dd2304 100644 ---- a/editors/awk.c -+++ b/editors/awk.c -@@ -1937,7 +1937,7 @@ static int awk_split(const char *s, node *spl, char **slist) - n++; /* at least one field will be there */ - do { - int l; -- regmatch_t pmatch[2]; // TODO: why [2]? [1] is enough... -+ regmatch_t pmatch[1]; - - l = strcspn(s, c+2); /* len till next NUL or \n */ - if (regexec1_nonempty(icase ? spl->r.ire : spl->l.re, s, pmatch) == 0 -@@ -2166,7 +2166,7 @@ static int ptest(node *pattern) - static int awk_getline(rstream *rsm, var *v) - { - char *b; -- regmatch_t pmatch[2]; // TODO: why [2]? [1] is enough... -+ regmatch_t pmatch[1]; - int size, a, p, pp = 0; - int fd, so, eo, r, rp; - char c, *m, *s; -@@ -2473,7 +2473,7 @@ static NOINLINE var *exec_builtin(node *op, var *res) - node *an[4]; - var *av[4]; - const char *as[4]; -- regmatch_t pmatch[2]; -+ regmatch_t pmatch[1]; - regex_t sreg, *re; - node *spl; - uint32_t isr, info; -@@ -3533,6 +3533,8 @@ int awk_main(int argc UNUSED_PARAM, char **argv) - parse_program(llist_pop(&list_e)); - } - #endif -+//FIXME: preserve order of -e and -f -+//TODO: implement -i LIBRARY and -E FILE too, they are easy-ish - if (!(opt & (OPT_f | OPT_e))) { - if (!*argv) - bb_show_usage(); --- -2.27.0 - - -From bb55cde906cbaf136d6487ed7738003aa41b4bd5 Mon Sep 17 00:00:00 2001 -From: Denys Vlasenko -Date: Fri, 2 Jul 2021 23:38:50 +0200 -Subject: [PATCH 41/61] awk: move match() code out-of-line - -function old new delta -exec_builtin_match - 202 +202 -exec_builtin 1434 1157 -277 ------------------------------------------------------------------------------- -(add/remove: 1/0 grow/shrink: 0/1 up/down: 202/-277) Total: -75 bytes - -Signed-off-by: Denys Vlasenko ---- - editors/awk.c | 45 ++++++++++++++++++++++++++++----------------- - 1 file changed, 28 insertions(+), 17 deletions(-) - -diff --git a/editors/awk.c b/editors/awk.c -index c06dd2304..96e06db25 100644 ---- a/editors/awk.c -+++ b/editors/awk.c -@@ -2465,6 +2465,30 @@ static NOINLINE int do_mktime(const char *ds) - return mktime(&then); - } - -+/* Reduce stack usage in exec_builtin() by keeping match() code separate */ -+static NOINLINE void exec_builtin_match(node *an1, const char *as0, var *res) -+{ -+ regmatch_t pmatch[1]; -+ regex_t sreg, *re; -+ int n; -+ -+ re = as_regex(an1, &sreg); -+ n = regexec(re, as0, 1, pmatch, 0); -+ if (n == 0) { -+ pmatch[0].rm_so++; -+ pmatch[0].rm_eo++; -+ } else { -+ pmatch[0].rm_so = 0; -+ pmatch[0].rm_eo = -1; -+ } -+ if (re == &sreg) -+ regfree(re); -+ setvar_i(newvar("RSTART"), pmatch[0].rm_so); -+ setvar_i(newvar("RLENGTH"), pmatch[0].rm_eo - pmatch[0].rm_so); -+ setvar_i(res, pmatch[0].rm_so); -+} -+ -+/* Reduce stack usage in evaluate() by keeping builtins' code separate */ - static NOINLINE var *exec_builtin(node *op, var *res) - { - #define tspl (G.exec_builtin__tspl) -@@ -2473,8 +2497,6 @@ static NOINLINE var *exec_builtin(node *op, var *res) - node *an[4]; - var *av[4]; - const char *as[4]; -- regmatch_t pmatch[1]; -- regex_t sreg, *re; - node *spl; - uint32_t isr, info; - int nargs; -@@ -2633,20 +2655,7 @@ static NOINLINE var *exec_builtin(node *op, var *res) - break; - - case B_ma: -- re = as_regex(an[1], &sreg); -- n = regexec(re, as[0], 1, pmatch, 0); -- if (n == 0) { -- pmatch[0].rm_so++; -- pmatch[0].rm_eo++; -- } else { -- pmatch[0].rm_so = 0; -- pmatch[0].rm_eo = -1; -- } -- setvar_i(newvar("RSTART"), pmatch[0].rm_so); -- setvar_i(newvar("RLENGTH"), pmatch[0].rm_eo - pmatch[0].rm_so); -- setvar_i(res, pmatch[0].rm_so); -- if (re == &sreg) -- regfree(re); -+ exec_builtin_match(an[1], as[0], res); - break; - - case B_ge: -@@ -2732,7 +2741,9 @@ static rstream *next_input_file(void) - - /* - * Evaluate node - the heart of the program. Supplied with subtree -- * and place where to store result. Returns ptr to result. -+ * and "res" variable to assign the result to if we evaluate an expression. -+ * If node refers to e.g. a variable or a field, no assignment happens. -+ * Return ptr to the result (which may or may not be the "res" variable!) - */ - #define XC(n) ((n) >> 8) - --- -2.27.0 - - -From a76f1b553545e144f5456c84398a0d98a81ff70d Mon Sep 17 00:00:00 2001 -From: Denys Vlasenko -Date: Sat, 3 Jul 2021 00:39:55 +0200 -Subject: [PATCH 42/61] awk: rename GRPSTART/END to L/RBRACE, no code changes - -Signed-off-by: Denys Vlasenko ---- - editors/awk.c | 60 ++++++++++++++++++++++++++++----------------------- - 1 file changed, 33 insertions(+), 27 deletions(-) - -diff --git a/editors/awk.c b/editors/awk.c -index 96e06db25..a1a2afd1d 100644 ---- a/editors/awk.c -+++ b/editors/awk.c -@@ -211,8 +211,8 @@ typedef struct tsplitter_s { - #define TC_PIPE (1 << 9) /* input redirection pipe | */ - #define TC_UOPPRE2 (1 << 10) /* unary prefix operator + - ! */ - #define TC_ARRTERM (1 << 11) /* ] */ --#define TC_GRPSTART (1 << 12) /* { */ --#define TC_GRPTERM (1 << 13) /* } */ -+#define TC_LBRACE (1 << 12) /* { */ -+#define TC_RBRACE (1 << 13) /* } */ - #define TC_SEMICOL (1 << 14) /* ; */ - #define TC_NEWLINE (1 << 15) - #define TC_STATX (1 << 16) /* ctl statement (for, next...) */ -@@ -250,8 +250,8 @@ if ((n) & TC_COMMA ) debug_printf_parse(" COMMA" ); \ - if ((n) & TC_PIPE ) debug_printf_parse(" PIPE" ); \ - if ((n) & TC_UOPPRE2 ) debug_printf_parse(" UOPPRE2" ); \ - if ((n) & TC_ARRTERM ) debug_printf_parse(" ARRTERM" ); \ --if ((n) & TC_GRPSTART) debug_printf_parse(" GRPSTART"); \ --if ((n) & TC_GRPTERM ) debug_printf_parse(" GRPTERM" ); \ -+if ((n) & TC_LBRACE ) debug_printf_parse(" LBRACE" ); \ -+if ((n) & TC_RBRACE ) debug_printf_parse(" RBRACE" ); \ - if ((n) & TC_SEMICOL ) debug_printf_parse(" SEMICOL" ); \ - if ((n) & TC_NEWLINE ) debug_printf_parse(" NEWLINE" ); \ - if ((n) & TC_STATX ) debug_printf_parse(" STATX" ); \ -@@ -291,13 +291,13 @@ if ((n) & TC_NUMBER ) debug_printf_parse(" NUMBER" ); \ - | TC_FUNCDECL | TC_BEGIN | TC_END) - - /* discard newlines after these */ --#define TS_NOTERM (TC_COMMA | TC_GRPSTART | TC_GRPTERM \ -+#define TS_NOTERM (TC_COMMA | TC_LBRACE | TC_RBRACE \ - | TS_BINOP | TS_OPTERM) - - /* what can expression begin with */ - #define TS_OPSEQ (TS_OPERAND | TS_UOPPRE | TC_REGEXP) - /* what can group begin with */ --#define TS_GRPSEQ (TS_OPSEQ | TS_OPTERM | TS_STATEMNT | TC_GRPSTART) -+#define TS_GRPSEQ (TS_OPSEQ | TS_OPTERM | TS_STATEMNT | TC_LBRACE) - - /* if previous token class is CONCAT_L and next is CONCAT_R, concatenation */ - /* operator is inserted between them */ -@@ -402,8 +402,8 @@ static const char tokenlist[] ALIGN1 = - "\1|" NTC /* TC_PIPE */ - "\1+" "\1-" "\1!" NTC /* TC_UOPPRE2 */ - "\1]" NTC /* TC_ARRTERM */ -- "\1{" NTC /* TC_GRPSTART */ -- "\1}" NTC /* TC_GRPTERM */ -+ "\1{" NTC /* TC_LBRACE */ -+ "\1}" NTC /* TC_RBRACE */ - "\1;" NTC /* TC_SEMICOL */ - "\1\n" NTC /* TC_NEWLINE */ - "\2if" "\2do" "\3for" "\5break" /* TC_STATX */ -@@ -1471,7 +1471,7 @@ static node *parse_expr(uint32_t term_tc) - debug_printf_parse("%s: TC_LENGTH\n", __func__); - tc = next_token(TC_LPAREN /* length(...) */ - | TS_OPTERM /* length; (or newline)*/ -- | TC_GRPTERM /* length } */ -+ | TC_RBRACE /* length } */ - | TC_BINOPX /* length NUM */ - | TC_COMMA /* print length, 1 */ - ); -@@ -1516,11 +1516,11 @@ static void chain_expr(uint32_t info) - - n = chain_node(info); - -- n->l.n = parse_expr(TS_OPTERM | TC_GRPTERM); -+ n->l.n = parse_expr(TS_OPTERM | TC_RBRACE); - if ((info & OF_REQUIRED) && !n->l.n) - syntax_error(EMSG_TOO_FEW_ARGS); - -- if (t_tclass & TC_GRPTERM) -+ if (t_tclass & TC_RBRACE) - rollback_token(); - } - -@@ -1559,16 +1559,16 @@ static void chain_group(void) - c = next_token(TS_GRPSEQ); - } while (c & TC_NEWLINE); - -- if (c & TC_GRPSTART) { -- debug_printf_parse("%s: TC_GRPSTART\n", __func__); -- while ((c = next_token(TS_GRPSEQ | TC_GRPTERM)) != TC_GRPTERM) { -- debug_printf_parse("%s: !TC_GRPTERM\n", __func__); -+ if (c & TC_LBRACE) { -+ debug_printf_parse("%s: TC_LBRACE\n", __func__); -+ while ((c = next_token(TS_GRPSEQ | TC_RBRACE)) != TC_RBRACE) { -+ debug_printf_parse("%s: !TC_RBRACE\n", __func__); - if (c & TC_NEWLINE) - continue; - rollback_token(); - chain_group(); - } -- debug_printf_parse("%s: TC_GRPTERM\n", __func__); -+ debug_printf_parse("%s: TC_RBRACE\n", __func__); - return; - } - if (c & (TS_OPSEQ | TS_OPTERM)) { -@@ -1588,7 +1588,7 @@ static void chain_group(void) - chain_group(); - n2 = chain_node(OC_EXEC); - n->r.n = seq->last; -- if (next_token(TS_GRPSEQ | TC_GRPTERM | TC_ELSE) == TC_ELSE) { -+ if (next_token(TS_GRPSEQ | TC_RBRACE | TC_ELSE) == TC_ELSE) { - chain_group(); - n2->a.n = seq->last; - } else { -@@ -1641,12 +1641,12 @@ static void chain_group(void) - case OC_PRINTF: - debug_printf_parse("%s: OC_PRINT[F]\n", __func__); - n = chain_node(t_info); -- n->l.n = parse_expr(TS_OPTERM | TC_OUTRDR | TC_GRPTERM); -+ n->l.n = parse_expr(TS_OPTERM | TC_OUTRDR | TC_RBRACE); - if (t_tclass & TC_OUTRDR) { - n->info |= t_info; -- n->r.n = parse_expr(TS_OPTERM | TC_GRPTERM); -+ n->r.n = parse_expr(TS_OPTERM | TC_RBRACE); - } -- if (t_tclass & TC_GRPTERM) -+ if (t_tclass & TC_RBRACE) - rollback_token(); - break; - -@@ -1684,7 +1684,7 @@ static void parse_program(char *p) - - g_pos = p; - t_lineno = 1; -- while ((tclass = next_token(TC_EOF | TS_OPSEQ | TC_GRPSTART | -+ while ((tclass = next_token(TC_EOF | TS_OPSEQ | TC_LBRACE | - TS_OPTERM | TC_BEGIN | TC_END | TC_FUNCDECL)) != TC_EOF) { - - if (tclass & TS_OPTERM) { -@@ -1696,10 +1696,14 @@ static void parse_program(char *p) - if (tclass & TC_BEGIN) { - debug_printf_parse("%s: TC_BEGIN\n", __func__); - seq = &beginseq; -+//TODO: ensure there is no newline between BEGIN and { -+//next_token(TC_LBRACE); rollback_token(); - chain_group(); - } else if (tclass & TC_END) { - debug_printf_parse("%s: TC_END\n", __func__); - seq = &endseq; -+//TODO: ensure there is no newline between END and { -+//next_token(TC_LBRACE); rollback_token(); - chain_group(); - } else if (tclass & TC_FUNCDECL) { - debug_printf_parse("%s: TC_FUNCDECL\n", __func__); -@@ -1726,24 +1730,26 @@ static void parse_program(char *p) - /* it was a comma, we ate it */ - } - seq = &f->body; -+//TODO: ensure there is { after "func F(...)" - but newlines are allowed -+//while (next_token(TC_LBRACE | TC_NEWLINE) == TC_NEWLINE) continue; rollback_token(); - chain_group(); - hash_clear(ahash); - } else if (tclass & TS_OPSEQ) { - debug_printf_parse("%s: TS_OPSEQ\n", __func__); - rollback_token(); - cn = chain_node(OC_TEST); -- cn->l.n = parse_expr(TS_OPTERM | TC_EOF | TC_GRPSTART); -- if (t_tclass & TC_GRPSTART) { -- debug_printf_parse("%s: TC_GRPSTART\n", __func__); -+ cn->l.n = parse_expr(TS_OPTERM | TC_EOF | TC_LBRACE); -+ if (t_tclass & TC_LBRACE) { -+ debug_printf_parse("%s: TC_LBRACE\n", __func__); - rollback_token(); - chain_group(); - } else { -- debug_printf_parse("%s: !TC_GRPSTART\n", __func__); -+ debug_printf_parse("%s: !TC_LBRACE\n", __func__); - chain_node(OC_PRINT); - } - cn->r.n = mainseq.last; -- } else /* if (tclass & TC_GRPSTART) */ { -- debug_printf_parse("%s: TC_GRPSTART(?)\n", __func__); -+ } else /* if (tclass & TC_LBRACE) */ { -+ debug_printf_parse("%s: TC_LBRACE(?)\n", __func__); - rollback_token(); - chain_group(); - } --- -2.27.0 - - -From df7698f1df2ed5a82a1558e167ba3262d1c614cb Mon Sep 17 00:00:00 2001 -From: Denys Vlasenko -Date: Sat, 3 Jul 2021 01:16:48 +0200 -Subject: [PATCH 43/61] awk: tighten rules in action parsing - -Disallow: - BEGIN - { action } - must start on the same line -Disallow: - func f() - print "hello" - must be in {...} - -function old new delta -chain_until_rbrace - 41 +41 -parse_program 307 336 +29 -chain_group 649 616 -33 ------------------------------------------------------------------------------- -(add/remove: 1/0 grow/shrink: 1/1 up/down: 70/-33) Total: 37 bytes - -Signed-off-by: Denys Vlasenko ---- - editors/awk.c | 108 ++++++++++++++++++++++++++++++-------------------- - 1 file changed, 66 insertions(+), 42 deletions(-) - -diff --git a/editors/awk.c b/editors/awk.c -index a1a2afd1d..c68416873 100644 ---- a/editors/awk.c -+++ b/editors/awk.c -@@ -1549,29 +1549,35 @@ static node *chain_loop(node *nn) - return n; - } - -+static void chain_until_rbrace(void) -+{ -+ uint32_t tc; -+ while ((tc = next_token(TS_GRPSEQ | TC_RBRACE)) != TC_RBRACE) { -+ debug_printf_parse("%s: !TC_RBRACE\n", __func__); -+ if (tc == TC_NEWLINE) -+ continue; -+ rollback_token(); -+ chain_group(); -+ } -+ debug_printf_parse("%s: TC_RBRACE\n", __func__); -+} -+ - /* parse group and attach it to chain */ - static void chain_group(void) - { -- uint32_t c; -+ uint32_t tc; - node *n, *n2, *n3; - - do { -- c = next_token(TS_GRPSEQ); -- } while (c & TC_NEWLINE); -+ tc = next_token(TS_GRPSEQ); -+ } while (tc == TC_NEWLINE); - -- if (c & TC_LBRACE) { -+ if (tc == TC_LBRACE) { - debug_printf_parse("%s: TC_LBRACE\n", __func__); -- while ((c = next_token(TS_GRPSEQ | TC_RBRACE)) != TC_RBRACE) { -- debug_printf_parse("%s: !TC_RBRACE\n", __func__); -- if (c & TC_NEWLINE) -- continue; -- rollback_token(); -- chain_group(); -- } -- debug_printf_parse("%s: TC_RBRACE\n", __func__); -+ chain_until_rbrace(); - return; - } -- if (c & (TS_OPSEQ | TS_OPTERM)) { -+ if (tc & (TS_OPSEQ | TS_OPTERM)) { - debug_printf_parse("%s: TS_OPSEQ | TS_OPTERM\n", __func__); - rollback_token(); - chain_expr(OC_EXEC | Vx); -@@ -1675,37 +1681,48 @@ static void chain_group(void) - - static void parse_program(char *p) - { -- uint32_t tclass; -- node *cn; -- func *f; -- var *v; -- - debug_printf_parse("%s()\n", __func__); - - g_pos = p; - t_lineno = 1; -- while ((tclass = next_token(TC_EOF | TS_OPSEQ | TC_LBRACE | -- TS_OPTERM | TC_BEGIN | TC_END | TC_FUNCDECL)) != TC_EOF) { -+ for (;;) { -+ uint32_t tclass; - -- if (tclass & TS_OPTERM) { -+ tclass = next_token(TC_EOF | TS_OPSEQ | TC_LBRACE | -+ TS_OPTERM | TC_BEGIN | TC_END | TC_FUNCDECL); -+ -+ if (tclass == TC_EOF) { -+ debug_printf_parse("%s: TC_EOF\n", __func__); -+ break; -+ } -+ if (tclass & TS_OPTERM) { /* ; or */ - debug_printf_parse("%s: TS_OPTERM\n", __func__); -+//NB: gawk allows many newlines, but does not allow more than one semicolon: -+// BEGIN {...};; -+//would complain "each rule must have a pattern or an action part". -+//Same message for -+// ; BEGIN {...} - continue; - } -- -- seq = &mainseq; -- if (tclass & TC_BEGIN) { -+ if (tclass == TC_BEGIN) { - debug_printf_parse("%s: TC_BEGIN\n", __func__); - seq = &beginseq; --//TODO: ensure there is no newline between BEGIN and { --//next_token(TC_LBRACE); rollback_token(); -- chain_group(); -- } else if (tclass & TC_END) { -+ /* ensure there is no newline between BEGIN and { */ -+ next_token(TC_LBRACE); -+ chain_until_rbrace(); -+ continue; -+ } -+ if (tclass == TC_END) { - debug_printf_parse("%s: TC_END\n", __func__); - seq = &endseq; --//TODO: ensure there is no newline between END and { --//next_token(TC_LBRACE); rollback_token(); -- chain_group(); -- } else if (tclass & TC_FUNCDECL) { -+ /* ensure there is no newline between END and { */ -+ next_token(TC_LBRACE); -+ chain_until_rbrace(); -+ continue; -+ } -+ if (tclass == TC_FUNCDECL) { -+ func *f; -+ - debug_printf_parse("%s: TC_FUNCDECL\n", __func__); - next_token(TC_FUNCTION); - f = newfunc(t_string); -@@ -1716,6 +1733,7 @@ static void parse_program(char *p) - //f->nargs = 0; - already is - /* func arg list: comma sep list of args, and a close paren */ - for (;;) { -+ var *v; - if (next_token(TC_VARIABLE | TC_RPAREN) == TC_RPAREN) { - if (f->nargs == 0) - break; /* func() is ok */ -@@ -1730,31 +1748,37 @@ static void parse_program(char *p) - /* it was a comma, we ate it */ - } - seq = &f->body; --//TODO: ensure there is { after "func F(...)" - but newlines are allowed --//while (next_token(TC_LBRACE | TC_NEWLINE) == TC_NEWLINE) continue; rollback_token(); -- chain_group(); -+ /* ensure there is { after "func F(...)" - but newlines are allowed */ -+ while (next_token(TC_LBRACE | TC_NEWLINE) == TC_NEWLINE) -+ continue; -+ chain_until_rbrace(); - hash_clear(ahash); -- } else if (tclass & TS_OPSEQ) { -+ continue; -+ } -+ seq = &mainseq; -+ if (tclass & TS_OPSEQ) { -+ node *cn; -+ - debug_printf_parse("%s: TS_OPSEQ\n", __func__); - rollback_token(); - cn = chain_node(OC_TEST); - cn->l.n = parse_expr(TS_OPTERM | TC_EOF | TC_LBRACE); -- if (t_tclass & TC_LBRACE) { -+ if (t_tclass == TC_LBRACE) { - debug_printf_parse("%s: TC_LBRACE\n", __func__); - rollback_token(); - chain_group(); - } else { -+ /* no action, assume default "{ print }" */ - debug_printf_parse("%s: !TC_LBRACE\n", __func__); - chain_node(OC_PRINT); - } - cn->r.n = mainseq.last; -- } else /* if (tclass & TC_LBRACE) */ { -- debug_printf_parse("%s: TC_LBRACE(?)\n", __func__); -- rollback_token(); -- chain_group(); -+ continue; - } -+ /* tclass == TC_LBRACE */ -+ debug_printf_parse("%s: TC_LBRACE(?)\n", __func__); -+ chain_until_rbrace(); - } -- debug_printf_parse("%s: TC_EOF\n", __func__); - } - - --- -2.27.0 - - -From bebe1432529281f66d2004e07194718a47207d5d Mon Sep 17 00:00:00 2001 -From: Denys Vlasenko -Date: Sat, 3 Jul 2021 01:32:03 +0200 -Subject: [PATCH 44/61] awk: open-code TS_OPTERM, no logic changes - -Signed-off-by: Denys Vlasenko ---- - editors/awk.c | 33 +++++++++++++++++---------------- - 1 file changed, 17 insertions(+), 16 deletions(-) - -diff --git a/editors/awk.c b/editors/awk.c -index c68416873..8c471d693 100644 ---- a/editors/awk.c -+++ b/editors/awk.c -@@ -283,7 +283,6 @@ if ((n) & TC_NUMBER ) debug_printf_parse(" NUMBER" ); \ - - #define TS_LVALUE (TC_VARIABLE | TC_ARRAY) - #define TS_STATEMNT (TC_STATX | TC_WHILE) --#define TS_OPTERM (TC_SEMICOL | TC_NEWLINE) - - /* word tokens, cannot mean something else if not expected */ - #define TS_WORD (TC_IN | TS_STATEMNT | TC_ELSE \ -@@ -291,13 +290,14 @@ if ((n) & TC_NUMBER ) debug_printf_parse(" NUMBER" ); \ - | TC_FUNCDECL | TC_BEGIN | TC_END) - - /* discard newlines after these */ --#define TS_NOTERM (TC_COMMA | TC_LBRACE | TC_RBRACE \ -- | TS_BINOP | TS_OPTERM) -+#define TS_NOTERM (TS_BINOP | TC_COMMA | TC_LBRACE | TC_RBRACE \ -+ | TC_SEMICOL | TC_NEWLINE) - - /* what can expression begin with */ - #define TS_OPSEQ (TS_OPERAND | TS_UOPPRE | TC_REGEXP) - /* what can group begin with */ --#define TS_GRPSEQ (TS_OPSEQ | TS_OPTERM | TS_STATEMNT | TC_LBRACE) -+#define TS_GRPSEQ (TS_OPSEQ | TS_STATEMNT \ -+ | TC_SEMICOL | TC_NEWLINE | TC_LBRACE) - - /* if previous token class is CONCAT_L and next is CONCAT_R, concatenation */ - /* operator is inserted between them */ -@@ -642,7 +642,7 @@ struct globals2 { - #define g_buf (G.g_buf ) - #define INIT_G() do { \ - SET_PTR_TO_GLOBALS((char*)xzalloc(sizeof(G1)+sizeof(G)) + sizeof(G1)); \ -- t_tclass = TS_OPTERM; \ -+ t_tclass = TC_NEWLINE; \ - G.evaluate__seed = 1; \ - } while (0) - -@@ -1090,7 +1090,7 @@ static uint32_t next_token(uint32_t expected) - const uint32_t *ti; - uint32_t tc, last_token_class; - -- last_token_class = t_tclass; /* t_tclass is initialized to TS_OPTERM */ -+ last_token_class = t_tclass; /* t_tclass is initialized to TC_NEWLINE */ - - debug_printf_parse("%s() expected(%x):", __func__, expected); - debug_parse_print_tc(expected); -@@ -1470,7 +1470,8 @@ static node *parse_expr(uint32_t term_tc) - case TC_LENGTH: - debug_printf_parse("%s: TC_LENGTH\n", __func__); - tc = next_token(TC_LPAREN /* length(...) */ -- | TS_OPTERM /* length; (or newline)*/ -+ | TC_SEMICOL /* length; */ -+ | TC_NEWLINE /* length */ - | TC_RBRACE /* length } */ - | TC_BINOPX /* length NUM */ - | TC_COMMA /* print length, 1 */ -@@ -1516,7 +1517,7 @@ static void chain_expr(uint32_t info) - - n = chain_node(info); - -- n->l.n = parse_expr(TS_OPTERM | TC_RBRACE); -+ n->l.n = parse_expr(TC_SEMICOL | TC_NEWLINE | TC_RBRACE); - if ((info & OF_REQUIRED) && !n->l.n) - syntax_error(EMSG_TOO_FEW_ARGS); - -@@ -1577,8 +1578,8 @@ static void chain_group(void) - chain_until_rbrace(); - return; - } -- if (tc & (TS_OPSEQ | TS_OPTERM)) { -- debug_printf_parse("%s: TS_OPSEQ | TS_OPTERM\n", __func__); -+ if (tc & (TS_OPSEQ | TC_SEMICOL | TC_NEWLINE)) { -+ debug_printf_parse("%s: TS_OPSEQ | TC_SEMICOL | TC_NEWLINE\n", __func__); - rollback_token(); - chain_expr(OC_EXEC | Vx); - return; -@@ -1647,10 +1648,10 @@ static void chain_group(void) - case OC_PRINTF: - debug_printf_parse("%s: OC_PRINT[F]\n", __func__); - n = chain_node(t_info); -- n->l.n = parse_expr(TS_OPTERM | TC_OUTRDR | TC_RBRACE); -+ n->l.n = parse_expr(TC_SEMICOL | TC_NEWLINE | TC_OUTRDR | TC_RBRACE); - if (t_tclass & TC_OUTRDR) { - n->info |= t_info; -- n->r.n = parse_expr(TS_OPTERM | TC_RBRACE); -+ n->r.n = parse_expr(TC_SEMICOL | TC_NEWLINE | TC_RBRACE); - } - if (t_tclass & TC_RBRACE) - rollback_token(); -@@ -1689,14 +1690,14 @@ static void parse_program(char *p) - uint32_t tclass; - - tclass = next_token(TC_EOF | TS_OPSEQ | TC_LBRACE | -- TS_OPTERM | TC_BEGIN | TC_END | TC_FUNCDECL); -+ TC_SEMICOL | TC_NEWLINE | TC_BEGIN | TC_END | TC_FUNCDECL); - - if (tclass == TC_EOF) { - debug_printf_parse("%s: TC_EOF\n", __func__); - break; - } -- if (tclass & TS_OPTERM) { /* ; or */ -- debug_printf_parse("%s: TS_OPTERM\n", __func__); -+ if (tclass & (TC_SEMICOL | TC_NEWLINE)) { -+ debug_printf_parse("%s: TC_SEMICOL | TC_NEWLINE\n", __func__); - //NB: gawk allows many newlines, but does not allow more than one semicolon: - // BEGIN {...};; - //would complain "each rule must have a pattern or an action part". -@@ -1762,7 +1763,7 @@ static void parse_program(char *p) - debug_printf_parse("%s: TS_OPSEQ\n", __func__); - rollback_token(); - cn = chain_node(OC_TEST); -- cn->l.n = parse_expr(TS_OPTERM | TC_EOF | TC_LBRACE); -+ cn->l.n = parse_expr(TC_SEMICOL | TC_NEWLINE | TC_EOF | TC_LBRACE); - if (t_tclass == TC_LBRACE) { - debug_printf_parse("%s: TC_LBRACE\n", __func__); - rollback_token(); --- -2.27.0 - - -From be80050f2cff5967de7a50eb3aed2f95c39357cd Mon Sep 17 00:00:00 2001 -From: Denys Vlasenko -Date: Sat, 3 Jul 2021 01:59:36 +0200 -Subject: [PATCH 45/61] awk: support %F %a %A in printf - -function old new delta -.rodata 104111 104120 +9 - -Signed-off-by: Denys Vlasenko ---- - editors/awk.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/editors/awk.c b/editors/awk.c -index 8c471d693..2c3b49bc8 100644 ---- a/editors/awk.c -+++ b/editors/awk.c -@@ -909,7 +909,7 @@ static int fmt_num(char *b, int size, const char *format, double n, int int_as_i - do { c = *s; } while (c && *++s); - if (strchr("diouxX", c)) { - r = snprintf(b, size, format, (int)n); -- } else if (strchr("eEfgG", c)) { -+ } else if (strchr("eEfFgGaA", c)) { - r = snprintf(b, size, format, n); - } else { - syntax_error(EMSG_INV_FMT); --- -2.27.0 - - -From 8b97bd49bdd5181c211f5d7b64108edf9e8962f4 Mon Sep 17 00:00:00 2001 -From: Denys Vlasenko -Date: Sat, 3 Jul 2021 11:54:01 +0200 -Subject: [PATCH 46/61] awk: do not use a copy of g_progname for - node->l.new_progname - -We never destroy g_progname's, the strings still exist, no need to copy - -function old new delta -chain_node 104 97 -7 - -Signed-off-by: Denys Vlasenko ---- - editors/awk.c | 4 ++-- - 1 file changed, 2 insertions(+), 2 deletions(-) - -diff --git a/editors/awk.c b/editors/awk.c -index 2c3b49bc8..4119253ec 100644 ---- a/editors/awk.c -+++ b/editors/awk.c -@@ -179,7 +179,7 @@ typedef struct node_s { - struct node_s *n; - var *v; - int aidx; -- char *new_progname; -+ const char *new_progname; - regex_t *re; - } l; - union { -@@ -1501,7 +1501,7 @@ static node *chain_node(uint32_t info) - if (seq->programname != g_progname) { - seq->programname = g_progname; - n = chain_node(OC_NEWSOURCE); -- n->l.new_progname = xstrdup(g_progname); -+ n->l.new_progname = g_progname; - } - - n = seq->last; --- -2.27.0 - - -From 61dc1b3f2201368a310b0754a74e6152fe6b015d Mon Sep 17 00:00:00 2001 -From: Denys Vlasenko -Date: Sat, 3 Jul 2021 11:57:59 +0200 -Subject: [PATCH 47/61] awk: rand(): 64-bit constants should be ULL - -Signed-off-by: Denys Vlasenko ---- - editors/awk.c | 4 ++-- - 1 file changed, 2 insertions(+), 2 deletions(-) - -diff --git a/editors/awk.c b/editors/awk.c -index 4119253ec..e4dd6684c 100644 ---- a/editors/awk.c -+++ b/editors/awk.c -@@ -3169,9 +3169,9 @@ static var *evaluate(node *op, var *res) - uint64_t v = ((uint64_t)rand() << 32) | u; - /* the above shift+or is optimized out on 32-bit arches */ - # if RAND_MAX > 0x7fffffff -- v &= 0x7fffffffffffffffUL; -+ v &= 0x7fffffffffffffffULL; - # endif -- R_d = (double)v / 0x8000000000000000UL; -+ R_d = (double)v / 0x8000000000000000ULL; - #else - # error Not implemented for this value of RAND_MAX - #endif --- -2.27.0 - - -From a6468234691fb0718fa0d57b9de4a7748f805af9 Mon Sep 17 00:00:00 2001 -From: Denys Vlasenko -Date: Sat, 3 Jul 2021 12:20:36 +0200 -Subject: [PATCH 48/61] awk: match(): code shrink - -function old new delta -do_match - 165 +165 -exec_builtin_match 202 - -202 ------------------------------------------------------------------------------- -(add/remove: 1/1 grow/shrink: 0/0 up/down: 165/-202) Total: -37 bytes - -Signed-off-by: Denys Vlasenko ---- - editors/awk.c | 24 +++++++++++------------- - 1 file changed, 11 insertions(+), 13 deletions(-) - -diff --git a/editors/awk.c b/editors/awk.c -index e4dd6684c..649198d15 100644 ---- a/editors/awk.c -+++ b/editors/awk.c -@@ -2497,26 +2497,24 @@ static NOINLINE int do_mktime(const char *ds) - } - - /* Reduce stack usage in exec_builtin() by keeping match() code separate */ --static NOINLINE void exec_builtin_match(node *an1, const char *as0, var *res) -+static NOINLINE var *do_match(node *an1, const char *as0) - { - regmatch_t pmatch[1]; - regex_t sreg, *re; -- int n; -+ int n, start, len; - - re = as_regex(an1, &sreg); - n = regexec(re, as0, 1, pmatch, 0); -- if (n == 0) { -- pmatch[0].rm_so++; -- pmatch[0].rm_eo++; -- } else { -- pmatch[0].rm_so = 0; -- pmatch[0].rm_eo = -1; -- } - if (re == &sreg) - regfree(re); -- setvar_i(newvar("RSTART"), pmatch[0].rm_so); -- setvar_i(newvar("RLENGTH"), pmatch[0].rm_eo - pmatch[0].rm_so); -- setvar_i(res, pmatch[0].rm_so); -+ start = 0; -+ len = -1; -+ if (n == 0) { -+ start = pmatch[0].rm_so + 1; -+ len = pmatch[0].rm_eo - pmatch[0].rm_so; -+ } -+ setvar_i(newvar("RLENGTH"), len); -+ return setvar_i(newvar("RSTART"), start); - } - - /* Reduce stack usage in evaluate() by keeping builtins' code separate */ -@@ -2686,7 +2684,7 @@ static NOINLINE var *exec_builtin(node *op, var *res) - break; - - case B_ma: -- exec_builtin_match(an[1], as[0], res); -+ res = do_match(an[1], as[0]); - break; - - case B_ge: --- -2.27.0 - - -From 9642f8123d92f8a1db9078178b04d22015d5e03a Mon Sep 17 00:00:00 2001 -From: Denys Vlasenko -Date: Sat, 3 Jul 2021 13:29:32 +0200 -Subject: [PATCH 49/61] awk: restore strdup elision optimization in assignment - -function old new delta -evaluate 3339 3387 +48 - -Signed-off-by: Denys Vlasenko ---- - editors/awk.c | 25 +++++++++++++++++-------- - 1 file changed, 17 insertions(+), 8 deletions(-) - -diff --git a/editors/awk.c b/editors/awk.c -index 649198d15..20672db9a 100644 ---- a/editors/awk.c -+++ b/editors/awk.c -@@ -102,7 +102,7 @@ enum { - #define VF_USER 0x0200 /* 1 = user input (may be numeric string) */ - #define VF_SPECIAL 0x0400 /* 1 = requires extra handling when changed */ - #define VF_WALK 0x0800 /* 1 = variable has alloc'd x.walker list */ --#define VF_FSTR 0x1000 /* 1 = var::string points to fstring buffer */ -+#define VF_FSTR 0x1000 /* 1 = don't free() var::string (not malloced, or is owned by something else) */ - #define VF_CHILD 0x2000 /* 1 = function arg; x.parent points to source */ - #define VF_DIRTY 0x4000 /* 1 = variable was set explicitly */ - -@@ -1371,6 +1371,12 @@ static node *parse_expr(uint32_t term_tc) - cn->a.n = vn->a.n; - if (tc & TS_BINOP) { - cn->l.n = vn; -+//FIXME: this is the place to detect and reject assignments to non-lvalues. -+//Currently we allow "assignments" to consts and temporaries, nonsense like this: -+// awk 'BEGIN { "qwe" = 1 }' -+// awk 'BEGIN { 7 *= 7 }' -+// awk 'BEGIN { length("qwe") = 1 }' -+// awk 'BEGIN { (1+1) += 3 }' - expected_tc = TS_OPERAND | TS_UOPPRE | TC_REGEXP; - if ((t_info & OPCLSMASK) == OC_PGETLINE) { - /* it's a pipe */ -@@ -3043,14 +3049,17 @@ static var *evaluate(node *op, var *res) - case XC( OC_MOVE ): - debug_printf_eval("MOVE\n"); - /* if source is a temporary string, jusk relink it to dest */ --//Disabled: if R.v is numeric but happens to have cached R.v->string, --//then L.v ends up being a string, which is wrong --// if (R.v == TMPVAR1 && R.v->string) { --// res = setvar_p(L.v, R.v->string); --// R.v->string = NULL; --// } else { -+ if (R.v == TMPVAR1 -+ && !(R.v->type & VF_NUMBER) -+ /* Why check !NUMBER? if R.v is a number but has cached R.v->string, -+ * L.v ends up a string, which is wrong */ -+ /*&& R.v->string - always not NULL (right?) */ -+ ) { -+ res = setvar_p(L.v, R.v->string); /* avoids strdup */ -+ R.v->string = NULL; -+ } else { - res = copyvar(L.v, R.v); --// } -+ } - break; - - case XC( OC_TERNARY ): --- -2.27.0 - - -From c49ba79e1ce45367a1d994b12d972daae0698beb Mon Sep 17 00:00:00 2001 -From: Denys Vlasenko -Date: Sat, 3 Jul 2021 13:57:47 +0200 -Subject: [PATCH 50/61] awk: simplify tests for operation class - -Usually, an operation class has only one possible value of "info" word. -In this case, just compare the entire info word, do not bother -to mask OPCLSMASK bits. - -(Example where this is not the case: OC_REPLACE for "=") - -function old new delta -mk_splitter 106 100 -6 -chain_group 616 610 -6 -nextarg 40 32 -8 -exec_builtin 1157 1149 -8 -as_regex 111 103 -8 -awk_split 553 543 -10 -parse_expr 948 936 -12 -awk_getline 656 642 -14 -evaluate 3387 3343 -44 ------------------------------------------------------------------------------- -(add/remove: 0/0 grow/shrink: 0/9 up/down: 0/-116) Total: -116 bytes - -Signed-off-by: Denys Vlasenko ---- - editors/awk.c | 64 +++++++++++++++++++++++++++++---------------------- - 1 file changed, 36 insertions(+), 28 deletions(-) - -diff --git a/editors/awk.c b/editors/awk.c -index 20672db9a..cd135ef64 100644 ---- a/editors/awk.c -+++ b/editors/awk.c -@@ -432,7 +432,8 @@ static const char tokenlist[] ALIGN1 = - static const uint32_t tokeninfo[] ALIGN4 = { - 0, - 0, -- OC_REGEXP, -+#define TI_REGEXP OC_REGEXP -+ TI_REGEXP, - xS|'a', xS|'w', xS|'|', - OC_UNARY|xV|P(9)|'p', OC_UNARY|xV|P(9)|'m', - #define TI_PREINC (OC_UNARY|xV|P(9)|'P') -@@ -443,12 +444,17 @@ static const uint32_t tokeninfo[] ALIGN4 = { - OC_BINARY|NV|P(29)|'+', OC_BINARY|NV|P(29)|'-', OC_REPLACE|NV|P(74)|'&', OC_BINARY|NV|P(15)|'&', - OC_BINARY|NV|P(25)|'/', OC_BINARY|NV|P(25)|'%', OC_BINARY|NV|P(15)|'&', OC_BINARY|NV|P(25)|'*', - OC_COMPARE|VV|P(39)|4, OC_COMPARE|VV|P(39)|3, OC_COMPARE|VV|P(39)|0, OC_COMPARE|VV|P(39)|1, --#define TI_LESS (OC_COMPARE|VV|P(39)|2) -+#define TI_LESS (OC_COMPARE|VV|P(39)|2) - TI_LESS, OC_MATCH|Sx|P(45)|'!', OC_MATCH|Sx|P(45)|'~', OC_LAND|Vx|P(55), -- OC_LOR|Vx|P(59), OC_TERNARY|Vx|P(64)|'?', OC_COLON|xx|P(67)|':', -- OC_IN|SV|P(49), /* TC_IN */ -- OC_COMMA|SS|P(80), -- OC_PGETLINE|SV|P(37), -+#define TI_TERNARY (OC_TERNARY|Vx|P(64)|'?') -+#define TI_COLON (OC_COLON|xx|P(67)|':') -+ OC_LOR|Vx|P(59), TI_TERNARY, TI_COLON, -+#define TI_IN (OC_IN|SV|P(49)) -+ TI_IN, -+#define TI_COMMA (OC_COMMA|SS|P(80)) -+ TI_COMMA, -+#define TI_PGETLINE (OC_PGETLINE|SV|P(37)) -+ TI_PGETLINE, - OC_UNARY|xV|P(19)|'+', OC_UNARY|xV|P(19)|'-', OC_UNARY|xV|P(19)|'!', - 0, /* ] */ - 0, -@@ -456,7 +462,8 @@ static const uint32_t tokeninfo[] ALIGN4 = { - 0, - 0, /* \n */ - ST_IF, ST_DO, ST_FOR, OC_BREAK, -- OC_CONTINUE, OC_DELETE|Rx, OC_PRINT, -+#define TI_PRINT OC_PRINT -+ OC_CONTINUE, OC_DELETE|Rx, TI_PRINT, - OC_PRINTF, OC_NEXT, OC_NEXTFILE, - OC_RETURN|Vx, OC_EXIT|Nx, - ST_WHILE, -@@ -465,8 +472,8 @@ static const uint32_t tokeninfo[] ALIGN4 = { - // Highest byte bit pattern: nn s3s2s1 v3v2v1 - // nn - min. number of args, sN - resolve Nth arg to string, vN - resolve to var - // OC_F's are builtins with zero or one argument. --// |Rx| enforces that arg is present for: system, close, cos, sin, exp, int, log, sqrt. --// Check for no args is present in builtins' code (not in this table): rand, systime. -+// |Rx| enforces that arg is present for: system, close, cos, sin, exp, int, log, sqrt -+// Check for no args is present in builtins' code (not in this table): rand, systime - // Have one _optional_ arg: fflush, srand, length - #define OC_B OC_BUILTIN - #define OC_F OC_FBLTIN -@@ -1310,7 +1317,7 @@ static node *new_node(uint32_t info) - - static void mk_re_node(const char *s, node *n, regex_t *re) - { -- n->info = OC_REGEXP; -+ n->info = TI_REGEXP; - n->l.re = re; - n->r.ire = re + 1; - xregcomp(re, s, REG_EXTENDED); -@@ -1360,12 +1367,13 @@ static node *parse_expr(uint32_t term_tc) - * previous operators with higher priority */ - vn = cn; - while (((t_info & PRIMASK) > (vn->a.n->info & PRIMASK2)) -- || ((t_info == vn->info) && ((t_info & OPCLSMASK) == OC_COLON)) -+ || ((t_info == vn->info) && t_info == TI_COLON) - ) { - vn = vn->a.n; - if (!vn->a.n) syntax_error(EMSG_UNEXP_TOKEN); - } -- if ((t_info & OPCLSMASK) == OC_TERNARY) -+ if (t_info == TI_TERNARY) -+//TODO: why? - t_info += P(6); - cn = vn->a.n->r.n = new_node(t_info); - cn->a.n = vn->a.n; -@@ -1378,7 +1386,7 @@ static node *parse_expr(uint32_t term_tc) - // awk 'BEGIN { length("qwe") = 1 }' - // awk 'BEGIN { (1+1) += 3 }' - expected_tc = TS_OPERAND | TS_UOPPRE | TC_REGEXP; -- if ((t_info & OPCLSMASK) == OC_PGETLINE) { -+ if (t_info == TI_PGETLINE) { - /* it's a pipe */ - next_token(TC_GETLINE); - /* give maximum priority to this pipe */ -@@ -1630,7 +1638,7 @@ static void chain_group(void) - next_token(TC_LPAREN); - n2 = parse_expr(TC_SEMICOL | TC_RPAREN); - if (t_tclass & TC_RPAREN) { /* for-in */ -- if (!n2 || (n2->info & OPCLSMASK) != OC_IN) -+ if (!n2 || n2->info != TI_IN) - syntax_error(EMSG_UNEXP_TOKEN); - n = chain_node(OC_WALKINIT | VV); - n->l.n = n2->l.n; -@@ -1834,7 +1842,7 @@ static node *mk_splitter(const char *s, tsplitter *spl) - re = &spl->re[0]; - ire = &spl->re[1]; - n = &spl->n; -- if ((n->info & OPCLSMASK) == OC_REGEXP) { -+ if (n->info == TI_REGEXP) { - regfree(re); - regfree(ire); // TODO: nuke ire, use re+1? - } -@@ -1858,7 +1866,7 @@ static regex_t *as_regex(node *op, regex_t *preg) - int cflags; - const char *s; - -- if ((op->info & OPCLSMASK) == OC_REGEXP) { -+ if (op->info == TI_REGEXP) { - return icase ? op->r.ire : op->l.re; - } - -@@ -1968,7 +1976,7 @@ static int awk_split(const char *s, node *spl, char **slist) - c[2] = '\n'; - - n = 0; -- if ((spl->info & OPCLSMASK) == OC_REGEXP) { /* regex split */ -+ if (spl->info == TI_REGEXP) { /* regex split */ - if (!*s) - return n; /* "": zero fields */ - n++; /* at least one field will be there */ -@@ -2135,7 +2143,7 @@ static node *nextarg(node **pn) - node *n; - - n = *pn; -- if (n && (n->info & OPCLSMASK) == OC_COMMA) { -+ if (n && n->info == TI_COMMA) { - *pn = n->r.n; - n = n->l.n; - } else { -@@ -2229,7 +2237,7 @@ static int awk_getline(rstream *rsm, var *v) - so = eo = p; - r = 1; - if (p > 0) { -- if ((rsplitter.n.info & OPCLSMASK) == OC_REGEXP) { -+ if (rsplitter.n.info == TI_REGEXP) { - if (regexec(icase ? rsplitter.n.r.ire : rsplitter.n.l.re, - b, 1, pmatch, 0) == 0) { - so = pmatch[0].rm_so; -@@ -2575,8 +2583,8 @@ static NOINLINE var *exec_builtin(node *op, var *res) - char *s, *s1; - - if (nargs > 2) { -- spl = (an[2]->info & OPCLSMASK) == OC_REGEXP ? -- an[2] : mk_splitter(getvar_s(evaluate(an[2], TMPVAR2)), &tspl); -+ spl = (an[2]->info == TI_REGEXP) ? an[2] -+ : mk_splitter(getvar_s(evaluate(an[2], TMPVAR2)), &tspl); - } else { - spl = &fsplitter.n; - } -@@ -2860,7 +2868,7 @@ static var *evaluate(node *op, var *res) - /* test pattern */ - case XC( OC_TEST ): - debug_printf_eval("TEST\n"); -- if ((op1->info & OPCLSMASK) == OC_COMMA) { -+ if (op1->info == TI_COMMA) { - /* it's range pattern */ - if ((opinfo & OF_CHECKED) || ptest(op1->l.n)) { - op->info |= OF_CHECKED; -@@ -2921,7 +2929,7 @@ static var *evaluate(node *op, var *res) - F = rsm->F; - } - -- if ((opinfo & OPCLSMASK) == OC_PRINT) { -+ if (opinfo == TI_PRINT) { - if (!op1) { - fputs(getvar_s(intvar[F0]), F); - } else { -@@ -2940,7 +2948,7 @@ static var *evaluate(node *op, var *res) - } - } - fputs(getvar_s(intvar[ORS]), F); -- } else { /* OC_PRINTF */ -+ } else { /* PRINTF */ - char *s = awk_printf(op1, &len); - #if ENABLE_FEATURE_AWK_GNU_EXTENSIONS - fwrite(s, len, 1, F); -@@ -3064,7 +3072,7 @@ static var *evaluate(node *op, var *res) - - case XC( OC_TERNARY ): - debug_printf_eval("TERNARY\n"); -- if ((op->r.n->info & OPCLSMASK) != OC_COLON) -+ if (op->r.n->info != TI_COLON) - syntax_error(EMSG_POSSIBLE_ERROR); - res = evaluate(istrue(L.v) ? op->r.n->l.n : op->r.n->r.n, res); - break; -@@ -3122,7 +3130,7 @@ static var *evaluate(node *op, var *res) - if (op1) { - rsm = newfile(L.s); - if (!rsm->F) { -- if ((opinfo & OPCLSMASK) == OC_PGETLINE) { -+ if (opinfo == TI_PGETLINE) { - rsm->F = popen(L.s, "r"); - rsm->is_pipe = TRUE; - } else { -@@ -3158,7 +3166,7 @@ static var *evaluate(node *op, var *res) - double R_d = R_d; /* for compiler */ - debug_printf_eval("FBLTIN\n"); - -- if (op1 && (op1->info & OPCLSMASK) == OC_COMMA) -+ if (op1 && op1->info == TI_COMMA) - /* Simple builtins take one arg maximum */ - syntax_error("Too many arguments"); - -@@ -3358,7 +3366,7 @@ static var *evaluate(node *op, var *res) - case XC( OC_COMMA ): { - const char *sep = ""; - debug_printf_eval("COMMA\n"); -- if ((opinfo & OPCLSMASK) == OC_COMMA) -+ if (opinfo == TI_COMMA) - sep = getvar_s(intvar[SUBSEP]); - setvar_p(res, xasprintf("%s%s%s", L.s, sep, R.s)); - break; --- -2.27.0 - - -From 39122ab01367775898f3f46394942138176b4101 Mon Sep 17 00:00:00 2001 -From: Denys Vlasenko -Date: Sun, 4 Jul 2021 01:25:34 +0200 -Subject: [PATCH 51/61] awk: fix printf buffer overflow - -function old new delta -awk_printf 468 546 +78 -fmt_num 239 247 +8 -getvar_s 125 111 -14 -evaluate 3343 3329 -14 ------------------------------------------------------------------------------- -(add/remove: 0/0 grow/shrink: 2/2 up/down: 86/-28) Total: 58 bytes - -Signed-off-by: Denys Vlasenko ---- - editors/awk.c | 94 ++++++++++++++++++++++++++++++--------------------- - 1 file changed, 55 insertions(+), 39 deletions(-) - -diff --git a/editors/awk.c b/editors/awk.c -index cd135ef64..a440a6234 100644 ---- a/editors/awk.c -+++ b/editors/awk.c -@@ -904,25 +904,23 @@ static double my_strtod(char **pp) - - /* -------- working with variables (set/get/copy/etc) -------- */ - --static int fmt_num(char *b, int size, const char *format, double n, int int_as_int) -+static void fmt_num(const char *format, double n) - { -- int r = 0; -- char c; -- const char *s = format; -- -- if (int_as_int && n == (long long)n) { -- r = snprintf(b, size, "%lld", (long long)n); -+ if (n == (long long)n) { -+ snprintf(g_buf, MAXVARFMT, "%lld", (long long)n); - } else { -+ const char *s = format; -+ char c; -+ - do { c = *s; } while (c && *++s); - if (strchr("diouxX", c)) { -- r = snprintf(b, size, format, (int)n); -+ snprintf(g_buf, MAXVARFMT, format, (int)n); - } else if (strchr("eEfFgGaA", c)) { -- r = snprintf(b, size, format, n); -+ snprintf(g_buf, MAXVARFMT, format, n); - } else { - syntax_error(EMSG_INV_FMT); - } - } -- return r; - } - - static xhash *iamarray(var *a) -@@ -999,7 +997,7 @@ static const char *getvar_s(var *v) - { - /* if v is numeric and has no cached string, convert it to string */ - if ((v->type & (VF_NUMBER | VF_CACHED)) == VF_NUMBER) { -- fmt_num(g_buf, MAXVARFMT, getvar_s(intvar[CONVFMT]), v->number, TRUE); -+ fmt_num(getvar_s(intvar[CONVFMT]), v->number); - v->string = xstrdup(g_buf); - v->type |= VF_CACHED; - } -@@ -2315,12 +2313,9 @@ static int awk_getline(rstream *rsm, var *v) - #endif - static char *awk_printf(node *n, int *len) - { -- char *b = NULL; -- char *fmt, *s, *f; -- const char *s1; -- int i, j, incr, bsize; -- char c, c1; -- var *arg; -+ char *b; -+ char *fmt, *f; -+ int i; - - //tmpvar = nvalloc(1); - #define TMPVAR (&G.awk_printf__tmpvar) -@@ -2333,8 +2328,14 @@ static char *awk_printf(node *n, int *len) - // to evaluate() potentially recursing into another awk_printf() can't - // mangle the value. - -+ b = NULL; - i = 0; -- while (*f) { -+ while (*f) { /* "print one format spec" loop */ -+ char *s; -+ char c; -+ char sv; -+ var *arg; -+ - s = f; - while (*f && (*f != '%' || *++f == '%')) - f++; -@@ -2343,40 +2344,55 @@ static char *awk_printf(node *n, int *len) - syntax_error("%*x formats are not supported"); - f++; - } -- -- incr = (f - s) + MAXVARFMT; -- b = qrealloc(b, incr + i, &bsize); - c = *f; -- if (c != '\0') -- f++; -- c1 = *f; -+ if (!c) { -+ /* Tail of fmt with no percent chars, -+ * or "....%" (percent seen, but no format specifier char found) -+ */ -+ goto tail; -+ } -+ sv = *++f; - *f = '\0'; - arg = evaluate(nextarg(&n), TMPVAR); - -- j = i; -- if (c == 'c' || !c) { -- i += sprintf(b+i, s, is_numeric(arg) ? -+ /* Result can be arbitrarily long. Example: -+ * printf "%99999s", "BOOM" -+ */ -+ if (c == 'c') { -+ s = xasprintf(s, is_numeric(arg) ? - (char)getvar_i(arg) : *getvar_s(arg)); - } else if (c == 's') { -- s1 = getvar_s(arg); -- b = qrealloc(b, incr+i+strlen(s1), &bsize); -- i += sprintf(b+i, s, s1); -+ s = xasprintf(s, getvar_s(arg)); - } else { -- i += fmt_num(b+i, incr, s, getvar_i(arg), FALSE); -+ double d = getvar_i(arg); -+ if (strchr("diouxX", c)) { -+//TODO: make it wider here (%x -> %llx etc)? -+ s = xasprintf(s, (int)d); -+ } else if (strchr("eEfFgGaA", c)) { -+ s = xasprintf(s, d); -+ } else { -+ syntax_error(EMSG_INV_FMT); -+ } - } -- *f = c1; -+ *f = sv; - -- /* if there was an error while sprintf, return value is negative */ -- if (i < j) -- i = j; -+ if (i == 0) { -+ b = s; -+ i = strlen(b); -+ continue; -+ } -+ tail: -+ b = xrealloc(b, i + strlen(s) + 1); -+ i = stpcpy(b + i, s) - b; -+ if (!c) /* tail? */ -+ break; -+ free(s); - } - - free(fmt); - //nvfree(tmpvar, 1); - #undef TMPVAR - -- b = xrealloc(b, i + 1); -- b[i] = '\0'; - #if ENABLE_FEATURE_AWK_GNU_EXTENSIONS - if (len) - *len = i; -@@ -2936,8 +2952,8 @@ static var *evaluate(node *op, var *res) - for (;;) { - var *v = evaluate(nextarg(&op1), TMPVAR0); - if (v->type & VF_NUMBER) { -- fmt_num(g_buf, MAXVARFMT, getvar_s(intvar[OFMT]), -- getvar_i(v), TRUE); -+ fmt_num(getvar_s(intvar[OFMT]), -+ getvar_i(v)); - fputs(g_buf, F); - } else { - fputs(getvar_s(v), F); --- -2.27.0 - - -From 9c55f6ae3f528a3416368e0aff9942d5b4ed216d Mon Sep 17 00:00:00 2001 -From: Denys Vlasenko -Date: Sun, 11 Jul 2021 11:46:21 +0200 -Subject: [PATCH 52/61] awk: rollback_token() + chain_group() == - chain_until_rbrace() - -function old new delta -parse_program 336 332 -4 - -Signed-off-by: Denys Vlasenko ---- - editors/awk.c | 3 +-- - 1 file changed, 1 insertion(+), 2 deletions(-) - -diff --git a/editors/awk.c b/editors/awk.c -index a440a6234..755e68fc7 100644 ---- a/editors/awk.c -+++ b/editors/awk.c -@@ -1778,8 +1778,7 @@ static void parse_program(char *p) - cn->l.n = parse_expr(TC_SEMICOL | TC_NEWLINE | TC_EOF | TC_LBRACE); - if (t_tclass == TC_LBRACE) { - debug_printf_parse("%s: TC_LBRACE\n", __func__); -- rollback_token(); -- chain_group(); -+ chain_until_rbrace(); - } else { - /* no action, assume default "{ print }" */ - debug_printf_parse("%s: !TC_LBRACE\n", __func__); --- -2.27.0 - - -From bd0d2c3b5bf5c9337e67b43222bafcdf80c4e36a Mon Sep 17 00:00:00 2001 -From: Denys Vlasenko -Date: Sun, 11 Jul 2021 12:00:31 +0200 -Subject: [PATCH 53/61] awk: undo TI_PRINT, it introduced a bug (print with any - redirect acting as printf) - -function old new delta -evaluate 3329 3337 +8 - -Patch by Ron Yorston - -Signed-off-by: Denys Vlasenko ---- - editors/awk.c | 8 +++++--- - testsuite/awk.tests | 5 +++++ - 2 files changed, 10 insertions(+), 3 deletions(-) - -diff --git a/editors/awk.c b/editors/awk.c -index 755e68fc7..0aa7c0804 100644 ---- a/editors/awk.c -+++ b/editors/awk.c -@@ -462,8 +462,7 @@ static const uint32_t tokeninfo[] ALIGN4 = { - 0, - 0, /* \n */ - ST_IF, ST_DO, ST_FOR, OC_BREAK, --#define TI_PRINT OC_PRINT -- OC_CONTINUE, OC_DELETE|Rx, TI_PRINT, -+ OC_CONTINUE, OC_DELETE|Rx, OC_PRINT, - OC_PRINTF, OC_NEXT, OC_NEXTFILE, - OC_RETURN|Vx, OC_EXIT|Nx, - ST_WHILE, -@@ -2944,7 +2943,10 @@ static var *evaluate(node *op, var *res) - F = rsm->F; - } - -- if (opinfo == TI_PRINT) { -+ /* Can't just check 'opinfo == OC_PRINT' here, parser ORs -+ * additional bits to opinfos of print/printf with redirects -+ */ -+ if ((opinfo & OPCLSMASK) == OC_PRINT) { - if (!op1) { - fputs(getvar_s(intvar[F0]), F); - } else { -diff --git a/testsuite/awk.tests b/testsuite/awk.tests -index 770d8ffce..6b23b91cb 100755 ---- a/testsuite/awk.tests -+++ b/testsuite/awk.tests -@@ -450,4 +450,9 @@ testing "awk exit N propagates through END's exit" \ - "42\n" \ - '' '' - -+testing "awk print + redirect" \ -+ "awk 'BEGIN { print \"STDERR %s\" >\"/dev/stderr\" }' 2>&1" \ -+ "STDERR %s\n" \ -+ '' '' -+ - exit $FAILCOUNT --- -2.27.0 - - -From 5ed199c07d9ffc947443118dda0e0af6569588d5 Mon Sep 17 00:00:00 2001 -From: Denys Vlasenko -Date: Sun, 11 Jul 2021 12:25:33 +0200 -Subject: [PATCH 54/61] awk: unbreak "printf('%c') can output NUL" testcase - -function old new delta -awk_printf 546 593 +47 - -Signed-off-by: Denys Vlasenko ---- - editors/awk.c | 43 ++++++++++++++++++++++++++----------------- - 1 file changed, 26 insertions(+), 17 deletions(-) - -diff --git a/editors/awk.c b/editors/awk.c -index 0aa7c0804..e765d3fcf 100644 ---- a/editors/awk.c -+++ b/editors/awk.c -@@ -2309,11 +2309,11 @@ static int awk_getline(rstream *rsm, var *v) - #if !ENABLE_FEATURE_AWK_GNU_EXTENSIONS - # define awk_printf(a, b) awk_printf(a) - #endif --static char *awk_printf(node *n, int *len) -+static char *awk_printf(node *n, size_t *len) - { - char *b; - char *fmt, *f; -- int i; -+ size_t i; - - //tmpvar = nvalloc(1); - #define TMPVAR (&G.awk_printf__tmpvar) -@@ -2333,6 +2333,7 @@ static char *awk_printf(node *n, int *len) - char c; - char sv; - var *arg; -+ size_t slen; - - s = f; - while (*f && (*f != '%' || *++f == '%')) -@@ -2347,6 +2348,7 @@ static char *awk_printf(node *n, int *len) - /* Tail of fmt with no percent chars, - * or "....%" (percent seen, but no format specifier char found) - */ -+ slen = strlen(s); - goto tail; - } - sv = *++f; -@@ -2357,31 +2359,38 @@ static char *awk_printf(node *n, int *len) - * printf "%99999s", "BOOM" - */ - if (c == 'c') { -- s = xasprintf(s, is_numeric(arg) ? -- (char)getvar_i(arg) : *getvar_s(arg)); -- } else if (c == 's') { -- s = xasprintf(s, getvar_s(arg)); -+ c = is_numeric(arg) ? getvar_i(arg) : *getvar_s(arg); -+ s = xasprintf(s, c); -+ /* + 1 if c == NUL: handle printf "%c" 0 case -+ * (and printf "%22c" 0 etc, but still fails for e.g. printf "%-22c" 0) */ -+ slen = strlen(s) + (c == '\0'); - } else { -- double d = getvar_i(arg); -- if (strchr("diouxX", c)) { --//TODO: make it wider here (%x -> %llx etc)? -- s = xasprintf(s, (int)d); -- } else if (strchr("eEfFgGaA", c)) { -- s = xasprintf(s, d); -+ if (c == 's') { -+ s = xasprintf(s, getvar_s(arg)); - } else { -- syntax_error(EMSG_INV_FMT); -+ double d = getvar_i(arg); -+ if (strchr("diouxX", c)) { -+//TODO: make it wider here (%x -> %llx etc)? -+ s = xasprintf(s, (int)d); -+ } else if (strchr("eEfFgGaA", c)) { -+ s = xasprintf(s, d); -+ } else { -+ syntax_error(EMSG_INV_FMT); -+ } - } -+ slen = strlen(s); - } - *f = sv; - - if (i == 0) { - b = s; -- i = strlen(b); -+ i = slen; - continue; - } - tail: -- b = xrealloc(b, i + strlen(s) + 1); -- i = stpcpy(b + i, s) - b; -+ b = xrealloc(b, i + slen + 1); -+ strcpy(b + i, s); -+ i += slen; - if (!c) /* tail? */ - break; - free(s); -@@ -2926,7 +2935,6 @@ static var *evaluate(node *op, var *res) - debug_printf_eval("PRINTF\n"); - { - FILE *F = stdout; -- IF_FEATURE_AWK_GNU_EXTENSIONS(int len;) - - if (op->r.n) { - rstream *rsm = newfile(R.s); -@@ -2966,6 +2974,7 @@ static var *evaluate(node *op, var *res) - } - fputs(getvar_s(intvar[ORS]), F); - } else { /* PRINTF */ -+ IF_FEATURE_AWK_GNU_EXTENSIONS(size_t len;) - char *s = awk_printf(op1, &len); - #if ENABLE_FEATURE_AWK_GNU_EXTENSIONS - fwrite(s, len, 1, F); --- -2.27.0 - - -From f38b2d9bcddd00432150567bef8f8a2bf0d1ed43 Mon Sep 17 00:00:00 2001 -From: Denys Vlasenko -Date: Sun, 11 Jul 2021 12:51:43 +0200 -Subject: [PATCH 55/61] awk: unbreak "cmd" | getline - -function old new delta -evaluate 3337 3343 +6 - -Signed-off-by: Denys Vlasenko ---- - editors/awk.c | 3 ++- - testsuite/awk.tests | 5 +++++ - 2 files changed, 7 insertions(+), 1 deletion(-) - -diff --git a/editors/awk.c b/editors/awk.c -index e765d3fcf..6c60a0615 100644 ---- a/editors/awk.c -+++ b/editors/awk.c -@@ -3156,7 +3156,8 @@ static var *evaluate(node *op, var *res) - if (op1) { - rsm = newfile(L.s); - if (!rsm->F) { -- if (opinfo == TI_PGETLINE) { -+ /* NB: can't use "opinfo == TI_PGETLINE", would break "cmd" | getline */ -+ if ((opinfo & OPCLSMASK) == OC_PGETLINE) { - rsm->F = popen(L.s, "r"); - rsm->is_pipe = TRUE; - } else { -diff --git a/testsuite/awk.tests b/testsuite/awk.tests -index 6b23b91cb..242c897d1 100755 ---- a/testsuite/awk.tests -+++ b/testsuite/awk.tests -@@ -455,4 +455,9 @@ testing "awk print + redirect" \ - "STDERR %s\n" \ - '' '' - -+testing "awk \"cmd\" | getline" \ -+ "awk 'BEGIN { \"echo HELLO\" | getline; print }'" \ -+ "HELLO\n" \ -+ '' '' -+ - exit $FAILCOUNT --- -2.27.0 - - -From 3a759a81580a1f7d9b4428e30c623324ec2e3699 Mon Sep 17 00:00:00 2001 -From: Denys Vlasenko -Date: Sun, 11 Jul 2021 18:16:10 +0200 -Subject: [PATCH 56/61] awk: fix corner case in awk_printf - -Example where it wasn't working: - awk 'BEGIN { printf "qwe %s rty %c uio\n", "a", 0, "c" }' -- the NUL printing in %c caused premature stop of printing. - -function old new delta -awk_printf 593 596 +3 - -Signed-off-by: Denys Vlasenko ---- - editors/awk.c | 8 ++++---- - 1 file changed, 4 insertions(+), 4 deletions(-) - -diff --git a/editors/awk.c b/editors/awk.c -index 6c60a0615..465033f5f 100644 ---- a/editors/awk.c -+++ b/editors/awk.c -@@ -2359,11 +2359,11 @@ static char *awk_printf(node *n, size_t *len) - * printf "%99999s", "BOOM" - */ - if (c == 'c') { -- c = is_numeric(arg) ? getvar_i(arg) : *getvar_s(arg); -- s = xasprintf(s, c); -- /* + 1 if c == NUL: handle printf "%c" 0 case -+ char cc = is_numeric(arg) ? getvar_i(arg) : *getvar_s(arg); -+ s = xasprintf(s, cc); -+ /* + 1 if cc == NUL: handle printf "%c" 0 case - * (and printf "%22c" 0 etc, but still fails for e.g. printf "%-22c" 0) */ -- slen = strlen(s) + (c == '\0'); -+ slen = strlen(s) + (cc == '\0'); - } else { - if (c == 's') { - s = xasprintf(s, getvar_s(arg)); --- -2.27.0 - - -From e62366d32f13e059266e2996a68be023bef309ef Mon Sep 17 00:00:00 2001 -From: Denys Vlasenko -Date: Mon, 12 Jul 2021 11:27:11 +0200 -Subject: [PATCH 57/61] awk: fix printf "%-10c", 0 - -function old new delta -awk_printf 596 626 +30 - -Signed-off-by: Denys Vlasenko ---- - editors/awk.c | 9 +++++---- - testsuite/awk.tests | 8 ++++++++ - 2 files changed, 13 insertions(+), 4 deletions(-) - -diff --git a/editors/awk.c b/editors/awk.c -index 465033f5f..437d87ecf 100644 ---- a/editors/awk.c -+++ b/editors/awk.c -@@ -2360,10 +2360,11 @@ static char *awk_printf(node *n, size_t *len) - */ - if (c == 'c') { - char cc = is_numeric(arg) ? getvar_i(arg) : *getvar_s(arg); -- s = xasprintf(s, cc); -- /* + 1 if cc == NUL: handle printf "%c" 0 case -- * (and printf "%22c" 0 etc, but still fails for e.g. printf "%-22c" 0) */ -- slen = strlen(s) + (cc == '\0'); -+ char *r = xasprintf(s, cc ? cc : '^' /* else strlen will be wrong */); -+ slen = strlen(r); -+ if (cc == '\0') /* if cc is NUL, re-format the string with it */ -+ sprintf(r, s, cc); -+ s = r; - } else { - if (c == 's') { - s = xasprintf(s, getvar_s(arg)); -diff --git a/testsuite/awk.tests b/testsuite/awk.tests -index 242c897d1..3cddb4dd4 100755 ---- a/testsuite/awk.tests -+++ b/testsuite/awk.tests -@@ -415,6 +415,14 @@ testing "awk printf('%c') can output NUL" \ - "awk '{printf(\"hello%c null\n\", 0)}'" "hello\0 null\n" "" "\n" - SKIP= - -+optional FEATURE_AWK_GNU_EXTENSIONS -+testing "awk printf('%-10c') can output NUL" \ -+ "awk 'BEGIN { printf \"[%-10c]\n\", 0 }' | od -tx1" "\ -+0000000 5b 00 20 20 20 20 20 20 20 20 20 5d 0a -+0000015 -+" "" "" -+SKIP= -+ - # testing "description" "command" "result" "infile" "stdin" - testing 'awk negative field access' \ - 'awk 2>&1 -- '\''{ $(-1) }'\' \ --- -2.27.0 - - -From 258057e67d4403d43f48788fabdf874c1bb59502 Mon Sep 17 00:00:00 2001 -From: Denys Vlasenko -Date: Mon, 12 Jul 2021 13:30:30 +0200 -Subject: [PATCH 58/61] awk: in parsing, remove superfluous NEWLINE check; - optimize builtin arg evaluation - -function old new delta -exec_builtin 1149 1145 -4 - -Signed-off-by: Denys Vlasenko ---- - editors/awk.c | 11 ++++++----- - 1 file changed, 6 insertions(+), 5 deletions(-) - -diff --git a/editors/awk.c b/editors/awk.c -index 437d87ecf..7a282356d 100644 ---- a/editors/awk.c -+++ b/editors/awk.c -@@ -1589,8 +1589,8 @@ static void chain_group(void) - chain_until_rbrace(); - return; - } -- if (tc & (TS_OPSEQ | TC_SEMICOL | TC_NEWLINE)) { -- debug_printf_parse("%s: TS_OPSEQ | TC_SEMICOL | TC_NEWLINE\n", __func__); -+ if (tc & (TS_OPSEQ | TC_SEMICOL)) { -+ debug_printf_parse("%s: TS_OPSEQ | TC_SEMICOL\n", __func__); - rollback_token(); - chain_expr(OC_EXEC | Vx); - return; -@@ -2582,10 +2582,11 @@ static NOINLINE var *exec_builtin(node *op, var *res) - av[2] = av[3] = NULL; - for (i = 0; i < 4 && op; i++) { - an[i] = nextarg(&op); -- if (isr & 0x09000000) -+ if (isr & 0x09000000) { - av[i] = evaluate(an[i], TMPVAR(i)); -- if (isr & 0x08000000) -- as[i] = getvar_s(av[i]); -+ if (isr & 0x08000000) -+ as[i] = getvar_s(av[i]); -+ } - isr >>= 1; - } - --- -2.27.0 - - -From 18fe636700ac5d795027d920922340410f65640e Mon Sep 17 00:00:00 2001 -From: Denys Vlasenko -Date: Wed, 14 Jul 2021 14:25:07 +0200 -Subject: [PATCH 59/61] awk: tighten parsing - disallow extra semicolons - -'; BEGIN {...}' and 'BEGIN {...} ;; {...}' are not accepted by gawk - -function old new delta -parse_program 332 353 +21 - -Signed-off-by: Denys Vlasenko ---- - editors/awk.c | 40 ++++++++++++++++++++++++---------------- - 1 file changed, 24 insertions(+), 16 deletions(-) - -diff --git a/editors/awk.c b/editors/awk.c -index 7a282356d..2f8a18c8e 100644 ---- a/editors/awk.c -+++ b/editors/awk.c -@@ -1634,7 +1634,7 @@ static void chain_group(void) - debug_printf_parse("%s: ST_FOR\n", __func__); - next_token(TC_LPAREN); - n2 = parse_expr(TC_SEMICOL | TC_RPAREN); -- if (t_tclass & TC_RPAREN) { /* for-in */ -+ if (t_tclass & TC_RPAREN) { /* for (I in ARRAY) */ - if (!n2 || n2->info != TI_IN) - syntax_error(EMSG_UNEXP_TOKEN); - n = chain_node(OC_WALKINIT | VV); -@@ -1700,20 +1700,15 @@ static void parse_program(char *p) - for (;;) { - uint32_t tclass; - -- tclass = next_token(TC_EOF | TS_OPSEQ | TC_LBRACE | -- TC_SEMICOL | TC_NEWLINE | TC_BEGIN | TC_END | TC_FUNCDECL); -- -+ tclass = next_token(TS_OPSEQ | TC_LBRACE | TC_BEGIN | TC_END | TC_FUNCDECL -+ | TC_EOF | TC_NEWLINE /* but not TC_SEMICOL */); -+ got_tok: - if (tclass == TC_EOF) { - debug_printf_parse("%s: TC_EOF\n", __func__); - break; - } -- if (tclass & (TC_SEMICOL | TC_NEWLINE)) { -- debug_printf_parse("%s: TC_SEMICOL | TC_NEWLINE\n", __func__); --//NB: gawk allows many newlines, but does not allow more than one semicolon: --// BEGIN {...};; --//would complain "each rule must have a pattern or an action part". --//Same message for --// ; BEGIN {...} -+ if (tclass == TC_NEWLINE) { -+ debug_printf_parse("%s: TC_NEWLINE\n", __func__); - continue; - } - if (tclass == TC_BEGIN) { -@@ -1722,7 +1717,7 @@ static void parse_program(char *p) - /* ensure there is no newline between BEGIN and { */ - next_token(TC_LBRACE); - chain_until_rbrace(); -- continue; -+ goto next_tok; - } - if (tclass == TC_END) { - debug_printf_parse("%s: TC_END\n", __func__); -@@ -1730,7 +1725,7 @@ static void parse_program(char *p) - /* ensure there is no newline between END and { */ - next_token(TC_LBRACE); - chain_until_rbrace(); -- continue; -+ goto next_tok; - } - if (tclass == TC_FUNCDECL) { - func *f; -@@ -1765,7 +1760,7 @@ static void parse_program(char *p) - continue; - chain_until_rbrace(); - hash_clear(ahash); -- continue; -+ goto next_tok; - } - seq = &mainseq; - if (tclass & TS_OPSEQ) { -@@ -1784,12 +1779,25 @@ static void parse_program(char *p) - chain_node(OC_PRINT); - } - cn->r.n = mainseq.last; -- continue; -+ goto next_tok; - } - /* tclass == TC_LBRACE */ - debug_printf_parse("%s: TC_LBRACE(?)\n", __func__); - chain_until_rbrace(); -- } -+ next_tok: -+ /* Same as next_token() at the top of the loop, + TC_SEMICOL */ -+ tclass = next_token(TS_OPSEQ | TC_LBRACE | TC_BEGIN | TC_END | TC_FUNCDECL -+ | TC_EOF | TC_NEWLINE | TC_SEMICOL); -+ /* gawk allows many newlines, but does not allow more than one semicolon: -+ * BEGIN {...};; -+ * would complain "each rule must have a pattern or an action part". -+ * Same message for -+ * ; BEGIN {...} -+ */ -+ if (tclass != TC_SEMICOL) -+ goto got_tok; /* use this token */ -+ /* else: loop back - ate the semicolon, get and use _next_ token */ -+ } /* for (;;) */ - } - - --- -2.27.0 - - -From 9b502f61277aa48a412dd1a18e7a30b5d4c3d71a Mon Sep 17 00:00:00 2001 -From: Denys Vlasenko -Date: Wed, 14 Jul 2021 14:33:37 +0200 -Subject: [PATCH 60/61] awk: disallow break/continue outside of loops - -function old new delta -.rodata 104139 104186 +47 -chain_group 610 633 +23 ------------------------------------------------------------------------------- -(add/remove: 0/0 grow/shrink: 2/0 up/down: 70/0) Total: 70 bytes - -Signed-off-by: Denys Vlasenko ---- - editors/awk.c | 6 ++++-- - testsuite/awk.tests | 9 ++------- - 2 files changed, 6 insertions(+), 9 deletions(-) - -diff --git a/editors/awk.c b/editors/awk.c -index 2f8a18c8e..607d69487 100644 ---- a/editors/awk.c -+++ b/editors/awk.c -@@ -1671,16 +1671,18 @@ static void chain_group(void) - case OC_BREAK: - debug_printf_parse("%s: OC_BREAK\n", __func__); - n = chain_node(OC_EXEC); -+ if (!break_ptr) -+ syntax_error("'break' not in a loop"); - n->a.n = break_ptr; --//TODO: if break_ptr is NULL, syntax error (not in the loop)? - chain_expr(t_info); - break; - - case OC_CONTINUE: - debug_printf_parse("%s: OC_CONTINUE\n", __func__); - n = chain_node(OC_EXEC); -+ if (!continue_ptr) -+ syntax_error("'continue' not in a loop"); - n->a.n = continue_ptr; --//TODO: if continue_ptr is NULL, syntax error (not in the loop)? - chain_expr(t_info); - break; - -diff --git a/testsuite/awk.tests b/testsuite/awk.tests -index 3cddb4dd4..f53b1efe2 100755 ---- a/testsuite/awk.tests -+++ b/testsuite/awk.tests -@@ -379,19 +379,14 @@ testing "awk -e and ARGC" \ - "" - SKIP= - --# The examples are in fact not valid awk programs (break/continue --# can only be used inside loops). --# But we do accept them outside of loops. --# We had a bug with misparsing "break ; else" sequence. --# Test that *that* bug is fixed, using simplest possible scripts: - testing "awk break" \ - "awk -f - 2>&1; echo \$?" \ -- "0\n" \ -+ "awk: -:1: 'break' not in a loop\n1\n" \ - "" \ - 'BEGIN { if (1) break; else a = 1 }' - testing "awk continue" \ - "awk -f - 2>&1; echo \$?" \ -- "0\n" \ -+ "awk: -:1: 'continue' not in a loop\n1\n" \ - "" \ - 'BEGIN { if (1) continue; else a = 1 }' - --- -2.27.0 - - -From 027b43ab6700b85f037fb69c08ad052cff6a7384 Mon Sep 17 00:00:00 2001 -From: Denys Vlasenko -Date: Wed, 14 Jul 2021 16:58:05 +0200 -Subject: [PATCH 61/61] awk: whitespace and debugging tweaks - -Signed-off-by: Denys Vlasenko ---- - editors/awk.c | 133 +++++++++++++++++++++++++------------------------- - 1 file changed, 66 insertions(+), 67 deletions(-) - -diff --git a/editors/awk.c b/editors/awk.c -index 607d69487..3adbca7aa 100644 ---- a/editors/awk.c -+++ b/editors/awk.c -@@ -199,77 +199,78 @@ typedef struct tsplitter_s { - - /* simple token classes */ - /* order and hex values are very important!!! See next_token() */ --#define TC_LPAREN (1 << 0) /* ( */ --#define TC_RPAREN (1 << 1) /* ) */ --#define TC_REGEXP (1 << 2) /* /.../ */ --#define TC_OUTRDR (1 << 3) /* | > >> */ --#define TC_UOPPOST (1 << 4) /* unary postfix operator ++ -- */ --#define TC_UOPPRE1 (1 << 5) /* unary prefix operator ++ -- $ */ --#define TC_BINOPX (1 << 6) /* two-opnd operator */ --#define TC_IN (1 << 7) /* 'in' */ --#define TC_COMMA (1 << 8) /* , */ --#define TC_PIPE (1 << 9) /* input redirection pipe | */ --#define TC_UOPPRE2 (1 << 10) /* unary prefix operator + - ! */ --#define TC_ARRTERM (1 << 11) /* ] */ --#define TC_LBRACE (1 << 12) /* { */ --#define TC_RBRACE (1 << 13) /* } */ --#define TC_SEMICOL (1 << 14) /* ; */ --#define TC_NEWLINE (1 << 15) --#define TC_STATX (1 << 16) /* ctl statement (for, next...) */ --#define TC_WHILE (1 << 17) /* 'while' */ --#define TC_ELSE (1 << 18) /* 'else' */ --#define TC_BUILTIN (1 << 19) -+#define TC_LPAREN (1 << 0) /* ( */ -+#define TC_RPAREN (1 << 1) /* ) */ -+#define TC_REGEXP (1 << 2) /* /.../ */ -+#define TC_OUTRDR (1 << 3) /* | > >> */ -+#define TC_UOPPOST (1 << 4) /* unary postfix operator ++ -- */ -+#define TC_UOPPRE1 (1 << 5) /* unary prefix operator ++ -- $ */ -+#define TC_BINOPX (1 << 6) /* two-opnd operator */ -+#define TC_IN (1 << 7) /* 'in' */ -+#define TC_COMMA (1 << 8) /* , */ -+#define TC_PIPE (1 << 9) /* input redirection pipe | */ -+#define TC_UOPPRE2 (1 << 10) /* unary prefix operator + - ! */ -+#define TC_ARRTERM (1 << 11) /* ] */ -+#define TC_LBRACE (1 << 12) /* { */ -+#define TC_RBRACE (1 << 13) /* } */ -+#define TC_SEMICOL (1 << 14) /* ; */ -+#define TC_NEWLINE (1 << 15) -+#define TC_STATX (1 << 16) /* ctl statement (for, next...) */ -+#define TC_WHILE (1 << 17) /* 'while' */ -+#define TC_ELSE (1 << 18) /* 'else' */ -+#define TC_BUILTIN (1 << 19) - /* This costs ~50 bytes of code. - * A separate class to support deprecated "length" form. If we don't need that - * (i.e. if we demand that only "length()" with () is valid), then TC_LENGTH - * can be merged with TC_BUILTIN: - */ --#define TC_LENGTH (1 << 20) /* 'length' */ --#define TC_GETLINE (1 << 21) /* 'getline' */ --#define TC_FUNCDECL (1 << 22) /* 'function' 'func' */ --#define TC_BEGIN (1 << 23) /* 'BEGIN' */ --#define TC_END (1 << 24) /* 'END' */ --#define TC_EOF (1 << 25) --#define TC_VARIABLE (1 << 26) /* name */ --#define TC_ARRAY (1 << 27) /* name[ */ --#define TC_FUNCTION (1 << 28) /* name( */ --#define TC_STRING (1 << 29) /* "..." */ --#define TC_NUMBER (1 << 30) -+#define TC_LENGTH (1 << 20) /* 'length' */ -+#define TC_GETLINE (1 << 21) /* 'getline' */ -+#define TC_FUNCDECL (1 << 22) /* 'function' 'func' */ -+#define TC_BEGIN (1 << 23) /* 'BEGIN' */ -+#define TC_END (1 << 24) /* 'END' */ -+#define TC_EOF (1 << 25) -+#define TC_VARIABLE (1 << 26) /* name */ -+#define TC_ARRAY (1 << 27) /* name[ */ -+#define TC_FUNCTION (1 << 28) /* name( */ -+#define TC_STRING (1 << 29) /* "..." */ -+#define TC_NUMBER (1 << 30) - - #ifndef debug_parse_print_tc --#define debug_parse_print_tc(n) do { \ --if ((n) & TC_LPAREN ) debug_printf_parse(" LPAREN" ); \ --if ((n) & TC_RPAREN ) debug_printf_parse(" RPAREN" ); \ --if ((n) & TC_REGEXP ) debug_printf_parse(" REGEXP" ); \ --if ((n) & TC_OUTRDR ) debug_printf_parse(" OUTRDR" ); \ --if ((n) & TC_UOPPOST ) debug_printf_parse(" UOPPOST" ); \ --if ((n) & TC_UOPPRE1 ) debug_printf_parse(" UOPPRE1" ); \ --if ((n) & TC_BINOPX ) debug_printf_parse(" BINOPX" ); \ --if ((n) & TC_IN ) debug_printf_parse(" IN" ); \ --if ((n) & TC_COMMA ) debug_printf_parse(" COMMA" ); \ --if ((n) & TC_PIPE ) debug_printf_parse(" PIPE" ); \ --if ((n) & TC_UOPPRE2 ) debug_printf_parse(" UOPPRE2" ); \ --if ((n) & TC_ARRTERM ) debug_printf_parse(" ARRTERM" ); \ --if ((n) & TC_LBRACE ) debug_printf_parse(" LBRACE" ); \ --if ((n) & TC_RBRACE ) debug_printf_parse(" RBRACE" ); \ --if ((n) & TC_SEMICOL ) debug_printf_parse(" SEMICOL" ); \ --if ((n) & TC_NEWLINE ) debug_printf_parse(" NEWLINE" ); \ --if ((n) & TC_STATX ) debug_printf_parse(" STATX" ); \ --if ((n) & TC_WHILE ) debug_printf_parse(" WHILE" ); \ --if ((n) & TC_ELSE ) debug_printf_parse(" ELSE" ); \ --if ((n) & TC_BUILTIN ) debug_printf_parse(" BUILTIN" ); \ --if ((n) & TC_LENGTH ) debug_printf_parse(" LENGTH" ); \ --if ((n) & TC_GETLINE ) debug_printf_parse(" GETLINE" ); \ --if ((n) & TC_FUNCDECL) debug_printf_parse(" FUNCDECL"); \ --if ((n) & TC_BEGIN ) debug_printf_parse(" BEGIN" ); \ --if ((n) & TC_END ) debug_printf_parse(" END" ); \ --if ((n) & TC_EOF ) debug_printf_parse(" EOF" ); \ --if ((n) & TC_VARIABLE) debug_printf_parse(" VARIABLE"); \ --if ((n) & TC_ARRAY ) debug_printf_parse(" ARRAY" ); \ --if ((n) & TC_FUNCTION) debug_printf_parse(" FUNCTION"); \ --if ((n) & TC_STRING ) debug_printf_parse(" STRING" ); \ --if ((n) & TC_NUMBER ) debug_printf_parse(" NUMBER" ); \ --} while (0) -+static void debug_parse_print_tc(uint32_t n) -+{ -+ if (n & TC_LPAREN ) debug_printf_parse(" LPAREN" ); -+ if (n & TC_RPAREN ) debug_printf_parse(" RPAREN" ); -+ if (n & TC_REGEXP ) debug_printf_parse(" REGEXP" ); -+ if (n & TC_OUTRDR ) debug_printf_parse(" OUTRDR" ); -+ if (n & TC_UOPPOST ) debug_printf_parse(" UOPPOST" ); -+ if (n & TC_UOPPRE1 ) debug_printf_parse(" UOPPRE1" ); -+ if (n & TC_BINOPX ) debug_printf_parse(" BINOPX" ); -+ if (n & TC_IN ) debug_printf_parse(" IN" ); -+ if (n & TC_COMMA ) debug_printf_parse(" COMMA" ); -+ if (n & TC_PIPE ) debug_printf_parse(" PIPE" ); -+ if (n & TC_UOPPRE2 ) debug_printf_parse(" UOPPRE2" ); -+ if (n & TC_ARRTERM ) debug_printf_parse(" ARRTERM" ); -+ if (n & TC_LBRACE ) debug_printf_parse(" LBRACE" ); -+ if (n & TC_RBRACE ) debug_printf_parse(" RBRACE" ); -+ if (n & TC_SEMICOL ) debug_printf_parse(" SEMICOL" ); -+ if (n & TC_NEWLINE ) debug_printf_parse(" NEWLINE" ); -+ if (n & TC_STATX ) debug_printf_parse(" STATX" ); -+ if (n & TC_WHILE ) debug_printf_parse(" WHILE" ); -+ if (n & TC_ELSE ) debug_printf_parse(" ELSE" ); -+ if (n & TC_BUILTIN ) debug_printf_parse(" BUILTIN" ); -+ if (n & TC_LENGTH ) debug_printf_parse(" LENGTH" ); -+ if (n & TC_GETLINE ) debug_printf_parse(" GETLINE" ); -+ if (n & TC_FUNCDECL) debug_printf_parse(" FUNCDECL"); -+ if (n & TC_BEGIN ) debug_printf_parse(" BEGIN" ); -+ if (n & TC_END ) debug_printf_parse(" END" ); -+ if (n & TC_EOF ) debug_printf_parse(" EOF" ); -+ if (n & TC_VARIABLE) debug_printf_parse(" VARIABLE"); -+ if (n & TC_ARRAY ) debug_printf_parse(" ARRAY" ); -+ if (n & TC_FUNCTION) debug_printf_parse(" FUNCTION"); -+ if (n & TC_STRING ) debug_printf_parse(" STRING" ); -+ if (n & TC_NUMBER ) debug_printf_parse(" NUMBER" ); -+} - #endif - - /* combined token classes ("token [class] sets") */ -@@ -417,7 +418,7 @@ static const char tokenlist[] ALIGN1 = - "\5close" "\6system" "\6fflush" "\5atan2" - "\3cos" "\3exp" "\3int" "\3log" - "\4rand" "\3sin" "\4sqrt" "\5srand" -- "\6gensub" "\4gsub" "\5index" /* "\6length" was here */ -+ "\6gensub" "\4gsub" "\5index" /* "\6length" was here */ - "\5match" "\5split" "\7sprintf" "\3sub" - "\6substr" "\7systime" "\10strftime" "\6mktime" - "\7tolower" "\7toupper" NTC -@@ -1802,7 +1803,6 @@ static void parse_program(char *p) - } /* for (;;) */ - } - -- - /* -------- program execution part -------- */ - - /* temporary variables allocator */ -@@ -3510,7 +3510,6 @@ static var *evaluate(node *op, var *res) - #undef sreg - } - -- - /* -------- main & co. -------- */ - - static int awk_exit(void) --- -2.27.0 - diff --git a/busybox-1.33.1.tar.bz2 b/busybox-1.33.1.tar.bz2 deleted file mode 100644 index e0bf11226e61a29ec1c1ef7e46674846e8355c6b..0000000000000000000000000000000000000000 Binary files a/busybox-1.33.1.tar.bz2 and /dev/null differ diff --git a/busybox-1.34.1.tar.bz2 b/busybox-1.34.1.tar.bz2 new file mode 100644 index 0000000000000000000000000000000000000000..5f08f774543760b5355a23ae777beaec93ac3070 Binary files /dev/null and b/busybox-1.34.1.tar.bz2 differ diff --git a/busybox.spec b/busybox.spec index d7b2cedbbc1c6c267ad96f07ab360588f4ba41b1..e613b37c3891cf0d1e81897f3a71d850fd36f171 100644 --- a/busybox.spec +++ b/busybox.spec @@ -1,11 +1,12 @@ #spec file for busybox %if "%{!?VERSION:1}" -%define VERSION 1.33.1 +%define VERSION 1.34.1 %endif %if "%{!?RELEASE:1}" -%define RELEASE 12 +%define RELEASE 16 %endif +Epoch: 1 Name: busybox Version: %{VERSION} @@ -19,13 +20,7 @@ Source1: busybox-static.config Source2: busybox-petitboot.config Source3: busybox-dynamic.config -#backport -Patch6000: backport-CVE-2021-42374.patch -Patch6001: backport-CVE-2021-42377.patch -Patch6002: backport-CVE-2021-42373.patch -Patch6003: backport-CVE-2021-42375.patch -Patch6004: backport-CVE-2021-42376.patch -Patch6005: backport-fix-awk-cve.patch +Patch6000: backport-CVE-2022-28391.patch BuildRoot: %_topdir/BUILDROOT #Dependency @@ -101,6 +96,30 @@ install -m 644 docs/busybox.dynamic.1 $RPM_BUILD_ROOT/%{_mandir}/man1/busybox.1 %{_mandir}/man1/busybox.petitboot.1.gz %changelog +* Thu Jul 28 2022 jikui - 1:1.34.1-16 +- Type:bugfix +- Id:NA +- SUG:NA +- DESC:sync openEuler-22.03-LTS + +* Thu May 5 2022 jikui - 1:1.34.1-15 +- Type:bugfix +- Id:NA +- SUG:NA +- DESC:add epoch field in spec file + +* Tue Apr 19 2022 jikui - 1:1.34.1-14 +- Type:CVE +- Id:NA +- SUG:NA +- DESC:fix CVE-2022-28391 + +* Mon Nov 29 2021 jikui - 1:1.34.1-13 +- Type:enhancement +- Id:NA +- SUG:NA +- DESC:update busybox to 1.34.1 + * Wed Nov 25 2021 xiechengliang - 1:1.33.1-12 - Type:CVE - Id:NA diff --git a/busybox.yaml b/busybox.yaml deleted file mode 100644 index 0ab836b8be72cc942f6e1635d32aba83d4794cfe..0000000000000000000000000000000000000000 --- a/busybox.yaml +++ /dev/null @@ -1,5 +0,0 @@ -version_control: git -src_repo: https://git.busybox.net/busybox/ -tag_prefix: "^v" -separator: _ -git_url: https://git.busybox.net/busybox/ \ No newline at end of file