summaryrefslogtreecommitdiffstats
path: root/meta/recipes-core/busybox/busybox/0001-awk-fix-CVEs.patch
diff options
context:
space:
mode:
Diffstat (limited to 'meta/recipes-core/busybox/busybox/0001-awk-fix-CVEs.patch')
-rw-r--r--meta/recipes-core/busybox/busybox/0001-awk-fix-CVEs.patch3266
1 files changed, 3266 insertions, 0 deletions
diff --git a/meta/recipes-core/busybox/busybox/0001-awk-fix-CVEs.patch b/meta/recipes-core/busybox/busybox/0001-awk-fix-CVEs.patch
new file mode 100644
index 0000000000..c07b53ebfd
--- /dev/null
+++ b/meta/recipes-core/busybox/busybox/0001-awk-fix-CVEs.patch
@@ -0,0 +1,3266 @@
1From cf542caeed195af05fa6205341f829ccee53f8c2 Mon Sep 17 00:00:00 2001
2From: Chen Qi <Qi.Chen@windriver.com>
3Date: Tue, 4 Jan 2022 17:48:03 -0800
4Subject: [PATCH] awk: fix CVEs
5
6The awk CVEs is hard to be separated, thus we use the following method
7to format the current patch.
8git rev-list --reverse 1_33_2..1_34_1 -- editors/awk.c | xargs git cherry-pick
9git reset HEAD~66 && git add . && git commit
10
11CVE: CVE-2021-42378
12CVE: CVE-2021-42379
13CVE: CVE-2021-42380
14CVE: CVE-2021-42381
15CVE: CVE-2021-42382
16CVE: CVE-2021-42383
17CVE: CVE-2021-42384
18CVE: CVE-2021-42385
19CVE: CVE-2021-42386
20
21Upstream-Status: Backport
22
23Signed-off-by: Chen Qi <Qi.Chen@windriver.com>
24---
25 editors/awk.c | 2060 +++++++++++++++++++++++-----------------
26 testsuite/awk.tests | 62 +-
27 testsuite/printf.tests | 5 +
28 3 files changed, 1264 insertions(+), 863 deletions(-)
29
30diff --git a/editors/awk.c b/editors/awk.c
31index 2c15f9e4e..f6314ac72 100644
32--- a/editors/awk.c
33+++ b/editors/awk.c
34@@ -66,6 +66,8 @@
35 #endif
36 #ifndef debug_printf_parse
37 # define debug_printf_parse(...) (fprintf(stderr, __VA_ARGS__))
38+#else
39+# define debug_parse_print_tc(...) ((void)0)
40 #endif
41
42
43@@ -91,7 +93,6 @@ enum {
44 };
45
46 #define MAXVARFMT 240
47-#define MINNVBLOCK 64
48
49 /* variable flags */
50 #define VF_NUMBER 0x0001 /* 1 = primary type is number */
51@@ -101,7 +102,7 @@ enum {
52 #define VF_USER 0x0200 /* 1 = user input (may be numeric string) */
53 #define VF_SPECIAL 0x0400 /* 1 = requires extra handling when changed */
54 #define VF_WALK 0x0800 /* 1 = variable has alloc'd x.walker list */
55-#define VF_FSTR 0x1000 /* 1 = var::string points to fstring buffer */
56+#define VF_FSTR 0x1000 /* 1 = don't free() var::string (not malloced, or is owned by something else) */
57 #define VF_CHILD 0x2000 /* 1 = function arg; x.parent points to source */
58 #define VF_DIRTY 0x4000 /* 1 = variable was set explicitly */
59
60@@ -118,8 +119,8 @@ typedef struct walker_list {
61 /* Variable */
62 typedef struct var_s {
63 unsigned type; /* flags */
64- double number;
65 char *string;
66+ double number;
67 union {
68 int aidx; /* func arg idx (for compilation stage) */
69 struct xhash_s *array; /* array ptr */
70@@ -138,6 +139,7 @@ typedef struct chain_s {
71 /* Function */
72 typedef struct func_s {
73 unsigned nargs;
74+ smallint defined;
75 struct chain_s body;
76 } func;
77
78@@ -177,7 +179,7 @@ typedef struct node_s {
79 struct node_s *n;
80 var *v;
81 int aidx;
82- char *new_progname;
83+ const char *new_progname;
84 regex_t *re;
85 } l;
86 union {
87@@ -190,91 +192,120 @@ typedef struct node_s {
88 } a;
89 } node;
90
91-/* Block of temporary variables */
92-typedef struct nvblock_s {
93- int size;
94- var *pos;
95- struct nvblock_s *prev;
96- struct nvblock_s *next;
97- var nv[];
98-} nvblock;
99-
100 typedef struct tsplitter_s {
101 node n;
102 regex_t re[2];
103 } tsplitter;
104
105 /* simple token classes */
106-/* Order and hex values are very important!!! See next_token() */
107-#define TC_SEQSTART (1 << 0) /* ( */
108-#define TC_SEQTERM (1 << 1) /* ) */
109-#define TC_REGEXP (1 << 2) /* /.../ */
110-#define TC_OUTRDR (1 << 3) /* | > >> */
111-#define TC_UOPPOST (1 << 4) /* unary postfix operator */
112-#define TC_UOPPRE1 (1 << 5) /* unary prefix operator */
113-#define TC_BINOPX (1 << 6) /* two-opnd operator */
114-#define TC_IN (1 << 7)
115-#define TC_COMMA (1 << 8)
116-#define TC_PIPE (1 << 9) /* input redirection pipe */
117-#define TC_UOPPRE2 (1 << 10) /* unary prefix operator */
118-#define TC_ARRTERM (1 << 11) /* ] */
119-#define TC_GRPSTART (1 << 12) /* { */
120-#define TC_GRPTERM (1 << 13) /* } */
121-#define TC_SEMICOL (1 << 14)
122-#define TC_NEWLINE (1 << 15)
123-#define TC_STATX (1 << 16) /* ctl statement (for, next...) */
124-#define TC_WHILE (1 << 17)
125-#define TC_ELSE (1 << 18)
126-#define TC_BUILTIN (1 << 19)
127+/* order and hex values are very important!!! See next_token() */
128+#define TC_LPAREN (1 << 0) /* ( */
129+#define TC_RPAREN (1 << 1) /* ) */
130+#define TC_REGEXP (1 << 2) /* /.../ */
131+#define TC_OUTRDR (1 << 3) /* | > >> */
132+#define TC_UOPPOST (1 << 4) /* unary postfix operator ++ -- */
133+#define TC_UOPPRE1 (1 << 5) /* unary prefix operator ++ -- $ */
134+#define TC_BINOPX (1 << 6) /* two-opnd operator */
135+#define TC_IN (1 << 7) /* 'in' */
136+#define TC_COMMA (1 << 8) /* , */
137+#define TC_PIPE (1 << 9) /* input redirection pipe | */
138+#define TC_UOPPRE2 (1 << 10) /* unary prefix operator + - ! */
139+#define TC_ARRTERM (1 << 11) /* ] */
140+#define TC_LBRACE (1 << 12) /* { */
141+#define TC_RBRACE (1 << 13) /* } */
142+#define TC_SEMICOL (1 << 14) /* ; */
143+#define TC_NEWLINE (1 << 15)
144+#define TC_STATX (1 << 16) /* ctl statement (for, next...) */
145+#define TC_WHILE (1 << 17) /* 'while' */
146+#define TC_ELSE (1 << 18) /* 'else' */
147+#define TC_BUILTIN (1 << 19)
148 /* This costs ~50 bytes of code.
149 * A separate class to support deprecated "length" form. If we don't need that
150 * (i.e. if we demand that only "length()" with () is valid), then TC_LENGTH
151 * can be merged with TC_BUILTIN:
152 */
153-#define TC_LENGTH (1 << 20)
154-#define TC_GETLINE (1 << 21)
155-#define TC_FUNCDECL (1 << 22) /* 'function' 'func' */
156-#define TC_BEGIN (1 << 23)
157-#define TC_END (1 << 24)
158-#define TC_EOF (1 << 25)
159-#define TC_VARIABLE (1 << 26)
160-#define TC_ARRAY (1 << 27)
161-#define TC_FUNCTION (1 << 28)
162-#define TC_STRING (1 << 29)
163-#define TC_NUMBER (1 << 30)
164-
165-#define TC_UOPPRE (TC_UOPPRE1 | TC_UOPPRE2)
166-
167-/* combined token classes */
168-#define TC_BINOP (TC_BINOPX | TC_COMMA | TC_PIPE | TC_IN)
169-//#define TC_UNARYOP (TC_UOPPRE | TC_UOPPOST)
170-#define TC_OPERAND (TC_VARIABLE | TC_ARRAY | TC_FUNCTION \
171- | TC_BUILTIN | TC_LENGTH | TC_GETLINE \
172- | TC_SEQSTART | TC_STRING | TC_NUMBER)
173-
174-#define TC_STATEMNT (TC_STATX | TC_WHILE)
175-#define TC_OPTERM (TC_SEMICOL | TC_NEWLINE)
176+#define TC_LENGTH (1 << 20) /* 'length' */
177+#define TC_GETLINE (1 << 21) /* 'getline' */
178+#define TC_FUNCDECL (1 << 22) /* 'function' 'func' */
179+#define TC_BEGIN (1 << 23) /* 'BEGIN' */
180+#define TC_END (1 << 24) /* 'END' */
181+#define TC_EOF (1 << 25)
182+#define TC_VARIABLE (1 << 26) /* name */
183+#define TC_ARRAY (1 << 27) /* name[ */
184+#define TC_FUNCTION (1 << 28) /* name( */
185+#define TC_STRING (1 << 29) /* "..." */
186+#define TC_NUMBER (1 << 30)
187+
188+#ifndef debug_parse_print_tc
189+static void debug_parse_print_tc(uint32_t n)
190+{
191+ if (n & TC_LPAREN ) debug_printf_parse(" LPAREN" );
192+ if (n & TC_RPAREN ) debug_printf_parse(" RPAREN" );
193+ if (n & TC_REGEXP ) debug_printf_parse(" REGEXP" );
194+ if (n & TC_OUTRDR ) debug_printf_parse(" OUTRDR" );
195+ if (n & TC_UOPPOST ) debug_printf_parse(" UOPPOST" );
196+ if (n & TC_UOPPRE1 ) debug_printf_parse(" UOPPRE1" );
197+ if (n & TC_BINOPX ) debug_printf_parse(" BINOPX" );
198+ if (n & TC_IN ) debug_printf_parse(" IN" );
199+ if (n & TC_COMMA ) debug_printf_parse(" COMMA" );
200+ if (n & TC_PIPE ) debug_printf_parse(" PIPE" );
201+ if (n & TC_UOPPRE2 ) debug_printf_parse(" UOPPRE2" );
202+ if (n & TC_ARRTERM ) debug_printf_parse(" ARRTERM" );
203+ if (n & TC_LBRACE ) debug_printf_parse(" LBRACE" );
204+ if (n & TC_RBRACE ) debug_printf_parse(" RBRACE" );
205+ if (n & TC_SEMICOL ) debug_printf_parse(" SEMICOL" );
206+ if (n & TC_NEWLINE ) debug_printf_parse(" NEWLINE" );
207+ if (n & TC_STATX ) debug_printf_parse(" STATX" );
208+ if (n & TC_WHILE ) debug_printf_parse(" WHILE" );
209+ if (n & TC_ELSE ) debug_printf_parse(" ELSE" );
210+ if (n & TC_BUILTIN ) debug_printf_parse(" BUILTIN" );
211+ if (n & TC_LENGTH ) debug_printf_parse(" LENGTH" );
212+ if (n & TC_GETLINE ) debug_printf_parse(" GETLINE" );
213+ if (n & TC_FUNCDECL) debug_printf_parse(" FUNCDECL");
214+ if (n & TC_BEGIN ) debug_printf_parse(" BEGIN" );
215+ if (n & TC_END ) debug_printf_parse(" END" );
216+ if (n & TC_EOF ) debug_printf_parse(" EOF" );
217+ if (n & TC_VARIABLE) debug_printf_parse(" VARIABLE");
218+ if (n & TC_ARRAY ) debug_printf_parse(" ARRAY" );
219+ if (n & TC_FUNCTION) debug_printf_parse(" FUNCTION");
220+ if (n & TC_STRING ) debug_printf_parse(" STRING" );
221+ if (n & TC_NUMBER ) debug_printf_parse(" NUMBER" );
222+}
223+#endif
224+
225+/* combined token classes ("token [class] sets") */
226+#define TS_UOPPRE (TC_UOPPRE1 | TC_UOPPRE2)
227+
228+#define TS_BINOP (TC_BINOPX | TC_COMMA | TC_PIPE | TC_IN)
229+//#define TS_UNARYOP (TS_UOPPRE | TC_UOPPOST)
230+#define TS_OPERAND (TC_VARIABLE | TC_ARRAY | TC_FUNCTION \
231+ | TC_BUILTIN | TC_LENGTH | TC_GETLINE \
232+ | TC_LPAREN | TC_STRING | TC_NUMBER)
233+
234+#define TS_LVALUE (TC_VARIABLE | TC_ARRAY)
235+#define TS_STATEMNT (TC_STATX | TC_WHILE)
236
237 /* word tokens, cannot mean something else if not expected */
238-#define TC_WORD (TC_IN | TC_STATEMNT | TC_ELSE \
239- | TC_BUILTIN | TC_LENGTH | TC_GETLINE \
240- | TC_FUNCDECL | TC_BEGIN | TC_END)
241+#define TS_WORD (TC_IN | TS_STATEMNT | TC_ELSE \
242+ | TC_BUILTIN | TC_LENGTH | TC_GETLINE \
243+ | TC_FUNCDECL | TC_BEGIN | TC_END)
244
245 /* discard newlines after these */
246-#define TC_NOTERM (TC_COMMA | TC_GRPSTART | TC_GRPTERM \
247- | TC_BINOP | TC_OPTERM)
248+#define TS_NOTERM (TS_BINOP | TC_COMMA | TC_LBRACE | TC_RBRACE \
249+ | TC_SEMICOL | TC_NEWLINE)
250
251 /* what can expression begin with */
252-#define TC_OPSEQ (TC_OPERAND | TC_UOPPRE | TC_REGEXP)
253+#define TS_OPSEQ (TS_OPERAND | TS_UOPPRE | TC_REGEXP)
254 /* what can group begin with */
255-#define TC_GRPSEQ (TC_OPSEQ | TC_OPTERM | TC_STATEMNT | TC_GRPSTART)
256+#define TS_GRPSEQ (TS_OPSEQ | TS_STATEMNT \
257+ | TC_SEMICOL | TC_NEWLINE | TC_LBRACE)
258
259-/* if previous token class is CONCAT1 and next is CONCAT2, concatenation */
260+/* if previous token class is CONCAT_L and next is CONCAT_R, concatenation */
261 /* operator is inserted between them */
262-#define TC_CONCAT1 (TC_VARIABLE | TC_ARRTERM | TC_SEQTERM \
263+#define TS_CONCAT_L (TC_VARIABLE | TC_ARRTERM | TC_RPAREN \
264 | TC_STRING | TC_NUMBER | TC_UOPPOST \
265 | TC_LENGTH)
266-#define TC_CONCAT2 (TC_OPERAND | TC_UOPPRE)
267+#define TS_CONCAT_R (TS_OPERAND | TS_UOPPRE)
268
269 #define OF_RES1 0x010000
270 #define OF_RES2 0x020000
271@@ -284,13 +315,12 @@ typedef struct tsplitter_s {
272 #define OF_CHECKED 0x200000
273 #define OF_REQUIRED 0x400000
274
275-
276 /* combined operator flags */
277 #define xx 0
278 #define xV OF_RES2
279 #define xS (OF_RES2 | OF_STR2)
280 #define Vx OF_RES1
281-#define Rx (OF_RES1 | OF_NUM1 | OF_REQUIRED)
282+#define Rx OF_REQUIRED
283 #define VV (OF_RES1 | OF_RES2)
284 #define Nx (OF_RES1 | OF_NUM1)
285 #define NV (OF_RES1 | OF_NUM1 | OF_RES2)
286@@ -302,8 +332,7 @@ typedef struct tsplitter_s {
287 #define OPNMASK 0x007F
288
289 /* operator priority is a highest byte (even: r->l, odd: l->r grouping)
290- * For builtins it has different meaning: n n s3 s2 s1 v3 v2 v1,
291- * n - min. number of args, vN - resolve Nth arg to var, sN - resolve to string
292+ * (for builtins it has different meaning)
293 */
294 #undef P
295 #undef PRIMASK
296@@ -313,10 +342,8 @@ typedef struct tsplitter_s {
297 #define PRIMASK2 0x7E000000
298
299 /* Operation classes */
300-
301 #define SHIFT_TIL_THIS 0x0600
302 #define RECUR_FROM_THIS 0x1000
303-
304 enum {
305 OC_DELETE = 0x0100, OC_EXEC = 0x0200, OC_NEWSOURCE = 0x0300,
306 OC_PRINT = 0x0400, OC_PRINTF = 0x0500, OC_WALKINIT = 0x0600,
307@@ -358,8 +385,8 @@ enum {
308 #define NTCC '\377'
309
310 static const char tokenlist[] ALIGN1 =
311- "\1(" NTC /* TC_SEQSTART */
312- "\1)" NTC /* TC_SEQTERM */
313+ "\1(" NTC /* TC_LPAREN */
314+ "\1)" NTC /* TC_RPAREN */
315 "\1/" NTC /* TC_REGEXP */
316 "\2>>" "\1>" "\1|" NTC /* TC_OUTRDR */
317 "\2++" "\2--" NTC /* TC_UOPPOST */
318@@ -376,8 +403,8 @@ static const char tokenlist[] ALIGN1 =
319 "\1|" NTC /* TC_PIPE */
320 "\1+" "\1-" "\1!" NTC /* TC_UOPPRE2 */
321 "\1]" NTC /* TC_ARRTERM */
322- "\1{" NTC /* TC_GRPSTART */
323- "\1}" NTC /* TC_GRPTERM */
324+ "\1{" NTC /* TC_LBRACE */
325+ "\1}" NTC /* TC_RBRACE */
326 "\1;" NTC /* TC_SEMICOL */
327 "\1\n" NTC /* TC_NEWLINE */
328 "\2if" "\2do" "\3for" "\5break" /* TC_STATX */
329@@ -391,7 +418,7 @@ static const char tokenlist[] ALIGN1 =
330 "\5close" "\6system" "\6fflush" "\5atan2"
331 "\3cos" "\3exp" "\3int" "\3log"
332 "\4rand" "\3sin" "\4sqrt" "\5srand"
333- "\6gensub" "\4gsub" "\5index" /* "\6length" was here */
334+ "\6gensub" "\4gsub" "\5index" /* "\6length" was here */
335 "\5match" "\5split" "\7sprintf" "\3sub"
336 "\6substr" "\7systime" "\10strftime" "\6mktime"
337 "\7tolower" "\7toupper" NTC
338@@ -403,25 +430,32 @@ static const char tokenlist[] ALIGN1 =
339 /* compiler adds trailing "\0" */
340 ;
341
342-#define OC_B OC_BUILTIN
343-
344 static const uint32_t tokeninfo[] ALIGN4 = {
345 0,
346 0,
347- OC_REGEXP,
348+#define TI_REGEXP OC_REGEXP
349+ TI_REGEXP,
350 xS|'a', xS|'w', xS|'|',
351 OC_UNARY|xV|P(9)|'p', OC_UNARY|xV|P(9)|'m',
352- OC_UNARY|xV|P(9)|'P', OC_UNARY|xV|P(9)|'M', OC_FIELD|xV|P(5),
353+#define TI_PREINC (OC_UNARY|xV|P(9)|'P')
354+#define TI_PREDEC (OC_UNARY|xV|P(9)|'M')
355+ TI_PREINC, TI_PREDEC, OC_FIELD|xV|P(5),
356 OC_COMPARE|VV|P(39)|5, OC_MOVE|VV|P(74), OC_REPLACE|NV|P(74)|'+', OC_REPLACE|NV|P(74)|'-',
357 OC_REPLACE|NV|P(74)|'*', OC_REPLACE|NV|P(74)|'/', OC_REPLACE|NV|P(74)|'%', OC_REPLACE|NV|P(74)|'&',
358 OC_BINARY|NV|P(29)|'+', OC_BINARY|NV|P(29)|'-', OC_REPLACE|NV|P(74)|'&', OC_BINARY|NV|P(15)|'&',
359 OC_BINARY|NV|P(25)|'/', OC_BINARY|NV|P(25)|'%', OC_BINARY|NV|P(15)|'&', OC_BINARY|NV|P(25)|'*',
360 OC_COMPARE|VV|P(39)|4, OC_COMPARE|VV|P(39)|3, OC_COMPARE|VV|P(39)|0, OC_COMPARE|VV|P(39)|1,
361- OC_COMPARE|VV|P(39)|2, OC_MATCH|Sx|P(45)|'!', OC_MATCH|Sx|P(45)|'~', OC_LAND|Vx|P(55),
362- OC_LOR|Vx|P(59), OC_TERNARY|Vx|P(64)|'?', OC_COLON|xx|P(67)|':',
363- OC_IN|SV|P(49), /* TC_IN */
364- OC_COMMA|SS|P(80),
365- OC_PGETLINE|SV|P(37),
366+#define TI_LESS (OC_COMPARE|VV|P(39)|2)
367+ TI_LESS, OC_MATCH|Sx|P(45)|'!', OC_MATCH|Sx|P(45)|'~', OC_LAND|Vx|P(55),
368+#define TI_TERNARY (OC_TERNARY|Vx|P(64)|'?')
369+#define TI_COLON (OC_COLON|xx|P(67)|':')
370+ OC_LOR|Vx|P(59), TI_TERNARY, TI_COLON,
371+#define TI_IN (OC_IN|SV|P(49))
372+ TI_IN,
373+#define TI_COMMA (OC_COMMA|SS|P(80))
374+ TI_COMMA,
375+#define TI_PGETLINE (OC_PGETLINE|SV|P(37))
376+ TI_PGETLINE,
377 OC_UNARY|xV|P(19)|'+', OC_UNARY|xV|P(19)|'-', OC_UNARY|xV|P(19)|'!',
378 0, /* ] */
379 0,
380@@ -434,20 +468,45 @@ static const uint32_t tokeninfo[] ALIGN4 = {
381 OC_RETURN|Vx, OC_EXIT|Nx,
382 ST_WHILE,
383 0, /* else */
384- OC_B|B_an|P(0x83), OC_B|B_co|P(0x41), OC_B|B_ls|P(0x83), OC_B|B_or|P(0x83),
385- OC_B|B_rs|P(0x83), OC_B|B_xo|P(0x83),
386- OC_FBLTIN|Sx|F_cl, OC_FBLTIN|Sx|F_sy, OC_FBLTIN|Sx|F_ff, OC_B|B_a2|P(0x83),
387- OC_FBLTIN|Nx|F_co, OC_FBLTIN|Nx|F_ex, OC_FBLTIN|Nx|F_in, OC_FBLTIN|Nx|F_lg,
388- OC_FBLTIN|F_rn, OC_FBLTIN|Nx|F_si, OC_FBLTIN|Nx|F_sq, OC_FBLTIN|Nx|F_sr,
389- OC_B|B_ge|P(0xd6), OC_B|B_gs|P(0xb6), OC_B|B_ix|P(0x9b), /* OC_FBLTIN|Sx|F_le, was here */
390- OC_B|B_ma|P(0x89), OC_B|B_sp|P(0x8b), OC_SPRINTF, OC_B|B_su|P(0xb6),
391- OC_B|B_ss|P(0x8f), OC_FBLTIN|F_ti, OC_B|B_ti|P(0x0b), OC_B|B_mt|P(0x0b),
392- OC_B|B_lo|P(0x49), OC_B|B_up|P(0x49),
393- OC_FBLTIN|Sx|F_le, /* TC_LENGTH */
394- OC_GETLINE|SV|P(0),
395- 0, 0,
396- 0,
397- 0 /* TC_END */
398+// OC_B's are builtins with enforced minimum number of arguments (two upper bits).
399+// Highest byte bit pattern: nn s3s2s1 v3v2v1
400+// nn - min. number of args, sN - resolve Nth arg to string, vN - resolve to var
401+// OC_F's are builtins with zero or one argument.
402+// |Rx| enforces that arg is present for: system, close, cos, sin, exp, int, log, sqrt
403+// Check for no args is present in builtins' code (not in this table): rand, systime
404+// Have one _optional_ arg: fflush, srand, length
405+#define OC_B OC_BUILTIN
406+#define OC_F OC_FBLTIN
407+#define A1 P(0x40) /*one arg*/
408+#define A2 P(0x80) /*two args*/
409+#define A3 P(0xc0) /*three args*/
410+#define __v P(1)
411+#define _vv P(3)
412+#define __s__v P(9)
413+#define __s_vv P(0x0b)
414+#define __svvv P(0x0f)
415+#define _ss_vv P(0x1b)
416+#define _s_vv_ P(0x16)
417+#define ss_vv_ P(0x36)
418+ OC_B|B_an|_vv|A2, OC_B|B_co|__v|A1, OC_B|B_ls|_vv|A2, OC_B|B_or|_vv|A2, // and compl lshift or
419+ OC_B|B_rs|_vv|A2, OC_B|B_xo|_vv|A2, // rshift xor
420+ OC_F|F_cl|Sx|Rx, OC_F|F_sy|Sx|Rx, OC_F|F_ff|Sx, OC_B|B_a2|_vv|A2, // close system fflush atan2
421+ OC_F|F_co|Nx|Rx, OC_F|F_ex|Nx|Rx, OC_F|F_in|Nx|Rx, OC_F|F_lg|Nx|Rx, // cos exp int log
422+ OC_F|F_rn, OC_F|F_si|Nx|Rx, OC_F|F_sq|Nx|Rx, OC_F|F_sr|Nx, // rand sin sqrt srand
423+ OC_B|B_ge|_s_vv_|A3,OC_B|B_gs|ss_vv_|A2,OC_B|B_ix|_ss_vv|A2, // gensub gsub index /*length was here*/
424+ OC_B|B_ma|__s__v|A2,OC_B|B_sp|__s_vv|A2,OC_SPRINTF, OC_B|B_su|ss_vv_|A2,// match split sprintf sub
425+ OC_B|B_ss|__svvv|A2,OC_F|F_ti, OC_B|B_ti|__s_vv, OC_B|B_mt|__s_vv, // substr systime strftime mktime
426+ OC_B|B_lo|__s__v|A1,OC_B|B_up|__s__v|A1, // tolower toupper
427+ OC_F|F_le|Sx, // length
428+ OC_GETLINE|SV, // getline
429+ 0, 0, // func function
430+ 0, // BEGIN
431+ 0 // END
432+#undef A1
433+#undef A2
434+#undef A3
435+#undef OC_B
436+#undef OC_F
437 };
438
439 /* internal variable names and their initial values */
440@@ -488,21 +547,29 @@ struct globals {
441 chain *seq;
442 node *break_ptr, *continue_ptr;
443 rstream *iF;
444- xhash *vhash, *ahash, *fdhash, *fnhash;
445+ xhash *ahash; /* argument names, used only while parsing function bodies */
446+ xhash *fnhash; /* function names, used only in parsing stage */
447+ xhash *vhash; /* variables and arrays */
448+ //xhash *fdhash; /* file objects, used only in execution stage */
449+ //we are reusing ahash as fdhash, via define (see later)
450 const char *g_progname;
451 int g_lineno;
452 int nfields;
453 int maxfields; /* used in fsrealloc() only */
454 var *Fields;
455- nvblock *g_cb;
456 char *g_pos;
457- char *g_buf;
458+ char g_saved_ch;
459 smallint icase;
460 smallint exiting;
461 smallint nextrec;
462 smallint nextfile;
463 smallint is_f0_split;
464 smallint t_rollback;
465+
466+ /* former statics from various functions */
467+ smallint next_token__concat_inserted;
468+ uint32_t next_token__save_tclass;
469+ uint32_t next_token__save_info;
470 };
471 struct globals2 {
472 uint32_t t_info; /* often used */
473@@ -515,32 +582,35 @@ struct globals2 {
474 /* former statics from various functions */
475 char *split_f0__fstrings;
476
477- uint32_t next_token__save_tclass;
478- uint32_t next_token__save_info;
479- uint32_t next_token__ltclass;
480- smallint next_token__concat_inserted;
481-
482- smallint next_input_file__files_happen;
483 rstream next_input_file__rsm;
484+ smallint next_input_file__files_happen;
485+
486+ smalluint exitcode;
487
488- var *evaluate__fnargs;
489 unsigned evaluate__seed;
490+ var *evaluate__fnargs;
491 regex_t evaluate__sreg;
492
493- var ptest__v;
494+ var ptest__tmpvar;
495+ var awk_printf__tmpvar;
496+ var as_regex__tmpvar;
497+ var exit__tmpvar;
498+ var main__tmpvar;
499
500 tsplitter exec_builtin__tspl;
501
502 /* biggest and least used members go last */
503 tsplitter fsplitter, rsplitter;
504+
505+ char g_buf[MAXVARFMT + 1];
506 };
507 #define G1 (ptr_to_globals[-1])
508 #define G (*(struct globals2 *)ptr_to_globals)
509 /* For debug. nm --size-sort awk.o | grep -vi ' [tr] ' */
510-/*char G1size[sizeof(G1)]; - 0x74 */
511-/*char Gsize[sizeof(G)]; - 0x1c4 */
512+//char G1size[sizeof(G1)]; // 0x70
513+//char Gsize[sizeof(G)]; // 0x2f8
514 /* Trying to keep most of members accessible with short offsets: */
515-/*char Gofs_seed[offsetof(struct globals2, evaluate__seed)]; - 0x90 */
516+//char Gofs_seed[offsetof(struct globals2, evaluate__seed)]; // 0x7c
517 #define t_double (G1.t_double )
518 #define beginseq (G1.beginseq )
519 #define mainseq (G1.mainseq )
520@@ -549,18 +619,20 @@ struct globals2 {
521 #define break_ptr (G1.break_ptr )
522 #define continue_ptr (G1.continue_ptr)
523 #define iF (G1.iF )
524-#define vhash (G1.vhash )
525 #define ahash (G1.ahash )
526-#define fdhash (G1.fdhash )
527 #define fnhash (G1.fnhash )
528+#define vhash (G1.vhash )
529+#define fdhash ahash
530+//^^^^^^^^^^^^^^^^^^ ahash is cleared after every function parsing,
531+// and ends up empty after parsing phase. Thus, we can simply reuse it
532+// for fdhash in execution stage.
533 #define g_progname (G1.g_progname )
534 #define g_lineno (G1.g_lineno )
535 #define nfields (G1.nfields )
536 #define maxfields (G1.maxfields )
537 #define Fields (G1.Fields )
538-#define g_cb (G1.g_cb )
539 #define g_pos (G1.g_pos )
540-#define g_buf (G1.g_buf )
541+#define g_saved_ch (G1.g_saved_ch )
542 #define icase (G1.icase )
543 #define exiting (G1.exiting )
544 #define nextrec (G1.nextrec )
545@@ -574,25 +646,13 @@ struct globals2 {
546 #define intvar (G.intvar )
547 #define fsplitter (G.fsplitter )
548 #define rsplitter (G.rsplitter )
549+#define g_buf (G.g_buf )
550 #define INIT_G() do { \
551 SET_PTR_TO_GLOBALS((char*)xzalloc(sizeof(G1)+sizeof(G)) + sizeof(G1)); \
552- G.next_token__ltclass = TC_OPTERM; \
553+ t_tclass = TC_NEWLINE; \
554 G.evaluate__seed = 1; \
555 } while (0)
556
557-
558-/* function prototypes */
559-static void handle_special(var *);
560-static node *parse_expr(uint32_t);
561-static void chain_group(void);
562-static var *evaluate(node *, var *);
563-static rstream *next_input_file(void);
564-static int fmt_num(char *, int, const char *, double, int);
565-static int awk_exit(int) NORETURN;
566-
567-/* ---- error handling ---- */
568-
569-static const char EMSG_INTERNAL_ERROR[] ALIGN1 = "Internal error";
570 static const char EMSG_UNEXP_EOS[] ALIGN1 = "Unexpected end of string";
571 static const char EMSG_UNEXP_TOKEN[] ALIGN1 = "Unexpected token";
572 static const char EMSG_DIV_BY_ZERO[] ALIGN1 = "Division by zero";
573@@ -604,10 +664,7 @@ static const char EMSG_UNDEF_FUNC[] ALIGN1 = "Call to undefined function";
574 static const char EMSG_NO_MATH[] ALIGN1 = "Math support is not compiled in";
575 static const char EMSG_NEGATIVE_FIELD[] ALIGN1 = "Access to negative field";
576
577-static void zero_out_var(var *vp)
578-{
579- memset(vp, 0, sizeof(*vp));
580-}
581+static int awk_exit(void) NORETURN;
582
583 static void syntax_error(const char *message) NORETURN;
584 static void syntax_error(const char *message)
585@@ -638,12 +695,40 @@ static xhash *hash_init(void)
586 return newhash;
587 }
588
589+static void hash_clear(xhash *hash)
590+{
591+ unsigned i;
592+ hash_item *hi, *thi;
593+
594+ for (i = 0; i < hash->csize; i++) {
595+ hi = hash->items[i];
596+ while (hi) {
597+ thi = hi;
598+ hi = hi->next;
599+//FIXME: this assumes that it's a hash of *variables*:
600+ free(thi->data.v.string);
601+ free(thi);
602+ }
603+ hash->items[i] = NULL;
604+ }
605+ hash->glen = hash->nel = 0;
606+}
607+
608+#if 0 //UNUSED
609+static void hash_free(xhash *hash)
610+{
611+ hash_clear(hash);
612+ free(hash->items);
613+ free(hash);
614+}
615+#endif
616+
617 /* find item in hash, return ptr to data, NULL if not found */
618-static void *hash_search(xhash *hash, const char *name)
619+static NOINLINE void *hash_search3(xhash *hash, const char *name, unsigned idx)
620 {
621 hash_item *hi;
622
623- hi = hash->items[hashidx(name) % hash->csize];
624+ hi = hash->items[idx % hash->csize];
625 while (hi) {
626 if (strcmp(hi->name, name) == 0)
627 return &hi->data;
628@@ -652,6 +737,11 @@ static void *hash_search(xhash *hash, const char *name)
629 return NULL;
630 }
631
632+static void *hash_search(xhash *hash, const char *name)
633+{
634+ return hash_search3(hash, name, hashidx(name));
635+}
636+
637 /* grow hash if it becomes too big */
638 static void hash_rebuild(xhash *hash)
639 {
640@@ -687,16 +777,17 @@ static void *hash_find(xhash *hash, const char *name)
641 unsigned idx;
642 int l;
643
644- hi = hash_search(hash, name);
645+ idx = hashidx(name);
646+ hi = hash_search3(hash, name, idx);
647 if (!hi) {
648- if (++hash->nel / hash->csize > 10)
649+ if (++hash->nel > hash->csize * 8)
650 hash_rebuild(hash);
651
652 l = strlen(name) + 1;
653 hi = xzalloc(sizeof(*hi) + l);
654 strcpy(hi->name, name);
655
656- idx = hashidx(name) % hash->csize;
657+ idx = idx % hash->csize;
658 hi->next = hash->items[idx];
659 hash->items[idx] = hi;
660 hash->glen += l;
661@@ -731,7 +822,7 @@ static void hash_remove(xhash *hash, const char *name)
662
663 static char *skip_spaces(char *p)
664 {
665- while (1) {
666+ for (;;) {
667 if (*p == '\\' && p[1] == '\n') {
668 p++;
669 t_lineno++;
670@@ -747,8 +838,10 @@ static char *skip_spaces(char *p)
671 static char *nextword(char **s)
672 {
673 char *p = *s;
674- while (*(*s)++ != '\0')
675+ char *q = p;
676+ while (*q++ != '\0')
677 continue;
678+ *s = q;
679 return p;
680 }
681
682@@ -811,10 +904,27 @@ static double my_strtod(char **pp)
683
684 /* -------- working with variables (set/get/copy/etc) -------- */
685
686-static xhash *iamarray(var *v)
687+static void fmt_num(const char *format, double n)
688 {
689- var *a = v;
690+ if (n == (long long)n) {
691+ snprintf(g_buf, MAXVARFMT, "%lld", (long long)n);
692+ } else {
693+ const char *s = format;
694+ char c;
695+
696+ do { c = *s; } while (c && *++s);
697+ if (strchr("diouxX", c)) {
698+ snprintf(g_buf, MAXVARFMT, format, (int)n);
699+ } else if (strchr("eEfFgGaA", c)) {
700+ snprintf(g_buf, MAXVARFMT, format, n);
701+ } else {
702+ syntax_error(EMSG_INV_FMT);
703+ }
704+ }
705+}
706
707+static xhash *iamarray(var *a)
708+{
709 while (a->type & VF_CHILD)
710 a = a->x.parent;
711
712@@ -825,23 +935,7 @@ static xhash *iamarray(var *v)
713 return a->x.array;
714 }
715
716-static void clear_array(xhash *array)
717-{
718- unsigned i;
719- hash_item *hi, *thi;
720-
721- for (i = 0; i < array->csize; i++) {
722- hi = array->items[i];
723- while (hi) {
724- thi = hi;
725- hi = hi->next;
726- free(thi->data.v.string);
727- free(thi);
728- }
729- array->items[i] = NULL;
730- }
731- array->glen = array->nel = 0;
732-}
733+#define clear_array(array) hash_clear(array)
734
735 /* clear a variable */
736 static var *clrvar(var *v)
737@@ -855,6 +949,8 @@ static var *clrvar(var *v)
738 return v;
739 }
740
741+static void handle_special(var *);
742+
743 /* assign string value to variable */
744 static var *setvar_p(var *v, char *value)
745 {
746@@ -901,7 +997,7 @@ static const char *getvar_s(var *v)
747 {
748 /* if v is numeric and has no cached string, convert it to string */
749 if ((v->type & (VF_NUMBER | VF_CACHED)) == VF_NUMBER) {
750- fmt_num(g_buf, MAXVARFMT, getvar_s(intvar[CONVFMT]), v->number, TRUE);
751+ fmt_num(getvar_s(intvar[CONVFMT]), v->number);
752 v->string = xstrdup(g_buf);
753 v->type |= VF_CACHED;
754 }
755@@ -920,6 +1016,7 @@ static double getvar_i(var *v)
756 v->number = my_strtod(&s);
757 debug_printf_eval("%f (s:'%s')\n", v->number, s);
758 if (v->type & VF_USER) {
759+//TODO: skip_spaces() also skips backslash+newline, is it intended here?
760 s = skip_spaces(s);
761 if (*s != '\0')
762 v->type &= ~VF_USER;
763@@ -981,94 +1078,28 @@ static int istrue(var *v)
764 return (v->string && v->string[0]);
765 }
766
767-/* temporary variables allocator. Last allocated should be first freed */
768-static var *nvalloc(int n)
769-{
770- nvblock *pb = NULL;
771- var *v, *r;
772- int size;
773-
774- while (g_cb) {
775- pb = g_cb;
776- if ((g_cb->pos - g_cb->nv) + n <= g_cb->size)
777- break;
778- g_cb = g_cb->next;
779- }
780-
781- if (!g_cb) {
782- size = (n <= MINNVBLOCK) ? MINNVBLOCK : n;
783- g_cb = xzalloc(sizeof(nvblock) + size * sizeof(var));
784- g_cb->size = size;
785- g_cb->pos = g_cb->nv;
786- g_cb->prev = pb;
787- /*g_cb->next = NULL; - xzalloc did it */
788- if (pb)
789- pb->next = g_cb;
790- }
791-
792- v = r = g_cb->pos;
793- g_cb->pos += n;
794-
795- while (v < g_cb->pos) {
796- v->type = 0;
797- v->string = NULL;
798- v++;
799- }
800-
801- return r;
802-}
803-
804-static void nvfree(var *v)
805-{
806- var *p;
807-
808- if (v < g_cb->nv || v >= g_cb->pos)
809- syntax_error(EMSG_INTERNAL_ERROR);
810-
811- for (p = v; p < g_cb->pos; p++) {
812- if ((p->type & (VF_ARRAY | VF_CHILD)) == VF_ARRAY) {
813- clear_array(iamarray(p));
814- free(p->x.array->items);
815- free(p->x.array);
816- }
817- if (p->type & VF_WALK) {
818- walker_list *n;
819- walker_list *w = p->x.walker;
820- debug_printf_walker("nvfree: freeing walker @%p\n", &p->x.walker);
821- p->x.walker = NULL;
822- while (w) {
823- n = w->prev;
824- debug_printf_walker(" free(%p)\n", w);
825- free(w);
826- w = n;
827- }
828- }
829- clrvar(p);
830- }
831-
832- g_cb->pos = v;
833- while (g_cb->prev && g_cb->pos == g_cb->nv) {
834- g_cb = g_cb->prev;
835- }
836-}
837-
838 /* ------- awk program text parsing ------- */
839
840-/* Parse next token pointed by global pos, place results into global ttt.
841- * If token isn't expected, give away. Return token class
842+/* Parse next token pointed by global pos, place results into global t_XYZ variables.
843+ * If token isn't expected, print error message and die.
844+ * Return token class (also store it in t_tclass).
845 */
846 static uint32_t next_token(uint32_t expected)
847 {
848-#define concat_inserted (G.next_token__concat_inserted)
849-#define save_tclass (G.next_token__save_tclass)
850-#define save_info (G.next_token__save_info)
851-/* Initialized to TC_OPTERM: */
852-#define ltclass (G.next_token__ltclass)
853+#define concat_inserted (G1.next_token__concat_inserted)
854+#define save_tclass (G1.next_token__save_tclass)
855+#define save_info (G1.next_token__save_info)
856
857- char *p, *s;
858+ char *p;
859 const char *tl;
860- uint32_t tc;
861 const uint32_t *ti;
862+ uint32_t tc, last_token_class;
863+
864+ last_token_class = t_tclass; /* t_tclass is initialized to TC_NEWLINE */
865+
866+ debug_printf_parse("%s() expected(%x):", __func__, expected);
867+ debug_parse_print_tc(expected);
868+ debug_printf_parse("\n");
869
870 if (t_rollback) {
871 debug_printf_parse("%s: using rolled-back token\n", __func__);
872@@ -1080,6 +1111,10 @@ static uint32_t next_token(uint32_t expected)
873 t_info = save_info;
874 } else {
875 p = g_pos;
876+ if (g_saved_ch != '\0') {
877+ *p = g_saved_ch;
878+ g_saved_ch = '\0';
879+ }
880 readnext:
881 p = skip_spaces(p);
882 g_lineno = t_lineno;
883@@ -1087,15 +1122,12 @@ static uint32_t next_token(uint32_t expected)
884 while (*p != '\n' && *p != '\0')
885 p++;
886
887- if (*p == '\n')
888- t_lineno++;
889-
890 if (*p == '\0') {
891 tc = TC_EOF;
892 debug_printf_parse("%s: token found: TC_EOF\n", __func__);
893 } else if (*p == '\"') {
894 /* it's a string */
895- t_string = s = ++p;
896+ char *s = t_string = ++p;
897 while (*p != '\"') {
898 char *pp;
899 if (*p == '\0' || *p == '\n')
900@@ -1110,7 +1142,7 @@ static uint32_t next_token(uint32_t expected)
901 debug_printf_parse("%s: token found:'%s' TC_STRING\n", __func__, t_string);
902 } else if ((expected & TC_REGEXP) && *p == '/') {
903 /* it's regexp */
904- t_string = s = ++p;
905+ char *s = t_string = ++p;
906 while (*p != '/') {
907 if (*p == '\0' || *p == '\n')
908 syntax_error(EMSG_UNEXP_EOS);
909@@ -1141,6 +1173,11 @@ static uint32_t next_token(uint32_t expected)
910 tc = TC_NUMBER;
911 debug_printf_parse("%s: token found:%f TC_NUMBER\n", __func__, t_double);
912 } else {
913+ char *end_of_name;
914+
915+ if (*p == '\n')
916+ t_lineno++;
917+
918 /* search for something known */
919 tl = tokenlist;
920 tc = 0x00000001;
921@@ -1155,9 +1192,9 @@ static uint32_t next_token(uint32_t expected)
922 * token matches,
923 * and it's not a longer word,
924 */
925- if ((tc & (expected | TC_WORD | TC_NEWLINE))
926+ if ((tc & (expected | TS_WORD | TC_NEWLINE))
927 && strncmp(p, tl, l) == 0
928- && !((tc & TC_WORD) && isalnum_(p[l]))
929+ && !((tc & TS_WORD) && isalnum_(p[l]))
930 ) {
931 /* then this is what we are looking for */
932 t_info = *ti;
933@@ -1174,67 +1211,94 @@ static uint32_t next_token(uint32_t expected)
934 if (!isalnum_(*p))
935 syntax_error(EMSG_UNEXP_TOKEN); /* no */
936 /* yes */
937- t_string = --p;
938- while (isalnum_(*++p)) {
939- p[-1] = *p;
940- }
941- p[-1] = '\0';
942- tc = TC_VARIABLE;
943- /* also consume whitespace between functionname and bracket */
944- if (!(expected & TC_VARIABLE) || (expected & TC_ARRAY))
945+ t_string = p;
946+ while (isalnum_(*p))
947+ p++;
948+ end_of_name = p;
949+
950+ if (last_token_class == TC_FUNCDECL)
951+ /* eat space in "function FUNC (...) {...}" declaration */
952 p = skip_spaces(p);
953+ else if (expected & TC_ARRAY) {
954+ /* eat space between array name and [ */
955+ char *s = skip_spaces(p);
956+ if (*s == '[') /* array ref, not just a name? */
957+ p = s;
958+ }
959+ /* else: do NOT consume whitespace after variable name!
960+ * gawk allows definition "function FUNC (p) {...}" - note space,
961+ * but disallows the call "FUNC (p)" because it isn't one -
962+ * expression "v (a)" should NOT be parsed as TC_FUNCTION:
963+ * it is a valid concatenation if "v" is a variable,
964+ * not a function name (and type of name is not known at parse time).
965+ */
966+
967 if (*p == '(') {
968+ p++;
969 tc = TC_FUNCTION;
970 debug_printf_parse("%s: token found:'%s' TC_FUNCTION\n", __func__, t_string);
971+ } else if (*p == '[') {
972+ p++;
973+ tc = TC_ARRAY;
974+ debug_printf_parse("%s: token found:'%s' TC_ARRAY\n", __func__, t_string);
975 } else {
976- if (*p == '[') {
977- p++;
978- tc = TC_ARRAY;
979- debug_printf_parse("%s: token found:'%s' TC_ARRAY\n", __func__, t_string);
980- } else
981- debug_printf_parse("%s: token found:'%s' TC_VARIABLE\n", __func__, t_string);
982+ tc = TC_VARIABLE;
983+ debug_printf_parse("%s: token found:'%s' TC_VARIABLE\n", __func__, t_string);
984+ if (end_of_name == p) {
985+ /* there is no space for trailing NUL in t_string!
986+ * We need to save the char we are going to NUL.
987+ * (we'll use it in future call to next_token())
988+ */
989+ g_saved_ch = *end_of_name;
990+// especially pathological example is V="abc"; V.2 - it's V concatenated to .2
991+// (it evaluates to "abc0.2"). Because of this case, we can't simply cache
992+// '.' and analyze it later: we also have to *store it back* in next
993+// next_token(), in order to give my_strtod() the undamaged ".2" string.
994+ }
995 }
996+ *end_of_name = '\0'; /* terminate t_string */
997 }
998 token_found:
999 g_pos = p;
1000
1001 /* skipping newlines in some cases */
1002- if ((ltclass & TC_NOTERM) && (tc & TC_NEWLINE))
1003+ if ((last_token_class & TS_NOTERM) && (tc & TC_NEWLINE))
1004 goto readnext;
1005
1006 /* insert concatenation operator when needed */
1007- debug_printf_parse("%s: %x %x %x concat_inserted?\n", __func__,
1008- (ltclass & TC_CONCAT1), (tc & TC_CONCAT2), (expected & TC_BINOP));
1009- if ((ltclass & TC_CONCAT1) && (tc & TC_CONCAT2) && (expected & TC_BINOP)
1010- && !(ltclass == TC_LENGTH && tc == TC_SEQSTART) /* but not for "length(..." */
1011+ debug_printf_parse("%s: concat_inserted if all nonzero: %x %x %x %x\n", __func__,
1012+ (last_token_class & TS_CONCAT_L), (tc & TS_CONCAT_R), (expected & TS_BINOP),
1013+ !(last_token_class == TC_LENGTH && tc == TC_LPAREN));
1014+ if ((last_token_class & TS_CONCAT_L) && (tc & TS_CONCAT_R) && (expected & TS_BINOP)
1015+ && !(last_token_class == TC_LENGTH && tc == TC_LPAREN) /* but not for "length(..." */
1016 ) {
1017 concat_inserted = TRUE;
1018 save_tclass = tc;
1019 save_info = t_info;
1020- tc = TC_BINOP;
1021+ tc = TC_BINOPX;
1022 t_info = OC_CONCAT | SS | P(35);
1023 }
1024
1025- debug_printf_parse("%s: t_tclass=tc=%x\n", __func__, t_tclass);
1026 t_tclass = tc;
1027+ debug_printf_parse("%s: t_tclass=tc=%x\n", __func__, tc);
1028 }
1029- ltclass = t_tclass;
1030-
1031 /* Are we ready for this? */
1032- if (!(ltclass & expected)) {
1033- syntax_error((ltclass & (TC_NEWLINE | TC_EOF)) ?
1034+ if (!(t_tclass & expected)) {
1035+ syntax_error((last_token_class & (TC_NEWLINE | TC_EOF)) ?
1036 EMSG_UNEXP_EOS : EMSG_UNEXP_TOKEN);
1037 }
1038
1039- debug_printf_parse("%s: returning, ltclass:%x t_double:%f\n", __func__, ltclass, t_double);
1040- return ltclass;
1041+ debug_printf_parse("%s: returning, t_double:%f t_tclass:", __func__, t_double);
1042+ debug_parse_print_tc(t_tclass);
1043+ debug_printf_parse("\n");
1044+
1045+ return t_tclass;
1046 #undef concat_inserted
1047 #undef save_tclass
1048 #undef save_info
1049-#undef ltclass
1050 }
1051
1052-static void rollback_token(void)
1053+static ALWAYS_INLINE void rollback_token(void)
1054 {
1055 t_rollback = TRUE;
1056 }
1057@@ -1251,169 +1315,188 @@ static node *new_node(uint32_t info)
1058
1059 static void mk_re_node(const char *s, node *n, regex_t *re)
1060 {
1061- n->info = OC_REGEXP;
1062+ n->info = TI_REGEXP;
1063 n->l.re = re;
1064 n->r.ire = re + 1;
1065 xregcomp(re, s, REG_EXTENDED);
1066 xregcomp(re + 1, s, REG_EXTENDED | REG_ICASE);
1067 }
1068
1069-static node *condition(void)
1070+static node *parse_expr(uint32_t);
1071+
1072+static node *parse_lrparen_list(void)
1073 {
1074- next_token(TC_SEQSTART);
1075- return parse_expr(TC_SEQTERM);
1076+ next_token(TC_LPAREN);
1077+ return parse_expr(TC_RPAREN);
1078 }
1079
1080 /* parse expression terminated by given argument, return ptr
1081 * to built subtree. Terminator is eaten by parse_expr */
1082-static node *parse_expr(uint32_t iexp)
1083+static node *parse_expr(uint32_t term_tc)
1084 {
1085 node sn;
1086 node *cn = &sn;
1087 node *vn, *glptr;
1088- uint32_t tc, xtc;
1089+ uint32_t tc, expected_tc;
1090 var *v;
1091
1092- debug_printf_parse("%s(%x)\n", __func__, iexp);
1093+ debug_printf_parse("%s() term_tc(%x):", __func__, term_tc);
1094+ debug_parse_print_tc(term_tc);
1095+ debug_printf_parse("\n");
1096
1097 sn.info = PRIMASK;
1098 sn.r.n = sn.a.n = glptr = NULL;
1099- xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP | iexp;
1100+ expected_tc = TS_OPERAND | TS_UOPPRE | TC_REGEXP | term_tc;
1101
1102- while (!((tc = next_token(xtc)) & iexp)) {
1103+ while (!((tc = next_token(expected_tc)) & term_tc)) {
1104
1105- if (glptr && (t_info == (OC_COMPARE | VV | P(39) | 2))) {
1106+ if (glptr && (t_info == TI_LESS)) {
1107 /* input redirection (<) attached to glptr node */
1108 debug_printf_parse("%s: input redir\n", __func__);
1109 cn = glptr->l.n = new_node(OC_CONCAT | SS | P(37));
1110 cn->a.n = glptr;
1111- xtc = TC_OPERAND | TC_UOPPRE;
1112+ expected_tc = TS_OPERAND | TS_UOPPRE;
1113 glptr = NULL;
1114-
1115- } else if (tc & (TC_BINOP | TC_UOPPOST)) {
1116- debug_printf_parse("%s: TC_BINOP | TC_UOPPOST tc:%x\n", __func__, tc);
1117+ continue;
1118+ }
1119+ if (tc & (TS_BINOP | TC_UOPPOST)) {
1120+ debug_printf_parse("%s: TS_BINOP | TC_UOPPOST tc:%x\n", __func__, tc);
1121 /* for binary and postfix-unary operators, jump back over
1122 * previous operators with higher priority */
1123 vn = cn;
1124 while (((t_info & PRIMASK) > (vn->a.n->info & PRIMASK2))
1125- || ((t_info == vn->info) && ((t_info & OPCLSMASK) == OC_COLON))
1126+ || ((t_info == vn->info) && t_info == TI_COLON)
1127 ) {
1128 vn = vn->a.n;
1129 if (!vn->a.n) syntax_error(EMSG_UNEXP_TOKEN);
1130 }
1131- if ((t_info & OPCLSMASK) == OC_TERNARY)
1132+ if (t_info == TI_TERNARY)
1133+//TODO: why?
1134 t_info += P(6);
1135 cn = vn->a.n->r.n = new_node(t_info);
1136 cn->a.n = vn->a.n;
1137- if (tc & TC_BINOP) {
1138+ if (tc & TS_BINOP) {
1139 cn->l.n = vn;
1140- xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP;
1141- if ((t_info & OPCLSMASK) == OC_PGETLINE) {
1142+//FIXME: this is the place to detect and reject assignments to non-lvalues.
1143+//Currently we allow "assignments" to consts and temporaries, nonsense like this:
1144+// awk 'BEGIN { "qwe" = 1 }'
1145+// awk 'BEGIN { 7 *= 7 }'
1146+// awk 'BEGIN { length("qwe") = 1 }'
1147+// awk 'BEGIN { (1+1) += 3 }'
1148+ expected_tc = TS_OPERAND | TS_UOPPRE | TC_REGEXP;
1149+ if (t_info == TI_PGETLINE) {
1150 /* it's a pipe */
1151 next_token(TC_GETLINE);
1152 /* give maximum priority to this pipe */
1153 cn->info &= ~PRIMASK;
1154- xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp;
1155+ expected_tc = TS_OPERAND | TS_UOPPRE | TS_BINOP | term_tc;
1156 }
1157 } else {
1158 cn->r.n = vn;
1159- xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp;
1160+ expected_tc = TS_OPERAND | TS_UOPPRE | TS_BINOP | term_tc;
1161 }
1162 vn->a.n = cn;
1163+ continue;
1164+ }
1165
1166- } else {
1167- debug_printf_parse("%s: other\n", __func__);
1168- /* for operands and prefix-unary operators, attach them
1169- * to last node */
1170- vn = cn;
1171- cn = vn->r.n = new_node(t_info);
1172- cn->a.n = vn;
1173- xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP;
1174- if (tc & (TC_OPERAND | TC_REGEXP)) {
1175- debug_printf_parse("%s: TC_OPERAND | TC_REGEXP\n", __func__);
1176- xtc = TC_UOPPRE | TC_UOPPOST | TC_BINOP | TC_OPERAND | iexp;
1177- /* one should be very careful with switch on tclass -
1178- * only simple tclasses should be used! */
1179- switch (tc) {
1180- case TC_VARIABLE:
1181- case TC_ARRAY:
1182- debug_printf_parse("%s: TC_VARIABLE | TC_ARRAY\n", __func__);
1183- cn->info = OC_VAR;
1184- v = hash_search(ahash, t_string);
1185- if (v != NULL) {
1186- cn->info = OC_FNARG;
1187- cn->l.aidx = v->x.aidx;
1188- } else {
1189- cn->l.v = newvar(t_string);
1190- }
1191- if (tc & TC_ARRAY) {
1192- cn->info |= xS;
1193- cn->r.n = parse_expr(TC_ARRTERM);
1194- }
1195- break;
1196+ debug_printf_parse("%s: other, t_info:%x\n", __func__, t_info);
1197+ /* for operands and prefix-unary operators, attach them
1198+ * to last node */
1199+ vn = cn;
1200+ cn = vn->r.n = new_node(t_info);
1201+ cn->a.n = vn;
1202
1203- case TC_NUMBER:
1204- case TC_STRING:
1205- debug_printf_parse("%s: TC_NUMBER | TC_STRING\n", __func__);
1206- cn->info = OC_VAR;
1207- v = cn->l.v = xzalloc(sizeof(var));
1208- if (tc & TC_NUMBER)
1209- setvar_i(v, t_double);
1210- else {
1211- setvar_s(v, t_string);
1212- xtc &= ~TC_UOPPOST; /* "str"++ is not allowed */
1213- }
1214- break;
1215+ expected_tc = TS_OPERAND | TS_UOPPRE | TC_REGEXP;
1216+ if (t_info == TI_PREINC || t_info == TI_PREDEC)
1217+ expected_tc = TS_LVALUE | TC_UOPPRE1;
1218
1219- case TC_REGEXP:
1220- debug_printf_parse("%s: TC_REGEXP\n", __func__);
1221- mk_re_node(t_string, cn, xzalloc(sizeof(regex_t)*2));
1222- break;
1223+ if (!(tc & (TS_OPERAND | TC_REGEXP)))
1224+ continue;
1225
1226- case TC_FUNCTION:
1227- debug_printf_parse("%s: TC_FUNCTION\n", __func__);
1228- cn->info = OC_FUNC;
1229- cn->r.f = newfunc(t_string);
1230- cn->l.n = condition();
1231- break;
1232+ debug_printf_parse("%s: TS_OPERAND | TC_REGEXP\n", __func__);
1233+ expected_tc = TS_UOPPRE | TC_UOPPOST | TS_BINOP | TS_OPERAND | term_tc;
1234+ /* one should be very careful with switch on tclass -
1235+ * only simple tclasses should be used (TC_xyz, not TS_xyz) */
1236+ switch (tc) {
1237+ case TC_VARIABLE:
1238+ case TC_ARRAY:
1239+ debug_printf_parse("%s: TC_VARIABLE | TC_ARRAY\n", __func__);
1240+ cn->info = OC_VAR;
1241+ v = hash_search(ahash, t_string);
1242+ if (v != NULL) {
1243+ cn->info = OC_FNARG;
1244+ cn->l.aidx = v->x.aidx;
1245+ } else {
1246+ cn->l.v = newvar(t_string);
1247+ }
1248+ if (tc & TC_ARRAY) {
1249+ cn->info |= xS;
1250+ cn->r.n = parse_expr(TC_ARRTERM);
1251+ }
1252+ break;
1253
1254- case TC_SEQSTART:
1255- debug_printf_parse("%s: TC_SEQSTART\n", __func__);
1256- cn = vn->r.n = parse_expr(TC_SEQTERM);
1257- if (!cn)
1258- syntax_error("Empty sequence");
1259- cn->a.n = vn;
1260- break;
1261+ case TC_NUMBER:
1262+ case TC_STRING:
1263+ debug_printf_parse("%s: TC_NUMBER | TC_STRING\n", __func__);
1264+ cn->info = OC_VAR;
1265+ v = cn->l.v = xzalloc(sizeof(var));
1266+ if (tc & TC_NUMBER)
1267+ setvar_i(v, t_double);
1268+ else {
1269+ setvar_s(v, t_string);
1270+ expected_tc &= ~TC_UOPPOST; /* "str"++ is not allowed */
1271+ }
1272+ break;
1273
1274- case TC_GETLINE:
1275- debug_printf_parse("%s: TC_GETLINE\n", __func__);
1276- glptr = cn;
1277- xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp;
1278- break;
1279+ case TC_REGEXP:
1280+ debug_printf_parse("%s: TC_REGEXP\n", __func__);
1281+ mk_re_node(t_string, cn, xzalloc(sizeof(regex_t)*2));
1282+ break;
1283
1284- case TC_BUILTIN:
1285- debug_printf_parse("%s: TC_BUILTIN\n", __func__);
1286- cn->l.n = condition();
1287- break;
1288+ case TC_FUNCTION:
1289+ debug_printf_parse("%s: TC_FUNCTION\n", __func__);
1290+ cn->info = OC_FUNC;
1291+ cn->r.f = newfunc(t_string);
1292+ cn->l.n = parse_expr(TC_RPAREN);
1293+ break;
1294
1295- case TC_LENGTH:
1296- debug_printf_parse("%s: TC_LENGTH\n", __func__);
1297- next_token(TC_SEQSTART /* length(...) */
1298- | TC_OPTERM /* length; (or newline)*/
1299- | TC_GRPTERM /* length } */
1300- | TC_BINOPX /* length <op> NUM */
1301- | TC_COMMA /* print length, 1 */
1302- );
1303- rollback_token();
1304- if (t_tclass & TC_SEQSTART) {
1305- /* It was a "(" token. Handle just like TC_BUILTIN */
1306- cn->l.n = condition();
1307- }
1308- break;
1309- }
1310+ case TC_LPAREN:
1311+ debug_printf_parse("%s: TC_LPAREN\n", __func__);
1312+ cn = vn->r.n = parse_expr(TC_RPAREN);
1313+ if (!cn)
1314+ syntax_error("Empty sequence");
1315+ cn->a.n = vn;
1316+ break;
1317+
1318+ case TC_GETLINE:
1319+ debug_printf_parse("%s: TC_GETLINE\n", __func__);
1320+ glptr = cn;
1321+ expected_tc = TS_OPERAND | TS_UOPPRE | TS_BINOP | term_tc;
1322+ break;
1323+
1324+ case TC_BUILTIN:
1325+ debug_printf_parse("%s: TC_BUILTIN\n", __func__);
1326+ cn->l.n = parse_lrparen_list();
1327+ break;
1328+
1329+ case TC_LENGTH:
1330+ debug_printf_parse("%s: TC_LENGTH\n", __func__);
1331+ tc = next_token(TC_LPAREN /* length(...) */
1332+ | TC_SEMICOL /* length; */
1333+ | TC_NEWLINE /* length<newline> */
1334+ | TC_RBRACE /* length } */
1335+ | TC_BINOPX /* length <op> NUM */
1336+ | TC_COMMA /* print length, 1 */
1337+ );
1338+ if (tc != TC_LPAREN)
1339+ rollback_token();
1340+ else {
1341+ /* It was a "(" token. Handle just like TC_BUILTIN */
1342+ cn->l.n = parse_expr(TC_RPAREN);
1343 }
1344+ break;
1345 }
1346- }
1347+ } /* while() */
1348
1349 debug_printf_parse("%s() returns %p\n", __func__, sn.r.n);
1350 return sn.r.n;
1351@@ -1430,7 +1513,7 @@ static node *chain_node(uint32_t info)
1352 if (seq->programname != g_progname) {
1353 seq->programname = g_progname;
1354 n = chain_node(OC_NEWSOURCE);
1355- n->l.new_progname = xstrdup(g_progname);
1356+ n->l.new_progname = g_progname;
1357 }
1358
1359 n = seq->last;
1360@@ -1446,14 +1529,16 @@ static void chain_expr(uint32_t info)
1361
1362 n = chain_node(info);
1363
1364- n->l.n = parse_expr(TC_OPTERM | TC_GRPTERM);
1365+ n->l.n = parse_expr(TC_SEMICOL | TC_NEWLINE | TC_RBRACE);
1366 if ((info & OF_REQUIRED) && !n->l.n)
1367 syntax_error(EMSG_TOO_FEW_ARGS);
1368
1369- if (t_tclass & TC_GRPTERM)
1370+ if (t_tclass & TC_RBRACE)
1371 rollback_token();
1372 }
1373
1374+static void chain_group(void);
1375+
1376 static node *chain_loop(node *nn)
1377 {
1378 node *n, *n2, *save_brk, *save_cont;
1379@@ -1477,207 +1562,284 @@ static node *chain_loop(node *nn)
1380 return n;
1381 }
1382
1383+static void chain_until_rbrace(void)
1384+{
1385+ uint32_t tc;
1386+ while ((tc = next_token(TS_GRPSEQ | TC_RBRACE)) != TC_RBRACE) {
1387+ debug_printf_parse("%s: !TC_RBRACE\n", __func__);
1388+ if (tc == TC_NEWLINE)
1389+ continue;
1390+ rollback_token();
1391+ chain_group();
1392+ }
1393+ debug_printf_parse("%s: TC_RBRACE\n", __func__);
1394+}
1395+
1396 /* parse group and attach it to chain */
1397 static void chain_group(void)
1398 {
1399- uint32_t c;
1400+ uint32_t tc;
1401 node *n, *n2, *n3;
1402
1403 do {
1404- c = next_token(TC_GRPSEQ);
1405- } while (c & TC_NEWLINE);
1406-
1407- if (c & TC_GRPSTART) {
1408- debug_printf_parse("%s: TC_GRPSTART\n", __func__);
1409- while (next_token(TC_GRPSEQ | TC_GRPTERM) != TC_GRPTERM) {
1410- debug_printf_parse("%s: !TC_GRPTERM\n", __func__);
1411- if (t_tclass & TC_NEWLINE)
1412- continue;
1413- rollback_token();
1414- chain_group();
1415- }
1416- debug_printf_parse("%s: TC_GRPTERM\n", __func__);
1417- } else if (c & (TC_OPSEQ | TC_OPTERM)) {
1418- debug_printf_parse("%s: TC_OPSEQ | TC_OPTERM\n", __func__);
1419+ tc = next_token(TS_GRPSEQ);
1420+ } while (tc == TC_NEWLINE);
1421+
1422+ if (tc == TC_LBRACE) {
1423+ debug_printf_parse("%s: TC_LBRACE\n", __func__);
1424+ chain_until_rbrace();
1425+ return;
1426+ }
1427+ if (tc & (TS_OPSEQ | TC_SEMICOL)) {
1428+ debug_printf_parse("%s: TS_OPSEQ | TC_SEMICOL\n", __func__);
1429 rollback_token();
1430 chain_expr(OC_EXEC | Vx);
1431- } else {
1432- /* TC_STATEMNT */
1433- debug_printf_parse("%s: TC_STATEMNT(?)\n", __func__);
1434- switch (t_info & OPCLSMASK) {
1435- case ST_IF:
1436- debug_printf_parse("%s: ST_IF\n", __func__);
1437- n = chain_node(OC_BR | Vx);
1438- n->l.n = condition();
1439+ return;
1440+ }
1441+
1442+ /* TS_STATEMNT */
1443+ debug_printf_parse("%s: TS_STATEMNT(?)\n", __func__);
1444+ switch (t_info & OPCLSMASK) {
1445+ case ST_IF:
1446+ debug_printf_parse("%s: ST_IF\n", __func__);
1447+ n = chain_node(OC_BR | Vx);
1448+ n->l.n = parse_lrparen_list();
1449+ chain_group();
1450+ n2 = chain_node(OC_EXEC);
1451+ n->r.n = seq->last;
1452+ if (next_token(TS_GRPSEQ | TC_RBRACE | TC_ELSE) == TC_ELSE) {
1453 chain_group();
1454- n2 = chain_node(OC_EXEC);
1455- n->r.n = seq->last;
1456- if (next_token(TC_GRPSEQ | TC_GRPTERM | TC_ELSE) == TC_ELSE) {
1457- chain_group();
1458- n2->a.n = seq->last;
1459- } else {
1460- rollback_token();
1461- }
1462- break;
1463+ n2->a.n = seq->last;
1464+ } else {
1465+ rollback_token();
1466+ }
1467+ break;
1468
1469- case ST_WHILE:
1470- debug_printf_parse("%s: ST_WHILE\n", __func__);
1471- n2 = condition();
1472- n = chain_loop(NULL);
1473- n->l.n = n2;
1474- break;
1475+ case ST_WHILE:
1476+ debug_printf_parse("%s: ST_WHILE\n", __func__);
1477+ n2 = parse_lrparen_list();
1478+ n = chain_loop(NULL);
1479+ n->l.n = n2;
1480+ break;
1481
1482- case ST_DO:
1483- debug_printf_parse("%s: ST_DO\n", __func__);
1484- n2 = chain_node(OC_EXEC);
1485- n = chain_loop(NULL);
1486- n2->a.n = n->a.n;
1487- next_token(TC_WHILE);
1488- n->l.n = condition();
1489- break;
1490+ case ST_DO:
1491+ debug_printf_parse("%s: ST_DO\n", __func__);
1492+ n2 = chain_node(OC_EXEC);
1493+ n = chain_loop(NULL);
1494+ n2->a.n = n->a.n;
1495+ next_token(TC_WHILE);
1496+ n->l.n = parse_lrparen_list();
1497+ break;
1498
1499- case ST_FOR:
1500- debug_printf_parse("%s: ST_FOR\n", __func__);
1501- next_token(TC_SEQSTART);
1502- n2 = parse_expr(TC_SEMICOL | TC_SEQTERM);
1503- if (t_tclass & TC_SEQTERM) { /* for-in */
1504- if (!n2 || (n2->info & OPCLSMASK) != OC_IN)
1505- syntax_error(EMSG_UNEXP_TOKEN);
1506- n = chain_node(OC_WALKINIT | VV);
1507- n->l.n = n2->l.n;
1508- n->r.n = n2->r.n;
1509- n = chain_loop(NULL);
1510- n->info = OC_WALKNEXT | Vx;
1511- n->l.n = n2->l.n;
1512- } else { /* for (;;) */
1513- n = chain_node(OC_EXEC | Vx);
1514- n->l.n = n2;
1515- n2 = parse_expr(TC_SEMICOL);
1516- n3 = parse_expr(TC_SEQTERM);
1517- n = chain_loop(n3);
1518- n->l.n = n2;
1519- if (!n2)
1520- n->info = OC_EXEC;
1521- }
1522- break;
1523+ case ST_FOR:
1524+ debug_printf_parse("%s: ST_FOR\n", __func__);
1525+ next_token(TC_LPAREN);
1526+ n2 = parse_expr(TC_SEMICOL | TC_RPAREN);
1527+ if (t_tclass & TC_RPAREN) { /* for (I in ARRAY) */
1528+ if (!n2 || n2->info != TI_IN)
1529+ syntax_error(EMSG_UNEXP_TOKEN);
1530+ n = chain_node(OC_WALKINIT | VV);
1531+ n->l.n = n2->l.n;
1532+ n->r.n = n2->r.n;
1533+ n = chain_loop(NULL);
1534+ n->info = OC_WALKNEXT | Vx;
1535+ n->l.n = n2->l.n;
1536+ } else { /* for (;;) */
1537+ n = chain_node(OC_EXEC | Vx);
1538+ n->l.n = n2;
1539+ n2 = parse_expr(TC_SEMICOL);
1540+ n3 = parse_expr(TC_RPAREN);
1541+ n = chain_loop(n3);
1542+ n->l.n = n2;
1543+ if (!n2)
1544+ n->info = OC_EXEC;
1545+ }
1546+ break;
1547
1548- case OC_PRINT:
1549- case OC_PRINTF:
1550- debug_printf_parse("%s: OC_PRINT[F]\n", __func__);
1551- n = chain_node(t_info);
1552- n->l.n = parse_expr(TC_OPTERM | TC_OUTRDR | TC_GRPTERM);
1553- if (t_tclass & TC_OUTRDR) {
1554- n->info |= t_info;
1555- n->r.n = parse_expr(TC_OPTERM | TC_GRPTERM);
1556- }
1557- if (t_tclass & TC_GRPTERM)
1558- rollback_token();
1559- break;
1560+ case OC_PRINT:
1561+ case OC_PRINTF:
1562+ debug_printf_parse("%s: OC_PRINT[F]\n", __func__);
1563+ n = chain_node(t_info);
1564+ n->l.n = parse_expr(TC_SEMICOL | TC_NEWLINE | TC_OUTRDR | TC_RBRACE);
1565+ if (t_tclass & TC_OUTRDR) {
1566+ n->info |= t_info;
1567+ n->r.n = parse_expr(TC_SEMICOL | TC_NEWLINE | TC_RBRACE);
1568+ }
1569+ if (t_tclass & TC_RBRACE)
1570+ rollback_token();
1571+ break;
1572
1573- case OC_BREAK:
1574- debug_printf_parse("%s: OC_BREAK\n", __func__);
1575- n = chain_node(OC_EXEC);
1576- n->a.n = break_ptr;
1577- chain_expr(t_info);
1578- break;
1579+ case OC_BREAK:
1580+ debug_printf_parse("%s: OC_BREAK\n", __func__);
1581+ n = chain_node(OC_EXEC);
1582+ if (!break_ptr)
1583+ syntax_error("'break' not in a loop");
1584+ n->a.n = break_ptr;
1585+ chain_expr(t_info);
1586+ break;
1587
1588- case OC_CONTINUE:
1589- debug_printf_parse("%s: OC_CONTINUE\n", __func__);
1590- n = chain_node(OC_EXEC);
1591- n->a.n = continue_ptr;
1592- chain_expr(t_info);
1593- break;
1594+ case OC_CONTINUE:
1595+ debug_printf_parse("%s: OC_CONTINUE\n", __func__);
1596+ n = chain_node(OC_EXEC);
1597+ if (!continue_ptr)
1598+ syntax_error("'continue' not in a loop");
1599+ n->a.n = continue_ptr;
1600+ chain_expr(t_info);
1601+ break;
1602
1603- /* delete, next, nextfile, return, exit */
1604- default:
1605- debug_printf_parse("%s: default\n", __func__);
1606- chain_expr(t_info);
1607- }
1608+ /* delete, next, nextfile, return, exit */
1609+ default:
1610+ debug_printf_parse("%s: default\n", __func__);
1611+ chain_expr(t_info);
1612 }
1613 }
1614
1615 static void parse_program(char *p)
1616 {
1617- uint32_t tclass;
1618- node *cn;
1619- func *f;
1620- var *v;
1621+ debug_printf_parse("%s()\n", __func__);
1622
1623 g_pos = p;
1624 t_lineno = 1;
1625- while ((tclass = next_token(TC_EOF | TC_OPSEQ | TC_GRPSTART |
1626- TC_OPTERM | TC_BEGIN | TC_END | TC_FUNCDECL)) != TC_EOF) {
1627+ for (;;) {
1628+ uint32_t tclass;
1629
1630- if (tclass & TC_OPTERM) {
1631- debug_printf_parse("%s: TC_OPTERM\n", __func__);
1632+ tclass = next_token(TS_OPSEQ | TC_LBRACE | TC_BEGIN | TC_END | TC_FUNCDECL
1633+ | TC_EOF | TC_NEWLINE /* but not TC_SEMICOL */);
1634+ got_tok:
1635+ if (tclass == TC_EOF) {
1636+ debug_printf_parse("%s: TC_EOF\n", __func__);
1637+ break;
1638+ }
1639+ if (tclass == TC_NEWLINE) {
1640+ debug_printf_parse("%s: TC_NEWLINE\n", __func__);
1641 continue;
1642 }
1643-
1644- seq = &mainseq;
1645- if (tclass & TC_BEGIN) {
1646+ if (tclass == TC_BEGIN) {
1647 debug_printf_parse("%s: TC_BEGIN\n", __func__);
1648 seq = &beginseq;
1649- chain_group();
1650- } else if (tclass & TC_END) {
1651+ /* ensure there is no newline between BEGIN and { */
1652+ next_token(TC_LBRACE);
1653+ chain_until_rbrace();
1654+ goto next_tok;
1655+ }
1656+ if (tclass == TC_END) {
1657 debug_printf_parse("%s: TC_END\n", __func__);
1658 seq = &endseq;
1659- chain_group();
1660- } else if (tclass & TC_FUNCDECL) {
1661+ /* ensure there is no newline between END and { */
1662+ next_token(TC_LBRACE);
1663+ chain_until_rbrace();
1664+ goto next_tok;
1665+ }
1666+ if (tclass == TC_FUNCDECL) {
1667+ func *f;
1668+
1669 debug_printf_parse("%s: TC_FUNCDECL\n", __func__);
1670 next_token(TC_FUNCTION);
1671- g_pos++;
1672 f = newfunc(t_string);
1673- f->body.first = NULL;
1674- f->nargs = 0;
1675- /* Match func arg list: a comma sep list of >= 0 args, and a close paren */
1676- while (next_token(TC_VARIABLE | TC_SEQTERM | TC_COMMA)) {
1677- /* Either an empty arg list, or trailing comma from prev iter
1678- * must be followed by an arg */
1679- if (f->nargs == 0 && t_tclass == TC_SEQTERM)
1680- break;
1681-
1682- /* TC_SEQSTART/TC_COMMA must be followed by TC_VARIABLE */
1683- if (t_tclass != TC_VARIABLE)
1684+ if (f->defined)
1685+ syntax_error("Duplicate function");
1686+ f->defined = 1;
1687+ //f->body.first = NULL; - already is
1688+ //f->nargs = 0; - already is
1689+ /* func arg list: comma sep list of args, and a close paren */
1690+ for (;;) {
1691+ var *v;
1692+ if (next_token(TC_VARIABLE | TC_RPAREN) == TC_RPAREN) {
1693+ if (f->nargs == 0)
1694+ break; /* func() is ok */
1695+ /* func(a,) is not ok */
1696 syntax_error(EMSG_UNEXP_TOKEN);
1697-
1698+ }
1699 v = findvar(ahash, t_string);
1700 v->x.aidx = f->nargs++;
1701-
1702 /* Arg followed either by end of arg list or 1 comma */
1703- if (next_token(TC_COMMA | TC_SEQTERM) & TC_SEQTERM)
1704+ if (next_token(TC_COMMA | TC_RPAREN) == TC_RPAREN)
1705 break;
1706- if (t_tclass != TC_COMMA)
1707- syntax_error(EMSG_UNEXP_TOKEN);
1708+ /* it was a comma, we ate it */
1709 }
1710 seq = &f->body;
1711- chain_group();
1712- clear_array(ahash);
1713- } else if (tclass & TC_OPSEQ) {
1714- debug_printf_parse("%s: TC_OPSEQ\n", __func__);
1715+ /* ensure there is { after "func F(...)" - but newlines are allowed */
1716+ while (next_token(TC_LBRACE | TC_NEWLINE) == TC_NEWLINE)
1717+ continue;
1718+ chain_until_rbrace();
1719+ hash_clear(ahash);
1720+ goto next_tok;
1721+ }
1722+ seq = &mainseq;
1723+ if (tclass & TS_OPSEQ) {
1724+ node *cn;
1725+
1726+ debug_printf_parse("%s: TS_OPSEQ\n", __func__);
1727 rollback_token();
1728 cn = chain_node(OC_TEST);
1729- cn->l.n = parse_expr(TC_OPTERM | TC_EOF | TC_GRPSTART);
1730- if (t_tclass & TC_GRPSTART) {
1731- debug_printf_parse("%s: TC_GRPSTART\n", __func__);
1732- rollback_token();
1733- chain_group();
1734+ cn->l.n = parse_expr(TC_SEMICOL | TC_NEWLINE | TC_EOF | TC_LBRACE);
1735+ if (t_tclass == TC_LBRACE) {
1736+ debug_printf_parse("%s: TC_LBRACE\n", __func__);
1737+ chain_until_rbrace();
1738 } else {
1739- debug_printf_parse("%s: !TC_GRPSTART\n", __func__);
1740+ /* no action, assume default "{ print }" */
1741+ debug_printf_parse("%s: !TC_LBRACE\n", __func__);
1742 chain_node(OC_PRINT);
1743 }
1744 cn->r.n = mainseq.last;
1745- } else /* if (tclass & TC_GRPSTART) */ {
1746- debug_printf_parse("%s: TC_GRPSTART(?)\n", __func__);
1747- rollback_token();
1748- chain_group();
1749+ goto next_tok;
1750 }
1751- }
1752- debug_printf_parse("%s: TC_EOF\n", __func__);
1753+ /* tclass == TC_LBRACE */
1754+ debug_printf_parse("%s: TC_LBRACE(?)\n", __func__);
1755+ chain_until_rbrace();
1756+ next_tok:
1757+ /* Same as next_token() at the top of the loop, + TC_SEMICOL */
1758+ tclass = next_token(TS_OPSEQ | TC_LBRACE | TC_BEGIN | TC_END | TC_FUNCDECL
1759+ | TC_EOF | TC_NEWLINE | TC_SEMICOL);
1760+ /* gawk allows many newlines, but does not allow more than one semicolon:
1761+ * BEGIN {...}<newline>;<newline>;
1762+ * would complain "each rule must have a pattern or an action part".
1763+ * Same message for
1764+ * ; BEGIN {...}
1765+ */
1766+ if (tclass != TC_SEMICOL)
1767+ goto got_tok; /* use this token */
1768+ /* else: loop back - ate the semicolon, get and use _next_ token */
1769+ } /* for (;;) */
1770 }
1771
1772-
1773 /* -------- program execution part -------- */
1774
1775+/* temporary variables allocator */
1776+static var *nvalloc(int sz)
1777+{
1778+ return xzalloc(sz * sizeof(var));
1779+}
1780+
1781+static void nvfree(var *v, int sz)
1782+{
1783+ var *p = v;
1784+
1785+ while (--sz >= 0) {
1786+ if ((p->type & (VF_ARRAY | VF_CHILD)) == VF_ARRAY) {
1787+ clear_array(iamarray(p));
1788+ free(p->x.array->items);
1789+ free(p->x.array);
1790+ }
1791+ if (p->type & VF_WALK) {
1792+ walker_list *n;
1793+ walker_list *w = p->x.walker;
1794+ debug_printf_walker("nvfree: freeing walker @%p\n", &p->x.walker);
1795+ p->x.walker = NULL;
1796+ while (w) {
1797+ n = w->prev;
1798+ debug_printf_walker(" free(%p)\n", w);
1799+ free(w);
1800+ w = n;
1801+ }
1802+ }
1803+ clrvar(p);
1804+ p++;
1805+ }
1806+
1807+ free(v);
1808+}
1809+
1810 static node *mk_splitter(const char *s, tsplitter *spl)
1811 {
1812 regex_t *re, *ire;
1813@@ -1686,7 +1848,7 @@ static node *mk_splitter(const char *s, tsplitter *spl)
1814 re = &spl->re[0];
1815 ire = &spl->re[1];
1816 n = &spl->n;
1817- if ((n->info & OPCLSMASK) == OC_REGEXP) {
1818+ if (n->info == TI_REGEXP) {
1819 regfree(re);
1820 regfree(ire); // TODO: nuke ire, use re+1?
1821 }
1822@@ -1699,21 +1861,28 @@ static node *mk_splitter(const char *s, tsplitter *spl)
1823 return n;
1824 }
1825
1826-/* use node as a regular expression. Supplied with node ptr and regex_t
1827+static var *evaluate(node *, var *);
1828+
1829+/* Use node as a regular expression. Supplied with node ptr and regex_t
1830 * storage space. Return ptr to regex (if result points to preg, it should
1831- * be later regfree'd manually
1832+ * be later regfree'd manually).
1833 */
1834 static regex_t *as_regex(node *op, regex_t *preg)
1835 {
1836 int cflags;
1837- var *v;
1838 const char *s;
1839
1840- if ((op->info & OPCLSMASK) == OC_REGEXP) {
1841+ if (op->info == TI_REGEXP) {
1842 return icase ? op->r.ire : op->l.re;
1843 }
1844- v = nvalloc(1);
1845- s = getvar_s(evaluate(op, v));
1846+
1847+ //tmpvar = nvalloc(1);
1848+#define TMPVAR (&G.as_regex__tmpvar)
1849+ // We use a single "static" tmpvar (instead of on-stack or malloced one)
1850+ // to decrease memory consumption in deeply-recursive awk programs.
1851+ // The rule to work safely is to never call evaluate() while our static
1852+ // TMPVAR's value is still needed.
1853+ s = getvar_s(evaluate(op, TMPVAR));
1854
1855 cflags = icase ? REG_EXTENDED | REG_ICASE : REG_EXTENDED;
1856 /* Testcase where REG_EXTENDED fails (unpaired '{'):
1857@@ -1725,7 +1894,8 @@ static regex_t *as_regex(node *op, regex_t *preg)
1858 cflags &= ~REG_EXTENDED;
1859 xregcomp(preg, s, cflags);
1860 }
1861- nvfree(v);
1862+ //nvfree(tmpvar, 1);
1863+#undef TMPVAR
1864 return preg;
1865 }
1866
1867@@ -1745,12 +1915,22 @@ static char* qrealloc(char *b, int n, int *size)
1868 /* resize field storage space */
1869 static void fsrealloc(int size)
1870 {
1871- int i;
1872+ int i, newsize;
1873
1874 if (size >= maxfields) {
1875+ /* Sanity cap, easier than catering for overflows */
1876+ if (size > 0xffffff)
1877+ bb_die_memory_exhausted();
1878+
1879 i = maxfields;
1880 maxfields = size + 16;
1881- Fields = xrealloc(Fields, maxfields * sizeof(Fields[0]));
1882+
1883+ newsize = maxfields * sizeof(Fields[0]);
1884+ debug_printf_eval("fsrealloc: xrealloc(%p, %u)\n", Fields, newsize);
1885+ Fields = xrealloc(Fields, newsize);
1886+ debug_printf_eval("fsrealloc: Fields=%p..%p\n", Fields, (char*)Fields + newsize - 1);
1887+ /* ^^^ did Fields[] move? debug aid for L.v getting "upstaged" by R.v in evaluate() */
1888+
1889 for (; i < maxfields; i++) {
1890 Fields[i].type = VF_SPECIAL;
1891 Fields[i].string = NULL;
1892@@ -1802,13 +1982,13 @@ static int awk_split(const char *s, node *spl, char **slist)
1893 c[2] = '\n';
1894
1895 n = 0;
1896- if ((spl->info & OPCLSMASK) == OC_REGEXP) { /* regex split */
1897+ if (spl->info == TI_REGEXP) { /* regex split */
1898 if (!*s)
1899 return n; /* "": zero fields */
1900 n++; /* at least one field will be there */
1901 do {
1902 int l;
1903- regmatch_t pmatch[2]; // TODO: why [2]? [1] is enough...
1904+ regmatch_t pmatch[1];
1905
1906 l = strcspn(s, c+2); /* len till next NUL or \n */
1907 if (regexec1_nonempty(icase ? spl->r.ire : spl->l.re, s, pmatch) == 0
1908@@ -1969,7 +2149,7 @@ static node *nextarg(node **pn)
1909 node *n;
1910
1911 n = *pn;
1912- if (n && (n->info & OPCLSMASK) == OC_COMMA) {
1913+ if (n && n->info == TI_COMMA) {
1914 *pn = n->r.n;
1915 n = n->l.n;
1916 } else {
1917@@ -2000,8 +2180,7 @@ static void hashwalk_init(var *v, xhash *array)
1918 for (i = 0; i < array->csize; i++) {
1919 hi = array->items[i];
1920 while (hi) {
1921- strcpy(w->end, hi->name);
1922- nextword(&w->end);
1923+ w->end = stpcpy(w->end, hi->name) + 1;
1924 hi = hi->next;
1925 }
1926 }
1927@@ -2027,15 +2206,18 @@ static int hashwalk_next(var *v)
1928 /* evaluate node, return 1 when result is true, 0 otherwise */
1929 static int ptest(node *pattern)
1930 {
1931- /* ptest__v is "static": to save stack space? */
1932- return istrue(evaluate(pattern, &G.ptest__v));
1933+ // We use a single "static" tmpvar (instead of on-stack or malloced one)
1934+ // to decrease memory consumption in deeply-recursive awk programs.
1935+ // The rule to work safely is to never call evaluate() while our static
1936+ // TMPVAR's value is still needed.
1937+ return istrue(evaluate(pattern, &G.ptest__tmpvar));
1938 }
1939
1940 /* read next record from stream rsm into a variable v */
1941 static int awk_getline(rstream *rsm, var *v)
1942 {
1943 char *b;
1944- regmatch_t pmatch[2]; // TODO: why [2]? [1] is enough...
1945+ regmatch_t pmatch[1];
1946 int size, a, p, pp = 0;
1947 int fd, so, eo, r, rp;
1948 char c, *m, *s;
1949@@ -2061,7 +2243,7 @@ static int awk_getline(rstream *rsm, var *v)
1950 so = eo = p;
1951 r = 1;
1952 if (p > 0) {
1953- if ((rsplitter.n.info & OPCLSMASK) == OC_REGEXP) {
1954+ if (rsplitter.n.info == TI_REGEXP) {
1955 if (regexec(icase ? rsplitter.n.r.ire : rsplitter.n.l.re,
1956 b, 1, pmatch, 0) == 0) {
1957 so = pmatch[0].rm_so;
1958@@ -2133,82 +2315,126 @@ static int awk_getline(rstream *rsm, var *v)
1959 return r;
1960 }
1961
1962-static int fmt_num(char *b, int size, const char *format, double n, int int_as_int)
1963-{
1964- int r = 0;
1965- char c;
1966- const char *s = format;
1967-
1968- if (int_as_int && n == (long long)n) {
1969- r = snprintf(b, size, "%lld", (long long)n);
1970- } else {
1971- do { c = *s; } while (c && *++s);
1972- if (strchr("diouxX", c)) {
1973- r = snprintf(b, size, format, (int)n);
1974- } else if (strchr("eEfgG", c)) {
1975- r = snprintf(b, size, format, n);
1976- } else {
1977- syntax_error(EMSG_INV_FMT);
1978- }
1979- }
1980- return r;
1981-}
1982-
1983 /* formatted output into an allocated buffer, return ptr to buffer */
1984-static char *awk_printf(node *n)
1985+#if !ENABLE_FEATURE_AWK_GNU_EXTENSIONS
1986+# define awk_printf(a, b) awk_printf(a)
1987+#endif
1988+static char *awk_printf(node *n, size_t *len)
1989 {
1990- char *b = NULL;
1991- char *fmt, *s, *f;
1992- const char *s1;
1993- int i, j, incr, bsize;
1994- char c, c1;
1995- var *v, *arg;
1996-
1997- v = nvalloc(1);
1998- fmt = f = xstrdup(getvar_s(evaluate(nextarg(&n), v)));
1999-
2000+ char *b;
2001+ char *fmt, *f;
2002+ size_t i;
2003+
2004+ //tmpvar = nvalloc(1);
2005+#define TMPVAR (&G.awk_printf__tmpvar)
2006+ // We use a single "static" tmpvar (instead of on-stack or malloced one)
2007+ // to decrease memory consumption in deeply-recursive awk programs.
2008+ // The rule to work safely is to never call evaluate() while our static
2009+ // TMPVAR's value is still needed.
2010+ fmt = f = xstrdup(getvar_s(evaluate(nextarg(&n), TMPVAR)));
2011+ // ^^^^^^^^^ here we immediately strdup() the value, so the later call
2012+ // to evaluate() potentially recursing into another awk_printf() can't
2013+ // mangle the value.
2014+
2015+ b = NULL;
2016 i = 0;
2017- while (*f) {
2018+ while (1) { /* "print one format spec" loop */
2019+ char *s;
2020+ char c;
2021+ char sv;
2022+ var *arg;
2023+ size_t slen;
2024+
2025+ /* Find end of the next format spec, or end of line */
2026 s = f;
2027- while (*f && (*f != '%' || *++f == '%'))
2028- f++;
2029- while (*f && !isalpha(*f)) {
2030- if (*f == '*')
2031- syntax_error("%*x formats are not supported");
2032+ while (1) {
2033+ c = *f;
2034+ if (!c) /* no percent chars found at all */
2035+ goto nul;
2036 f++;
2037+ if (c == '%')
2038+ break;
2039 }
2040-
2041- incr = (f - s) + MAXVARFMT;
2042- b = qrealloc(b, incr + i, &bsize);
2043+ /* we are past % in "....%..." */
2044 c = *f;
2045- if (c != '\0')
2046+ if (!c) /* "....%" */
2047+ goto nul;
2048+ if (c == '%') { /* "....%%...." */
2049+ slen = f - s;
2050+ s = xstrndup(s, slen);
2051 f++;
2052- c1 = *f;
2053+ goto append; /* print "....%" part verbatim */
2054+ }
2055+ while (1) {
2056+ if (isalpha(c))
2057+ break;
2058+ if (c == '*')
2059+ syntax_error("%*x formats are not supported");
2060+ c = *++f;
2061+ if (!c) { /* "....%...." and no letter found after % */
2062+ /* Example: awk 'BEGIN { printf "^^^%^^^\n"; }' */
2063+ nul:
2064+ slen = f - s;
2065+ goto tail; /* print remaining string, exit loop */
2066+ }
2067+ }
2068+ /* we are at A in "....%...A..." */
2069+
2070+ arg = evaluate(nextarg(&n), TMPVAR);
2071+
2072+ /* Result can be arbitrarily long. Example:
2073+ * printf "%99999s", "BOOM"
2074+ */
2075+ sv = *++f;
2076 *f = '\0';
2077- arg = evaluate(nextarg(&n), v);
2078-
2079- j = i;
2080- if (c == 'c' || !c) {
2081- i += sprintf(b+i, s, is_numeric(arg) ?
2082- (char)getvar_i(arg) : *getvar_s(arg));
2083- } else if (c == 's') {
2084- s1 = getvar_s(arg);
2085- b = qrealloc(b, incr+i+strlen(s1), &bsize);
2086- i += sprintf(b+i, s, s1);
2087+ if (c == 'c') {
2088+ char cc = is_numeric(arg) ? getvar_i(arg) : *getvar_s(arg);
2089+ char *r = xasprintf(s, cc ? cc : '^' /* else strlen will be wrong */);
2090+ slen = strlen(r);
2091+ if (cc == '\0') /* if cc is NUL, re-format the string with it */
2092+ sprintf(r, s, cc);
2093+ s = r;
2094 } else {
2095- i += fmt_num(b+i, incr, s, getvar_i(arg), FALSE);
2096+ if (c == 's') {
2097+ s = xasprintf(s, getvar_s(arg));
2098+ } else {
2099+ double d = getvar_i(arg);
2100+ if (strchr("diouxX", c)) {
2101+//TODO: make it wider here (%x -> %llx etc)?
2102+ s = xasprintf(s, (int)d);
2103+ } else if (strchr("eEfFgGaA", c)) {
2104+ s = xasprintf(s, d);
2105+ } else {
2106+//TODO: GNU Awk 5.0.1: printf "%W" prints "%W", does not error out
2107+ syntax_error(EMSG_INV_FMT);
2108+ }
2109+ }
2110+ slen = strlen(s);
2111 }
2112- *f = c1;
2113-
2114- /* if there was an error while sprintf, return value is negative */
2115- if (i < j)
2116- i = j;
2117+ *f = sv;
2118+ append:
2119+ if (i == 0) {
2120+ b = s;
2121+ i = slen;
2122+ continue;
2123+ }
2124+ tail:
2125+ b = xrealloc(b, i + slen + 1);
2126+ strcpy(b + i, s);
2127+ i += slen;
2128+ if (!c) /* s is NOT allocated and this is the last part of string? */
2129+ break;
2130+ free(s);
2131 }
2132
2133 free(fmt);
2134- nvfree(v);
2135- b = xrealloc(b, i + 1);
2136- b[i] = '\0';
2137+ //nvfree(tmpvar, 1);
2138+#undef TMPVAR
2139+
2140+#if ENABLE_FEATURE_AWK_GNU_EXTENSIONS
2141+ if (len)
2142+ *len = i;
2143+#endif
2144 return b;
2145 }
2146
2147@@ -2338,33 +2564,59 @@ static NOINLINE int do_mktime(const char *ds)
2148 return mktime(&then);
2149 }
2150
2151+/* Reduce stack usage in exec_builtin() by keeping match() code separate */
2152+static NOINLINE var *do_match(node *an1, const char *as0)
2153+{
2154+ regmatch_t pmatch[1];
2155+ regex_t sreg, *re;
2156+ int n, start, len;
2157+
2158+ re = as_regex(an1, &sreg);
2159+ n = regexec(re, as0, 1, pmatch, 0);
2160+ if (re == &sreg)
2161+ regfree(re);
2162+ start = 0;
2163+ len = -1;
2164+ if (n == 0) {
2165+ start = pmatch[0].rm_so + 1;
2166+ len = pmatch[0].rm_eo - pmatch[0].rm_so;
2167+ }
2168+ setvar_i(newvar("RLENGTH"), len);
2169+ return setvar_i(newvar("RSTART"), start);
2170+}
2171+
2172+/* Reduce stack usage in evaluate() by keeping builtins' code separate */
2173 static NOINLINE var *exec_builtin(node *op, var *res)
2174 {
2175 #define tspl (G.exec_builtin__tspl)
2176
2177- var *tv;
2178+ var *tmpvars;
2179 node *an[4];
2180 var *av[4];
2181 const char *as[4];
2182- regmatch_t pmatch[2];
2183- regex_t sreg, *re;
2184 node *spl;
2185 uint32_t isr, info;
2186 int nargs;
2187 time_t tt;
2188 int i, l, ll, n;
2189
2190- tv = nvalloc(4);
2191+ tmpvars = nvalloc(4);
2192+#define TMPVAR0 (tmpvars)
2193+#define TMPVAR1 (tmpvars + 1)
2194+#define TMPVAR2 (tmpvars + 2)
2195+#define TMPVAR3 (tmpvars + 3)
2196+#define TMPVAR(i) (tmpvars + (i))
2197 isr = info = op->info;
2198 op = op->l.n;
2199
2200 av[2] = av[3] = NULL;
2201 for (i = 0; i < 4 && op; i++) {
2202 an[i] = nextarg(&op);
2203- if (isr & 0x09000000)
2204- av[i] = evaluate(an[i], &tv[i]);
2205- if (isr & 0x08000000)
2206- as[i] = getvar_s(av[i]);
2207+ if (isr & 0x09000000) {
2208+ av[i] = evaluate(an[i], TMPVAR(i));
2209+ if (isr & 0x08000000)
2210+ as[i] = getvar_s(av[i]);
2211+ }
2212 isr >>= 1;
2213 }
2214
2215@@ -2386,8 +2638,8 @@ static NOINLINE var *exec_builtin(node *op, var *res)
2216 char *s, *s1;
2217
2218 if (nargs > 2) {
2219- spl = (an[2]->info & OPCLSMASK) == OC_REGEXP ?
2220- an[2] : mk_splitter(getvar_s(evaluate(an[2], &tv[2])), &tspl);
2221+ spl = (an[2]->info == TI_REGEXP) ? an[2]
2222+ : mk_splitter(getvar_s(evaluate(an[2], TMPVAR2)), &tspl);
2223 } else {
2224 spl = &fsplitter.n;
2225 }
2226@@ -2501,20 +2753,7 @@ static NOINLINE var *exec_builtin(node *op, var *res)
2227 break;
2228
2229 case B_ma:
2230- re = as_regex(an[1], &sreg);
2231- n = regexec(re, as[0], 1, pmatch, 0);
2232- if (n == 0) {
2233- pmatch[0].rm_so++;
2234- pmatch[0].rm_eo++;
2235- } else {
2236- pmatch[0].rm_so = 0;
2237- pmatch[0].rm_eo = -1;
2238- }
2239- setvar_i(newvar("RSTART"), pmatch[0].rm_so);
2240- setvar_i(newvar("RLENGTH"), pmatch[0].rm_eo - pmatch[0].rm_so);
2241- setvar_i(res, pmatch[0].rm_so);
2242- if (re == &sreg)
2243- regfree(re);
2244+ res = do_match(an[1], as[0]);
2245 break;
2246
2247 case B_ge:
2248@@ -2530,14 +2769,79 @@ static NOINLINE var *exec_builtin(node *op, var *res)
2249 break;
2250 }
2251
2252- nvfree(tv);
2253+ nvfree(tmpvars, 4);
2254+#undef TMPVAR0
2255+#undef TMPVAR1
2256+#undef TMPVAR2
2257+#undef TMPVAR3
2258+#undef TMPVAR
2259+
2260 return res;
2261 #undef tspl
2262 }
2263
2264+/* if expr looks like "var=value", perform assignment and return 1,
2265+ * otherwise return 0 */
2266+static int is_assignment(const char *expr)
2267+{
2268+ char *exprc, *val;
2269+
2270+ val = (char*)endofname(expr);
2271+ if (val == (char*)expr || *val != '=') {
2272+ return FALSE;
2273+ }
2274+
2275+ exprc = xstrdup(expr);
2276+ val = exprc + (val - expr);
2277+ *val++ = '\0';
2278+
2279+ unescape_string_in_place(val);
2280+ setvar_u(newvar(exprc), val);
2281+ free(exprc);
2282+ return TRUE;
2283+}
2284+
2285+/* switch to next input file */
2286+static rstream *next_input_file(void)
2287+{
2288+#define rsm (G.next_input_file__rsm)
2289+#define files_happen (G.next_input_file__files_happen)
2290+
2291+ const char *fname, *ind;
2292+
2293+ if (rsm.F)
2294+ fclose(rsm.F);
2295+ rsm.F = NULL;
2296+ rsm.pos = rsm.adv = 0;
2297+
2298+ for (;;) {
2299+ if (getvar_i(intvar[ARGIND])+1 >= getvar_i(intvar[ARGC])) {
2300+ if (files_happen)
2301+ return NULL;
2302+ fname = "-";
2303+ rsm.F = stdin;
2304+ break;
2305+ }
2306+ ind = getvar_s(incvar(intvar[ARGIND]));
2307+ fname = getvar_s(findvar(iamarray(intvar[ARGV]), ind));
2308+ if (fname && *fname && !is_assignment(fname)) {
2309+ rsm.F = xfopen_stdin(fname);
2310+ break;
2311+ }
2312+ }
2313+
2314+ files_happen = TRUE;
2315+ setvar_s(intvar[FILENAME], fname);
2316+ return &rsm;
2317+#undef rsm
2318+#undef files_happen
2319+}
2320+
2321 /*
2322 * Evaluate node - the heart of the program. Supplied with subtree
2323- * and place where to store result. returns ptr to result.
2324+ * and "res" variable to assign the result to if we evaluate an expression.
2325+ * If node refers to e.g. a variable or a field, no assignment happens.
2326+ * Return ptr to the result (which may or may not be the "res" variable!)
2327 */
2328 #define XC(n) ((n) >> 8)
2329
2330@@ -2549,14 +2853,16 @@ static var *evaluate(node *op, var *res)
2331 #define seed (G.evaluate__seed)
2332 #define sreg (G.evaluate__sreg)
2333
2334- var *v1;
2335+ var *tmpvars;
2336
2337 if (!op)
2338 return setvar_s(res, NULL);
2339
2340 debug_printf_eval("entered %s()\n", __func__);
2341
2342- v1 = nvalloc(2);
2343+ tmpvars = nvalloc(2);
2344+#define TMPVAR0 (tmpvars)
2345+#define TMPVAR1 (tmpvars + 1)
2346
2347 while (op) {
2348 struct {
2349@@ -2578,48 +2884,35 @@ static var *evaluate(node *op, var *res)
2350 op1 = op->l.n;
2351 debug_printf_eval("opinfo:%08x opn:%08x\n", opinfo, opn);
2352
2353- /* "delete" is special:
2354- * "delete array[var--]" must evaluate index expr only once,
2355- * must not evaluate it in "execute inevitable things" part.
2356- */
2357- if (XC(opinfo & OPCLSMASK) == XC(OC_DELETE)) {
2358- uint32_t info = op1->info & OPCLSMASK;
2359- var *v;
2360-
2361- debug_printf_eval("DELETE\n");
2362- if (info == OC_VAR) {
2363- v = op1->l.v;
2364- } else if (info == OC_FNARG) {
2365- v = &fnargs[op1->l.aidx];
2366- } else {
2367- syntax_error(EMSG_NOT_ARRAY);
2368+ /* execute inevitable things */
2369+ if (opinfo & OF_RES1) {
2370+ if ((opinfo & OF_REQUIRED) && !op1)
2371+ syntax_error(EMSG_TOO_FEW_ARGS);
2372+ L.v = evaluate(op1, TMPVAR0);
2373+ if (opinfo & OF_STR1) {
2374+ L.s = getvar_s(L.v);
2375+ debug_printf_eval("L.s:'%s'\n", L.s);
2376 }
2377- if (op1->r.n) { /* array ref? */
2378- const char *s;
2379- s = getvar_s(evaluate(op1->r.n, v1));
2380- hash_remove(iamarray(v), s);
2381- } else {
2382- clear_array(iamarray(v));
2383+ if (opinfo & OF_NUM1) {
2384+ L_d = getvar_i(L.v);
2385+ debug_printf_eval("L_d:%f\n", L_d);
2386 }
2387- goto next;
2388 }
2389-
2390- /* execute inevitable things */
2391- if (opinfo & OF_RES1)
2392- L.v = evaluate(op1, v1);
2393- if (opinfo & OF_RES2)
2394- R.v = evaluate(op->r.n, v1+1);
2395- if (opinfo & OF_STR1) {
2396- L.s = getvar_s(L.v);
2397- debug_printf_eval("L.s:'%s'\n", L.s);
2398- }
2399- if (opinfo & OF_STR2) {
2400- R.s = getvar_s(R.v);
2401- debug_printf_eval("R.s:'%s'\n", R.s);
2402- }
2403- if (opinfo & OF_NUM1) {
2404- L_d = getvar_i(L.v);
2405- debug_printf_eval("L_d:%f\n", L_d);
2406+ /* NB: Must get string/numeric values of L (done above)
2407+ * _before_ evaluate()'ing R.v: if both L and R are $NNNs,
2408+ * and right one is large, then L.v points to Fields[NNN1],
2409+ * second evaluate() reallocates and moves (!) Fields[],
2410+ * R.v points to Fields[NNN2] but L.v now points to freed mem!
2411+ * (Seen trying to evaluate "$444 $44444")
2412+ */
2413+ if (opinfo & OF_RES2) {
2414+ R.v = evaluate(op->r.n, TMPVAR1);
2415+ //TODO: L.v may be invalid now, set L.v to NULL to catch bugs?
2416+ //L.v = NULL;
2417+ if (opinfo & OF_STR2) {
2418+ R.s = getvar_s(R.v);
2419+ debug_printf_eval("R.s:'%s'\n", R.s);
2420+ }
2421 }
2422
2423 debug_printf_eval("switch(0x%x)\n", XC(opinfo & OPCLSMASK));
2424@@ -2629,7 +2922,8 @@ static var *evaluate(node *op, var *res)
2425
2426 /* test pattern */
2427 case XC( OC_TEST ):
2428- if ((op1->info & OPCLSMASK) == OC_COMMA) {
2429+ debug_printf_eval("TEST\n");
2430+ if (op1->info == TI_COMMA) {
2431 /* it's range pattern */
2432 if ((opinfo & OF_CHECKED) || ptest(op1->l.n)) {
2433 op->info |= OF_CHECKED;
2434@@ -2646,25 +2940,32 @@ static var *evaluate(node *op, var *res)
2435
2436 /* just evaluate an expression, also used as unconditional jump */
2437 case XC( OC_EXEC ):
2438+ debug_printf_eval("EXEC\n");
2439 break;
2440
2441 /* branch, used in if-else and various loops */
2442 case XC( OC_BR ):
2443+ debug_printf_eval("BR\n");
2444 op = istrue(L.v) ? op->a.n : op->r.n;
2445 break;
2446
2447 /* initialize for-in loop */
2448 case XC( OC_WALKINIT ):
2449+ debug_printf_eval("WALKINIT\n");
2450 hashwalk_init(L.v, iamarray(R.v));
2451 break;
2452
2453 /* get next array item */
2454 case XC( OC_WALKNEXT ):
2455+ debug_printf_eval("WALKNEXT\n");
2456 op = hashwalk_next(L.v) ? op->a.n : op->r.n;
2457 break;
2458
2459 case XC( OC_PRINT ):
2460- case XC( OC_PRINTF ): {
2461+ debug_printf_eval("PRINT /\n");
2462+ case XC( OC_PRINTF ):
2463+ debug_printf_eval("PRINTF\n");
2464+ {
2465 FILE *F = stdout;
2466
2467 if (op->r.n) {
2468@@ -2682,55 +2983,94 @@ static var *evaluate(node *op, var *res)
2469 F = rsm->F;
2470 }
2471
2472+ /* Can't just check 'opinfo == OC_PRINT' here, parser ORs
2473+ * additional bits to opinfos of print/printf with redirects
2474+ */
2475 if ((opinfo & OPCLSMASK) == OC_PRINT) {
2476 if (!op1) {
2477 fputs(getvar_s(intvar[F0]), F);
2478 } else {
2479- while (op1) {
2480- var *v = evaluate(nextarg(&op1), v1);
2481+ for (;;) {
2482+ var *v = evaluate(nextarg(&op1), TMPVAR0);
2483 if (v->type & VF_NUMBER) {
2484- fmt_num(g_buf, MAXVARFMT, getvar_s(intvar[OFMT]),
2485- getvar_i(v), TRUE);
2486+ fmt_num(getvar_s(intvar[OFMT]),
2487+ getvar_i(v));
2488 fputs(g_buf, F);
2489 } else {
2490 fputs(getvar_s(v), F);
2491 }
2492-
2493- if (op1)
2494- fputs(getvar_s(intvar[OFS]), F);
2495+ if (!op1)
2496+ break;
2497+ fputs(getvar_s(intvar[OFS]), F);
2498 }
2499 }
2500 fputs(getvar_s(intvar[ORS]), F);
2501-
2502- } else { /* OC_PRINTF */
2503- char *s = awk_printf(op1);
2504+ } else { /* PRINTF */
2505+ IF_FEATURE_AWK_GNU_EXTENSIONS(size_t len;)
2506+ char *s = awk_printf(op1, &len);
2507+#if ENABLE_FEATURE_AWK_GNU_EXTENSIONS
2508+ fwrite(s, len, 1, F);
2509+#else
2510 fputs(s, F);
2511+#endif
2512 free(s);
2513 }
2514 fflush(F);
2515 break;
2516 }
2517
2518- /* case XC( OC_DELETE ): - moved to happen before arg evaluation */
2519+ case XC( OC_DELETE ):
2520+ debug_printf_eval("DELETE\n");
2521+ {
2522+ /* "delete" is special:
2523+ * "delete array[var--]" must evaluate index expr only once.
2524+ */
2525+ uint32_t info = op1->info & OPCLSMASK;
2526+ var *v;
2527+
2528+ if (info == OC_VAR) {
2529+ v = op1->l.v;
2530+ } else if (info == OC_FNARG) {
2531+ v = &fnargs[op1->l.aidx];
2532+ } else {
2533+ syntax_error(EMSG_NOT_ARRAY);
2534+ }
2535+ if (op1->r.n) { /* array ref? */
2536+ const char *s;
2537+ s = getvar_s(evaluate(op1->r.n, TMPVAR0));
2538+ hash_remove(iamarray(v), s);
2539+ } else {
2540+ clear_array(iamarray(v));
2541+ }
2542+ break;
2543+ }
2544
2545 case XC( OC_NEWSOURCE ):
2546+ debug_printf_eval("NEWSOURCE\n");
2547 g_progname = op->l.new_progname;
2548 break;
2549
2550 case XC( OC_RETURN ):
2551+ debug_printf_eval("RETURN\n");
2552 copyvar(res, L.v);
2553 break;
2554
2555 case XC( OC_NEXTFILE ):
2556+ debug_printf_eval("NEXTFILE\n");
2557 nextfile = TRUE;
2558 case XC( OC_NEXT ):
2559+ debug_printf_eval("NEXT\n");
2560 nextrec = TRUE;
2561 case XC( OC_DONE ):
2562+ debug_printf_eval("DONE\n");
2563 clrvar(res);
2564 break;
2565
2566 case XC( OC_EXIT ):
2567- awk_exit(L_d);
2568+ debug_printf_eval("EXIT\n");
2569+ if (op1)
2570+ G.exitcode = (int)L_d;
2571+ awk_exit();
2572
2573 /* -- recursive node type -- */
2574
2575@@ -2749,15 +3089,18 @@ static var *evaluate(node *op, var *res)
2576 break;
2577
2578 case XC( OC_IN ):
2579+ debug_printf_eval("IN\n");
2580 setvar_i(res, hash_search(iamarray(R.v), L.s) ? 1 : 0);
2581 break;
2582
2583 case XC( OC_REGEXP ):
2584+ debug_printf_eval("REGEXP\n");
2585 op1 = op;
2586 L.s = getvar_s(intvar[F0]);
2587 goto re_cont;
2588
2589 case XC( OC_MATCH ):
2590+ debug_printf_eval("MATCH\n");
2591 op1 = op->r.n;
2592 re_cont:
2593 {
2594@@ -2772,61 +3115,80 @@ static var *evaluate(node *op, var *res)
2595 case XC( OC_MOVE ):
2596 debug_printf_eval("MOVE\n");
2597 /* if source is a temporary string, jusk relink it to dest */
2598-//Disabled: if R.v is numeric but happens to have cached R.v->string,
2599-//then L.v ends up being a string, which is wrong
2600-// if (R.v == v1+1 && R.v->string) {
2601-// res = setvar_p(L.v, R.v->string);
2602-// R.v->string = NULL;
2603-// } else {
2604+ if (R.v == TMPVAR1
2605+ && !(R.v->type & VF_NUMBER)
2606+ /* Why check !NUMBER? if R.v is a number but has cached R.v->string,
2607+ * L.v ends up a string, which is wrong */
2608+ /*&& R.v->string - always not NULL (right?) */
2609+ ) {
2610+ res = setvar_p(L.v, R.v->string); /* avoids strdup */
2611+ R.v->string = NULL;
2612+ } else {
2613 res = copyvar(L.v, R.v);
2614-// }
2615+ }
2616 break;
2617
2618 case XC( OC_TERNARY ):
2619- if ((op->r.n->info & OPCLSMASK) != OC_COLON)
2620+ debug_printf_eval("TERNARY\n");
2621+ if (op->r.n->info != TI_COLON)
2622 syntax_error(EMSG_POSSIBLE_ERROR);
2623 res = evaluate(istrue(L.v) ? op->r.n->l.n : op->r.n->r.n, res);
2624 break;
2625
2626 case XC( OC_FUNC ): {
2627- var *vbeg, *v;
2628+ var *argvars, *sv_fnargs;
2629 const char *sv_progname;
2630+ int nargs, i;
2631
2632- /* The body might be empty, still has to eval the args */
2633- if (!op->r.n->info && !op->r.f->body.first)
2634+ debug_printf_eval("FUNC\n");
2635+
2636+ if (!op->r.f->defined)
2637 syntax_error(EMSG_UNDEF_FUNC);
2638
2639- vbeg = v = nvalloc(op->r.f->nargs + 1);
2640+ /* The body might be empty, still has to eval the args */
2641+ nargs = op->r.f->nargs;
2642+ argvars = nvalloc(nargs);
2643+ i = 0;
2644 while (op1) {
2645- var *arg = evaluate(nextarg(&op1), v1);
2646- copyvar(v, arg);
2647- v->type |= VF_CHILD;
2648- v->x.parent = arg;
2649- if (++v - vbeg >= op->r.f->nargs)
2650- break;
2651+ var *arg = evaluate(nextarg(&op1), TMPVAR0);
2652+ if (i == nargs) {
2653+ /* call with more arguments than function takes.
2654+ * (gawk warns: "warning: function 'f' called with more arguments than declared").
2655+ * They are still evaluated, but discarded: */
2656+ clrvar(arg);
2657+ continue;
2658+ }
2659+ copyvar(&argvars[i], arg);
2660+ argvars[i].type |= VF_CHILD;
2661+ argvars[i].x.parent = arg;
2662+ i++;
2663 }
2664
2665- v = fnargs;
2666- fnargs = vbeg;
2667+ sv_fnargs = fnargs;
2668 sv_progname = g_progname;
2669
2670+ fnargs = argvars;
2671 res = evaluate(op->r.f->body.first, res);
2672+ nvfree(argvars, nargs);
2673
2674 g_progname = sv_progname;
2675- nvfree(fnargs);
2676- fnargs = v;
2677+ fnargs = sv_fnargs;
2678
2679 break;
2680 }
2681
2682 case XC( OC_GETLINE ):
2683- case XC( OC_PGETLINE ): {
2684+ debug_printf_eval("GETLINE /\n");
2685+ case XC( OC_PGETLINE ):
2686+ debug_printf_eval("PGETLINE\n");
2687+ {
2688 rstream *rsm;
2689 int i;
2690
2691 if (op1) {
2692 rsm = newfile(L.s);
2693 if (!rsm->F) {
2694+ /* NB: can't use "opinfo == TI_PGETLINE", would break "cmd" | getline */
2695 if ((opinfo & OPCLSMASK) == OC_PGETLINE) {
2696 rsm->F = popen(L.s, "r");
2697 rsm->is_pipe = TRUE;
2698@@ -2861,16 +3223,34 @@ static var *evaluate(node *op, var *res)
2699 /* simple builtins */
2700 case XC( OC_FBLTIN ): {
2701 double R_d = R_d; /* for compiler */
2702+ debug_printf_eval("FBLTIN\n");
2703+
2704+ if (op1 && op1->info == TI_COMMA)
2705+ /* Simple builtins take one arg maximum */
2706+ syntax_error("Too many arguments");
2707
2708 switch (opn) {
2709 case F_in:
2710 R_d = (long long)L_d;
2711 break;
2712
2713- case F_rn:
2714- R_d = (double)rand() / (double)RAND_MAX;
2715+ case F_rn: /*rand*/
2716+ if (op1)
2717+ syntax_error("Too many arguments");
2718+ {
2719+#if RAND_MAX >= 0x7fffffff
2720+ uint32_t u = ((uint32_t)rand() << 16) ^ rand();
2721+ uint64_t v = ((uint64_t)rand() << 32) | u;
2722+ /* the above shift+or is optimized out on 32-bit arches */
2723+# if RAND_MAX > 0x7fffffff
2724+ v &= 0x7fffffffffffffffULL;
2725+# endif
2726+ R_d = (double)v / 0x8000000000000000ULL;
2727+#else
2728+# error Not implemented for this value of RAND_MAX
2729+#endif
2730 break;
2731-
2732+ }
2733 case F_co:
2734 if (ENABLE_FEATURE_AWK_LIBM) {
2735 R_d = cos(L_d);
2736@@ -2910,7 +3290,9 @@ static var *evaluate(node *op, var *res)
2737 srand(seed);
2738 break;
2739
2740- case F_ti:
2741+ case F_ti: /*systime*/
2742+ if (op1)
2743+ syntax_error("Too many arguments");
2744 R_d = time(NULL);
2745 break;
2746
2747@@ -2949,7 +3331,7 @@ static var *evaluate(node *op, var *res)
2748 rstream *rsm;
2749 int err = 0;
2750 rsm = (rstream *)hash_search(fdhash, L.s);
2751- debug_printf_eval("OC_FBLTIN F_cl rsm:%p\n", rsm);
2752+ debug_printf_eval("OC_FBLTIN close: op1:%p s:'%s' rsm:%p\n", op1, L.s, rsm);
2753 if (rsm) {
2754 debug_printf_eval("OC_FBLTIN F_cl "
2755 "rsm->is_pipe:%d, ->F:%p\n",
2756@@ -2960,6 +3342,11 @@ static var *evaluate(node *op, var *res)
2757 */
2758 if (rsm->F)
2759 err = rsm->is_pipe ? pclose(rsm->F) : fclose(rsm->F);
2760+//TODO: fix this case:
2761+// $ awk 'BEGIN { print close(""); print ERRNO }'
2762+// -1
2763+// close of redirection that was never opened
2764+// (we print 0, 0)
2765 free(rsm->buffer);
2766 hash_remove(fdhash, L.s);
2767 }
2768@@ -2974,14 +3361,18 @@ static var *evaluate(node *op, var *res)
2769 }
2770
2771 case XC( OC_BUILTIN ):
2772+ debug_printf_eval("BUILTIN\n");
2773 res = exec_builtin(op, res);
2774 break;
2775
2776 case XC( OC_SPRINTF ):
2777- setvar_p(res, awk_printf(op1));
2778+ debug_printf_eval("SPRINTF\n");
2779+ setvar_p(res, awk_printf(op1, NULL));
2780 break;
2781
2782- case XC( OC_UNARY ): {
2783+ case XC( OC_UNARY ):
2784+ debug_printf_eval("UNARY\n");
2785+ {
2786 double Ld, R_d;
2787
2788 Ld = R_d = getvar_i(R.v);
2789@@ -3011,7 +3402,9 @@ static var *evaluate(node *op, var *res)
2790 break;
2791 }
2792
2793- case XC( OC_FIELD ): {
2794+ case XC( OC_FIELD ):
2795+ debug_printf_eval("FIELD\n");
2796+ {
2797 int i = (int)getvar_i(R.v);
2798 if (i < 0)
2799 syntax_error(EMSG_NEGATIVE_FIELD);
2800@@ -3028,26 +3421,33 @@ static var *evaluate(node *op, var *res)
2801
2802 /* concatenation (" ") and index joining (",") */
2803 case XC( OC_CONCAT ):
2804+ debug_printf_eval("CONCAT /\n");
2805 case XC( OC_COMMA ): {
2806 const char *sep = "";
2807- if ((opinfo & OPCLSMASK) == OC_COMMA)
2808+ debug_printf_eval("COMMA\n");
2809+ if (opinfo == TI_COMMA)
2810 sep = getvar_s(intvar[SUBSEP]);
2811 setvar_p(res, xasprintf("%s%s%s", L.s, sep, R.s));
2812 break;
2813 }
2814
2815 case XC( OC_LAND ):
2816+ debug_printf_eval("LAND\n");
2817 setvar_i(res, istrue(L.v) ? ptest(op->r.n) : 0);
2818 break;
2819
2820 case XC( OC_LOR ):
2821+ debug_printf_eval("LOR\n");
2822 setvar_i(res, istrue(L.v) ? 1 : ptest(op->r.n));
2823 break;
2824
2825 case XC( OC_BINARY ):
2826- case XC( OC_REPLACE ): {
2827+ debug_printf_eval("BINARY /\n");
2828+ case XC( OC_REPLACE ):
2829+ debug_printf_eval("REPLACE\n");
2830+ {
2831 double R_d = getvar_i(R.v);
2832- debug_printf_eval("BINARY/REPLACE: R_d:%f opn:%c\n", R_d, opn);
2833+ debug_printf_eval("R_d:%f opn:%c\n", R_d, opn);
2834 switch (opn) {
2835 case '+':
2836 L_d += R_d;
2837@@ -3083,6 +3483,7 @@ static var *evaluate(node *op, var *res)
2838 case XC( OC_COMPARE ): {
2839 int i = i; /* for compiler */
2840 double Ld;
2841+ debug_printf_eval("COMPARE\n");
2842
2843 if (is_numeric(L.v) && is_numeric(R.v)) {
2844 Ld = getvar_i(L.v) - getvar_i(R.v);
2845@@ -3109,7 +3510,7 @@ static var *evaluate(node *op, var *res)
2846 default:
2847 syntax_error(EMSG_POSSIBLE_ERROR);
2848 } /* switch */
2849- next:
2850+
2851 if ((opinfo & OPCLSMASK) <= SHIFT_TIL_THIS)
2852 op = op->a.n;
2853 if ((opinfo & OPCLSMASK) >= RECUR_FROM_THIS)
2854@@ -3118,7 +3519,10 @@ static var *evaluate(node *op, var *res)
2855 break;
2856 } /* while (op) */
2857
2858- nvfree(v1);
2859+ nvfree(tmpvars, 2);
2860+#undef TMPVAR0
2861+#undef TMPVAR1
2862+
2863 debug_printf_eval("returning from %s(): %p\n", __func__, res);
2864 return res;
2865 #undef fnargs
2866@@ -3126,25 +3530,21 @@ static var *evaluate(node *op, var *res)
2867 #undef sreg
2868 }
2869
2870-
2871 /* -------- main & co. -------- */
2872
2873-static int awk_exit(int r)
2874+static int awk_exit(void)
2875 {
2876- var tv;
2877 unsigned i;
2878- hash_item *hi;
2879-
2880- zero_out_var(&tv);
2881
2882 if (!exiting) {
2883 exiting = TRUE;
2884 nextrec = FALSE;
2885- evaluate(endseq.first, &tv);
2886+ evaluate(endseq.first, &G.exit__tmpvar);
2887 }
2888
2889 /* waiting for children */
2890 for (i = 0; i < fdhash->csize; i++) {
2891+ hash_item *hi;
2892 hi = fdhash->items[i];
2893 while (hi) {
2894 if (hi->data.rs.F && hi->data.rs.is_pipe)
2895@@ -3153,65 +3553,7 @@ static int awk_exit(int r)
2896 }
2897 }
2898
2899- exit(r);
2900-}
2901-
2902-/* if expr looks like "var=value", perform assignment and return 1,
2903- * otherwise return 0 */
2904-static int is_assignment(const char *expr)
2905-{
2906- char *exprc, *val;
2907-
2908- if (!isalnum_(*expr) || (val = strchr(expr, '=')) == NULL) {
2909- return FALSE;
2910- }
2911-
2912- exprc = xstrdup(expr);
2913- val = exprc + (val - expr);
2914- *val++ = '\0';
2915-
2916- unescape_string_in_place(val);
2917- setvar_u(newvar(exprc), val);
2918- free(exprc);
2919- return TRUE;
2920-}
2921-
2922-/* switch to next input file */
2923-static rstream *next_input_file(void)
2924-{
2925-#define rsm (G.next_input_file__rsm)
2926-#define files_happen (G.next_input_file__files_happen)
2927-
2928- FILE *F;
2929- const char *fname, *ind;
2930-
2931- if (rsm.F)
2932- fclose(rsm.F);
2933- rsm.F = NULL;
2934- rsm.pos = rsm.adv = 0;
2935-
2936- for (;;) {
2937- if (getvar_i(intvar[ARGIND])+1 >= getvar_i(intvar[ARGC])) {
2938- if (files_happen)
2939- return NULL;
2940- fname = "-";
2941- F = stdin;
2942- break;
2943- }
2944- ind = getvar_s(incvar(intvar[ARGIND]));
2945- fname = getvar_s(findvar(iamarray(intvar[ARGV]), ind));
2946- if (fname && *fname && !is_assignment(fname)) {
2947- F = xfopen_stdin(fname);
2948- break;
2949- }
2950- }
2951-
2952- files_happen = TRUE;
2953- setvar_s(intvar[FILENAME], fname);
2954- rsm.F = F;
2955- return &rsm;
2956-#undef rsm
2957-#undef files_happen
2958+ exit(G.exitcode);
2959 }
2960
2961 int awk_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE;
2962@@ -3224,12 +3566,7 @@ int awk_main(int argc UNUSED_PARAM, char **argv)
2963 #if ENABLE_FEATURE_AWK_GNU_EXTENSIONS
2964 llist_t *list_e = NULL;
2965 #endif
2966- int i, j;
2967- var *v;
2968- var tv;
2969- char **envp;
2970- char *vnames = (char *)vNames; /* cheat */
2971- char *vvalues = (char *)vValues;
2972+ int i;
2973
2974 INIT_G();
2975
2976@@ -3238,48 +3575,43 @@ int awk_main(int argc UNUSED_PARAM, char **argv)
2977 if (ENABLE_LOCALE_SUPPORT)
2978 setlocale(LC_NUMERIC, "C");
2979
2980- zero_out_var(&tv);
2981-
2982- /* allocate global buffer */
2983- g_buf = xmalloc(MAXVARFMT + 1);
2984-
2985- vhash = hash_init();
2986- ahash = hash_init();
2987- fdhash = hash_init();
2988- fnhash = hash_init();
2989-
2990 /* initialize variables */
2991- for (i = 0; *vnames; i++) {
2992- intvar[i] = v = newvar(nextword(&vnames));
2993- if (*vvalues != '\377')
2994- setvar_s(v, nextword(&vvalues));
2995- else
2996- setvar_i(v, 0);
2997-
2998- if (*vnames == '*') {
2999- v->type |= VF_SPECIAL;
3000- vnames++;
3001+ vhash = hash_init();
3002+ {
3003+ char *vnames = (char *)vNames; /* cheat */
3004+ char *vvalues = (char *)vValues;
3005+ for (i = 0; *vnames; i++) {
3006+ var *v;
3007+ intvar[i] = v = newvar(nextword(&vnames));
3008+ if (*vvalues != '\377')
3009+ setvar_s(v, nextword(&vvalues));
3010+ else
3011+ setvar_i(v, 0);
3012+
3013+ if (*vnames == '*') {
3014+ v->type |= VF_SPECIAL;
3015+ vnames++;
3016+ }
3017 }
3018 }
3019
3020 handle_special(intvar[FS]);
3021 handle_special(intvar[RS]);
3022
3023- newfile("/dev/stdin")->F = stdin;
3024- newfile("/dev/stdout")->F = stdout;
3025- newfile("/dev/stderr")->F = stderr;
3026-
3027 /* Huh, people report that sometimes environ is NULL. Oh well. */
3028- if (environ) for (envp = environ; *envp; envp++) {
3029- /* environ is writable, thus we don't strdup it needlessly */
3030- char *s = *envp;
3031- char *s1 = strchr(s, '=');
3032- if (s1) {
3033- *s1 = '\0';
3034- /* Both findvar and setvar_u take const char*
3035- * as 2nd arg -> environment is not trashed */
3036- setvar_u(findvar(iamarray(intvar[ENVIRON]), s), s1 + 1);
3037- *s1 = '=';
3038+ if (environ) {
3039+ char **envp;
3040+ for (envp = environ; *envp; envp++) {
3041+ /* environ is writable, thus we don't strdup it needlessly */
3042+ char *s = *envp;
3043+ char *s1 = strchr(s, '=');
3044+ if (s1) {
3045+ *s1 = '\0';
3046+ /* Both findvar and setvar_u take const char*
3047+ * as 2nd arg -> environment is not trashed */
3048+ setvar_u(findvar(iamarray(intvar[ENVIRON]), s), s1 + 1);
3049+ *s1 = '=';
3050+ }
3051 }
3052 }
3053 opt = getopt32(argv, OPTSTR_AWK, &opt_F, &list_v, &list_f, IF_FEATURE_AWK_GNU_EXTENSIONS(&list_e,) NULL);
3054@@ -3295,20 +3627,19 @@ int awk_main(int argc UNUSED_PARAM, char **argv)
3055 if (!is_assignment(llist_pop(&list_v)))
3056 bb_show_usage();
3057 }
3058+
3059+ /* Parse all supplied programs */
3060+ fnhash = hash_init();
3061+ ahash = hash_init();
3062 while (list_f) {
3063- char *s = NULL;
3064- FILE *from_file;
3065+ int fd;
3066+ char *s;
3067
3068 g_progname = llist_pop(&list_f);
3069- from_file = xfopen_stdin(g_progname);
3070- /* one byte is reserved for some trick in next_token */
3071- for (i = j = 1; j > 0; i += j) {
3072- s = xrealloc(s, i + 4096);
3073- j = fread(s + i, 1, 4094, from_file);
3074- }
3075- s[i] = '\0';
3076- fclose(from_file);
3077- parse_program(s + 1);
3078+ fd = xopen_stdin(g_progname);
3079+ s = xmalloc_read(fd, NULL); /* it's NUL-terminated */
3080+ close(fd);
3081+ parse_program(s);
3082 free(s);
3083 }
3084 g_progname = "cmd. line";
3085@@ -3317,11 +3648,23 @@ int awk_main(int argc UNUSED_PARAM, char **argv)
3086 parse_program(llist_pop(&list_e));
3087 }
3088 #endif
3089+//FIXME: preserve order of -e and -f
3090+//TODO: implement -i LIBRARY and -E FILE too, they are easy-ish
3091 if (!(opt & (OPT_f | OPT_e))) {
3092 if (!*argv)
3093 bb_show_usage();
3094 parse_program(*argv++);
3095 }
3096+ /* Free unused parse structures */
3097+ //hash_free(fnhash); // ~250 bytes when empty, used only for function names
3098+ //^^^^^^^^^^^^^^^^^ does not work, hash_clear() inside SEGVs
3099+ // (IOW: hash_clear() assumes it's a hash of variables. fnhash is not).
3100+ free(fnhash->items);
3101+ free(fnhash);
3102+ fnhash = NULL; // debug
3103+ //hash_free(ahash); // empty after parsing, will reuse as fdhash instead of freeing
3104+
3105+ /* Parsing done, on to executing */
3106
3107 /* fill in ARGV array */
3108 setari_u(intvar[ARGV], 0, "awk");
3109@@ -3330,9 +3673,14 @@ int awk_main(int argc UNUSED_PARAM, char **argv)
3110 setari_u(intvar[ARGV], ++i, *argv++);
3111 setvar_i(intvar[ARGC], i + 1);
3112
3113- evaluate(beginseq.first, &tv);
3114+ //fdhash = ahash; // done via define
3115+ newfile("/dev/stdin")->F = stdin;
3116+ newfile("/dev/stdout")->F = stdout;
3117+ newfile("/dev/stderr")->F = stderr;
3118+
3119+ evaluate(beginseq.first, &G.main__tmpvar);
3120 if (!mainseq.first && !endseq.first)
3121- awk_exit(EXIT_SUCCESS);
3122+ awk_exit();
3123
3124 /* input file could already be opened in BEGIN block */
3125 if (!iF)
3126@@ -3347,7 +3695,7 @@ int awk_main(int argc UNUSED_PARAM, char **argv)
3127 nextrec = FALSE;
3128 incvar(intvar[NR]);
3129 incvar(intvar[FNR]);
3130- evaluate(mainseq.first, &tv);
3131+ evaluate(mainseq.first, &G.main__tmpvar);
3132
3133 if (nextfile)
3134 break;
3135@@ -3359,6 +3707,6 @@ int awk_main(int argc UNUSED_PARAM, char **argv)
3136 iF = next_input_file();
3137 }
3138
3139- awk_exit(EXIT_SUCCESS);
3140+ awk_exit();
3141 /*return 0;*/
3142 }
3143diff --git a/testsuite/awk.tests b/testsuite/awk.tests
3144index 92c83d719..4a7a01245 100755
3145--- a/testsuite/awk.tests
3146+++ b/testsuite/awk.tests
3147@@ -44,6 +44,16 @@ testing "awk handles empty function f(arg){}" \
3148 "L1\n\nL2\n\n" \
3149 "" ""
3150
3151+prg='
3152+function empty_fun(){}
3153+END {empty_fun()
3154+ print "Ok"
3155+}'
3156+testing "awk handles empty function f(){}" \
3157+ "awk '$prg'" \
3158+ "Ok\n" \
3159+ "" ""
3160+
3161 prg='
3162 function outer_fun() {
3163 return 1
3164@@ -71,6 +81,23 @@ testing "awk properly handles undefined function" \
3165 "L1\n\nawk: cmd. line:5: Call to undefined function\n" \
3166 "" ""
3167
3168+prg='
3169+BEGIN {
3170+ v=1
3171+ a=2
3172+ print v (a)
3173+}'
3174+testing "awk 'v (a)' is not a function call, it is a concatenation" \
3175+ "awk '$prg' 2>&1" \
3176+ "12\n" \
3177+ "" ""
3178+
3179+prg='func f(){print"F"};func g(){print"G"};BEGIN{f(g(),g())}'
3180+testing "awk unused function args are evaluated" \
3181+ "awk '$prg' 2>&1" \
3182+ "G\nG\nF\n" \
3183+ "" ""
3184+
3185
3186 optional DESKTOP
3187 testing "awk hex const 1" "awk '{ print or(0xffffffff,1) }'" "4294967295\n" "" "\n"
3188@@ -352,19 +379,14 @@ testing "awk -e and ARGC" \
3189 ""
3190 SKIP=
3191
3192-# The examples are in fact not valid awk programs (break/continue
3193-# can only be used inside loops).
3194-# But we do accept them outside of loops.
3195-# We had a bug with misparsing "break ; else" sequence.
3196-# Test that *that* bug is fixed, using simplest possible scripts:
3197 testing "awk break" \
3198 "awk -f - 2>&1; echo \$?" \
3199- "0\n" \
3200+ "awk: -:1: 'break' not in a loop\n1\n" \
3201 "" \
3202 'BEGIN { if (1) break; else a = 1 }'
3203 testing "awk continue" \
3204 "awk -f - 2>&1; echo \$?" \
3205- "0\n" \
3206+ "awk: -:1: 'continue' not in a loop\n1\n" \
3207 "" \
3208 'BEGIN { if (1) continue; else a = 1 }'
3209
3210@@ -383,6 +405,11 @@ testing "awk errors on missing delete arg" \
3211 "awk -e '{delete}' 2>&1" "awk: cmd. line:1: Too few arguments\n" "" ""
3212 SKIP=
3213
3214+optional FEATURE_AWK_GNU_EXTENSIONS
3215+testing "awk printf('%c') can output NUL" \
3216+ "awk '{printf(\"hello%c null\n\", 0)}'" "hello\0 null\n" "" "\n"
3217+SKIP=
3218+
3219 # testing "description" "command" "result" "infile" "stdin"
3220 testing 'awk negative field access' \
3221 'awk 2>&1 -- '\''{ $(-1) }'\' \
3222@@ -413,4 +440,25 @@ testing 'awk $NF is empty' \
3223 '' \
3224 'a=====123='
3225
3226+testing "awk exit N propagates through END's exit" \
3227+ "awk 'BEGIN { exit 42 } END { exit }'; echo \$?" \
3228+ "42\n" \
3229+ '' ''
3230+
3231+testing "awk print + redirect" \
3232+ "awk 'BEGIN { print \"STDERR %s\" >\"/dev/stderr\" }' 2>&1" \
3233+ "STDERR %s\n" \
3234+ '' ''
3235+
3236+testing "awk \"cmd\" | getline" \
3237+ "awk 'BEGIN { \"echo HELLO\" | getline; print }'" \
3238+ "HELLO\n" \
3239+ '' ''
3240+
3241+# printf %% should print one % (had a bug where it didn't)
3242+testing 'awk printf %% prints one %' \
3243+ "awk 'BEGIN { printf \"%%\n\" }'" \
3244+ "%\n" \
3245+ '' ''
3246+
3247 exit $FAILCOUNT
3248diff --git a/testsuite/printf.tests b/testsuite/printf.tests
3249index 34a65926e..050edef71 100755
3250--- a/testsuite/printf.tests
3251+++ b/testsuite/printf.tests
3252@@ -79,6 +79,11 @@ testing "printf understands %Ld" \
3253 "-5\n""0\n" \
3254 "" ""
3255
3256+testing "printf understands %%" \
3257+ "${bb}printf '%%\n' 2>&1; echo \$?" \
3258+ "%\n""0\n" \
3259+ "" ""
3260+
3261 testing "printf handles positive numbers for %d" \
3262 "${bb}printf '%d\n' 3 +3 ' 3' ' +3' 2>&1; echo \$?" \
3263 "3\n"\
3264--
32652.33.0
3266