summaryrefslogtreecommitdiffstats
path: root/meta/recipes-core
diff options
context:
space:
mode:
Diffstat (limited to 'meta/recipes-core')
-rw-r--r--meta/recipes-core/busybox/busybox-inittab_1.33.2.bb (renamed from meta/recipes-core/busybox/busybox-inittab_1.33.0.bb)0
-rw-r--r--meta/recipes-core/busybox/busybox/0001-awk-fix-CVEs.patch3266
-rw-r--r--meta/recipes-core/busybox/busybox/0002-man-fix-segfault-in-man-1.patch30
-rw-r--r--meta/recipes-core/busybox/busybox_1.33.2.bb (renamed from meta/recipes-core/busybox/busybox_1.33.1.bb)4
-rw-r--r--meta/recipes-core/expat/expat/CVE-2021-45960.patch65
-rw-r--r--meta/recipes-core/expat/expat/CVE-2021-46143.patch49
-rw-r--r--meta/recipes-core/expat/expat/CVE-2022-22822-27.patch257
-rw-r--r--meta/recipes-core/expat/expat/CVE-2022-23852.patch33
-rw-r--r--meta/recipes-core/expat/expat/CVE-2022-23990.patch49
-rw-r--r--meta/recipes-core/expat/expat/CVE-2022-25235.patch261
-rw-r--r--meta/recipes-core/expat/expat/CVE-2022-25236-1.patch116
-rw-r--r--meta/recipes-core/expat/expat/CVE-2022-25236-2.patch232
-rw-r--r--meta/recipes-core/expat/expat_2.2.10.bb14
-rw-r--r--meta/recipes-core/glibc/glibc-version.inc2
-rw-r--r--meta/recipes-core/glibc/glibc_2.33.bb2
-rw-r--r--meta/recipes-core/images/build-appliance-image_15.0.0.bb2
-rwxr-xr-xmeta/recipes-core/initrdscripts/initramfs-framework/finish12
-rw-r--r--meta/recipes-core/libxml/libxml2/CVE-2022-23308-fix-regression.patch99
-rw-r--r--meta/recipes-core/libxml/libxml2/CVE-2022-23308.patch209
-rw-r--r--meta/recipes-core/libxml/libxml2_2.9.10.bb2
-rw-r--r--meta/recipes-core/os-release/os-release.bb4
-rw-r--r--meta/recipes-core/zlib/zlib/CVE-2018-25032.patch347
-rw-r--r--meta/recipes-core/zlib/zlib_1.2.11.bb1
23 files changed, 5048 insertions, 8 deletions
diff --git a/meta/recipes-core/busybox/busybox-inittab_1.33.0.bb b/meta/recipes-core/busybox/busybox-inittab_1.33.2.bb
index 3804f4f7b2..3804f4f7b2 100644
--- a/meta/recipes-core/busybox/busybox-inittab_1.33.0.bb
+++ b/meta/recipes-core/busybox/busybox-inittab_1.33.2.bb
diff --git a/meta/recipes-core/busybox/busybox/0001-awk-fix-CVEs.patch b/meta/recipes-core/busybox/busybox/0001-awk-fix-CVEs.patch
new file mode 100644
index 0000000000..c07b53ebfd
--- /dev/null
+++ b/meta/recipes-core/busybox/busybox/0001-awk-fix-CVEs.patch
@@ -0,0 +1,3266 @@
1From cf542caeed195af05fa6205341f829ccee53f8c2 Mon Sep 17 00:00:00 2001
2From: Chen Qi <Qi.Chen@windriver.com>
3Date: Tue, 4 Jan 2022 17:48:03 -0800
4Subject: [PATCH] awk: fix CVEs
5
6The awk CVEs is hard to be separated, thus we use the following method
7to format the current patch.
8git rev-list --reverse 1_33_2..1_34_1 -- editors/awk.c | xargs git cherry-pick
9git reset HEAD~66 && git add . && git commit
10
11CVE: CVE-2021-42378
12CVE: CVE-2021-42379
13CVE: CVE-2021-42380
14CVE: CVE-2021-42381
15CVE: CVE-2021-42382
16CVE: CVE-2021-42383
17CVE: CVE-2021-42384
18CVE: CVE-2021-42385
19CVE: CVE-2021-42386
20
21Upstream-Status: Backport
22
23Signed-off-by: Chen Qi <Qi.Chen@windriver.com>
24---
25 editors/awk.c | 2060 +++++++++++++++++++++++-----------------
26 testsuite/awk.tests | 62 +-
27 testsuite/printf.tests | 5 +
28 3 files changed, 1264 insertions(+), 863 deletions(-)
29
30diff --git a/editors/awk.c b/editors/awk.c
31index 2c15f9e4e..f6314ac72 100644
32--- a/editors/awk.c
33+++ b/editors/awk.c
34@@ -66,6 +66,8 @@
35 #endif
36 #ifndef debug_printf_parse
37 # define debug_printf_parse(...) (fprintf(stderr, __VA_ARGS__))
38+#else
39+# define debug_parse_print_tc(...) ((void)0)
40 #endif
41
42
43@@ -91,7 +93,6 @@ enum {
44 };
45
46 #define MAXVARFMT 240
47-#define MINNVBLOCK 64
48
49 /* variable flags */
50 #define VF_NUMBER 0x0001 /* 1 = primary type is number */
51@@ -101,7 +102,7 @@ enum {
52 #define VF_USER 0x0200 /* 1 = user input (may be numeric string) */
53 #define VF_SPECIAL 0x0400 /* 1 = requires extra handling when changed */
54 #define VF_WALK 0x0800 /* 1 = variable has alloc'd x.walker list */
55-#define VF_FSTR 0x1000 /* 1 = var::string points to fstring buffer */
56+#define VF_FSTR 0x1000 /* 1 = don't free() var::string (not malloced, or is owned by something else) */
57 #define VF_CHILD 0x2000 /* 1 = function arg; x.parent points to source */
58 #define VF_DIRTY 0x4000 /* 1 = variable was set explicitly */
59
60@@ -118,8 +119,8 @@ typedef struct walker_list {
61 /* Variable */
62 typedef struct var_s {
63 unsigned type; /* flags */
64- double number;
65 char *string;
66+ double number;
67 union {
68 int aidx; /* func arg idx (for compilation stage) */
69 struct xhash_s *array; /* array ptr */
70@@ -138,6 +139,7 @@ typedef struct chain_s {
71 /* Function */
72 typedef struct func_s {
73 unsigned nargs;
74+ smallint defined;
75 struct chain_s body;
76 } func;
77
78@@ -177,7 +179,7 @@ typedef struct node_s {
79 struct node_s *n;
80 var *v;
81 int aidx;
82- char *new_progname;
83+ const char *new_progname;
84 regex_t *re;
85 } l;
86 union {
87@@ -190,91 +192,120 @@ typedef struct node_s {
88 } a;
89 } node;
90
91-/* Block of temporary variables */
92-typedef struct nvblock_s {
93- int size;
94- var *pos;
95- struct nvblock_s *prev;
96- struct nvblock_s *next;
97- var nv[];
98-} nvblock;
99-
100 typedef struct tsplitter_s {
101 node n;
102 regex_t re[2];
103 } tsplitter;
104
105 /* simple token classes */
106-/* Order and hex values are very important!!! See next_token() */
107-#define TC_SEQSTART (1 << 0) /* ( */
108-#define TC_SEQTERM (1 << 1) /* ) */
109-#define TC_REGEXP (1 << 2) /* /.../ */
110-#define TC_OUTRDR (1 << 3) /* | > >> */
111-#define TC_UOPPOST (1 << 4) /* unary postfix operator */
112-#define TC_UOPPRE1 (1 << 5) /* unary prefix operator */
113-#define TC_BINOPX (1 << 6) /* two-opnd operator */
114-#define TC_IN (1 << 7)
115-#define TC_COMMA (1 << 8)
116-#define TC_PIPE (1 << 9) /* input redirection pipe */
117-#define TC_UOPPRE2 (1 << 10) /* unary prefix operator */
118-#define TC_ARRTERM (1 << 11) /* ] */
119-#define TC_GRPSTART (1 << 12) /* { */
120-#define TC_GRPTERM (1 << 13) /* } */
121-#define TC_SEMICOL (1 << 14)
122-#define TC_NEWLINE (1 << 15)
123-#define TC_STATX (1 << 16) /* ctl statement (for, next...) */
124-#define TC_WHILE (1 << 17)
125-#define TC_ELSE (1 << 18)
126-#define TC_BUILTIN (1 << 19)
127+/* order and hex values are very important!!! See next_token() */
128+#define TC_LPAREN (1 << 0) /* ( */
129+#define TC_RPAREN (1 << 1) /* ) */
130+#define TC_REGEXP (1 << 2) /* /.../ */
131+#define TC_OUTRDR (1 << 3) /* | > >> */
132+#define TC_UOPPOST (1 << 4) /* unary postfix operator ++ -- */
133+#define TC_UOPPRE1 (1 << 5) /* unary prefix operator ++ -- $ */
134+#define TC_BINOPX (1 << 6) /* two-opnd operator */
135+#define TC_IN (1 << 7) /* 'in' */
136+#define TC_COMMA (1 << 8) /* , */
137+#define TC_PIPE (1 << 9) /* input redirection pipe | */
138+#define TC_UOPPRE2 (1 << 10) /* unary prefix operator + - ! */
139+#define TC_ARRTERM (1 << 11) /* ] */
140+#define TC_LBRACE (1 << 12) /* { */
141+#define TC_RBRACE (1 << 13) /* } */
142+#define TC_SEMICOL (1 << 14) /* ; */
143+#define TC_NEWLINE (1 << 15)
144+#define TC_STATX (1 << 16) /* ctl statement (for, next...) */
145+#define TC_WHILE (1 << 17) /* 'while' */
146+#define TC_ELSE (1 << 18) /* 'else' */
147+#define TC_BUILTIN (1 << 19)
148 /* This costs ~50 bytes of code.
149 * A separate class to support deprecated "length" form. If we don't need that
150 * (i.e. if we demand that only "length()" with () is valid), then TC_LENGTH
151 * can be merged with TC_BUILTIN:
152 */
153-#define TC_LENGTH (1 << 20)
154-#define TC_GETLINE (1 << 21)
155-#define TC_FUNCDECL (1 << 22) /* 'function' 'func' */
156-#define TC_BEGIN (1 << 23)
157-#define TC_END (1 << 24)
158-#define TC_EOF (1 << 25)
159-#define TC_VARIABLE (1 << 26)
160-#define TC_ARRAY (1 << 27)
161-#define TC_FUNCTION (1 << 28)
162-#define TC_STRING (1 << 29)
163-#define TC_NUMBER (1 << 30)
164-
165-#define TC_UOPPRE (TC_UOPPRE1 | TC_UOPPRE2)
166-
167-/* combined token classes */
168-#define TC_BINOP (TC_BINOPX | TC_COMMA | TC_PIPE | TC_IN)
169-//#define TC_UNARYOP (TC_UOPPRE | TC_UOPPOST)
170-#define TC_OPERAND (TC_VARIABLE | TC_ARRAY | TC_FUNCTION \
171- | TC_BUILTIN | TC_LENGTH | TC_GETLINE \
172- | TC_SEQSTART | TC_STRING | TC_NUMBER)
173-
174-#define TC_STATEMNT (TC_STATX | TC_WHILE)
175-#define TC_OPTERM (TC_SEMICOL | TC_NEWLINE)
176+#define TC_LENGTH (1 << 20) /* 'length' */
177+#define TC_GETLINE (1 << 21) /* 'getline' */
178+#define TC_FUNCDECL (1 << 22) /* 'function' 'func' */
179+#define TC_BEGIN (1 << 23) /* 'BEGIN' */
180+#define TC_END (1 << 24) /* 'END' */
181+#define TC_EOF (1 << 25)
182+#define TC_VARIABLE (1 << 26) /* name */
183+#define TC_ARRAY (1 << 27) /* name[ */
184+#define TC_FUNCTION (1 << 28) /* name( */
185+#define TC_STRING (1 << 29) /* "..." */
186+#define TC_NUMBER (1 << 30)
187+
188+#ifndef debug_parse_print_tc
189+static void debug_parse_print_tc(uint32_t n)
190+{
191+ if (n & TC_LPAREN ) debug_printf_parse(" LPAREN" );
192+ if (n & TC_RPAREN ) debug_printf_parse(" RPAREN" );
193+ if (n & TC_REGEXP ) debug_printf_parse(" REGEXP" );
194+ if (n & TC_OUTRDR ) debug_printf_parse(" OUTRDR" );
195+ if (n & TC_UOPPOST ) debug_printf_parse(" UOPPOST" );
196+ if (n & TC_UOPPRE1 ) debug_printf_parse(" UOPPRE1" );
197+ if (n & TC_BINOPX ) debug_printf_parse(" BINOPX" );
198+ if (n & TC_IN ) debug_printf_parse(" IN" );
199+ if (n & TC_COMMA ) debug_printf_parse(" COMMA" );
200+ if (n & TC_PIPE ) debug_printf_parse(" PIPE" );
201+ if (n & TC_UOPPRE2 ) debug_printf_parse(" UOPPRE2" );
202+ if (n & TC_ARRTERM ) debug_printf_parse(" ARRTERM" );
203+ if (n & TC_LBRACE ) debug_printf_parse(" LBRACE" );
204+ if (n & TC_RBRACE ) debug_printf_parse(" RBRACE" );
205+ if (n & TC_SEMICOL ) debug_printf_parse(" SEMICOL" );
206+ if (n & TC_NEWLINE ) debug_printf_parse(" NEWLINE" );
207+ if (n & TC_STATX ) debug_printf_parse(" STATX" );
208+ if (n & TC_WHILE ) debug_printf_parse(" WHILE" );
209+ if (n & TC_ELSE ) debug_printf_parse(" ELSE" );
210+ if (n & TC_BUILTIN ) debug_printf_parse(" BUILTIN" );
211+ if (n & TC_LENGTH ) debug_printf_parse(" LENGTH" );
212+ if (n & TC_GETLINE ) debug_printf_parse(" GETLINE" );
213+ if (n & TC_FUNCDECL) debug_printf_parse(" FUNCDECL");
214+ if (n & TC_BEGIN ) debug_printf_parse(" BEGIN" );
215+ if (n & TC_END ) debug_printf_parse(" END" );
216+ if (n & TC_EOF ) debug_printf_parse(" EOF" );
217+ if (n & TC_VARIABLE) debug_printf_parse(" VARIABLE");
218+ if (n & TC_ARRAY ) debug_printf_parse(" ARRAY" );
219+ if (n & TC_FUNCTION) debug_printf_parse(" FUNCTION");
220+ if (n & TC_STRING ) debug_printf_parse(" STRING" );
221+ if (n & TC_NUMBER ) debug_printf_parse(" NUMBER" );
222+}
223+#endif
224+
225+/* combined token classes ("token [class] sets") */
226+#define TS_UOPPRE (TC_UOPPRE1 | TC_UOPPRE2)
227+
228+#define TS_BINOP (TC_BINOPX | TC_COMMA | TC_PIPE | TC_IN)
229+//#define TS_UNARYOP (TS_UOPPRE | TC_UOPPOST)
230+#define TS_OPERAND (TC_VARIABLE | TC_ARRAY | TC_FUNCTION \
231+ | TC_BUILTIN | TC_LENGTH | TC_GETLINE \
232+ | TC_LPAREN | TC_STRING | TC_NUMBER)
233+
234+#define TS_LVALUE (TC_VARIABLE | TC_ARRAY)
235+#define TS_STATEMNT (TC_STATX | TC_WHILE)
236
237 /* word tokens, cannot mean something else if not expected */
238-#define TC_WORD (TC_IN | TC_STATEMNT | TC_ELSE \
239- | TC_BUILTIN | TC_LENGTH | TC_GETLINE \
240- | TC_FUNCDECL | TC_BEGIN | TC_END)
241+#define TS_WORD (TC_IN | TS_STATEMNT | TC_ELSE \
242+ | TC_BUILTIN | TC_LENGTH | TC_GETLINE \
243+ | TC_FUNCDECL | TC_BEGIN | TC_END)
244
245 /* discard newlines after these */
246-#define TC_NOTERM (TC_COMMA | TC_GRPSTART | TC_GRPTERM \
247- | TC_BINOP | TC_OPTERM)
248+#define TS_NOTERM (TS_BINOP | TC_COMMA | TC_LBRACE | TC_RBRACE \
249+ | TC_SEMICOL | TC_NEWLINE)
250
251 /* what can expression begin with */
252-#define TC_OPSEQ (TC_OPERAND | TC_UOPPRE | TC_REGEXP)
253+#define TS_OPSEQ (TS_OPERAND | TS_UOPPRE | TC_REGEXP)
254 /* what can group begin with */
255-#define TC_GRPSEQ (TC_OPSEQ | TC_OPTERM | TC_STATEMNT | TC_GRPSTART)
256+#define TS_GRPSEQ (TS_OPSEQ | TS_STATEMNT \
257+ | TC_SEMICOL | TC_NEWLINE | TC_LBRACE)
258
259-/* if previous token class is CONCAT1 and next is CONCAT2, concatenation */
260+/* if previous token class is CONCAT_L and next is CONCAT_R, concatenation */
261 /* operator is inserted between them */
262-#define TC_CONCAT1 (TC_VARIABLE | TC_ARRTERM | TC_SEQTERM \
263+#define TS_CONCAT_L (TC_VARIABLE | TC_ARRTERM | TC_RPAREN \
264 | TC_STRING | TC_NUMBER | TC_UOPPOST \
265 | TC_LENGTH)
266-#define TC_CONCAT2 (TC_OPERAND | TC_UOPPRE)
267+#define TS_CONCAT_R (TS_OPERAND | TS_UOPPRE)
268
269 #define OF_RES1 0x010000
270 #define OF_RES2 0x020000
271@@ -284,13 +315,12 @@ typedef struct tsplitter_s {
272 #define OF_CHECKED 0x200000
273 #define OF_REQUIRED 0x400000
274
275-
276 /* combined operator flags */
277 #define xx 0
278 #define xV OF_RES2
279 #define xS (OF_RES2 | OF_STR2)
280 #define Vx OF_RES1
281-#define Rx (OF_RES1 | OF_NUM1 | OF_REQUIRED)
282+#define Rx OF_REQUIRED
283 #define VV (OF_RES1 | OF_RES2)
284 #define Nx (OF_RES1 | OF_NUM1)
285 #define NV (OF_RES1 | OF_NUM1 | OF_RES2)
286@@ -302,8 +332,7 @@ typedef struct tsplitter_s {
287 #define OPNMASK 0x007F
288
289 /* operator priority is a highest byte (even: r->l, odd: l->r grouping)
290- * For builtins it has different meaning: n n s3 s2 s1 v3 v2 v1,
291- * n - min. number of args, vN - resolve Nth arg to var, sN - resolve to string
292+ * (for builtins it has different meaning)
293 */
294 #undef P
295 #undef PRIMASK
296@@ -313,10 +342,8 @@ typedef struct tsplitter_s {
297 #define PRIMASK2 0x7E000000
298
299 /* Operation classes */
300-
301 #define SHIFT_TIL_THIS 0x0600
302 #define RECUR_FROM_THIS 0x1000
303-
304 enum {
305 OC_DELETE = 0x0100, OC_EXEC = 0x0200, OC_NEWSOURCE = 0x0300,
306 OC_PRINT = 0x0400, OC_PRINTF = 0x0500, OC_WALKINIT = 0x0600,
307@@ -358,8 +385,8 @@ enum {
308 #define NTCC '\377'
309
310 static const char tokenlist[] ALIGN1 =
311- "\1(" NTC /* TC_SEQSTART */
312- "\1)" NTC /* TC_SEQTERM */
313+ "\1(" NTC /* TC_LPAREN */
314+ "\1)" NTC /* TC_RPAREN */
315 "\1/" NTC /* TC_REGEXP */
316 "\2>>" "\1>" "\1|" NTC /* TC_OUTRDR */
317 "\2++" "\2--" NTC /* TC_UOPPOST */
318@@ -376,8 +403,8 @@ static const char tokenlist[] ALIGN1 =
319 "\1|" NTC /* TC_PIPE */
320 "\1+" "\1-" "\1!" NTC /* TC_UOPPRE2 */
321 "\1]" NTC /* TC_ARRTERM */
322- "\1{" NTC /* TC_GRPSTART */
323- "\1}" NTC /* TC_GRPTERM */
324+ "\1{" NTC /* TC_LBRACE */
325+ "\1}" NTC /* TC_RBRACE */
326 "\1;" NTC /* TC_SEMICOL */
327 "\1\n" NTC /* TC_NEWLINE */
328 "\2if" "\2do" "\3for" "\5break" /* TC_STATX */
329@@ -391,7 +418,7 @@ static const char tokenlist[] ALIGN1 =
330 "\5close" "\6system" "\6fflush" "\5atan2"
331 "\3cos" "\3exp" "\3int" "\3log"
332 "\4rand" "\3sin" "\4sqrt" "\5srand"
333- "\6gensub" "\4gsub" "\5index" /* "\6length" was here */
334+ "\6gensub" "\4gsub" "\5index" /* "\6length" was here */
335 "\5match" "\5split" "\7sprintf" "\3sub"
336 "\6substr" "\7systime" "\10strftime" "\6mktime"
337 "\7tolower" "\7toupper" NTC
338@@ -403,25 +430,32 @@ static const char tokenlist[] ALIGN1 =
339 /* compiler adds trailing "\0" */
340 ;
341
342-#define OC_B OC_BUILTIN
343-
344 static const uint32_t tokeninfo[] ALIGN4 = {
345 0,
346 0,
347- OC_REGEXP,
348+#define TI_REGEXP OC_REGEXP
349+ TI_REGEXP,
350 xS|'a', xS|'w', xS|'|',
351 OC_UNARY|xV|P(9)|'p', OC_UNARY|xV|P(9)|'m',
352- OC_UNARY|xV|P(9)|'P', OC_UNARY|xV|P(9)|'M', OC_FIELD|xV|P(5),
353+#define TI_PREINC (OC_UNARY|xV|P(9)|'P')
354+#define TI_PREDEC (OC_UNARY|xV|P(9)|'M')
355+ TI_PREINC, TI_PREDEC, OC_FIELD|xV|P(5),
356 OC_COMPARE|VV|P(39)|5, OC_MOVE|VV|P(74), OC_REPLACE|NV|P(74)|'+', OC_REPLACE|NV|P(74)|'-',
357 OC_REPLACE|NV|P(74)|'*', OC_REPLACE|NV|P(74)|'/', OC_REPLACE|NV|P(74)|'%', OC_REPLACE|NV|P(74)|'&',
358 OC_BINARY|NV|P(29)|'+', OC_BINARY|NV|P(29)|'-', OC_REPLACE|NV|P(74)|'&', OC_BINARY|NV|P(15)|'&',
359 OC_BINARY|NV|P(25)|'/', OC_BINARY|NV|P(25)|'%', OC_BINARY|NV|P(15)|'&', OC_BINARY|NV|P(25)|'*',
360 OC_COMPARE|VV|P(39)|4, OC_COMPARE|VV|P(39)|3, OC_COMPARE|VV|P(39)|0, OC_COMPARE|VV|P(39)|1,
361- OC_COMPARE|VV|P(39)|2, OC_MATCH|Sx|P(45)|'!', OC_MATCH|Sx|P(45)|'~', OC_LAND|Vx|P(55),
362- OC_LOR|Vx|P(59), OC_TERNARY|Vx|P(64)|'?', OC_COLON|xx|P(67)|':',
363- OC_IN|SV|P(49), /* TC_IN */
364- OC_COMMA|SS|P(80),
365- OC_PGETLINE|SV|P(37),
366+#define TI_LESS (OC_COMPARE|VV|P(39)|2)
367+ TI_LESS, OC_MATCH|Sx|P(45)|'!', OC_MATCH|Sx|P(45)|'~', OC_LAND|Vx|P(55),
368+#define TI_TERNARY (OC_TERNARY|Vx|P(64)|'?')
369+#define TI_COLON (OC_COLON|xx|P(67)|':')
370+ OC_LOR|Vx|P(59), TI_TERNARY, TI_COLON,
371+#define TI_IN (OC_IN|SV|P(49))
372+ TI_IN,
373+#define TI_COMMA (OC_COMMA|SS|P(80))
374+ TI_COMMA,
375+#define TI_PGETLINE (OC_PGETLINE|SV|P(37))
376+ TI_PGETLINE,
377 OC_UNARY|xV|P(19)|'+', OC_UNARY|xV|P(19)|'-', OC_UNARY|xV|P(19)|'!',
378 0, /* ] */
379 0,
380@@ -434,20 +468,45 @@ static const uint32_t tokeninfo[] ALIGN4 = {
381 OC_RETURN|Vx, OC_EXIT|Nx,
382 ST_WHILE,
383 0, /* else */
384- OC_B|B_an|P(0x83), OC_B|B_co|P(0x41), OC_B|B_ls|P(0x83), OC_B|B_or|P(0x83),
385- OC_B|B_rs|P(0x83), OC_B|B_xo|P(0x83),
386- OC_FBLTIN|Sx|F_cl, OC_FBLTIN|Sx|F_sy, OC_FBLTIN|Sx|F_ff, OC_B|B_a2|P(0x83),
387- OC_FBLTIN|Nx|F_co, OC_FBLTIN|Nx|F_ex, OC_FBLTIN|Nx|F_in, OC_FBLTIN|Nx|F_lg,
388- OC_FBLTIN|F_rn, OC_FBLTIN|Nx|F_si, OC_FBLTIN|Nx|F_sq, OC_FBLTIN|Nx|F_sr,
389- OC_B|B_ge|P(0xd6), OC_B|B_gs|P(0xb6), OC_B|B_ix|P(0x9b), /* OC_FBLTIN|Sx|F_le, was here */
390- OC_B|B_ma|P(0x89), OC_B|B_sp|P(0x8b), OC_SPRINTF, OC_B|B_su|P(0xb6),
391- OC_B|B_ss|P(0x8f), OC_FBLTIN|F_ti, OC_B|B_ti|P(0x0b), OC_B|B_mt|P(0x0b),
392- OC_B|B_lo|P(0x49), OC_B|B_up|P(0x49),
393- OC_FBLTIN|Sx|F_le, /* TC_LENGTH */
394- OC_GETLINE|SV|P(0),
395- 0, 0,
396- 0,
397- 0 /* TC_END */
398+// OC_B's are builtins with enforced minimum number of arguments (two upper bits).
399+// Highest byte bit pattern: nn s3s2s1 v3v2v1
400+// nn - min. number of args, sN - resolve Nth arg to string, vN - resolve to var
401+// OC_F's are builtins with zero or one argument.
402+// |Rx| enforces that arg is present for: system, close, cos, sin, exp, int, log, sqrt
403+// Check for no args is present in builtins' code (not in this table): rand, systime
404+// Have one _optional_ arg: fflush, srand, length
405+#define OC_B OC_BUILTIN
406+#define OC_F OC_FBLTIN
407+#define A1 P(0x40) /*one arg*/
408+#define A2 P(0x80) /*two args*/
409+#define A3 P(0xc0) /*three args*/
410+#define __v P(1)
411+#define _vv P(3)
412+#define __s__v P(9)
413+#define __s_vv P(0x0b)
414+#define __svvv P(0x0f)
415+#define _ss_vv P(0x1b)
416+#define _s_vv_ P(0x16)
417+#define ss_vv_ P(0x36)
418+ OC_B|B_an|_vv|A2, OC_B|B_co|__v|A1, OC_B|B_ls|_vv|A2, OC_B|B_or|_vv|A2, // and compl lshift or
419+ OC_B|B_rs|_vv|A2, OC_B|B_xo|_vv|A2, // rshift xor
420+ OC_F|F_cl|Sx|Rx, OC_F|F_sy|Sx|Rx, OC_F|F_ff|Sx, OC_B|B_a2|_vv|A2, // close system fflush atan2
421+ OC_F|F_co|Nx|Rx, OC_F|F_ex|Nx|Rx, OC_F|F_in|Nx|Rx, OC_F|F_lg|Nx|Rx, // cos exp int log
422+ OC_F|F_rn, OC_F|F_si|Nx|Rx, OC_F|F_sq|Nx|Rx, OC_F|F_sr|Nx, // rand sin sqrt srand
423+ OC_B|B_ge|_s_vv_|A3,OC_B|B_gs|ss_vv_|A2,OC_B|B_ix|_ss_vv|A2, // gensub gsub index /*length was here*/
424+ OC_B|B_ma|__s__v|A2,OC_B|B_sp|__s_vv|A2,OC_SPRINTF, OC_B|B_su|ss_vv_|A2,// match split sprintf sub
425+ OC_B|B_ss|__svvv|A2,OC_F|F_ti, OC_B|B_ti|__s_vv, OC_B|B_mt|__s_vv, // substr systime strftime mktime
426+ OC_B|B_lo|__s__v|A1,OC_B|B_up|__s__v|A1, // tolower toupper
427+ OC_F|F_le|Sx, // length
428+ OC_GETLINE|SV, // getline
429+ 0, 0, // func function
430+ 0, // BEGIN
431+ 0 // END
432+#undef A1
433+#undef A2
434+#undef A3
435+#undef OC_B
436+#undef OC_F
437 };
438
439 /* internal variable names and their initial values */
440@@ -488,21 +547,29 @@ struct globals {
441 chain *seq;
442 node *break_ptr, *continue_ptr;
443 rstream *iF;
444- xhash *vhash, *ahash, *fdhash, *fnhash;
445+ xhash *ahash; /* argument names, used only while parsing function bodies */
446+ xhash *fnhash; /* function names, used only in parsing stage */
447+ xhash *vhash; /* variables and arrays */
448+ //xhash *fdhash; /* file objects, used only in execution stage */
449+ //we are reusing ahash as fdhash, via define (see later)
450 const char *g_progname;
451 int g_lineno;
452 int nfields;
453 int maxfields; /* used in fsrealloc() only */
454 var *Fields;
455- nvblock *g_cb;
456 char *g_pos;
457- char *g_buf;
458+ char g_saved_ch;
459 smallint icase;
460 smallint exiting;
461 smallint nextrec;
462 smallint nextfile;
463 smallint is_f0_split;
464 smallint t_rollback;
465+
466+ /* former statics from various functions */
467+ smallint next_token__concat_inserted;
468+ uint32_t next_token__save_tclass;
469+ uint32_t next_token__save_info;
470 };
471 struct globals2 {
472 uint32_t t_info; /* often used */
473@@ -515,32 +582,35 @@ struct globals2 {
474 /* former statics from various functions */
475 char *split_f0__fstrings;
476
477- uint32_t next_token__save_tclass;
478- uint32_t next_token__save_info;
479- uint32_t next_token__ltclass;
480- smallint next_token__concat_inserted;
481-
482- smallint next_input_file__files_happen;
483 rstream next_input_file__rsm;
484+ smallint next_input_file__files_happen;
485+
486+ smalluint exitcode;
487
488- var *evaluate__fnargs;
489 unsigned evaluate__seed;
490+ var *evaluate__fnargs;
491 regex_t evaluate__sreg;
492
493- var ptest__v;
494+ var ptest__tmpvar;
495+ var awk_printf__tmpvar;
496+ var as_regex__tmpvar;
497+ var exit__tmpvar;
498+ var main__tmpvar;
499
500 tsplitter exec_builtin__tspl;
501
502 /* biggest and least used members go last */
503 tsplitter fsplitter, rsplitter;
504+
505+ char g_buf[MAXVARFMT + 1];
506 };
507 #define G1 (ptr_to_globals[-1])
508 #define G (*(struct globals2 *)ptr_to_globals)
509 /* For debug. nm --size-sort awk.o | grep -vi ' [tr] ' */
510-/*char G1size[sizeof(G1)]; - 0x74 */
511-/*char Gsize[sizeof(G)]; - 0x1c4 */
512+//char G1size[sizeof(G1)]; // 0x70
513+//char Gsize[sizeof(G)]; // 0x2f8
514 /* Trying to keep most of members accessible with short offsets: */
515-/*char Gofs_seed[offsetof(struct globals2, evaluate__seed)]; - 0x90 */
516+//char Gofs_seed[offsetof(struct globals2, evaluate__seed)]; // 0x7c
517 #define t_double (G1.t_double )
518 #define beginseq (G1.beginseq )
519 #define mainseq (G1.mainseq )
520@@ -549,18 +619,20 @@ struct globals2 {
521 #define break_ptr (G1.break_ptr )
522 #define continue_ptr (G1.continue_ptr)
523 #define iF (G1.iF )
524-#define vhash (G1.vhash )
525 #define ahash (G1.ahash )
526-#define fdhash (G1.fdhash )
527 #define fnhash (G1.fnhash )
528+#define vhash (G1.vhash )
529+#define fdhash ahash
530+//^^^^^^^^^^^^^^^^^^ ahash is cleared after every function parsing,
531+// and ends up empty after parsing phase. Thus, we can simply reuse it
532+// for fdhash in execution stage.
533 #define g_progname (G1.g_progname )
534 #define g_lineno (G1.g_lineno )
535 #define nfields (G1.nfields )
536 #define maxfields (G1.maxfields )
537 #define Fields (G1.Fields )
538-#define g_cb (G1.g_cb )
539 #define g_pos (G1.g_pos )
540-#define g_buf (G1.g_buf )
541+#define g_saved_ch (G1.g_saved_ch )
542 #define icase (G1.icase )
543 #define exiting (G1.exiting )
544 #define nextrec (G1.nextrec )
545@@ -574,25 +646,13 @@ struct globals2 {
546 #define intvar (G.intvar )
547 #define fsplitter (G.fsplitter )
548 #define rsplitter (G.rsplitter )
549+#define g_buf (G.g_buf )
550 #define INIT_G() do { \
551 SET_PTR_TO_GLOBALS((char*)xzalloc(sizeof(G1)+sizeof(G)) + sizeof(G1)); \
552- G.next_token__ltclass = TC_OPTERM; \
553+ t_tclass = TC_NEWLINE; \
554 G.evaluate__seed = 1; \
555 } while (0)
556
557-
558-/* function prototypes */
559-static void handle_special(var *);
560-static node *parse_expr(uint32_t);
561-static void chain_group(void);
562-static var *evaluate(node *, var *);
563-static rstream *next_input_file(void);
564-static int fmt_num(char *, int, const char *, double, int);
565-static int awk_exit(int) NORETURN;
566-
567-/* ---- error handling ---- */
568-
569-static const char EMSG_INTERNAL_ERROR[] ALIGN1 = "Internal error";
570 static const char EMSG_UNEXP_EOS[] ALIGN1 = "Unexpected end of string";
571 static const char EMSG_UNEXP_TOKEN[] ALIGN1 = "Unexpected token";
572 static const char EMSG_DIV_BY_ZERO[] ALIGN1 = "Division by zero";
573@@ -604,10 +664,7 @@ static const char EMSG_UNDEF_FUNC[] ALIGN1 = "Call to undefined function";
574 static const char EMSG_NO_MATH[] ALIGN1 = "Math support is not compiled in";
575 static const char EMSG_NEGATIVE_FIELD[] ALIGN1 = "Access to negative field";
576
577-static void zero_out_var(var *vp)
578-{
579- memset(vp, 0, sizeof(*vp));
580-}
581+static int awk_exit(void) NORETURN;
582
583 static void syntax_error(const char *message) NORETURN;
584 static void syntax_error(const char *message)
585@@ -638,12 +695,40 @@ static xhash *hash_init(void)
586 return newhash;
587 }
588
589+static void hash_clear(xhash *hash)
590+{
591+ unsigned i;
592+ hash_item *hi, *thi;
593+
594+ for (i = 0; i < hash->csize; i++) {
595+ hi = hash->items[i];
596+ while (hi) {
597+ thi = hi;
598+ hi = hi->next;
599+//FIXME: this assumes that it's a hash of *variables*:
600+ free(thi->data.v.string);
601+ free(thi);
602+ }
603+ hash->items[i] = NULL;
604+ }
605+ hash->glen = hash->nel = 0;
606+}
607+
608+#if 0 //UNUSED
609+static void hash_free(xhash *hash)
610+{
611+ hash_clear(hash);
612+ free(hash->items);
613+ free(hash);
614+}
615+#endif
616+
617 /* find item in hash, return ptr to data, NULL if not found */
618-static void *hash_search(xhash *hash, const char *name)
619+static NOINLINE void *hash_search3(xhash *hash, const char *name, unsigned idx)
620 {
621 hash_item *hi;
622
623- hi = hash->items[hashidx(name) % hash->csize];
624+ hi = hash->items[idx % hash->csize];
625 while (hi) {
626 if (strcmp(hi->name, name) == 0)
627 return &hi->data;
628@@ -652,6 +737,11 @@ static void *hash_search(xhash *hash, const char *name)
629 return NULL;
630 }
631
632+static void *hash_search(xhash *hash, const char *name)
633+{
634+ return hash_search3(hash, name, hashidx(name));
635+}
636+
637 /* grow hash if it becomes too big */
638 static void hash_rebuild(xhash *hash)
639 {
640@@ -687,16 +777,17 @@ static void *hash_find(xhash *hash, const char *name)
641 unsigned idx;
642 int l;
643
644- hi = hash_search(hash, name);
645+ idx = hashidx(name);
646+ hi = hash_search3(hash, name, idx);
647 if (!hi) {
648- if (++hash->nel / hash->csize > 10)
649+ if (++hash->nel > hash->csize * 8)
650 hash_rebuild(hash);
651
652 l = strlen(name) + 1;
653 hi = xzalloc(sizeof(*hi) + l);
654 strcpy(hi->name, name);
655
656- idx = hashidx(name) % hash->csize;
657+ idx = idx % hash->csize;
658 hi->next = hash->items[idx];
659 hash->items[idx] = hi;
660 hash->glen += l;
661@@ -731,7 +822,7 @@ static void hash_remove(xhash *hash, const char *name)
662
663 static char *skip_spaces(char *p)
664 {
665- while (1) {
666+ for (;;) {
667 if (*p == '\\' && p[1] == '\n') {
668 p++;
669 t_lineno++;
670@@ -747,8 +838,10 @@ static char *skip_spaces(char *p)
671 static char *nextword(char **s)
672 {
673 char *p = *s;
674- while (*(*s)++ != '\0')
675+ char *q = p;
676+ while (*q++ != '\0')
677 continue;
678+ *s = q;
679 return p;
680 }
681
682@@ -811,10 +904,27 @@ static double my_strtod(char **pp)
683
684 /* -------- working with variables (set/get/copy/etc) -------- */
685
686-static xhash *iamarray(var *v)
687+static void fmt_num(const char *format, double n)
688 {
689- var *a = v;
690+ if (n == (long long)n) {
691+ snprintf(g_buf, MAXVARFMT, "%lld", (long long)n);
692+ } else {
693+ const char *s = format;
694+ char c;
695+
696+ do { c = *s; } while (c && *++s);
697+ if (strchr("diouxX", c)) {
698+ snprintf(g_buf, MAXVARFMT, format, (int)n);
699+ } else if (strchr("eEfFgGaA", c)) {
700+ snprintf(g_buf, MAXVARFMT, format, n);
701+ } else {
702+ syntax_error(EMSG_INV_FMT);
703+ }
704+ }
705+}
706
707+static xhash *iamarray(var *a)
708+{
709 while (a->type & VF_CHILD)
710 a = a->x.parent;
711
712@@ -825,23 +935,7 @@ static xhash *iamarray(var *v)
713 return a->x.array;
714 }
715
716-static void clear_array(xhash *array)
717-{
718- unsigned i;
719- hash_item *hi, *thi;
720-
721- for (i = 0; i < array->csize; i++) {
722- hi = array->items[i];
723- while (hi) {
724- thi = hi;
725- hi = hi->next;
726- free(thi->data.v.string);
727- free(thi);
728- }
729- array->items[i] = NULL;
730- }
731- array->glen = array->nel = 0;
732-}
733+#define clear_array(array) hash_clear(array)
734
735 /* clear a variable */
736 static var *clrvar(var *v)
737@@ -855,6 +949,8 @@ static var *clrvar(var *v)
738 return v;
739 }
740
741+static void handle_special(var *);
742+
743 /* assign string value to variable */
744 static var *setvar_p(var *v, char *value)
745 {
746@@ -901,7 +997,7 @@ static const char *getvar_s(var *v)
747 {
748 /* if v is numeric and has no cached string, convert it to string */
749 if ((v->type & (VF_NUMBER | VF_CACHED)) == VF_NUMBER) {
750- fmt_num(g_buf, MAXVARFMT, getvar_s(intvar[CONVFMT]), v->number, TRUE);
751+ fmt_num(getvar_s(intvar[CONVFMT]), v->number);
752 v->string = xstrdup(g_buf);
753 v->type |= VF_CACHED;
754 }
755@@ -920,6 +1016,7 @@ static double getvar_i(var *v)
756 v->number = my_strtod(&s);
757 debug_printf_eval("%f (s:'%s')\n", v->number, s);
758 if (v->type & VF_USER) {
759+//TODO: skip_spaces() also skips backslash+newline, is it intended here?
760 s = skip_spaces(s);
761 if (*s != '\0')
762 v->type &= ~VF_USER;
763@@ -981,94 +1078,28 @@ static int istrue(var *v)
764 return (v->string && v->string[0]);
765 }
766
767-/* temporary variables allocator. Last allocated should be first freed */
768-static var *nvalloc(int n)
769-{
770- nvblock *pb = NULL;
771- var *v, *r;
772- int size;
773-
774- while (g_cb) {
775- pb = g_cb;
776- if ((g_cb->pos - g_cb->nv) + n <= g_cb->size)
777- break;
778- g_cb = g_cb->next;
779- }
780-
781- if (!g_cb) {
782- size = (n <= MINNVBLOCK) ? MINNVBLOCK : n;
783- g_cb = xzalloc(sizeof(nvblock) + size * sizeof(var));
784- g_cb->size = size;
785- g_cb->pos = g_cb->nv;
786- g_cb->prev = pb;
787- /*g_cb->next = NULL; - xzalloc did it */
788- if (pb)
789- pb->next = g_cb;
790- }
791-
792- v = r = g_cb->pos;
793- g_cb->pos += n;
794-
795- while (v < g_cb->pos) {
796- v->type = 0;
797- v->string = NULL;
798- v++;
799- }
800-
801- return r;
802-}
803-
804-static void nvfree(var *v)
805-{
806- var *p;
807-
808- if (v < g_cb->nv || v >= g_cb->pos)
809- syntax_error(EMSG_INTERNAL_ERROR);
810-
811- for (p = v; p < g_cb->pos; p++) {
812- if ((p->type & (VF_ARRAY | VF_CHILD)) == VF_ARRAY) {
813- clear_array(iamarray(p));
814- free(p->x.array->items);
815- free(p->x.array);
816- }
817- if (p->type & VF_WALK) {
818- walker_list *n;
819- walker_list *w = p->x.walker;
820- debug_printf_walker("nvfree: freeing walker @%p\n", &p->x.walker);
821- p->x.walker = NULL;
822- while (w) {
823- n = w->prev;
824- debug_printf_walker(" free(%p)\n", w);
825- free(w);
826- w = n;
827- }
828- }
829- clrvar(p);
830- }
831-
832- g_cb->pos = v;
833- while (g_cb->prev && g_cb->pos == g_cb->nv) {
834- g_cb = g_cb->prev;
835- }
836-}
837-
838 /* ------- awk program text parsing ------- */
839
840-/* Parse next token pointed by global pos, place results into global ttt.
841- * If token isn't expected, give away. Return token class
842+/* Parse next token pointed by global pos, place results into global t_XYZ variables.
843+ * If token isn't expected, print error message and die.
844+ * Return token class (also store it in t_tclass).
845 */
846 static uint32_t next_token(uint32_t expected)
847 {
848-#define concat_inserted (G.next_token__concat_inserted)
849-#define save_tclass (G.next_token__save_tclass)
850-#define save_info (G.next_token__save_info)
851-/* Initialized to TC_OPTERM: */
852-#define ltclass (G.next_token__ltclass)
853+#define concat_inserted (G1.next_token__concat_inserted)
854+#define save_tclass (G1.next_token__save_tclass)
855+#define save_info (G1.next_token__save_info)
856
857- char *p, *s;
858+ char *p;
859 const char *tl;
860- uint32_t tc;
861 const uint32_t *ti;
862+ uint32_t tc, last_token_class;
863+
864+ last_token_class = t_tclass; /* t_tclass is initialized to TC_NEWLINE */
865+
866+ debug_printf_parse("%s() expected(%x):", __func__, expected);
867+ debug_parse_print_tc(expected);
868+ debug_printf_parse("\n");
869
870 if (t_rollback) {
871 debug_printf_parse("%s: using rolled-back token\n", __func__);
872@@ -1080,6 +1111,10 @@ static uint32_t next_token(uint32_t expected)
873 t_info = save_info;
874 } else {
875 p = g_pos;
876+ if (g_saved_ch != '\0') {
877+ *p = g_saved_ch;
878+ g_saved_ch = '\0';
879+ }
880 readnext:
881 p = skip_spaces(p);
882 g_lineno = t_lineno;
883@@ -1087,15 +1122,12 @@ static uint32_t next_token(uint32_t expected)
884 while (*p != '\n' && *p != '\0')
885 p++;
886
887- if (*p == '\n')
888- t_lineno++;
889-
890 if (*p == '\0') {
891 tc = TC_EOF;
892 debug_printf_parse("%s: token found: TC_EOF\n", __func__);
893 } else if (*p == '\"') {
894 /* it's a string */
895- t_string = s = ++p;
896+ char *s = t_string = ++p;
897 while (*p != '\"') {
898 char *pp;
899 if (*p == '\0' || *p == '\n')
900@@ -1110,7 +1142,7 @@ static uint32_t next_token(uint32_t expected)
901 debug_printf_parse("%s: token found:'%s' TC_STRING\n", __func__, t_string);
902 } else if ((expected & TC_REGEXP) && *p == '/') {
903 /* it's regexp */
904- t_string = s = ++p;
905+ char *s = t_string = ++p;
906 while (*p != '/') {
907 if (*p == '\0' || *p == '\n')
908 syntax_error(EMSG_UNEXP_EOS);
909@@ -1141,6 +1173,11 @@ static uint32_t next_token(uint32_t expected)
910 tc = TC_NUMBER;
911 debug_printf_parse("%s: token found:%f TC_NUMBER\n", __func__, t_double);
912 } else {
913+ char *end_of_name;
914+
915+ if (*p == '\n')
916+ t_lineno++;
917+
918 /* search for something known */
919 tl = tokenlist;
920 tc = 0x00000001;
921@@ -1155,9 +1192,9 @@ static uint32_t next_token(uint32_t expected)
922 * token matches,
923 * and it's not a longer word,
924 */
925- if ((tc & (expected | TC_WORD | TC_NEWLINE))
926+ if ((tc & (expected | TS_WORD | TC_NEWLINE))
927 && strncmp(p, tl, l) == 0
928- && !((tc & TC_WORD) && isalnum_(p[l]))
929+ && !((tc & TS_WORD) && isalnum_(p[l]))
930 ) {
931 /* then this is what we are looking for */
932 t_info = *ti;
933@@ -1174,67 +1211,94 @@ static uint32_t next_token(uint32_t expected)
934 if (!isalnum_(*p))
935 syntax_error(EMSG_UNEXP_TOKEN); /* no */
936 /* yes */
937- t_string = --p;
938- while (isalnum_(*++p)) {
939- p[-1] = *p;
940- }
941- p[-1] = '\0';
942- tc = TC_VARIABLE;
943- /* also consume whitespace between functionname and bracket */
944- if (!(expected & TC_VARIABLE) || (expected & TC_ARRAY))
945+ t_string = p;
946+ while (isalnum_(*p))
947+ p++;
948+ end_of_name = p;
949+
950+ if (last_token_class == TC_FUNCDECL)
951+ /* eat space in "function FUNC (...) {...}" declaration */
952 p = skip_spaces(p);
953+ else if (expected & TC_ARRAY) {
954+ /* eat space between array name and [ */
955+ char *s = skip_spaces(p);
956+ if (*s == '[') /* array ref, not just a name? */
957+ p = s;
958+ }
959+ /* else: do NOT consume whitespace after variable name!
960+ * gawk allows definition "function FUNC (p) {...}" - note space,
961+ * but disallows the call "FUNC (p)" because it isn't one -
962+ * expression "v (a)" should NOT be parsed as TC_FUNCTION:
963+ * it is a valid concatenation if "v" is a variable,
964+ * not a function name (and type of name is not known at parse time).
965+ */
966+
967 if (*p == '(') {
968+ p++;
969 tc = TC_FUNCTION;
970 debug_printf_parse("%s: token found:'%s' TC_FUNCTION\n", __func__, t_string);
971+ } else if (*p == '[') {
972+ p++;
973+ tc = TC_ARRAY;
974+ debug_printf_parse("%s: token found:'%s' TC_ARRAY\n", __func__, t_string);
975 } else {
976- if (*p == '[') {
977- p++;
978- tc = TC_ARRAY;
979- debug_printf_parse("%s: token found:'%s' TC_ARRAY\n", __func__, t_string);
980- } else
981- debug_printf_parse("%s: token found:'%s' TC_VARIABLE\n", __func__, t_string);
982+ tc = TC_VARIABLE;
983+ debug_printf_parse("%s: token found:'%s' TC_VARIABLE\n", __func__, t_string);
984+ if (end_of_name == p) {
985+ /* there is no space for trailing NUL in t_string!
986+ * We need to save the char we are going to NUL.
987+ * (we'll use it in future call to next_token())
988+ */
989+ g_saved_ch = *end_of_name;
990+// especially pathological example is V="abc"; V.2 - it's V concatenated to .2
991+// (it evaluates to "abc0.2"). Because of this case, we can't simply cache
992+// '.' and analyze it later: we also have to *store it back* in next
993+// next_token(), in order to give my_strtod() the undamaged ".2" string.
994+ }
995 }
996+ *end_of_name = '\0'; /* terminate t_string */
997 }
998 token_found:
999 g_pos = p;
1000
1001 /* skipping newlines in some cases */
1002- if ((ltclass & TC_NOTERM) && (tc & TC_NEWLINE))
1003+ if ((last_token_class & TS_NOTERM) && (tc & TC_NEWLINE))
1004 goto readnext;
1005
1006 /* insert concatenation operator when needed */
1007- debug_printf_parse("%s: %x %x %x concat_inserted?\n", __func__,
1008- (ltclass & TC_CONCAT1), (tc & TC_CONCAT2), (expected & TC_BINOP));
1009- if ((ltclass & TC_CONCAT1) && (tc & TC_CONCAT2) && (expected & TC_BINOP)
1010- && !(ltclass == TC_LENGTH && tc == TC_SEQSTART) /* but not for "length(..." */
1011+ debug_printf_parse("%s: concat_inserted if all nonzero: %x %x %x %x\n", __func__,
1012+ (last_token_class & TS_CONCAT_L), (tc & TS_CONCAT_R), (expected & TS_BINOP),
1013+ !(last_token_class == TC_LENGTH && tc == TC_LPAREN));
1014+ if ((last_token_class & TS_CONCAT_L) && (tc & TS_CONCAT_R) && (expected & TS_BINOP)
1015+ && !(last_token_class == TC_LENGTH && tc == TC_LPAREN) /* but not for "length(..." */
1016 ) {
1017 concat_inserted = TRUE;
1018 save_tclass = tc;
1019 save_info = t_info;
1020- tc = TC_BINOP;
1021+ tc = TC_BINOPX;
1022 t_info = OC_CONCAT | SS | P(35);
1023 }
1024
1025- debug_printf_parse("%s: t_tclass=tc=%x\n", __func__, t_tclass);
1026 t_tclass = tc;
1027+ debug_printf_parse("%s: t_tclass=tc=%x\n", __func__, tc);
1028 }
1029- ltclass = t_tclass;
1030-
1031 /* Are we ready for this? */
1032- if (!(ltclass & expected)) {
1033- syntax_error((ltclass & (TC_NEWLINE | TC_EOF)) ?
1034+ if (!(t_tclass & expected)) {
1035+ syntax_error((last_token_class & (TC_NEWLINE | TC_EOF)) ?
1036 EMSG_UNEXP_EOS : EMSG_UNEXP_TOKEN);
1037 }
1038
1039- debug_printf_parse("%s: returning, ltclass:%x t_double:%f\n", __func__, ltclass, t_double);
1040- return ltclass;
1041+ debug_printf_parse("%s: returning, t_double:%f t_tclass:", __func__, t_double);
1042+ debug_parse_print_tc(t_tclass);
1043+ debug_printf_parse("\n");
1044+
1045+ return t_tclass;
1046 #undef concat_inserted
1047 #undef save_tclass
1048 #undef save_info
1049-#undef ltclass
1050 }
1051
1052-static void rollback_token(void)
1053+static ALWAYS_INLINE void rollback_token(void)
1054 {
1055 t_rollback = TRUE;
1056 }
1057@@ -1251,169 +1315,188 @@ static node *new_node(uint32_t info)
1058
1059 static void mk_re_node(const char *s, node *n, regex_t *re)
1060 {
1061- n->info = OC_REGEXP;
1062+ n->info = TI_REGEXP;
1063 n->l.re = re;
1064 n->r.ire = re + 1;
1065 xregcomp(re, s, REG_EXTENDED);
1066 xregcomp(re + 1, s, REG_EXTENDED | REG_ICASE);
1067 }
1068
1069-static node *condition(void)
1070+static node *parse_expr(uint32_t);
1071+
1072+static node *parse_lrparen_list(void)
1073 {
1074- next_token(TC_SEQSTART);
1075- return parse_expr(TC_SEQTERM);
1076+ next_token(TC_LPAREN);
1077+ return parse_expr(TC_RPAREN);
1078 }
1079
1080 /* parse expression terminated by given argument, return ptr
1081 * to built subtree. Terminator is eaten by parse_expr */
1082-static node *parse_expr(uint32_t iexp)
1083+static node *parse_expr(uint32_t term_tc)
1084 {
1085 node sn;
1086 node *cn = &sn;
1087 node *vn, *glptr;
1088- uint32_t tc, xtc;
1089+ uint32_t tc, expected_tc;
1090 var *v;
1091
1092- debug_printf_parse("%s(%x)\n", __func__, iexp);
1093+ debug_printf_parse("%s() term_tc(%x):", __func__, term_tc);
1094+ debug_parse_print_tc(term_tc);
1095+ debug_printf_parse("\n");
1096
1097 sn.info = PRIMASK;
1098 sn.r.n = sn.a.n = glptr = NULL;
1099- xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP | iexp;
1100+ expected_tc = TS_OPERAND | TS_UOPPRE | TC_REGEXP | term_tc;
1101
1102- while (!((tc = next_token(xtc)) & iexp)) {
1103+ while (!((tc = next_token(expected_tc)) & term_tc)) {
1104
1105- if (glptr && (t_info == (OC_COMPARE | VV | P(39) | 2))) {
1106+ if (glptr && (t_info == TI_LESS)) {
1107 /* input redirection (<) attached to glptr node */
1108 debug_printf_parse("%s: input redir\n", __func__);
1109 cn = glptr->l.n = new_node(OC_CONCAT | SS | P(37));
1110 cn->a.n = glptr;
1111- xtc = TC_OPERAND | TC_UOPPRE;
1112+ expected_tc = TS_OPERAND | TS_UOPPRE;
1113 glptr = NULL;
1114-
1115- } else if (tc & (TC_BINOP | TC_UOPPOST)) {
1116- debug_printf_parse("%s: TC_BINOP | TC_UOPPOST tc:%x\n", __func__, tc);
1117+ continue;
1118+ }
1119+ if (tc & (TS_BINOP | TC_UOPPOST)) {
1120+ debug_printf_parse("%s: TS_BINOP | TC_UOPPOST tc:%x\n", __func__, tc);
1121 /* for binary and postfix-unary operators, jump back over
1122 * previous operators with higher priority */
1123 vn = cn;
1124 while (((t_info & PRIMASK) > (vn->a.n->info & PRIMASK2))
1125- || ((t_info == vn->info) && ((t_info & OPCLSMASK) == OC_COLON))
1126+ || ((t_info == vn->info) && t_info == TI_COLON)
1127 ) {
1128 vn = vn->a.n;
1129 if (!vn->a.n) syntax_error(EMSG_UNEXP_TOKEN);
1130 }
1131- if ((t_info & OPCLSMASK) == OC_TERNARY)
1132+ if (t_info == TI_TERNARY)
1133+//TODO: why?
1134 t_info += P(6);
1135 cn = vn->a.n->r.n = new_node(t_info);
1136 cn->a.n = vn->a.n;
1137- if (tc & TC_BINOP) {
1138+ if (tc & TS_BINOP) {
1139 cn->l.n = vn;
1140- xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP;
1141- if ((t_info & OPCLSMASK) == OC_PGETLINE) {
1142+//FIXME: this is the place to detect and reject assignments to non-lvalues.
1143+//Currently we allow "assignments" to consts and temporaries, nonsense like this:
1144+// awk 'BEGIN { "qwe" = 1 }'
1145+// awk 'BEGIN { 7 *= 7 }'
1146+// awk 'BEGIN { length("qwe") = 1 }'
1147+// awk 'BEGIN { (1+1) += 3 }'
1148+ expected_tc = TS_OPERAND | TS_UOPPRE | TC_REGEXP;
1149+ if (t_info == TI_PGETLINE) {
1150 /* it's a pipe */
1151 next_token(TC_GETLINE);
1152 /* give maximum priority to this pipe */
1153 cn->info &= ~PRIMASK;
1154- xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp;
1155+ expected_tc = TS_OPERAND | TS_UOPPRE | TS_BINOP | term_tc;
1156 }
1157 } else {
1158 cn->r.n = vn;
1159- xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp;
1160+ expected_tc = TS_OPERAND | TS_UOPPRE | TS_BINOP | term_tc;
1161 }
1162 vn->a.n = cn;
1163+ continue;
1164+ }
1165
1166- } else {
1167- debug_printf_parse("%s: other\n", __func__);
1168- /* for operands and prefix-unary operators, attach them
1169- * to last node */
1170- vn = cn;
1171- cn = vn->r.n = new_node(t_info);
1172- cn->a.n = vn;
1173- xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP;
1174- if (tc & (TC_OPERAND | TC_REGEXP)) {
1175- debug_printf_parse("%s: TC_OPERAND | TC_REGEXP\n", __func__);
1176- xtc = TC_UOPPRE | TC_UOPPOST | TC_BINOP | TC_OPERAND | iexp;
1177- /* one should be very careful with switch on tclass -
1178- * only simple tclasses should be used! */
1179- switch (tc) {
1180- case TC_VARIABLE:
1181- case TC_ARRAY:
1182- debug_printf_parse("%s: TC_VARIABLE | TC_ARRAY\n", __func__);
1183- cn->info = OC_VAR;
1184- v = hash_search(ahash, t_string);
1185- if (v != NULL) {
1186- cn->info = OC_FNARG;
1187- cn->l.aidx = v->x.aidx;
1188- } else {
1189- cn->l.v = newvar(t_string);
1190- }
1191- if (tc & TC_ARRAY) {
1192- cn->info |= xS;
1193- cn->r.n = parse_expr(TC_ARRTERM);
1194- }
1195- break;
1196+ debug_printf_parse("%s: other, t_info:%x\n", __func__, t_info);
1197+ /* for operands and prefix-unary operators, attach them
1198+ * to last node */
1199+ vn = cn;
1200+ cn = vn->r.n = new_node(t_info);
1201+ cn->a.n = vn;
1202
1203- case TC_NUMBER:
1204- case TC_STRING:
1205- debug_printf_parse("%s: TC_NUMBER | TC_STRING\n", __func__);
1206- cn->info = OC_VAR;
1207- v = cn->l.v = xzalloc(sizeof(var));
1208- if (tc & TC_NUMBER)
1209- setvar_i(v, t_double);
1210- else {
1211- setvar_s(v, t_string);
1212- xtc &= ~TC_UOPPOST; /* "str"++ is not allowed */
1213- }
1214- break;
1215+ expected_tc = TS_OPERAND | TS_UOPPRE | TC_REGEXP;
1216+ if (t_info == TI_PREINC || t_info == TI_PREDEC)
1217+ expected_tc = TS_LVALUE | TC_UOPPRE1;
1218
1219- case TC_REGEXP:
1220- debug_printf_parse("%s: TC_REGEXP\n", __func__);
1221- mk_re_node(t_string, cn, xzalloc(sizeof(regex_t)*2));
1222- break;
1223+ if (!(tc & (TS_OPERAND | TC_REGEXP)))
1224+ continue;
1225
1226- case TC_FUNCTION:
1227- debug_printf_parse("%s: TC_FUNCTION\n", __func__);
1228- cn->info = OC_FUNC;
1229- cn->r.f = newfunc(t_string);
1230- cn->l.n = condition();
1231- break;
1232+ debug_printf_parse("%s: TS_OPERAND | TC_REGEXP\n", __func__);
1233+ expected_tc = TS_UOPPRE | TC_UOPPOST | TS_BINOP | TS_OPERAND | term_tc;
1234+ /* one should be very careful with switch on tclass -
1235+ * only simple tclasses should be used (TC_xyz, not TS_xyz) */
1236+ switch (tc) {
1237+ case TC_VARIABLE:
1238+ case TC_ARRAY:
1239+ debug_printf_parse("%s: TC_VARIABLE | TC_ARRAY\n", __func__);
1240+ cn->info = OC_VAR;
1241+ v = hash_search(ahash, t_string);
1242+ if (v != NULL) {
1243+ cn->info = OC_FNARG;
1244+ cn->l.aidx = v->x.aidx;
1245+ } else {
1246+ cn->l.v = newvar(t_string);
1247+ }
1248+ if (tc & TC_ARRAY) {
1249+ cn->info |= xS;
1250+ cn->r.n = parse_expr(TC_ARRTERM);
1251+ }
1252+ break;
1253
1254- case TC_SEQSTART:
1255- debug_printf_parse("%s: TC_SEQSTART\n", __func__);
1256- cn = vn->r.n = parse_expr(TC_SEQTERM);
1257- if (!cn)
1258- syntax_error("Empty sequence");
1259- cn->a.n = vn;
1260- break;
1261+ case TC_NUMBER:
1262+ case TC_STRING:
1263+ debug_printf_parse("%s: TC_NUMBER | TC_STRING\n", __func__);
1264+ cn->info = OC_VAR;
1265+ v = cn->l.v = xzalloc(sizeof(var));
1266+ if (tc & TC_NUMBER)
1267+ setvar_i(v, t_double);
1268+ else {
1269+ setvar_s(v, t_string);
1270+ expected_tc &= ~TC_UOPPOST; /* "str"++ is not allowed */
1271+ }
1272+ break;
1273
1274- case TC_GETLINE:
1275- debug_printf_parse("%s: TC_GETLINE\n", __func__);
1276- glptr = cn;
1277- xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp;
1278- break;
1279+ case TC_REGEXP:
1280+ debug_printf_parse("%s: TC_REGEXP\n", __func__);
1281+ mk_re_node(t_string, cn, xzalloc(sizeof(regex_t)*2));
1282+ break;
1283
1284- case TC_BUILTIN:
1285- debug_printf_parse("%s: TC_BUILTIN\n", __func__);
1286- cn->l.n = condition();
1287- break;
1288+ case TC_FUNCTION:
1289+ debug_printf_parse("%s: TC_FUNCTION\n", __func__);
1290+ cn->info = OC_FUNC;
1291+ cn->r.f = newfunc(t_string);
1292+ cn->l.n = parse_expr(TC_RPAREN);
1293+ break;
1294
1295- case TC_LENGTH:
1296- debug_printf_parse("%s: TC_LENGTH\n", __func__);
1297- next_token(TC_SEQSTART /* length(...) */
1298- | TC_OPTERM /* length; (or newline)*/
1299- | TC_GRPTERM /* length } */
1300- | TC_BINOPX /* length <op> NUM */
1301- | TC_COMMA /* print length, 1 */
1302- );
1303- rollback_token();
1304- if (t_tclass & TC_SEQSTART) {
1305- /* It was a "(" token. Handle just like TC_BUILTIN */
1306- cn->l.n = condition();
1307- }
1308- break;
1309- }
1310+ case TC_LPAREN:
1311+ debug_printf_parse("%s: TC_LPAREN\n", __func__);
1312+ cn = vn->r.n = parse_expr(TC_RPAREN);
1313+ if (!cn)
1314+ syntax_error("Empty sequence");
1315+ cn->a.n = vn;
1316+ break;
1317+
1318+ case TC_GETLINE:
1319+ debug_printf_parse("%s: TC_GETLINE\n", __func__);
1320+ glptr = cn;
1321+ expected_tc = TS_OPERAND | TS_UOPPRE | TS_BINOP | term_tc;
1322+ break;
1323+
1324+ case TC_BUILTIN:
1325+ debug_printf_parse("%s: TC_BUILTIN\n", __func__);
1326+ cn->l.n = parse_lrparen_list();
1327+ break;
1328+
1329+ case TC_LENGTH:
1330+ debug_printf_parse("%s: TC_LENGTH\n", __func__);
1331+ tc = next_token(TC_LPAREN /* length(...) */
1332+ | TC_SEMICOL /* length; */
1333+ | TC_NEWLINE /* length<newline> */
1334+ | TC_RBRACE /* length } */
1335+ | TC_BINOPX /* length <op> NUM */
1336+ | TC_COMMA /* print length, 1 */
1337+ );
1338+ if (tc != TC_LPAREN)
1339+ rollback_token();
1340+ else {
1341+ /* It was a "(" token. Handle just like TC_BUILTIN */
1342+ cn->l.n = parse_expr(TC_RPAREN);
1343 }
1344+ break;
1345 }
1346- }
1347+ } /* while() */
1348
1349 debug_printf_parse("%s() returns %p\n", __func__, sn.r.n);
1350 return sn.r.n;
1351@@ -1430,7 +1513,7 @@ static node *chain_node(uint32_t info)
1352 if (seq->programname != g_progname) {
1353 seq->programname = g_progname;
1354 n = chain_node(OC_NEWSOURCE);
1355- n->l.new_progname = xstrdup(g_progname);
1356+ n->l.new_progname = g_progname;
1357 }
1358
1359 n = seq->last;
1360@@ -1446,14 +1529,16 @@ static void chain_expr(uint32_t info)
1361
1362 n = chain_node(info);
1363
1364- n->l.n = parse_expr(TC_OPTERM | TC_GRPTERM);
1365+ n->l.n = parse_expr(TC_SEMICOL | TC_NEWLINE | TC_RBRACE);
1366 if ((info & OF_REQUIRED) && !n->l.n)
1367 syntax_error(EMSG_TOO_FEW_ARGS);
1368
1369- if (t_tclass & TC_GRPTERM)
1370+ if (t_tclass & TC_RBRACE)
1371 rollback_token();
1372 }
1373
1374+static void chain_group(void);
1375+
1376 static node *chain_loop(node *nn)
1377 {
1378 node *n, *n2, *save_brk, *save_cont;
1379@@ -1477,207 +1562,284 @@ static node *chain_loop(node *nn)
1380 return n;
1381 }
1382
1383+static void chain_until_rbrace(void)
1384+{
1385+ uint32_t tc;
1386+ while ((tc = next_token(TS_GRPSEQ | TC_RBRACE)) != TC_RBRACE) {
1387+ debug_printf_parse("%s: !TC_RBRACE\n", __func__);
1388+ if (tc == TC_NEWLINE)
1389+ continue;
1390+ rollback_token();
1391+ chain_group();
1392+ }
1393+ debug_printf_parse("%s: TC_RBRACE\n", __func__);
1394+}
1395+
1396 /* parse group and attach it to chain */
1397 static void chain_group(void)
1398 {
1399- uint32_t c;
1400+ uint32_t tc;
1401 node *n, *n2, *n3;
1402
1403 do {
1404- c = next_token(TC_GRPSEQ);
1405- } while (c & TC_NEWLINE);
1406-
1407- if (c & TC_GRPSTART) {
1408- debug_printf_parse("%s: TC_GRPSTART\n", __func__);
1409- while (next_token(TC_GRPSEQ | TC_GRPTERM) != TC_GRPTERM) {
1410- debug_printf_parse("%s: !TC_GRPTERM\n", __func__);
1411- if (t_tclass & TC_NEWLINE)
1412- continue;
1413- rollback_token();
1414- chain_group();
1415- }
1416- debug_printf_parse("%s: TC_GRPTERM\n", __func__);
1417- } else if (c & (TC_OPSEQ | TC_OPTERM)) {
1418- debug_printf_parse("%s: TC_OPSEQ | TC_OPTERM\n", __func__);
1419+ tc = next_token(TS_GRPSEQ);
1420+ } while (tc == TC_NEWLINE);
1421+
1422+ if (tc == TC_LBRACE) {
1423+ debug_printf_parse("%s: TC_LBRACE\n", __func__);
1424+ chain_until_rbrace();
1425+ return;
1426+ }
1427+ if (tc & (TS_OPSEQ | TC_SEMICOL)) {
1428+ debug_printf_parse("%s: TS_OPSEQ | TC_SEMICOL\n", __func__);
1429 rollback_token();
1430 chain_expr(OC_EXEC | Vx);
1431- } else {
1432- /* TC_STATEMNT */
1433- debug_printf_parse("%s: TC_STATEMNT(?)\n", __func__);
1434- switch (t_info & OPCLSMASK) {
1435- case ST_IF:
1436- debug_printf_parse("%s: ST_IF\n", __func__);
1437- n = chain_node(OC_BR | Vx);
1438- n->l.n = condition();
1439+ return;
1440+ }
1441+
1442+ /* TS_STATEMNT */
1443+ debug_printf_parse("%s: TS_STATEMNT(?)\n", __func__);
1444+ switch (t_info & OPCLSMASK) {
1445+ case ST_IF:
1446+ debug_printf_parse("%s: ST_IF\n", __func__);
1447+ n = chain_node(OC_BR | Vx);
1448+ n->l.n = parse_lrparen_list();
1449+ chain_group();
1450+ n2 = chain_node(OC_EXEC);
1451+ n->r.n = seq->last;
1452+ if (next_token(TS_GRPSEQ | TC_RBRACE | TC_ELSE) == TC_ELSE) {
1453 chain_group();
1454- n2 = chain_node(OC_EXEC);
1455- n->r.n = seq->last;
1456- if (next_token(TC_GRPSEQ | TC_GRPTERM | TC_ELSE) == TC_ELSE) {
1457- chain_group();
1458- n2->a.n = seq->last;
1459- } else {
1460- rollback_token();
1461- }
1462- break;
1463+ n2->a.n = seq->last;
1464+ } else {
1465+ rollback_token();
1466+ }
1467+ break;
1468
1469- case ST_WHILE:
1470- debug_printf_parse("%s: ST_WHILE\n", __func__);
1471- n2 = condition();
1472- n = chain_loop(NULL);
1473- n->l.n = n2;
1474- break;
1475+ case ST_WHILE:
1476+ debug_printf_parse("%s: ST_WHILE\n", __func__);
1477+ n2 = parse_lrparen_list();
1478+ n = chain_loop(NULL);
1479+ n->l.n = n2;
1480+ break;
1481
1482- case ST_DO:
1483- debug_printf_parse("%s: ST_DO\n", __func__);
1484- n2 = chain_node(OC_EXEC);
1485- n = chain_loop(NULL);
1486- n2->a.n = n->a.n;
1487- next_token(TC_WHILE);
1488- n->l.n = condition();
1489- break;
1490+ case ST_DO:
1491+ debug_printf_parse("%s: ST_DO\n", __func__);
1492+ n2 = chain_node(OC_EXEC);
1493+ n = chain_loop(NULL);
1494+ n2->a.n = n->a.n;
1495+ next_token(TC_WHILE);
1496+ n->l.n = parse_lrparen_list();
1497+ break;
1498
1499- case ST_FOR:
1500- debug_printf_parse("%s: ST_FOR\n", __func__);
1501- next_token(TC_SEQSTART);
1502- n2 = parse_expr(TC_SEMICOL | TC_SEQTERM);
1503- if (t_tclass & TC_SEQTERM) { /* for-in */
1504- if (!n2 || (n2->info & OPCLSMASK) != OC_IN)
1505- syntax_error(EMSG_UNEXP_TOKEN);
1506- n = chain_node(OC_WALKINIT | VV);
1507- n->l.n = n2->l.n;
1508- n->r.n = n2->r.n;
1509- n = chain_loop(NULL);
1510- n->info = OC_WALKNEXT | Vx;
1511- n->l.n = n2->l.n;
1512- } else { /* for (;;) */
1513- n = chain_node(OC_EXEC | Vx);
1514- n->l.n = n2;
1515- n2 = parse_expr(TC_SEMICOL);
1516- n3 = parse_expr(TC_SEQTERM);
1517- n = chain_loop(n3);
1518- n->l.n = n2;
1519- if (!n2)
1520- n->info = OC_EXEC;
1521- }
1522- break;
1523+ case ST_FOR:
1524+ debug_printf_parse("%s: ST_FOR\n", __func__);
1525+ next_token(TC_LPAREN);
1526+ n2 = parse_expr(TC_SEMICOL | TC_RPAREN);
1527+ if (t_tclass & TC_RPAREN) { /* for (I in ARRAY) */
1528+ if (!n2 || n2->info != TI_IN)
1529+ syntax_error(EMSG_UNEXP_TOKEN);
1530+ n = chain_node(OC_WALKINIT | VV);
1531+ n->l.n = n2->l.n;
1532+ n->r.n = n2->r.n;
1533+ n = chain_loop(NULL);
1534+ n->info = OC_WALKNEXT | Vx;
1535+ n->l.n = n2->l.n;
1536+ } else { /* for (;;) */
1537+ n = chain_node(OC_EXEC | Vx);
1538+ n->l.n = n2;
1539+ n2 = parse_expr(TC_SEMICOL);
1540+ n3 = parse_expr(TC_RPAREN);
1541+ n = chain_loop(n3);
1542+ n->l.n = n2;
1543+ if (!n2)
1544+ n->info = OC_EXEC;
1545+ }
1546+ break;
1547
1548- case OC_PRINT:
1549- case OC_PRINTF:
1550- debug_printf_parse("%s: OC_PRINT[F]\n", __func__);
1551- n = chain_node(t_info);
1552- n->l.n = parse_expr(TC_OPTERM | TC_OUTRDR | TC_GRPTERM);
1553- if (t_tclass & TC_OUTRDR) {
1554- n->info |= t_info;
1555- n->r.n = parse_expr(TC_OPTERM | TC_GRPTERM);
1556- }
1557- if (t_tclass & TC_GRPTERM)
1558- rollback_token();
1559- break;
1560+ case OC_PRINT:
1561+ case OC_PRINTF:
1562+ debug_printf_parse("%s: OC_PRINT[F]\n", __func__);
1563+ n = chain_node(t_info);
1564+ n->l.n = parse_expr(TC_SEMICOL | TC_NEWLINE | TC_OUTRDR | TC_RBRACE);
1565+ if (t_tclass & TC_OUTRDR) {
1566+ n->info |= t_info;
1567+ n->r.n = parse_expr(TC_SEMICOL | TC_NEWLINE | TC_RBRACE);
1568+ }
1569+ if (t_tclass & TC_RBRACE)
1570+ rollback_token();
1571+ break;
1572
1573- case OC_BREAK:
1574- debug_printf_parse("%s: OC_BREAK\n", __func__);
1575- n = chain_node(OC_EXEC);
1576- n->a.n = break_ptr;
1577- chain_expr(t_info);
1578- break;
1579+ case OC_BREAK:
1580+ debug_printf_parse("%s: OC_BREAK\n", __func__);
1581+ n = chain_node(OC_EXEC);
1582+ if (!break_ptr)
1583+ syntax_error("'break' not in a loop");
1584+ n->a.n = break_ptr;
1585+ chain_expr(t_info);
1586+ break;
1587
1588- case OC_CONTINUE:
1589- debug_printf_parse("%s: OC_CONTINUE\n", __func__);
1590- n = chain_node(OC_EXEC);
1591- n->a.n = continue_ptr;
1592- chain_expr(t_info);
1593- break;
1594+ case OC_CONTINUE:
1595+ debug_printf_parse("%s: OC_CONTINUE\n", __func__);
1596+ n = chain_node(OC_EXEC);
1597+ if (!continue_ptr)
1598+ syntax_error("'continue' not in a loop");
1599+ n->a.n = continue_ptr;
1600+ chain_expr(t_info);
1601+ break;
1602
1603- /* delete, next, nextfile, return, exit */
1604- default:
1605- debug_printf_parse("%s: default\n", __func__);
1606- chain_expr(t_info);
1607- }
1608+ /* delete, next, nextfile, return, exit */
1609+ default:
1610+ debug_printf_parse("%s: default\n", __func__);
1611+ chain_expr(t_info);
1612 }
1613 }
1614
1615 static void parse_program(char *p)
1616 {
1617- uint32_t tclass;
1618- node *cn;
1619- func *f;
1620- var *v;
1621+ debug_printf_parse("%s()\n", __func__);
1622
1623 g_pos = p;
1624 t_lineno = 1;
1625- while ((tclass = next_token(TC_EOF | TC_OPSEQ | TC_GRPSTART |
1626- TC_OPTERM | TC_BEGIN | TC_END | TC_FUNCDECL)) != TC_EOF) {
1627+ for (;;) {
1628+ uint32_t tclass;
1629
1630- if (tclass & TC_OPTERM) {
1631- debug_printf_parse("%s: TC_OPTERM\n", __func__);
1632+ tclass = next_token(TS_OPSEQ | TC_LBRACE | TC_BEGIN | TC_END | TC_FUNCDECL
1633+ | TC_EOF | TC_NEWLINE /* but not TC_SEMICOL */);
1634+ got_tok:
1635+ if (tclass == TC_EOF) {
1636+ debug_printf_parse("%s: TC_EOF\n", __func__);
1637+ break;
1638+ }
1639+ if (tclass == TC_NEWLINE) {
1640+ debug_printf_parse("%s: TC_NEWLINE\n", __func__);
1641 continue;
1642 }
1643-
1644- seq = &mainseq;
1645- if (tclass & TC_BEGIN) {
1646+ if (tclass == TC_BEGIN) {
1647 debug_printf_parse("%s: TC_BEGIN\n", __func__);
1648 seq = &beginseq;
1649- chain_group();
1650- } else if (tclass & TC_END) {
1651+ /* ensure there is no newline between BEGIN and { */
1652+ next_token(TC_LBRACE);
1653+ chain_until_rbrace();
1654+ goto next_tok;
1655+ }
1656+ if (tclass == TC_END) {
1657 debug_printf_parse("%s: TC_END\n", __func__);
1658 seq = &endseq;
1659- chain_group();
1660- } else if (tclass & TC_FUNCDECL) {
1661+ /* ensure there is no newline between END and { */
1662+ next_token(TC_LBRACE);
1663+ chain_until_rbrace();
1664+ goto next_tok;
1665+ }
1666+ if (tclass == TC_FUNCDECL) {
1667+ func *f;
1668+
1669 debug_printf_parse("%s: TC_FUNCDECL\n", __func__);
1670 next_token(TC_FUNCTION);
1671- g_pos++;
1672 f = newfunc(t_string);
1673- f->body.first = NULL;
1674- f->nargs = 0;
1675- /* Match func arg list: a comma sep list of >= 0 args, and a close paren */
1676- while (next_token(TC_VARIABLE | TC_SEQTERM | TC_COMMA)) {
1677- /* Either an empty arg list, or trailing comma from prev iter
1678- * must be followed by an arg */
1679- if (f->nargs == 0 && t_tclass == TC_SEQTERM)
1680- break;
1681-
1682- /* TC_SEQSTART/TC_COMMA must be followed by TC_VARIABLE */
1683- if (t_tclass != TC_VARIABLE)
1684+ if (f->defined)
1685+ syntax_error("Duplicate function");
1686+ f->defined = 1;
1687+ //f->body.first = NULL; - already is
1688+ //f->nargs = 0; - already is
1689+ /* func arg list: comma sep list of args, and a close paren */
1690+ for (;;) {
1691+ var *v;
1692+ if (next_token(TC_VARIABLE | TC_RPAREN) == TC_RPAREN) {
1693+ if (f->nargs == 0)
1694+ break; /* func() is ok */
1695+ /* func(a,) is not ok */
1696 syntax_error(EMSG_UNEXP_TOKEN);
1697-
1698+ }
1699 v = findvar(ahash, t_string);
1700 v->x.aidx = f->nargs++;
1701-
1702 /* Arg followed either by end of arg list or 1 comma */
1703- if (next_token(TC_COMMA | TC_SEQTERM) & TC_SEQTERM)
1704+ if (next_token(TC_COMMA | TC_RPAREN) == TC_RPAREN)
1705 break;
1706- if (t_tclass != TC_COMMA)
1707- syntax_error(EMSG_UNEXP_TOKEN);
1708+ /* it was a comma, we ate it */
1709 }
1710 seq = &f->body;
1711- chain_group();
1712- clear_array(ahash);
1713- } else if (tclass & TC_OPSEQ) {
1714- debug_printf_parse("%s: TC_OPSEQ\n", __func__);
1715+ /* ensure there is { after "func F(...)" - but newlines are allowed */
1716+ while (next_token(TC_LBRACE | TC_NEWLINE) == TC_NEWLINE)
1717+ continue;
1718+ chain_until_rbrace();
1719+ hash_clear(ahash);
1720+ goto next_tok;
1721+ }
1722+ seq = &mainseq;
1723+ if (tclass & TS_OPSEQ) {
1724+ node *cn;
1725+
1726+ debug_printf_parse("%s: TS_OPSEQ\n", __func__);
1727 rollback_token();
1728 cn = chain_node(OC_TEST);
1729- cn->l.n = parse_expr(TC_OPTERM | TC_EOF | TC_GRPSTART);
1730- if (t_tclass & TC_GRPSTART) {
1731- debug_printf_parse("%s: TC_GRPSTART\n", __func__);
1732- rollback_token();
1733- chain_group();
1734+ cn->l.n = parse_expr(TC_SEMICOL | TC_NEWLINE | TC_EOF | TC_LBRACE);
1735+ if (t_tclass == TC_LBRACE) {
1736+ debug_printf_parse("%s: TC_LBRACE\n", __func__);
1737+ chain_until_rbrace();
1738 } else {
1739- debug_printf_parse("%s: !TC_GRPSTART\n", __func__);
1740+ /* no action, assume default "{ print }" */
1741+ debug_printf_parse("%s: !TC_LBRACE\n", __func__);
1742 chain_node(OC_PRINT);
1743 }
1744 cn->r.n = mainseq.last;
1745- } else /* if (tclass & TC_GRPSTART) */ {
1746- debug_printf_parse("%s: TC_GRPSTART(?)\n", __func__);
1747- rollback_token();
1748- chain_group();
1749+ goto next_tok;
1750 }
1751- }
1752- debug_printf_parse("%s: TC_EOF\n", __func__);
1753+ /* tclass == TC_LBRACE */
1754+ debug_printf_parse("%s: TC_LBRACE(?)\n", __func__);
1755+ chain_until_rbrace();
1756+ next_tok:
1757+ /* Same as next_token() at the top of the loop, + TC_SEMICOL */
1758+ tclass = next_token(TS_OPSEQ | TC_LBRACE | TC_BEGIN | TC_END | TC_FUNCDECL
1759+ | TC_EOF | TC_NEWLINE | TC_SEMICOL);
1760+ /* gawk allows many newlines, but does not allow more than one semicolon:
1761+ * BEGIN {...}<newline>;<newline>;
1762+ * would complain "each rule must have a pattern or an action part".
1763+ * Same message for
1764+ * ; BEGIN {...}
1765+ */
1766+ if (tclass != TC_SEMICOL)
1767+ goto got_tok; /* use this token */
1768+ /* else: loop back - ate the semicolon, get and use _next_ token */
1769+ } /* for (;;) */
1770 }
1771
1772-
1773 /* -------- program execution part -------- */
1774
1775+/* temporary variables allocator */
1776+static var *nvalloc(int sz)
1777+{
1778+ return xzalloc(sz * sizeof(var));
1779+}
1780+
1781+static void nvfree(var *v, int sz)
1782+{
1783+ var *p = v;
1784+
1785+ while (--sz >= 0) {
1786+ if ((p->type & (VF_ARRAY | VF_CHILD)) == VF_ARRAY) {
1787+ clear_array(iamarray(p));
1788+ free(p->x.array->items);
1789+ free(p->x.array);
1790+ }
1791+ if (p->type & VF_WALK) {
1792+ walker_list *n;
1793+ walker_list *w = p->x.walker;
1794+ debug_printf_walker("nvfree: freeing walker @%p\n", &p->x.walker);
1795+ p->x.walker = NULL;
1796+ while (w) {
1797+ n = w->prev;
1798+ debug_printf_walker(" free(%p)\n", w);
1799+ free(w);
1800+ w = n;
1801+ }
1802+ }
1803+ clrvar(p);
1804+ p++;
1805+ }
1806+
1807+ free(v);
1808+}
1809+
1810 static node *mk_splitter(const char *s, tsplitter *spl)
1811 {
1812 regex_t *re, *ire;
1813@@ -1686,7 +1848,7 @@ static node *mk_splitter(const char *s, tsplitter *spl)
1814 re = &spl->re[0];
1815 ire = &spl->re[1];
1816 n = &spl->n;
1817- if ((n->info & OPCLSMASK) == OC_REGEXP) {
1818+ if (n->info == TI_REGEXP) {
1819 regfree(re);
1820 regfree(ire); // TODO: nuke ire, use re+1?
1821 }
1822@@ -1699,21 +1861,28 @@ static node *mk_splitter(const char *s, tsplitter *spl)
1823 return n;
1824 }
1825
1826-/* use node as a regular expression. Supplied with node ptr and regex_t
1827+static var *evaluate(node *, var *);
1828+
1829+/* Use node as a regular expression. Supplied with node ptr and regex_t
1830 * storage space. Return ptr to regex (if result points to preg, it should
1831- * be later regfree'd manually
1832+ * be later regfree'd manually).
1833 */
1834 static regex_t *as_regex(node *op, regex_t *preg)
1835 {
1836 int cflags;
1837- var *v;
1838 const char *s;
1839
1840- if ((op->info & OPCLSMASK) == OC_REGEXP) {
1841+ if (op->info == TI_REGEXP) {
1842 return icase ? op->r.ire : op->l.re;
1843 }
1844- v = nvalloc(1);
1845- s = getvar_s(evaluate(op, v));
1846+
1847+ //tmpvar = nvalloc(1);
1848+#define TMPVAR (&G.as_regex__tmpvar)
1849+ // We use a single "static" tmpvar (instead of on-stack or malloced one)
1850+ // to decrease memory consumption in deeply-recursive awk programs.
1851+ // The rule to work safely is to never call evaluate() while our static
1852+ // TMPVAR's value is still needed.
1853+ s = getvar_s(evaluate(op, TMPVAR));
1854
1855 cflags = icase ? REG_EXTENDED | REG_ICASE : REG_EXTENDED;
1856 /* Testcase where REG_EXTENDED fails (unpaired '{'):
1857@@ -1725,7 +1894,8 @@ static regex_t *as_regex(node *op, regex_t *preg)
1858 cflags &= ~REG_EXTENDED;
1859 xregcomp(preg, s, cflags);
1860 }
1861- nvfree(v);
1862+ //nvfree(tmpvar, 1);
1863+#undef TMPVAR
1864 return preg;
1865 }
1866
1867@@ -1745,12 +1915,22 @@ static char* qrealloc(char *b, int n, int *size)
1868 /* resize field storage space */
1869 static void fsrealloc(int size)
1870 {
1871- int i;
1872+ int i, newsize;
1873
1874 if (size >= maxfields) {
1875+ /* Sanity cap, easier than catering for overflows */
1876+ if (size > 0xffffff)
1877+ bb_die_memory_exhausted();
1878+
1879 i = maxfields;
1880 maxfields = size + 16;
1881- Fields = xrealloc(Fields, maxfields * sizeof(Fields[0]));
1882+
1883+ newsize = maxfields * sizeof(Fields[0]);
1884+ debug_printf_eval("fsrealloc: xrealloc(%p, %u)\n", Fields, newsize);
1885+ Fields = xrealloc(Fields, newsize);
1886+ debug_printf_eval("fsrealloc: Fields=%p..%p\n", Fields, (char*)Fields + newsize - 1);
1887+ /* ^^^ did Fields[] move? debug aid for L.v getting "upstaged" by R.v in evaluate() */
1888+
1889 for (; i < maxfields; i++) {
1890 Fields[i].type = VF_SPECIAL;
1891 Fields[i].string = NULL;
1892@@ -1802,13 +1982,13 @@ static int awk_split(const char *s, node *spl, char **slist)
1893 c[2] = '\n';
1894
1895 n = 0;
1896- if ((spl->info & OPCLSMASK) == OC_REGEXP) { /* regex split */
1897+ if (spl->info == TI_REGEXP) { /* regex split */
1898 if (!*s)
1899 return n; /* "": zero fields */
1900 n++; /* at least one field will be there */
1901 do {
1902 int l;
1903- regmatch_t pmatch[2]; // TODO: why [2]? [1] is enough...
1904+ regmatch_t pmatch[1];
1905
1906 l = strcspn(s, c+2); /* len till next NUL or \n */
1907 if (regexec1_nonempty(icase ? spl->r.ire : spl->l.re, s, pmatch) == 0
1908@@ -1969,7 +2149,7 @@ static node *nextarg(node **pn)
1909 node *n;
1910
1911 n = *pn;
1912- if (n && (n->info & OPCLSMASK) == OC_COMMA) {
1913+ if (n && n->info == TI_COMMA) {
1914 *pn = n->r.n;
1915 n = n->l.n;
1916 } else {
1917@@ -2000,8 +2180,7 @@ static void hashwalk_init(var *v, xhash *array)
1918 for (i = 0; i < array->csize; i++) {
1919 hi = array->items[i];
1920 while (hi) {
1921- strcpy(w->end, hi->name);
1922- nextword(&w->end);
1923+ w->end = stpcpy(w->end, hi->name) + 1;
1924 hi = hi->next;
1925 }
1926 }
1927@@ -2027,15 +2206,18 @@ static int hashwalk_next(var *v)
1928 /* evaluate node, return 1 when result is true, 0 otherwise */
1929 static int ptest(node *pattern)
1930 {
1931- /* ptest__v is "static": to save stack space? */
1932- return istrue(evaluate(pattern, &G.ptest__v));
1933+ // We use a single "static" tmpvar (instead of on-stack or malloced one)
1934+ // to decrease memory consumption in deeply-recursive awk programs.
1935+ // The rule to work safely is to never call evaluate() while our static
1936+ // TMPVAR's value is still needed.
1937+ return istrue(evaluate(pattern, &G.ptest__tmpvar));
1938 }
1939
1940 /* read next record from stream rsm into a variable v */
1941 static int awk_getline(rstream *rsm, var *v)
1942 {
1943 char *b;
1944- regmatch_t pmatch[2]; // TODO: why [2]? [1] is enough...
1945+ regmatch_t pmatch[1];
1946 int size, a, p, pp = 0;
1947 int fd, so, eo, r, rp;
1948 char c, *m, *s;
1949@@ -2061,7 +2243,7 @@ static int awk_getline(rstream *rsm, var *v)
1950 so = eo = p;
1951 r = 1;
1952 if (p > 0) {
1953- if ((rsplitter.n.info & OPCLSMASK) == OC_REGEXP) {
1954+ if (rsplitter.n.info == TI_REGEXP) {
1955 if (regexec(icase ? rsplitter.n.r.ire : rsplitter.n.l.re,
1956 b, 1, pmatch, 0) == 0) {
1957 so = pmatch[0].rm_so;
1958@@ -2133,82 +2315,126 @@ static int awk_getline(rstream *rsm, var *v)
1959 return r;
1960 }
1961
1962-static int fmt_num(char *b, int size, const char *format, double n, int int_as_int)
1963-{
1964- int r = 0;
1965- char c;
1966- const char *s = format;
1967-
1968- if (int_as_int && n == (long long)n) {
1969- r = snprintf(b, size, "%lld", (long long)n);
1970- } else {
1971- do { c = *s; } while (c && *++s);
1972- if (strchr("diouxX", c)) {
1973- r = snprintf(b, size, format, (int)n);
1974- } else if (strchr("eEfgG", c)) {
1975- r = snprintf(b, size, format, n);
1976- } else {
1977- syntax_error(EMSG_INV_FMT);
1978- }
1979- }
1980- return r;
1981-}
1982-
1983 /* formatted output into an allocated buffer, return ptr to buffer */
1984-static char *awk_printf(node *n)
1985+#if !ENABLE_FEATURE_AWK_GNU_EXTENSIONS
1986+# define awk_printf(a, b) awk_printf(a)
1987+#endif
1988+static char *awk_printf(node *n, size_t *len)
1989 {
1990- char *b = NULL;
1991- char *fmt, *s, *f;
1992- const char *s1;
1993- int i, j, incr, bsize;
1994- char c, c1;
1995- var *v, *arg;
1996-
1997- v = nvalloc(1);
1998- fmt = f = xstrdup(getvar_s(evaluate(nextarg(&n), v)));
1999-
2000+ char *b;
2001+ char *fmt, *f;
2002+ size_t i;
2003+
2004+ //tmpvar = nvalloc(1);
2005+#define TMPVAR (&G.awk_printf__tmpvar)
2006+ // We use a single "static" tmpvar (instead of on-stack or malloced one)
2007+ // to decrease memory consumption in deeply-recursive awk programs.
2008+ // The rule to work safely is to never call evaluate() while our static
2009+ // TMPVAR's value is still needed.
2010+ fmt = f = xstrdup(getvar_s(evaluate(nextarg(&n), TMPVAR)));
2011+ // ^^^^^^^^^ here we immediately strdup() the value, so the later call
2012+ // to evaluate() potentially recursing into another awk_printf() can't
2013+ // mangle the value.
2014+
2015+ b = NULL;
2016 i = 0;
2017- while (*f) {
2018+ while (1) { /* "print one format spec" loop */
2019+ char *s;
2020+ char c;
2021+ char sv;
2022+ var *arg;
2023+ size_t slen;
2024+
2025+ /* Find end of the next format spec, or end of line */
2026 s = f;
2027- while (*f && (*f != '%' || *++f == '%'))
2028- f++;
2029- while (*f && !isalpha(*f)) {
2030- if (*f == '*')
2031- syntax_error("%*x formats are not supported");
2032+ while (1) {
2033+ c = *f;
2034+ if (!c) /* no percent chars found at all */
2035+ goto nul;
2036 f++;
2037+ if (c == '%')
2038+ break;
2039 }
2040-
2041- incr = (f - s) + MAXVARFMT;
2042- b = qrealloc(b, incr + i, &bsize);
2043+ /* we are past % in "....%..." */
2044 c = *f;
2045- if (c != '\0')
2046+ if (!c) /* "....%" */
2047+ goto nul;
2048+ if (c == '%') { /* "....%%...." */
2049+ slen = f - s;
2050+ s = xstrndup(s, slen);
2051 f++;
2052- c1 = *f;
2053+ goto append; /* print "....%" part verbatim */
2054+ }
2055+ while (1) {
2056+ if (isalpha(c))
2057+ break;
2058+ if (c == '*')
2059+ syntax_error("%*x formats are not supported");
2060+ c = *++f;
2061+ if (!c) { /* "....%...." and no letter found after % */
2062+ /* Example: awk 'BEGIN { printf "^^^%^^^\n"; }' */
2063+ nul:
2064+ slen = f - s;
2065+ goto tail; /* print remaining string, exit loop */
2066+ }
2067+ }
2068+ /* we are at A in "....%...A..." */
2069+
2070+ arg = evaluate(nextarg(&n), TMPVAR);
2071+
2072+ /* Result can be arbitrarily long. Example:
2073+ * printf "%99999s", "BOOM"
2074+ */
2075+ sv = *++f;
2076 *f = '\0';
2077- arg = evaluate(nextarg(&n), v);
2078-
2079- j = i;
2080- if (c == 'c' || !c) {
2081- i += sprintf(b+i, s, is_numeric(arg) ?
2082- (char)getvar_i(arg) : *getvar_s(arg));
2083- } else if (c == 's') {
2084- s1 = getvar_s(arg);
2085- b = qrealloc(b, incr+i+strlen(s1), &bsize);
2086- i += sprintf(b+i, s, s1);
2087+ if (c == 'c') {
2088+ char cc = is_numeric(arg) ? getvar_i(arg) : *getvar_s(arg);
2089+ char *r = xasprintf(s, cc ? cc : '^' /* else strlen will be wrong */);
2090+ slen = strlen(r);
2091+ if (cc == '\0') /* if cc is NUL, re-format the string with it */
2092+ sprintf(r, s, cc);
2093+ s = r;
2094 } else {
2095- i += fmt_num(b+i, incr, s, getvar_i(arg), FALSE);
2096+ if (c == 's') {
2097+ s = xasprintf(s, getvar_s(arg));
2098+ } else {
2099+ double d = getvar_i(arg);
2100+ if (strchr("diouxX", c)) {
2101+//TODO: make it wider here (%x -> %llx etc)?
2102+ s = xasprintf(s, (int)d);
2103+ } else if (strchr("eEfFgGaA", c)) {
2104+ s = xasprintf(s, d);
2105+ } else {
2106+//TODO: GNU Awk 5.0.1: printf "%W" prints "%W", does not error out
2107+ syntax_error(EMSG_INV_FMT);
2108+ }
2109+ }
2110+ slen = strlen(s);
2111 }
2112- *f = c1;
2113-
2114- /* if there was an error while sprintf, return value is negative */
2115- if (i < j)
2116- i = j;
2117+ *f = sv;
2118+ append:
2119+ if (i == 0) {
2120+ b = s;
2121+ i = slen;
2122+ continue;
2123+ }
2124+ tail:
2125+ b = xrealloc(b, i + slen + 1);
2126+ strcpy(b + i, s);
2127+ i += slen;
2128+ if (!c) /* s is NOT allocated and this is the last part of string? */
2129+ break;
2130+ free(s);
2131 }
2132
2133 free(fmt);
2134- nvfree(v);
2135- b = xrealloc(b, i + 1);
2136- b[i] = '\0';
2137+ //nvfree(tmpvar, 1);
2138+#undef TMPVAR
2139+
2140+#if ENABLE_FEATURE_AWK_GNU_EXTENSIONS
2141+ if (len)
2142+ *len = i;
2143+#endif
2144 return b;
2145 }
2146
2147@@ -2338,33 +2564,59 @@ static NOINLINE int do_mktime(const char *ds)
2148 return mktime(&then);
2149 }
2150
2151+/* Reduce stack usage in exec_builtin() by keeping match() code separate */
2152+static NOINLINE var *do_match(node *an1, const char *as0)
2153+{
2154+ regmatch_t pmatch[1];
2155+ regex_t sreg, *re;
2156+ int n, start, len;
2157+
2158+ re = as_regex(an1, &sreg);
2159+ n = regexec(re, as0, 1, pmatch, 0);
2160+ if (re == &sreg)
2161+ regfree(re);
2162+ start = 0;
2163+ len = -1;
2164+ if (n == 0) {
2165+ start = pmatch[0].rm_so + 1;
2166+ len = pmatch[0].rm_eo - pmatch[0].rm_so;
2167+ }
2168+ setvar_i(newvar("RLENGTH"), len);
2169+ return setvar_i(newvar("RSTART"), start);
2170+}
2171+
2172+/* Reduce stack usage in evaluate() by keeping builtins' code separate */
2173 static NOINLINE var *exec_builtin(node *op, var *res)
2174 {
2175 #define tspl (G.exec_builtin__tspl)
2176
2177- var *tv;
2178+ var *tmpvars;
2179 node *an[4];
2180 var *av[4];
2181 const char *as[4];
2182- regmatch_t pmatch[2];
2183- regex_t sreg, *re;
2184 node *spl;
2185 uint32_t isr, info;
2186 int nargs;
2187 time_t tt;
2188 int i, l, ll, n;
2189
2190- tv = nvalloc(4);
2191+ tmpvars = nvalloc(4);
2192+#define TMPVAR0 (tmpvars)
2193+#define TMPVAR1 (tmpvars + 1)
2194+#define TMPVAR2 (tmpvars + 2)
2195+#define TMPVAR3 (tmpvars + 3)
2196+#define TMPVAR(i) (tmpvars + (i))
2197 isr = info = op->info;
2198 op = op->l.n;
2199
2200 av[2] = av[3] = NULL;
2201 for (i = 0; i < 4 && op; i++) {
2202 an[i] = nextarg(&op);
2203- if (isr & 0x09000000)
2204- av[i] = evaluate(an[i], &tv[i]);
2205- if (isr & 0x08000000)
2206- as[i] = getvar_s(av[i]);
2207+ if (isr & 0x09000000) {
2208+ av[i] = evaluate(an[i], TMPVAR(i));
2209+ if (isr & 0x08000000)
2210+ as[i] = getvar_s(av[i]);
2211+ }
2212 isr >>= 1;
2213 }
2214
2215@@ -2386,8 +2638,8 @@ static NOINLINE var *exec_builtin(node *op, var *res)
2216 char *s, *s1;
2217
2218 if (nargs > 2) {
2219- spl = (an[2]->info & OPCLSMASK) == OC_REGEXP ?
2220- an[2] : mk_splitter(getvar_s(evaluate(an[2], &tv[2])), &tspl);
2221+ spl = (an[2]->info == TI_REGEXP) ? an[2]
2222+ : mk_splitter(getvar_s(evaluate(an[2], TMPVAR2)), &tspl);
2223 } else {
2224 spl = &fsplitter.n;
2225 }
2226@@ -2501,20 +2753,7 @@ static NOINLINE var *exec_builtin(node *op, var *res)
2227 break;
2228
2229 case B_ma:
2230- re = as_regex(an[1], &sreg);
2231- n = regexec(re, as[0], 1, pmatch, 0);
2232- if (n == 0) {
2233- pmatch[0].rm_so++;
2234- pmatch[0].rm_eo++;
2235- } else {
2236- pmatch[0].rm_so = 0;
2237- pmatch[0].rm_eo = -1;
2238- }
2239- setvar_i(newvar("RSTART"), pmatch[0].rm_so);
2240- setvar_i(newvar("RLENGTH"), pmatch[0].rm_eo - pmatch[0].rm_so);
2241- setvar_i(res, pmatch[0].rm_so);
2242- if (re == &sreg)
2243- regfree(re);
2244+ res = do_match(an[1], as[0]);
2245 break;
2246
2247 case B_ge:
2248@@ -2530,14 +2769,79 @@ static NOINLINE var *exec_builtin(node *op, var *res)
2249 break;
2250 }
2251
2252- nvfree(tv);
2253+ nvfree(tmpvars, 4);
2254+#undef TMPVAR0
2255+#undef TMPVAR1
2256+#undef TMPVAR2
2257+#undef TMPVAR3
2258+#undef TMPVAR
2259+
2260 return res;
2261 #undef tspl
2262 }
2263
2264+/* if expr looks like "var=value", perform assignment and return 1,
2265+ * otherwise return 0 */
2266+static int is_assignment(const char *expr)
2267+{
2268+ char *exprc, *val;
2269+
2270+ val = (char*)endofname(expr);
2271+ if (val == (char*)expr || *val != '=') {
2272+ return FALSE;
2273+ }
2274+
2275+ exprc = xstrdup(expr);
2276+ val = exprc + (val - expr);
2277+ *val++ = '\0';
2278+
2279+ unescape_string_in_place(val);
2280+ setvar_u(newvar(exprc), val);
2281+ free(exprc);
2282+ return TRUE;
2283+}
2284+
2285+/* switch to next input file */
2286+static rstream *next_input_file(void)
2287+{
2288+#define rsm (G.next_input_file__rsm)
2289+#define files_happen (G.next_input_file__files_happen)
2290+
2291+ const char *fname, *ind;
2292+
2293+ if (rsm.F)
2294+ fclose(rsm.F);
2295+ rsm.F = NULL;
2296+ rsm.pos = rsm.adv = 0;
2297+
2298+ for (;;) {
2299+ if (getvar_i(intvar[ARGIND])+1 >= getvar_i(intvar[ARGC])) {
2300+ if (files_happen)
2301+ return NULL;
2302+ fname = "-";
2303+ rsm.F = stdin;
2304+ break;
2305+ }
2306+ ind = getvar_s(incvar(intvar[ARGIND]));
2307+ fname = getvar_s(findvar(iamarray(intvar[ARGV]), ind));
2308+ if (fname && *fname && !is_assignment(fname)) {
2309+ rsm.F = xfopen_stdin(fname);
2310+ break;
2311+ }
2312+ }
2313+
2314+ files_happen = TRUE;
2315+ setvar_s(intvar[FILENAME], fname);
2316+ return &rsm;
2317+#undef rsm
2318+#undef files_happen
2319+}
2320+
2321 /*
2322 * Evaluate node - the heart of the program. Supplied with subtree
2323- * and place where to store result. returns ptr to result.
2324+ * and "res" variable to assign the result to if we evaluate an expression.
2325+ * If node refers to e.g. a variable or a field, no assignment happens.
2326+ * Return ptr to the result (which may or may not be the "res" variable!)
2327 */
2328 #define XC(n) ((n) >> 8)
2329
2330@@ -2549,14 +2853,16 @@ static var *evaluate(node *op, var *res)
2331 #define seed (G.evaluate__seed)
2332 #define sreg (G.evaluate__sreg)
2333
2334- var *v1;
2335+ var *tmpvars;
2336
2337 if (!op)
2338 return setvar_s(res, NULL);
2339
2340 debug_printf_eval("entered %s()\n", __func__);
2341
2342- v1 = nvalloc(2);
2343+ tmpvars = nvalloc(2);
2344+#define TMPVAR0 (tmpvars)
2345+#define TMPVAR1 (tmpvars + 1)
2346
2347 while (op) {
2348 struct {
2349@@ -2578,48 +2884,35 @@ static var *evaluate(node *op, var *res)
2350 op1 = op->l.n;
2351 debug_printf_eval("opinfo:%08x opn:%08x\n", opinfo, opn);
2352
2353- /* "delete" is special:
2354- * "delete array[var--]" must evaluate index expr only once,
2355- * must not evaluate it in "execute inevitable things" part.
2356- */
2357- if (XC(opinfo & OPCLSMASK) == XC(OC_DELETE)) {
2358- uint32_t info = op1->info & OPCLSMASK;
2359- var *v;
2360-
2361- debug_printf_eval("DELETE\n");
2362- if (info == OC_VAR) {
2363- v = op1->l.v;
2364- } else if (info == OC_FNARG) {
2365- v = &fnargs[op1->l.aidx];
2366- } else {
2367- syntax_error(EMSG_NOT_ARRAY);
2368+ /* execute inevitable things */
2369+ if (opinfo & OF_RES1) {
2370+ if ((opinfo & OF_REQUIRED) && !op1)
2371+ syntax_error(EMSG_TOO_FEW_ARGS);
2372+ L.v = evaluate(op1, TMPVAR0);
2373+ if (opinfo & OF_STR1) {
2374+ L.s = getvar_s(L.v);
2375+ debug_printf_eval("L.s:'%s'\n", L.s);
2376 }
2377- if (op1->r.n) { /* array ref? */
2378- const char *s;
2379- s = getvar_s(evaluate(op1->r.n, v1));
2380- hash_remove(iamarray(v), s);
2381- } else {
2382- clear_array(iamarray(v));
2383+ if (opinfo & OF_NUM1) {
2384+ L_d = getvar_i(L.v);
2385+ debug_printf_eval("L_d:%f\n", L_d);
2386 }
2387- goto next;
2388 }
2389-
2390- /* execute inevitable things */
2391- if (opinfo & OF_RES1)
2392- L.v = evaluate(op1, v1);
2393- if (opinfo & OF_RES2)
2394- R.v = evaluate(op->r.n, v1+1);
2395- if (opinfo & OF_STR1) {
2396- L.s = getvar_s(L.v);
2397- debug_printf_eval("L.s:'%s'\n", L.s);
2398- }
2399- if (opinfo & OF_STR2) {
2400- R.s = getvar_s(R.v);
2401- debug_printf_eval("R.s:'%s'\n", R.s);
2402- }
2403- if (opinfo & OF_NUM1) {
2404- L_d = getvar_i(L.v);
2405- debug_printf_eval("L_d:%f\n", L_d);
2406+ /* NB: Must get string/numeric values of L (done above)
2407+ * _before_ evaluate()'ing R.v: if both L and R are $NNNs,
2408+ * and right one is large, then L.v points to Fields[NNN1],
2409+ * second evaluate() reallocates and moves (!) Fields[],
2410+ * R.v points to Fields[NNN2] but L.v now points to freed mem!
2411+ * (Seen trying to evaluate "$444 $44444")
2412+ */
2413+ if (opinfo & OF_RES2) {
2414+ R.v = evaluate(op->r.n, TMPVAR1);
2415+ //TODO: L.v may be invalid now, set L.v to NULL to catch bugs?
2416+ //L.v = NULL;
2417+ if (opinfo & OF_STR2) {
2418+ R.s = getvar_s(R.v);
2419+ debug_printf_eval("R.s:'%s'\n", R.s);
2420+ }
2421 }
2422
2423 debug_printf_eval("switch(0x%x)\n", XC(opinfo & OPCLSMASK));
2424@@ -2629,7 +2922,8 @@ static var *evaluate(node *op, var *res)
2425
2426 /* test pattern */
2427 case XC( OC_TEST ):
2428- if ((op1->info & OPCLSMASK) == OC_COMMA) {
2429+ debug_printf_eval("TEST\n");
2430+ if (op1->info == TI_COMMA) {
2431 /* it's range pattern */
2432 if ((opinfo & OF_CHECKED) || ptest(op1->l.n)) {
2433 op->info |= OF_CHECKED;
2434@@ -2646,25 +2940,32 @@ static var *evaluate(node *op, var *res)
2435
2436 /* just evaluate an expression, also used as unconditional jump */
2437 case XC( OC_EXEC ):
2438+ debug_printf_eval("EXEC\n");
2439 break;
2440
2441 /* branch, used in if-else and various loops */
2442 case XC( OC_BR ):
2443+ debug_printf_eval("BR\n");
2444 op = istrue(L.v) ? op->a.n : op->r.n;
2445 break;
2446
2447 /* initialize for-in loop */
2448 case XC( OC_WALKINIT ):
2449+ debug_printf_eval("WALKINIT\n");
2450 hashwalk_init(L.v, iamarray(R.v));
2451 break;
2452
2453 /* get next array item */
2454 case XC( OC_WALKNEXT ):
2455+ debug_printf_eval("WALKNEXT\n");
2456 op = hashwalk_next(L.v) ? op->a.n : op->r.n;
2457 break;
2458
2459 case XC( OC_PRINT ):
2460- case XC( OC_PRINTF ): {
2461+ debug_printf_eval("PRINT /\n");
2462+ case XC( OC_PRINTF ):
2463+ debug_printf_eval("PRINTF\n");
2464+ {
2465 FILE *F = stdout;
2466
2467 if (op->r.n) {
2468@@ -2682,55 +2983,94 @@ static var *evaluate(node *op, var *res)
2469 F = rsm->F;
2470 }
2471
2472+ /* Can't just check 'opinfo == OC_PRINT' here, parser ORs
2473+ * additional bits to opinfos of print/printf with redirects
2474+ */
2475 if ((opinfo & OPCLSMASK) == OC_PRINT) {
2476 if (!op1) {
2477 fputs(getvar_s(intvar[F0]), F);
2478 } else {
2479- while (op1) {
2480- var *v = evaluate(nextarg(&op1), v1);
2481+ for (;;) {
2482+ var *v = evaluate(nextarg(&op1), TMPVAR0);
2483 if (v->type & VF_NUMBER) {
2484- fmt_num(g_buf, MAXVARFMT, getvar_s(intvar[OFMT]),
2485- getvar_i(v), TRUE);
2486+ fmt_num(getvar_s(intvar[OFMT]),
2487+ getvar_i(v));
2488 fputs(g_buf, F);
2489 } else {
2490 fputs(getvar_s(v), F);
2491 }
2492-
2493- if (op1)
2494- fputs(getvar_s(intvar[OFS]), F);
2495+ if (!op1)
2496+ break;
2497+ fputs(getvar_s(intvar[OFS]), F);
2498 }
2499 }
2500 fputs(getvar_s(intvar[ORS]), F);
2501-
2502- } else { /* OC_PRINTF */
2503- char *s = awk_printf(op1);
2504+ } else { /* PRINTF */
2505+ IF_FEATURE_AWK_GNU_EXTENSIONS(size_t len;)
2506+ char *s = awk_printf(op1, &len);
2507+#if ENABLE_FEATURE_AWK_GNU_EXTENSIONS
2508+ fwrite(s, len, 1, F);
2509+#else
2510 fputs(s, F);
2511+#endif
2512 free(s);
2513 }
2514 fflush(F);
2515 break;
2516 }
2517
2518- /* case XC( OC_DELETE ): - moved to happen before arg evaluation */
2519+ case XC( OC_DELETE ):
2520+ debug_printf_eval("DELETE\n");
2521+ {
2522+ /* "delete" is special:
2523+ * "delete array[var--]" must evaluate index expr only once.
2524+ */
2525+ uint32_t info = op1->info & OPCLSMASK;
2526+ var *v;
2527+
2528+ if (info == OC_VAR) {
2529+ v = op1->l.v;
2530+ } else if (info == OC_FNARG) {
2531+ v = &fnargs[op1->l.aidx];
2532+ } else {
2533+ syntax_error(EMSG_NOT_ARRAY);
2534+ }
2535+ if (op1->r.n) { /* array ref? */
2536+ const char *s;
2537+ s = getvar_s(evaluate(op1->r.n, TMPVAR0));
2538+ hash_remove(iamarray(v), s);
2539+ } else {
2540+ clear_array(iamarray(v));
2541+ }
2542+ break;
2543+ }
2544
2545 case XC( OC_NEWSOURCE ):
2546+ debug_printf_eval("NEWSOURCE\n");
2547 g_progname = op->l.new_progname;
2548 break;
2549
2550 case XC( OC_RETURN ):
2551+ debug_printf_eval("RETURN\n");
2552 copyvar(res, L.v);
2553 break;
2554
2555 case XC( OC_NEXTFILE ):
2556+ debug_printf_eval("NEXTFILE\n");
2557 nextfile = TRUE;
2558 case XC( OC_NEXT ):
2559+ debug_printf_eval("NEXT\n");
2560 nextrec = TRUE;
2561 case XC( OC_DONE ):
2562+ debug_printf_eval("DONE\n");
2563 clrvar(res);
2564 break;
2565
2566 case XC( OC_EXIT ):
2567- awk_exit(L_d);
2568+ debug_printf_eval("EXIT\n");
2569+ if (op1)
2570+ G.exitcode = (int)L_d;
2571+ awk_exit();
2572
2573 /* -- recursive node type -- */
2574
2575@@ -2749,15 +3089,18 @@ static var *evaluate(node *op, var *res)
2576 break;
2577
2578 case XC( OC_IN ):
2579+ debug_printf_eval("IN\n");
2580 setvar_i(res, hash_search(iamarray(R.v), L.s) ? 1 : 0);
2581 break;
2582
2583 case XC( OC_REGEXP ):
2584+ debug_printf_eval("REGEXP\n");
2585 op1 = op;
2586 L.s = getvar_s(intvar[F0]);
2587 goto re_cont;
2588
2589 case XC( OC_MATCH ):
2590+ debug_printf_eval("MATCH\n");
2591 op1 = op->r.n;
2592 re_cont:
2593 {
2594@@ -2772,61 +3115,80 @@ static var *evaluate(node *op, var *res)
2595 case XC( OC_MOVE ):
2596 debug_printf_eval("MOVE\n");
2597 /* if source is a temporary string, jusk relink it to dest */
2598-//Disabled: if R.v is numeric but happens to have cached R.v->string,
2599-//then L.v ends up being a string, which is wrong
2600-// if (R.v == v1+1 && R.v->string) {
2601-// res = setvar_p(L.v, R.v->string);
2602-// R.v->string = NULL;
2603-// } else {
2604+ if (R.v == TMPVAR1
2605+ && !(R.v->type & VF_NUMBER)
2606+ /* Why check !NUMBER? if R.v is a number but has cached R.v->string,
2607+ * L.v ends up a string, which is wrong */
2608+ /*&& R.v->string - always not NULL (right?) */
2609+ ) {
2610+ res = setvar_p(L.v, R.v->string); /* avoids strdup */
2611+ R.v->string = NULL;
2612+ } else {
2613 res = copyvar(L.v, R.v);
2614-// }
2615+ }
2616 break;
2617
2618 case XC( OC_TERNARY ):
2619- if ((op->r.n->info & OPCLSMASK) != OC_COLON)
2620+ debug_printf_eval("TERNARY\n");
2621+ if (op->r.n->info != TI_COLON)
2622 syntax_error(EMSG_POSSIBLE_ERROR);
2623 res = evaluate(istrue(L.v) ? op->r.n->l.n : op->r.n->r.n, res);
2624 break;
2625
2626 case XC( OC_FUNC ): {
2627- var *vbeg, *v;
2628+ var *argvars, *sv_fnargs;
2629 const char *sv_progname;
2630+ int nargs, i;
2631
2632- /* The body might be empty, still has to eval the args */
2633- if (!op->r.n->info && !op->r.f->body.first)
2634+ debug_printf_eval("FUNC\n");
2635+
2636+ if (!op->r.f->defined)
2637 syntax_error(EMSG_UNDEF_FUNC);
2638
2639- vbeg = v = nvalloc(op->r.f->nargs + 1);
2640+ /* The body might be empty, still has to eval the args */
2641+ nargs = op->r.f->nargs;
2642+ argvars = nvalloc(nargs);
2643+ i = 0;
2644 while (op1) {
2645- var *arg = evaluate(nextarg(&op1), v1);
2646- copyvar(v, arg);
2647- v->type |= VF_CHILD;
2648- v->x.parent = arg;
2649- if (++v - vbeg >= op->r.f->nargs)
2650- break;
2651+ var *arg = evaluate(nextarg(&op1), TMPVAR0);
2652+ if (i == nargs) {
2653+ /* call with more arguments than function takes.
2654+ * (gawk warns: "warning: function 'f' called with more arguments than declared").
2655+ * They are still evaluated, but discarded: */
2656+ clrvar(arg);
2657+ continue;
2658+ }
2659+ copyvar(&argvars[i], arg);
2660+ argvars[i].type |= VF_CHILD;
2661+ argvars[i].x.parent = arg;
2662+ i++;
2663 }
2664
2665- v = fnargs;
2666- fnargs = vbeg;
2667+ sv_fnargs = fnargs;
2668 sv_progname = g_progname;
2669
2670+ fnargs = argvars;
2671 res = evaluate(op->r.f->body.first, res);
2672+ nvfree(argvars, nargs);
2673
2674 g_progname = sv_progname;
2675- nvfree(fnargs);
2676- fnargs = v;
2677+ fnargs = sv_fnargs;
2678
2679 break;
2680 }
2681
2682 case XC( OC_GETLINE ):
2683- case XC( OC_PGETLINE ): {
2684+ debug_printf_eval("GETLINE /\n");
2685+ case XC( OC_PGETLINE ):
2686+ debug_printf_eval("PGETLINE\n");
2687+ {
2688 rstream *rsm;
2689 int i;
2690
2691 if (op1) {
2692 rsm = newfile(L.s);
2693 if (!rsm->F) {
2694+ /* NB: can't use "opinfo == TI_PGETLINE", would break "cmd" | getline */
2695 if ((opinfo & OPCLSMASK) == OC_PGETLINE) {
2696 rsm->F = popen(L.s, "r");
2697 rsm->is_pipe = TRUE;
2698@@ -2861,16 +3223,34 @@ static var *evaluate(node *op, var *res)
2699 /* simple builtins */
2700 case XC( OC_FBLTIN ): {
2701 double R_d = R_d; /* for compiler */
2702+ debug_printf_eval("FBLTIN\n");
2703+
2704+ if (op1 && op1->info == TI_COMMA)
2705+ /* Simple builtins take one arg maximum */
2706+ syntax_error("Too many arguments");
2707
2708 switch (opn) {
2709 case F_in:
2710 R_d = (long long)L_d;
2711 break;
2712
2713- case F_rn:
2714- R_d = (double)rand() / (double)RAND_MAX;
2715+ case F_rn: /*rand*/
2716+ if (op1)
2717+ syntax_error("Too many arguments");
2718+ {
2719+#if RAND_MAX >= 0x7fffffff
2720+ uint32_t u = ((uint32_t)rand() << 16) ^ rand();
2721+ uint64_t v = ((uint64_t)rand() << 32) | u;
2722+ /* the above shift+or is optimized out on 32-bit arches */
2723+# if RAND_MAX > 0x7fffffff
2724+ v &= 0x7fffffffffffffffULL;
2725+# endif
2726+ R_d = (double)v / 0x8000000000000000ULL;
2727+#else
2728+# error Not implemented for this value of RAND_MAX
2729+#endif
2730 break;
2731-
2732+ }
2733 case F_co:
2734 if (ENABLE_FEATURE_AWK_LIBM) {
2735 R_d = cos(L_d);
2736@@ -2910,7 +3290,9 @@ static var *evaluate(node *op, var *res)
2737 srand(seed);
2738 break;
2739
2740- case F_ti:
2741+ case F_ti: /*systime*/
2742+ if (op1)
2743+ syntax_error("Too many arguments");
2744 R_d = time(NULL);
2745 break;
2746
2747@@ -2949,7 +3331,7 @@ static var *evaluate(node *op, var *res)
2748 rstream *rsm;
2749 int err = 0;
2750 rsm = (rstream *)hash_search(fdhash, L.s);
2751- debug_printf_eval("OC_FBLTIN F_cl rsm:%p\n", rsm);
2752+ debug_printf_eval("OC_FBLTIN close: op1:%p s:'%s' rsm:%p\n", op1, L.s, rsm);
2753 if (rsm) {
2754 debug_printf_eval("OC_FBLTIN F_cl "
2755 "rsm->is_pipe:%d, ->F:%p\n",
2756@@ -2960,6 +3342,11 @@ static var *evaluate(node *op, var *res)
2757 */
2758 if (rsm->F)
2759 err = rsm->is_pipe ? pclose(rsm->F) : fclose(rsm->F);
2760+//TODO: fix this case:
2761+// $ awk 'BEGIN { print close(""); print ERRNO }'
2762+// -1
2763+// close of redirection that was never opened
2764+// (we print 0, 0)
2765 free(rsm->buffer);
2766 hash_remove(fdhash, L.s);
2767 }
2768@@ -2974,14 +3361,18 @@ static var *evaluate(node *op, var *res)
2769 }
2770
2771 case XC( OC_BUILTIN ):
2772+ debug_printf_eval("BUILTIN\n");
2773 res = exec_builtin(op, res);
2774 break;
2775
2776 case XC( OC_SPRINTF ):
2777- setvar_p(res, awk_printf(op1));
2778+ debug_printf_eval("SPRINTF\n");
2779+ setvar_p(res, awk_printf(op1, NULL));
2780 break;
2781
2782- case XC( OC_UNARY ): {
2783+ case XC( OC_UNARY ):
2784+ debug_printf_eval("UNARY\n");
2785+ {
2786 double Ld, R_d;
2787
2788 Ld = R_d = getvar_i(R.v);
2789@@ -3011,7 +3402,9 @@ static var *evaluate(node *op, var *res)
2790 break;
2791 }
2792
2793- case XC( OC_FIELD ): {
2794+ case XC( OC_FIELD ):
2795+ debug_printf_eval("FIELD\n");
2796+ {
2797 int i = (int)getvar_i(R.v);
2798 if (i < 0)
2799 syntax_error(EMSG_NEGATIVE_FIELD);
2800@@ -3028,26 +3421,33 @@ static var *evaluate(node *op, var *res)
2801
2802 /* concatenation (" ") and index joining (",") */
2803 case XC( OC_CONCAT ):
2804+ debug_printf_eval("CONCAT /\n");
2805 case XC( OC_COMMA ): {
2806 const char *sep = "";
2807- if ((opinfo & OPCLSMASK) == OC_COMMA)
2808+ debug_printf_eval("COMMA\n");
2809+ if (opinfo == TI_COMMA)
2810 sep = getvar_s(intvar[SUBSEP]);
2811 setvar_p(res, xasprintf("%s%s%s", L.s, sep, R.s));
2812 break;
2813 }
2814
2815 case XC( OC_LAND ):
2816+ debug_printf_eval("LAND\n");
2817 setvar_i(res, istrue(L.v) ? ptest(op->r.n) : 0);
2818 break;
2819
2820 case XC( OC_LOR ):
2821+ debug_printf_eval("LOR\n");
2822 setvar_i(res, istrue(L.v) ? 1 : ptest(op->r.n));
2823 break;
2824
2825 case XC( OC_BINARY ):
2826- case XC( OC_REPLACE ): {
2827+ debug_printf_eval("BINARY /\n");
2828+ case XC( OC_REPLACE ):
2829+ debug_printf_eval("REPLACE\n");
2830+ {
2831 double R_d = getvar_i(R.v);
2832- debug_printf_eval("BINARY/REPLACE: R_d:%f opn:%c\n", R_d, opn);
2833+ debug_printf_eval("R_d:%f opn:%c\n", R_d, opn);
2834 switch (opn) {
2835 case '+':
2836 L_d += R_d;
2837@@ -3083,6 +3483,7 @@ static var *evaluate(node *op, var *res)
2838 case XC( OC_COMPARE ): {
2839 int i = i; /* for compiler */
2840 double Ld;
2841+ debug_printf_eval("COMPARE\n");
2842
2843 if (is_numeric(L.v) && is_numeric(R.v)) {
2844 Ld = getvar_i(L.v) - getvar_i(R.v);
2845@@ -3109,7 +3510,7 @@ static var *evaluate(node *op, var *res)
2846 default:
2847 syntax_error(EMSG_POSSIBLE_ERROR);
2848 } /* switch */
2849- next:
2850+
2851 if ((opinfo & OPCLSMASK) <= SHIFT_TIL_THIS)
2852 op = op->a.n;
2853 if ((opinfo & OPCLSMASK) >= RECUR_FROM_THIS)
2854@@ -3118,7 +3519,10 @@ static var *evaluate(node *op, var *res)
2855 break;
2856 } /* while (op) */
2857
2858- nvfree(v1);
2859+ nvfree(tmpvars, 2);
2860+#undef TMPVAR0
2861+#undef TMPVAR1
2862+
2863 debug_printf_eval("returning from %s(): %p\n", __func__, res);
2864 return res;
2865 #undef fnargs
2866@@ -3126,25 +3530,21 @@ static var *evaluate(node *op, var *res)
2867 #undef sreg
2868 }
2869
2870-
2871 /* -------- main & co. -------- */
2872
2873-static int awk_exit(int r)
2874+static int awk_exit(void)
2875 {
2876- var tv;
2877 unsigned i;
2878- hash_item *hi;
2879-
2880- zero_out_var(&tv);
2881
2882 if (!exiting) {
2883 exiting = TRUE;
2884 nextrec = FALSE;
2885- evaluate(endseq.first, &tv);
2886+ evaluate(endseq.first, &G.exit__tmpvar);
2887 }
2888
2889 /* waiting for children */
2890 for (i = 0; i < fdhash->csize; i++) {
2891+ hash_item *hi;
2892 hi = fdhash->items[i];
2893 while (hi) {
2894 if (hi->data.rs.F && hi->data.rs.is_pipe)
2895@@ -3153,65 +3553,7 @@ static int awk_exit(int r)
2896 }
2897 }
2898
2899- exit(r);
2900-}
2901-
2902-/* if expr looks like "var=value", perform assignment and return 1,
2903- * otherwise return 0 */
2904-static int is_assignment(const char *expr)
2905-{
2906- char *exprc, *val;
2907-
2908- if (!isalnum_(*expr) || (val = strchr(expr, '=')) == NULL) {
2909- return FALSE;
2910- }
2911-
2912- exprc = xstrdup(expr);
2913- val = exprc + (val - expr);
2914- *val++ = '\0';
2915-
2916- unescape_string_in_place(val);
2917- setvar_u(newvar(exprc), val);
2918- free(exprc);
2919- return TRUE;
2920-}
2921-
2922-/* switch to next input file */
2923-static rstream *next_input_file(void)
2924-{
2925-#define rsm (G.next_input_file__rsm)
2926-#define files_happen (G.next_input_file__files_happen)
2927-
2928- FILE *F;
2929- const char *fname, *ind;
2930-
2931- if (rsm.F)
2932- fclose(rsm.F);
2933- rsm.F = NULL;
2934- rsm.pos = rsm.adv = 0;
2935-
2936- for (;;) {
2937- if (getvar_i(intvar[ARGIND])+1 >= getvar_i(intvar[ARGC])) {
2938- if (files_happen)
2939- return NULL;
2940- fname = "-";
2941- F = stdin;
2942- break;
2943- }
2944- ind = getvar_s(incvar(intvar[ARGIND]));
2945- fname = getvar_s(findvar(iamarray(intvar[ARGV]), ind));
2946- if (fname && *fname && !is_assignment(fname)) {
2947- F = xfopen_stdin(fname);
2948- break;
2949- }
2950- }
2951-
2952- files_happen = TRUE;
2953- setvar_s(intvar[FILENAME], fname);
2954- rsm.F = F;
2955- return &rsm;
2956-#undef rsm
2957-#undef files_happen
2958+ exit(G.exitcode);
2959 }
2960
2961 int awk_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE;
2962@@ -3224,12 +3566,7 @@ int awk_main(int argc UNUSED_PARAM, char **argv)
2963 #if ENABLE_FEATURE_AWK_GNU_EXTENSIONS
2964 llist_t *list_e = NULL;
2965 #endif
2966- int i, j;
2967- var *v;
2968- var tv;
2969- char **envp;
2970- char *vnames = (char *)vNames; /* cheat */
2971- char *vvalues = (char *)vValues;
2972+ int i;
2973
2974 INIT_G();
2975
2976@@ -3238,48 +3575,43 @@ int awk_main(int argc UNUSED_PARAM, char **argv)
2977 if (ENABLE_LOCALE_SUPPORT)
2978 setlocale(LC_NUMERIC, "C");
2979
2980- zero_out_var(&tv);
2981-
2982- /* allocate global buffer */
2983- g_buf = xmalloc(MAXVARFMT + 1);
2984-
2985- vhash = hash_init();
2986- ahash = hash_init();
2987- fdhash = hash_init();
2988- fnhash = hash_init();
2989-
2990 /* initialize variables */
2991- for (i = 0; *vnames; i++) {
2992- intvar[i] = v = newvar(nextword(&vnames));
2993- if (*vvalues != '\377')
2994- setvar_s(v, nextword(&vvalues));
2995- else
2996- setvar_i(v, 0);
2997-
2998- if (*vnames == '*') {
2999- v->type |= VF_SPECIAL;
3000- vnames++;
3001+ vhash = hash_init();
3002+ {
3003+ char *vnames = (char *)vNames; /* cheat */
3004+ char *vvalues = (char *)vValues;
3005+ for (i = 0; *vnames; i++) {
3006+ var *v;
3007+ intvar[i] = v = newvar(nextword(&vnames));
3008+ if (*vvalues != '\377')
3009+ setvar_s(v, nextword(&vvalues));
3010+ else
3011+ setvar_i(v, 0);
3012+
3013+ if (*vnames == '*') {
3014+ v->type |= VF_SPECIAL;
3015+ vnames++;
3016+ }
3017 }
3018 }
3019
3020 handle_special(intvar[FS]);
3021 handle_special(intvar[RS]);
3022
3023- newfile("/dev/stdin")->F = stdin;
3024- newfile("/dev/stdout")->F = stdout;
3025- newfile("/dev/stderr")->F = stderr;
3026-
3027 /* Huh, people report that sometimes environ is NULL. Oh well. */
3028- if (environ) for (envp = environ; *envp; envp++) {
3029- /* environ is writable, thus we don't strdup it needlessly */
3030- char *s = *envp;
3031- char *s1 = strchr(s, '=');
3032- if (s1) {
3033- *s1 = '\0';
3034- /* Both findvar and setvar_u take const char*
3035- * as 2nd arg -> environment is not trashed */
3036- setvar_u(findvar(iamarray(intvar[ENVIRON]), s), s1 + 1);
3037- *s1 = '=';
3038+ if (environ) {
3039+ char **envp;
3040+ for (envp = environ; *envp; envp++) {
3041+ /* environ is writable, thus we don't strdup it needlessly */
3042+ char *s = *envp;
3043+ char *s1 = strchr(s, '=');
3044+ if (s1) {
3045+ *s1 = '\0';
3046+ /* Both findvar and setvar_u take const char*
3047+ * as 2nd arg -> environment is not trashed */
3048+ setvar_u(findvar(iamarray(intvar[ENVIRON]), s), s1 + 1);
3049+ *s1 = '=';
3050+ }
3051 }
3052 }
3053 opt = getopt32(argv, OPTSTR_AWK, &opt_F, &list_v, &list_f, IF_FEATURE_AWK_GNU_EXTENSIONS(&list_e,) NULL);
3054@@ -3295,20 +3627,19 @@ int awk_main(int argc UNUSED_PARAM, char **argv)
3055 if (!is_assignment(llist_pop(&list_v)))
3056 bb_show_usage();
3057 }
3058+
3059+ /* Parse all supplied programs */
3060+ fnhash = hash_init();
3061+ ahash = hash_init();
3062 while (list_f) {
3063- char *s = NULL;
3064- FILE *from_file;
3065+ int fd;
3066+ char *s;
3067
3068 g_progname = llist_pop(&list_f);
3069- from_file = xfopen_stdin(g_progname);
3070- /* one byte is reserved for some trick in next_token */
3071- for (i = j = 1; j > 0; i += j) {
3072- s = xrealloc(s, i + 4096);
3073- j = fread(s + i, 1, 4094, from_file);
3074- }
3075- s[i] = '\0';
3076- fclose(from_file);
3077- parse_program(s + 1);
3078+ fd = xopen_stdin(g_progname);
3079+ s = xmalloc_read(fd, NULL); /* it's NUL-terminated */
3080+ close(fd);
3081+ parse_program(s);
3082 free(s);
3083 }
3084 g_progname = "cmd. line";
3085@@ -3317,11 +3648,23 @@ int awk_main(int argc UNUSED_PARAM, char **argv)
3086 parse_program(llist_pop(&list_e));
3087 }
3088 #endif
3089+//FIXME: preserve order of -e and -f
3090+//TODO: implement -i LIBRARY and -E FILE too, they are easy-ish
3091 if (!(opt & (OPT_f | OPT_e))) {
3092 if (!*argv)
3093 bb_show_usage();
3094 parse_program(*argv++);
3095 }
3096+ /* Free unused parse structures */
3097+ //hash_free(fnhash); // ~250 bytes when empty, used only for function names
3098+ //^^^^^^^^^^^^^^^^^ does not work, hash_clear() inside SEGVs
3099+ // (IOW: hash_clear() assumes it's a hash of variables. fnhash is not).
3100+ free(fnhash->items);
3101+ free(fnhash);
3102+ fnhash = NULL; // debug
3103+ //hash_free(ahash); // empty after parsing, will reuse as fdhash instead of freeing
3104+
3105+ /* Parsing done, on to executing */
3106
3107 /* fill in ARGV array */
3108 setari_u(intvar[ARGV], 0, "awk");
3109@@ -3330,9 +3673,14 @@ int awk_main(int argc UNUSED_PARAM, char **argv)
3110 setari_u(intvar[ARGV], ++i, *argv++);
3111 setvar_i(intvar[ARGC], i + 1);
3112
3113- evaluate(beginseq.first, &tv);
3114+ //fdhash = ahash; // done via define
3115+ newfile("/dev/stdin")->F = stdin;
3116+ newfile("/dev/stdout")->F = stdout;
3117+ newfile("/dev/stderr")->F = stderr;
3118+
3119+ evaluate(beginseq.first, &G.main__tmpvar);
3120 if (!mainseq.first && !endseq.first)
3121- awk_exit(EXIT_SUCCESS);
3122+ awk_exit();
3123
3124 /* input file could already be opened in BEGIN block */
3125 if (!iF)
3126@@ -3347,7 +3695,7 @@ int awk_main(int argc UNUSED_PARAM, char **argv)
3127 nextrec = FALSE;
3128 incvar(intvar[NR]);
3129 incvar(intvar[FNR]);
3130- evaluate(mainseq.first, &tv);
3131+ evaluate(mainseq.first, &G.main__tmpvar);
3132
3133 if (nextfile)
3134 break;
3135@@ -3359,6 +3707,6 @@ int awk_main(int argc UNUSED_PARAM, char **argv)
3136 iF = next_input_file();
3137 }
3138
3139- awk_exit(EXIT_SUCCESS);
3140+ awk_exit();
3141 /*return 0;*/
3142 }
3143diff --git a/testsuite/awk.tests b/testsuite/awk.tests
3144index 92c83d719..4a7a01245 100755
3145--- a/testsuite/awk.tests
3146+++ b/testsuite/awk.tests
3147@@ -44,6 +44,16 @@ testing "awk handles empty function f(arg){}" \
3148 "L1\n\nL2\n\n" \
3149 "" ""
3150
3151+prg='
3152+function empty_fun(){}
3153+END {empty_fun()
3154+ print "Ok"
3155+}'
3156+testing "awk handles empty function f(){}" \
3157+ "awk '$prg'" \
3158+ "Ok\n" \
3159+ "" ""
3160+
3161 prg='
3162 function outer_fun() {
3163 return 1
3164@@ -71,6 +81,23 @@ testing "awk properly handles undefined function" \
3165 "L1\n\nawk: cmd. line:5: Call to undefined function\n" \
3166 "" ""
3167
3168+prg='
3169+BEGIN {
3170+ v=1
3171+ a=2
3172+ print v (a)
3173+}'
3174+testing "awk 'v (a)' is not a function call, it is a concatenation" \
3175+ "awk '$prg' 2>&1" \
3176+ "12\n" \
3177+ "" ""
3178+
3179+prg='func f(){print"F"};func g(){print"G"};BEGIN{f(g(),g())}'
3180+testing "awk unused function args are evaluated" \
3181+ "awk '$prg' 2>&1" \
3182+ "G\nG\nF\n" \
3183+ "" ""
3184+
3185
3186 optional DESKTOP
3187 testing "awk hex const 1" "awk '{ print or(0xffffffff,1) }'" "4294967295\n" "" "\n"
3188@@ -352,19 +379,14 @@ testing "awk -e and ARGC" \
3189 ""
3190 SKIP=
3191
3192-# The examples are in fact not valid awk programs (break/continue
3193-# can only be used inside loops).
3194-# But we do accept them outside of loops.
3195-# We had a bug with misparsing "break ; else" sequence.
3196-# Test that *that* bug is fixed, using simplest possible scripts:
3197 testing "awk break" \
3198 "awk -f - 2>&1; echo \$?" \
3199- "0\n" \
3200+ "awk: -:1: 'break' not in a loop\n1\n" \
3201 "" \
3202 'BEGIN { if (1) break; else a = 1 }'
3203 testing "awk continue" \
3204 "awk -f - 2>&1; echo \$?" \
3205- "0\n" \
3206+ "awk: -:1: 'continue' not in a loop\n1\n" \
3207 "" \
3208 'BEGIN { if (1) continue; else a = 1 }'
3209
3210@@ -383,6 +405,11 @@ testing "awk errors on missing delete arg" \
3211 "awk -e '{delete}' 2>&1" "awk: cmd. line:1: Too few arguments\n" "" ""
3212 SKIP=
3213
3214+optional FEATURE_AWK_GNU_EXTENSIONS
3215+testing "awk printf('%c') can output NUL" \
3216+ "awk '{printf(\"hello%c null\n\", 0)}'" "hello\0 null\n" "" "\n"
3217+SKIP=
3218+
3219 # testing "description" "command" "result" "infile" "stdin"
3220 testing 'awk negative field access' \
3221 'awk 2>&1 -- '\''{ $(-1) }'\' \
3222@@ -413,4 +440,25 @@ testing 'awk $NF is empty' \
3223 '' \
3224 'a=====123='
3225
3226+testing "awk exit N propagates through END's exit" \
3227+ "awk 'BEGIN { exit 42 } END { exit }'; echo \$?" \
3228+ "42\n" \
3229+ '' ''
3230+
3231+testing "awk print + redirect" \
3232+ "awk 'BEGIN { print \"STDERR %s\" >\"/dev/stderr\" }' 2>&1" \
3233+ "STDERR %s\n" \
3234+ '' ''
3235+
3236+testing "awk \"cmd\" | getline" \
3237+ "awk 'BEGIN { \"echo HELLO\" | getline; print }'" \
3238+ "HELLO\n" \
3239+ '' ''
3240+
3241+# printf %% should print one % (had a bug where it didn't)
3242+testing 'awk printf %% prints one %' \
3243+ "awk 'BEGIN { printf \"%%\n\" }'" \
3244+ "%\n" \
3245+ '' ''
3246+
3247 exit $FAILCOUNT
3248diff --git a/testsuite/printf.tests b/testsuite/printf.tests
3249index 34a65926e..050edef71 100755
3250--- a/testsuite/printf.tests
3251+++ b/testsuite/printf.tests
3252@@ -79,6 +79,11 @@ testing "printf understands %Ld" \
3253 "-5\n""0\n" \
3254 "" ""
3255
3256+testing "printf understands %%" \
3257+ "${bb}printf '%%\n' 2>&1; echo \$?" \
3258+ "%\n""0\n" \
3259+ "" ""
3260+
3261 testing "printf handles positive numbers for %d" \
3262 "${bb}printf '%d\n' 3 +3 ' 3' ' +3' 2>&1; echo \$?" \
3263 "3\n"\
3264--
32652.33.0
3266
diff --git a/meta/recipes-core/busybox/busybox/0002-man-fix-segfault-in-man-1.patch b/meta/recipes-core/busybox/busybox/0002-man-fix-segfault-in-man-1.patch
new file mode 100644
index 0000000000..4a930b7b6f
--- /dev/null
+++ b/meta/recipes-core/busybox/busybox/0002-man-fix-segfault-in-man-1.patch
@@ -0,0 +1,30 @@
1From 4975cace9bf96bfde174f8bb5cc4068d2ea294d4 Mon Sep 17 00:00:00 2001
2From: Denys Vlasenko <vda.linux@googlemail.com>
3Date: Tue, 15 Jun 2021 14:47:46 +0200
4Subject: [PATCH] man: fix segfault in "man 1"
5
6function old new delta
7man_main 942 954 +12
8
9Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
10
11Upstream-Status: Backport [4d4fc5ca5ee4f]
12CVE: CVE-2021-42373
13Signed-off-by: Chen Qi <Qi.Chen@windriver.com>
14---
15 miscutils/man.c | 2 +-
16 1 file changed, 1 insertion(+), 1 deletion(-)
17
18diff --git a/miscutils/man.c b/miscutils/man.c
19index 722f6641e..d319e8bba 100644
20--- a/miscutils/man.c
21+++ b/miscutils/man.c
22@@ -324,7 +324,7 @@ int man_main(int argc UNUSED_PARAM, char **argv)
23
24 /* is 1st ARG a SECTION? */
25 sec_list = conf_sec_list;
26- if (is_section_name(conf_sec_list, *argv)) {
27+ if (is_section_name(conf_sec_list, *argv) && argv[1]) {
28 /* yes */
29 sec_list = *argv++;
30 }
diff --git a/meta/recipes-core/busybox/busybox_1.33.1.bb b/meta/recipes-core/busybox/busybox_1.33.2.bb
index 4002d6a5c6..4a0d3b4556 100644
--- a/meta/recipes-core/busybox/busybox_1.33.1.bb
+++ b/meta/recipes-core/busybox/busybox_1.33.2.bb
@@ -48,7 +48,9 @@ SRC_URI = "https://busybox.net/downloads/busybox-${PV}.tar.bz2;name=tarball \
48 file://0001-sysctl-ignore-EIO-of-stable_secret-below-proc-sys-ne.patch \ 48 file://0001-sysctl-ignore-EIO-of-stable_secret-below-proc-sys-ne.patch \
49 file://0001-gen_build_files-Use-C-locale-when-calling-sed-on-glo.patch \ 49 file://0001-gen_build_files-Use-C-locale-when-calling-sed-on-glo.patch \
50 file://0001-mktemp-add-tmpdir-option.patch \ 50 file://0001-mktemp-add-tmpdir-option.patch \
51 file://0001-awk-fix-CVEs.patch \
52 file://0002-man-fix-segfault-in-man-1.patch \
51 " 53 "
52SRC_URI_append_libc-musl = " file://musl.cfg " 54SRC_URI_append_libc-musl = " file://musl.cfg "
53 55
54SRC_URI[tarball.sha256sum] = "12cec6bd2b16d8a9446dd16130f2b92982f1819f6e1c5f5887b6db03f5660d28" 56SRC_URI[tarball.sha256sum] = "6843ba7977081e735fa0fdb05893e3c002c8c5ad7c9c80da206e603cc0ac47e7"
diff --git a/meta/recipes-core/expat/expat/CVE-2021-45960.patch b/meta/recipes-core/expat/expat/CVE-2021-45960.patch
new file mode 100644
index 0000000000..523449e22c
--- /dev/null
+++ b/meta/recipes-core/expat/expat/CVE-2021-45960.patch
@@ -0,0 +1,65 @@
1From 0adcb34c49bee5b19bd29b16a578c510c23597ea Mon Sep 17 00:00:00 2001
2From: Sebastian Pipping <sebastian@pipping.org>
3Date: Mon, 27 Dec 2021 20:15:02 +0100
4Subject: [PATCH] lib: Detect and prevent troublesome left shifts in function
5 storeAtts (CVE-2021-45960)
6
7Upstream-Status: Backport:
8https://github.com/libexpat/libexpat/pull/534/commits/0adcb34c49bee5b19bd29b16a578c510c23597ea
9
10CVE: CVE-2021-45960
11Signed-off-by: Steve Sakoman <steve@sakoman.com>
12
13---
14 expat/lib/xmlparse.c | 31 +++++++++++++++++++++++++++++--
15 1 file changed, 29 insertions(+), 2 deletions(-)
16
17diff --git a/expat/lib/xmlparse.c b/expat/lib/xmlparse.c
18index d730f41c3..b47c31b05 100644
19--- a/lib/xmlparse.c
20+++ b/lib/xmlparse.c
21@@ -3414,7 +3414,13 @@ storeAtts(XML_Parser parser, const ENCODING *enc, const char *attStr,
22 if (nPrefixes) {
23 int j; /* hash table index */
24 unsigned long version = parser->m_nsAttsVersion;
25- int nsAttsSize = (int)1 << parser->m_nsAttsPower;
26+
27+ /* Detect and prevent invalid shift */
28+ if (parser->m_nsAttsPower >= sizeof(unsigned int) * 8 /* bits per byte */) {
29+ return XML_ERROR_NO_MEMORY;
30+ }
31+
32+ unsigned int nsAttsSize = 1u << parser->m_nsAttsPower;
33 unsigned char oldNsAttsPower = parser->m_nsAttsPower;
34 /* size of hash table must be at least 2 * (# of prefixed attributes) */
35 if ((nPrefixes << 1)
36@@ -3425,7 +3431,28 @@ storeAtts(XML_Parser parser, const ENCODING *enc, const char *attStr,
37 ;
38 if (parser->m_nsAttsPower < 3)
39 parser->m_nsAttsPower = 3;
40- nsAttsSize = (int)1 << parser->m_nsAttsPower;
41+
42+ /* Detect and prevent invalid shift */
43+ if (parser->m_nsAttsPower >= sizeof(nsAttsSize) * 8 /* bits per byte */) {
44+ /* Restore actual size of memory in m_nsAtts */
45+ parser->m_nsAttsPower = oldNsAttsPower;
46+ return XML_ERROR_NO_MEMORY;
47+ }
48+
49+ nsAttsSize = 1u << parser->m_nsAttsPower;
50+
51+ /* Detect and prevent integer overflow.
52+ * The preprocessor guard addresses the "always false" warning
53+ * from -Wtype-limits on platforms where
54+ * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */
55+#if UINT_MAX >= SIZE_MAX
56+ if (nsAttsSize > (size_t)(-1) / sizeof(NS_ATT)) {
57+ /* Restore actual size of memory in m_nsAtts */
58+ parser->m_nsAttsPower = oldNsAttsPower;
59+ return XML_ERROR_NO_MEMORY;
60+ }
61+#endif
62+
63 temp = (NS_ATT *)REALLOC(parser, parser->m_nsAtts,
64 nsAttsSize * sizeof(NS_ATT));
65 if (! temp) {
diff --git a/meta/recipes-core/expat/expat/CVE-2021-46143.patch b/meta/recipes-core/expat/expat/CVE-2021-46143.patch
new file mode 100644
index 0000000000..b1a726d9a8
--- /dev/null
+++ b/meta/recipes-core/expat/expat/CVE-2021-46143.patch
@@ -0,0 +1,49 @@
1From 85ae9a2d7d0e9358f356b33977b842df8ebaec2b Mon Sep 17 00:00:00 2001
2From: Sebastian Pipping <sebastian@pipping.org>
3Date: Sat, 25 Dec 2021 20:52:08 +0100
4Subject: [PATCH] lib: Prevent integer overflow on m_groupSize in function
5 doProlog (CVE-2021-46143)
6
7Upstream-Status: Backport:
8https://github.com/libexpat/libexpat/pull/538/commits/85ae9a2d7d0e9358f356b33977b842df8ebaec2b
9
10CVE: CVE-2021-46143
11
12Signed-off-by: Steve Sakoman <steve@sakoman.com>
13---
14 expat/lib/xmlparse.c | 15 +++++++++++++++
15 1 file changed, 15 insertions(+)
16
17diff --git a/expat/lib/xmlparse.c b/expat/lib/xmlparse.c
18index b47c31b0..8f243126 100644
19--- a/lib/xmlparse.c
20+++ b/lib/xmlparse.c
21@@ -5046,6 +5046,11 @@ doProlog(XML_Parser parser, const ENCODING *enc, const char *s, const char *end,
22 if (parser->m_prologState.level >= parser->m_groupSize) {
23 if (parser->m_groupSize) {
24 {
25+ /* Detect and prevent integer overflow */
26+ if (parser->m_groupSize > (unsigned int)(-1) / 2u) {
27+ return XML_ERROR_NO_MEMORY;
28+ }
29+
30 char *const new_connector = (char *)REALLOC(
31 parser, parser->m_groupConnector, parser->m_groupSize *= 2);
32 if (new_connector == NULL) {
33@@ -5056,6 +5061,16 @@ doProlog(XML_Parser parser, const ENCODING *enc, const char *s, const char *end,
34 }
35
36 if (dtd->scaffIndex) {
37+ /* Detect and prevent integer overflow.
38+ * The preprocessor guard addresses the "always false" warning
39+ * from -Wtype-limits on platforms where
40+ * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */
41+#if UINT_MAX >= SIZE_MAX
42+ if (parser->m_groupSize > (size_t)(-1) / sizeof(int)) {
43+ return XML_ERROR_NO_MEMORY;
44+ }
45+#endif
46+
47 int *const new_scaff_index = (int *)REALLOC(
48 parser, dtd->scaffIndex, parser->m_groupSize * sizeof(int));
49 if (new_scaff_index == NULL)
diff --git a/meta/recipes-core/expat/expat/CVE-2022-22822-27.patch b/meta/recipes-core/expat/expat/CVE-2022-22822-27.patch
new file mode 100644
index 0000000000..e569fbc7ab
--- /dev/null
+++ b/meta/recipes-core/expat/expat/CVE-2022-22822-27.patch
@@ -0,0 +1,257 @@
1From 9f93e8036e842329863bf20395b8fb8f73834d9e Mon Sep 17 00:00:00 2001
2From: Sebastian Pipping <sebastian@pipping.org>
3Date: Thu, 30 Dec 2021 22:46:03 +0100
4Subject: [PATCH] lib: Prevent integer overflow at multiple places
5 (CVE-2022-22822 to CVE-2022-22827)
6
7The involved functions are:
8- addBinding (CVE-2022-22822)
9- build_model (CVE-2022-22823)
10- defineAttribute (CVE-2022-22824)
11- lookup (CVE-2022-22825)
12- nextScaffoldPart (CVE-2022-22826)
13- storeAtts (CVE-2022-22827)
14
15Upstream-Status: Backport:
16https://github.com/libexpat/libexpat/pull/539/commits/9f93e8036e842329863bf20395b8fb8f73834d9e
17
18CVE: CVE-2022-22822 CVE-2022-22823 CVE-2022-22824 CVE-2022-22825 CVE-2022-22826 CVE-2022-22827
19Signed-off-by: Steve Sakoman <steve@sakoman.com>
20
21---
22 expat/lib/xmlparse.c | 153 ++++++++++++++++++++++++++++++++++++++++++-
23 1 file changed, 151 insertions(+), 2 deletions(-)
24
25diff --git a/expat/lib/xmlparse.c b/expat/lib/xmlparse.c
26index 8f243126..575e73ee 100644
27--- a/lib/xmlparse.c
28+++ b/lib/xmlparse.c
29@@ -3261,13 +3261,38 @@ storeAtts(XML_Parser parser, const ENCODING *enc, const char *attStr,
30
31 /* get the attributes from the tokenizer */
32 n = XmlGetAttributes(enc, attStr, parser->m_attsSize, parser->m_atts);
33+
34+ /* Detect and prevent integer overflow */
35+ if (n > INT_MAX - nDefaultAtts) {
36+ return XML_ERROR_NO_MEMORY;
37+ }
38+
39 if (n + nDefaultAtts > parser->m_attsSize) {
40 int oldAttsSize = parser->m_attsSize;
41 ATTRIBUTE *temp;
42 #ifdef XML_ATTR_INFO
43 XML_AttrInfo *temp2;
44 #endif
45+
46+ /* Detect and prevent integer overflow */
47+ if ((nDefaultAtts > INT_MAX - INIT_ATTS_SIZE)
48+ || (n > INT_MAX - (nDefaultAtts + INIT_ATTS_SIZE))) {
49+ return XML_ERROR_NO_MEMORY;
50+ }
51+
52 parser->m_attsSize = n + nDefaultAtts + INIT_ATTS_SIZE;
53+
54+ /* Detect and prevent integer overflow.
55+ * The preprocessor guard addresses the "always false" warning
56+ * from -Wtype-limits on platforms where
57+ * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */
58+#if UINT_MAX >= SIZE_MAX
59+ if ((unsigned)parser->m_attsSize > (size_t)(-1) / sizeof(ATTRIBUTE)) {
60+ parser->m_attsSize = oldAttsSize;
61+ return XML_ERROR_NO_MEMORY;
62+ }
63+#endif
64+
65 temp = (ATTRIBUTE *)REALLOC(parser, (void *)parser->m_atts,
66 parser->m_attsSize * sizeof(ATTRIBUTE));
67 if (temp == NULL) {
68@@ -3276,6 +3301,17 @@ storeAtts(XML_Parser parser, const ENCODING *enc, const char *attStr,
69 }
70 parser->m_atts = temp;
71 #ifdef XML_ATTR_INFO
72+ /* Detect and prevent integer overflow.
73+ * The preprocessor guard addresses the "always false" warning
74+ * from -Wtype-limits on platforms where
75+ * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */
76+# if UINT_MAX >= SIZE_MAX
77+ if ((unsigned)parser->m_attsSize > (size_t)(-1) / sizeof(XML_AttrInfo)) {
78+ parser->m_attsSize = oldAttsSize;
79+ return XML_ERROR_NO_MEMORY;
80+ }
81+# endif
82+
83 temp2 = (XML_AttrInfo *)REALLOC(parser, (void *)parser->m_attInfo,
84 parser->m_attsSize * sizeof(XML_AttrInfo));
85 if (temp2 == NULL) {
86@@ -3610,9 +3646,31 @@ storeAtts(XML_Parser parser, const ENCODING *enc, const char *attStr,
87 tagNamePtr->prefixLen = prefixLen;
88 for (i = 0; localPart[i++];)
89 ; /* i includes null terminator */
90+
91+ /* Detect and prevent integer overflow */
92+ if (binding->uriLen > INT_MAX - prefixLen
93+ || i > INT_MAX - (binding->uriLen + prefixLen)) {
94+ return XML_ERROR_NO_MEMORY;
95+ }
96+
97 n = i + binding->uriLen + prefixLen;
98 if (n > binding->uriAlloc) {
99 TAG *p;
100+
101+ /* Detect and prevent integer overflow */
102+ if (n > INT_MAX - EXPAND_SPARE) {
103+ return XML_ERROR_NO_MEMORY;
104+ }
105+ /* Detect and prevent integer overflow.
106+ * The preprocessor guard addresses the "always false" warning
107+ * from -Wtype-limits on platforms where
108+ * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */
109+#if UINT_MAX >= SIZE_MAX
110+ if ((unsigned)(n + EXPAND_SPARE) > (size_t)(-1) / sizeof(XML_Char)) {
111+ return XML_ERROR_NO_MEMORY;
112+ }
113+#endif
114+
115 uri = (XML_Char *)MALLOC(parser, (n + EXPAND_SPARE) * sizeof(XML_Char));
116 if (! uri)
117 return XML_ERROR_NO_MEMORY;
118@@ -3708,6 +3766,21 @@ addBinding(XML_Parser parser, PREFIX *prefix, const ATTRIBUTE_ID *attId,
119 if (parser->m_freeBindingList) {
120 b = parser->m_freeBindingList;
121 if (len > b->uriAlloc) {
122+ /* Detect and prevent integer overflow */
123+ if (len > INT_MAX - EXPAND_SPARE) {
124+ return XML_ERROR_NO_MEMORY;
125+ }
126+
127+ /* Detect and prevent integer overflow.
128+ * The preprocessor guard addresses the "always false" warning
129+ * from -Wtype-limits on platforms where
130+ * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */
131+#if UINT_MAX >= SIZE_MAX
132+ if ((unsigned)(len + EXPAND_SPARE) > (size_t)(-1) / sizeof(XML_Char)) {
133+ return XML_ERROR_NO_MEMORY;
134+ }
135+#endif
136+
137 XML_Char *temp = (XML_Char *)REALLOC(
138 parser, b->uri, sizeof(XML_Char) * (len + EXPAND_SPARE));
139 if (temp == NULL)
140@@ -3720,6 +3793,21 @@ addBinding(XML_Parser parser, PREFIX *prefix, const ATTRIBUTE_ID *attId,
141 b = (BINDING *)MALLOC(parser, sizeof(BINDING));
142 if (! b)
143 return XML_ERROR_NO_MEMORY;
144+
145+ /* Detect and prevent integer overflow */
146+ if (len > INT_MAX - EXPAND_SPARE) {
147+ return XML_ERROR_NO_MEMORY;
148+ }
149+ /* Detect and prevent integer overflow.
150+ * The preprocessor guard addresses the "always false" warning
151+ * from -Wtype-limits on platforms where
152+ * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */
153+#if UINT_MAX >= SIZE_MAX
154+ if ((unsigned)(len + EXPAND_SPARE) > (size_t)(-1) / sizeof(XML_Char)) {
155+ return XML_ERROR_NO_MEMORY;
156+ }
157+#endif
158+
159 b->uri
160 = (XML_Char *)MALLOC(parser, sizeof(XML_Char) * (len + EXPAND_SPARE));
161 if (! b->uri) {
162@@ -6141,7 +6229,24 @@ defineAttribute(ELEMENT_TYPE *type, ATTRIBUTE_ID *attId, XML_Bool isCdata,
163 }
164 } else {
165 DEFAULT_ATTRIBUTE *temp;
166+
167+ /* Detect and prevent integer overflow */
168+ if (type->allocDefaultAtts > INT_MAX / 2) {
169+ return 0;
170+ }
171+
172 int count = type->allocDefaultAtts * 2;
173+
174+ /* Detect and prevent integer overflow.
175+ * The preprocessor guard addresses the "always false" warning
176+ * from -Wtype-limits on platforms where
177+ * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */
178+#if UINT_MAX >= SIZE_MAX
179+ if ((unsigned)count > (size_t)(-1) / sizeof(DEFAULT_ATTRIBUTE)) {
180+ return 0;
181+ }
182+#endif
183+
184 temp = (DEFAULT_ATTRIBUTE *)REALLOC(parser, type->defaultAtts,
185 (count * sizeof(DEFAULT_ATTRIBUTE)));
186 if (temp == NULL)
187@@ -6792,8 +6897,20 @@ lookup(XML_Parser parser, HASH_TABLE *table, KEY name, size_t createSize) {
188 /* check for overflow (table is half full) */
189 if (table->used >> (table->power - 1)) {
190 unsigned char newPower = table->power + 1;
191+
192+ /* Detect and prevent invalid shift */
193+ if (newPower >= sizeof(unsigned long) * 8 /* bits per byte */) {
194+ return NULL;
195+ }
196+
197 size_t newSize = (size_t)1 << newPower;
198 unsigned long newMask = (unsigned long)newSize - 1;
199+
200+ /* Detect and prevent integer overflow */
201+ if (newSize > (size_t)(-1) / sizeof(NAMED *)) {
202+ return NULL;
203+ }
204+
205 size_t tsize = newSize * sizeof(NAMED *);
206 NAMED **newV = (NAMED **)table->mem->malloc_fcn(tsize);
207 if (! newV)
208@@ -7143,6 +7260,20 @@ nextScaffoldPart(XML_Parser parser) {
209 if (dtd->scaffCount >= dtd->scaffSize) {
210 CONTENT_SCAFFOLD *temp;
211 if (dtd->scaffold) {
212+ /* Detect and prevent integer overflow */
213+ if (dtd->scaffSize > UINT_MAX / 2u) {
214+ return -1;
215+ }
216+ /* Detect and prevent integer overflow.
217+ * The preprocessor guard addresses the "always false" warning
218+ * from -Wtype-limits on platforms where
219+ * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */
220+#if UINT_MAX >= SIZE_MAX
221+ if (dtd->scaffSize > (size_t)(-1) / 2u / sizeof(CONTENT_SCAFFOLD)) {
222+ return -1;
223+ }
224+#endif
225+
226 temp = (CONTENT_SCAFFOLD *)REALLOC(
227 parser, dtd->scaffold, dtd->scaffSize * 2 * sizeof(CONTENT_SCAFFOLD));
228 if (temp == NULL)
229@@ -7212,8 +7343,26 @@ build_model(XML_Parser parser) {
230 XML_Content *ret;
231 XML_Content *cpos;
232 XML_Char *str;
233- int allocsize = (dtd->scaffCount * sizeof(XML_Content)
234- + (dtd->contentStringLen * sizeof(XML_Char)));
235+
236+ /* Detect and prevent integer overflow.
237+ * The preprocessor guard addresses the "always false" warning
238+ * from -Wtype-limits on platforms where
239+ * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */
240+#if UINT_MAX >= SIZE_MAX
241+ if (dtd->scaffCount > (size_t)(-1) / sizeof(XML_Content)) {
242+ return NULL;
243+ }
244+ if (dtd->contentStringLen > (size_t)(-1) / sizeof(XML_Char)) {
245+ return NULL;
246+ }
247+#endif
248+ if (dtd->scaffCount * sizeof(XML_Content)
249+ > (size_t)(-1) - dtd->contentStringLen * sizeof(XML_Char)) {
250+ return NULL;
251+ }
252+
253+ const size_t allocsize = (dtd->scaffCount * sizeof(XML_Content)
254+ + (dtd->contentStringLen * sizeof(XML_Char)));
255
256 ret = (XML_Content *)MALLOC(parser, allocsize);
257 if (! ret)
diff --git a/meta/recipes-core/expat/expat/CVE-2022-23852.patch b/meta/recipes-core/expat/expat/CVE-2022-23852.patch
new file mode 100644
index 0000000000..41425c108b
--- /dev/null
+++ b/meta/recipes-core/expat/expat/CVE-2022-23852.patch
@@ -0,0 +1,33 @@
1From 847a645152f5ebc10ac63b74b604d0c1a79fae40 Mon Sep 17 00:00:00 2001
2From: Samanta Navarro <ferivoz@riseup.net>
3Date: Sat, 22 Jan 2022 17:48:00 +0100
4Subject: [PATCH] lib: Detect and prevent integer overflow in XML_GetBuffer
5 (CVE-2022-23852)
6
7Upstream-Status: Backport:
8https://github.com/libexpat/libexpat/commit/847a645152f5ebc10ac63b74b604d0c1a79fae40
9
10CVE: CVE-2022-23852
11
12Signed-off-by: Steve Sakoman <steve@sakoman.com>
13
14---
15 expat/lib/xmlparse.c | 5 +++++
16 1 file changed, 5 insertions(+)
17
18diff --git a/expat/lib/xmlparse.c b/expat/lib/xmlparse.c
19index d54af683..5ce31402 100644
20--- a/lib/xmlparse.c
21+++ b/lib/xmlparse.c
22@@ -2067,6 +2067,11 @@ XML_GetBuffer(XML_Parser parser, int len) {
23 keep = (int)EXPAT_SAFE_PTR_DIFF(parser->m_bufferPtr, parser->m_buffer);
24 if (keep > XML_CONTEXT_BYTES)
25 keep = XML_CONTEXT_BYTES;
26+ /* Detect and prevent integer overflow */
27+ if (keep > INT_MAX - neededSize) {
28+ parser->m_errorCode = XML_ERROR_NO_MEMORY;
29+ return NULL;
30+ }
31 neededSize += keep;
32 #endif /* defined XML_CONTEXT_BYTES */
33 if (neededSize
diff --git a/meta/recipes-core/expat/expat/CVE-2022-23990.patch b/meta/recipes-core/expat/expat/CVE-2022-23990.patch
new file mode 100644
index 0000000000..c599517b3e
--- /dev/null
+++ b/meta/recipes-core/expat/expat/CVE-2022-23990.patch
@@ -0,0 +1,49 @@
1From ede41d1e186ed2aba88a06e84cac839b770af3a1 Mon Sep 17 00:00:00 2001
2From: Sebastian Pipping <sebastian@pipping.org>
3Date: Wed, 26 Jan 2022 02:36:43 +0100
4Subject: [PATCH] lib: Prevent integer overflow in doProlog (CVE-2022-23990)
5
6The change from "int nameLen" to "size_t nameLen"
7addresses the overflow on "nameLen++" in code
8"for (; name[nameLen++];)" right above the second
9change in the patch.
10
11Upstream-Status: Backport:
12https://github.com/libexpat/libexpat/pull/551/commits/ede41d1e186ed2aba88a06e84cac839b770af3a1
13
14CVE: CVE-2022-23990
15
16Signed-off-by: Steve Sakoman <steve@sakoman.com>
17
18---
19 lib/xmlparse.c | 10 ++++++++--
20 1 file changed, 8 insertions(+), 2 deletions(-)
21
22diff --git a/lib/xmlparse.c b/expat/lib/xmlparse.c
23index 5ce31402..d1d17005 100644
24--- a/lib/xmlparse.c
25+++ b/lib/xmlparse.c
26@@ -5372,7 +5372,7 @@ doProlog(XML_Parser parser, const ENCODING *enc, const char *s, const char *end,
27 if (dtd->in_eldecl) {
28 ELEMENT_TYPE *el;
29 const XML_Char *name;
30- int nameLen;
31+ size_t nameLen;
32 const char *nxt
33 = (quant == XML_CQUANT_NONE ? next : next - enc->minBytesPerChar);
34 int myindex = nextScaffoldPart(parser);
35@@ -5388,7 +5388,13 @@ doProlog(XML_Parser parser, const ENCODING *enc, const char *s, const char *end,
36 nameLen = 0;
37 for (; name[nameLen++];)
38 ;
39- dtd->contentStringLen += nameLen;
40+
41+ /* Detect and prevent integer overflow */
42+ if (nameLen > UINT_MAX - dtd->contentStringLen) {
43+ return XML_ERROR_NO_MEMORY;
44+ }
45+
46+ dtd->contentStringLen += (unsigned)nameLen;
47 if (parser->m_elementDeclHandler)
48 handleDefault = XML_FALSE;
49 }
diff --git a/meta/recipes-core/expat/expat/CVE-2022-25235.patch b/meta/recipes-core/expat/expat/CVE-2022-25235.patch
new file mode 100644
index 0000000000..9febeae609
--- /dev/null
+++ b/meta/recipes-core/expat/expat/CVE-2022-25235.patch
@@ -0,0 +1,261 @@
1Upstream-Status: Backport [https://github.com/libexpat/libexpat/commit/306b721]
2CVE: CVE-2022-25235
3
4The commit is a merge commit, and this patch is created by:
5
6$ git show -m -p --stat 306b72134f157bbfd1637b20a22cabf4acfa136a
7
8Remove modification for expat/Changes which fails to be applied.
9
10Signed-off-by: Kai Kang <kai.kang@windriver.com>
11
12commit 306b72134f157bbfd1637b20a22cabf4acfa136a (from 2cc97e875ef84da4bcf55156c83599116f7523b4)
13Merge: 2cc97e87 c16300f0
14Author: Sebastian Pipping <sebastian@pipping.org>
15Date: Fri Feb 18 20:12:32 2022 +0100
16
17 Merge pull request #562 from libexpat/utf8-security
18
19 [CVE-2022-25235] lib: Protect against malformed encoding (e.g. malformed UTF-8)
20---
21 expat/Changes | 7 ++++
22 expat/lib/xmltok.c | 5 ---
23 expat/lib/xmltok_impl.c | 18 ++++----
24 expat/tests/runtests.c | 109 ++++++++++++++++++++++++++++++++++++++++++++++++
25 4 files changed, 127 insertions(+), 12 deletions(-)
26
27diff --git a/lib/xmltok.c b/lib/xmltok.c
28index a72200e8..3bddf125 100644
29--- a/lib/xmltok.c
30+++ b/lib/xmltok.c
31@@ -98,11 +98,6 @@
32 + ((((byte)[1]) & 3) << 1) + ((((byte)[2]) >> 5) & 1)] \
33 & (1u << (((byte)[2]) & 0x1F)))
34
35-#define UTF8_GET_NAMING(pages, p, n) \
36- ((n) == 2 \
37- ? UTF8_GET_NAMING2(pages, (const unsigned char *)(p)) \
38- : ((n) == 3 ? UTF8_GET_NAMING3(pages, (const unsigned char *)(p)) : 0))
39-
40 /* Detection of invalid UTF-8 sequences is based on Table 3.1B
41 of Unicode 3.2: http://www.unicode.org/unicode/reports/tr28/
42 with the additional restriction of not allowing the Unicode
43diff --git a/lib/xmltok_impl.c b/lib/xmltok_impl.c
44index 0430591b..84ff35f9 100644
45--- a/lib/xmltok_impl.c
46+++ b/lib/xmltok_impl.c
47@@ -69,7 +69,7 @@
48 case BT_LEAD##n: \
49 if (end - ptr < n) \
50 return XML_TOK_PARTIAL_CHAR; \
51- if (! IS_NAME_CHAR(enc, ptr, n)) { \
52+ if (IS_INVALID_CHAR(enc, ptr, n) || ! IS_NAME_CHAR(enc, ptr, n)) { \
53 *nextTokPtr = ptr; \
54 return XML_TOK_INVALID; \
55 } \
56@@ -98,7 +98,7 @@
57 case BT_LEAD##n: \
58 if (end - ptr < n) \
59 return XML_TOK_PARTIAL_CHAR; \
60- if (! IS_NMSTRT_CHAR(enc, ptr, n)) { \
61+ if (IS_INVALID_CHAR(enc, ptr, n) || ! IS_NMSTRT_CHAR(enc, ptr, n)) { \
62 *nextTokPtr = ptr; \
63 return XML_TOK_INVALID; \
64 } \
65@@ -1142,6 +1142,10 @@ PREFIX(prologTok)(const ENCODING *enc, const char *ptr, const char *end,
66 case BT_LEAD##n: \
67 if (end - ptr < n) \
68 return XML_TOK_PARTIAL_CHAR; \
69+ if (IS_INVALID_CHAR(enc, ptr, n)) { \
70+ *nextTokPtr = ptr; \
71+ return XML_TOK_INVALID; \
72+ } \
73 if (IS_NMSTRT_CHAR(enc, ptr, n)) { \
74 ptr += n; \
75 tok = XML_TOK_NAME; \
76@@ -1270,7 +1274,7 @@ PREFIX(attributeValueTok)(const ENCODING *enc, const char *ptr, const char *end,
77 switch (BYTE_TYPE(enc, ptr)) {
78 # define LEAD_CASE(n) \
79 case BT_LEAD##n: \
80- ptr += n; \
81+ ptr += n; /* NOTE: The encoding has already been validated. */ \
82 break;
83 LEAD_CASE(2)
84 LEAD_CASE(3)
85@@ -1339,7 +1343,7 @@ PREFIX(entityValueTok)(const ENCODING *enc, const char *ptr, const char *end,
86 switch (BYTE_TYPE(enc, ptr)) {
87 # define LEAD_CASE(n) \
88 case BT_LEAD##n: \
89- ptr += n; \
90+ ptr += n; /* NOTE: The encoding has already been validated. */ \
91 break;
92 LEAD_CASE(2)
93 LEAD_CASE(3)
94@@ -1518,7 +1522,7 @@ PREFIX(getAtts)(const ENCODING *enc, const char *ptr, int attsMax,
95 state = inName; \
96 }
97 # define LEAD_CASE(n) \
98- case BT_LEAD##n: \
99+ case BT_LEAD##n: /* NOTE: The encoding has already been validated. */ \
100 START_NAME ptr += (n - MINBPC(enc)); \
101 break;
102 LEAD_CASE(2)
103@@ -1730,7 +1734,7 @@ PREFIX(nameLength)(const ENCODING *enc, const char *ptr) {
104 switch (BYTE_TYPE(enc, ptr)) {
105 # define LEAD_CASE(n) \
106 case BT_LEAD##n: \
107- ptr += n; \
108+ ptr += n; /* NOTE: The encoding has already been validated. */ \
109 break;
110 LEAD_CASE(2)
111 LEAD_CASE(3)
112@@ -1775,7 +1779,7 @@ PREFIX(updatePosition)(const ENCODING *enc, const char *ptr, const char *end,
113 switch (BYTE_TYPE(enc, ptr)) {
114 # define LEAD_CASE(n) \
115 case BT_LEAD##n: \
116- ptr += n; \
117+ ptr += n; /* NOTE: The encoding has already been validated. */ \
118 pos->columnNumber++; \
119 break;
120 LEAD_CASE(2)
121diff --git a/tests/runtests.c b/tests/runtests.c
122index bc5344b1..9b155b82 100644
123--- a/tests/runtests.c
124+++ b/tests/runtests.c
125@@ -5998,6 +5998,105 @@ START_TEST(test_utf8_in_cdata_section_2) {
126 }
127 END_TEST
128
129+START_TEST(test_utf8_in_start_tags) {
130+ struct test_case {
131+ bool goodName;
132+ bool goodNameStart;
133+ const char *tagName;
134+ };
135+
136+ // The idea with the tests below is this:
137+ // We want to cover 1-, 2- and 3-byte sequences, 4-byte sequences
138+ // go to isNever and are hence not a concern.
139+ //
140+ // We start with a character that is a valid name character
141+ // (or even name-start character, see XML 1.0r4 spec) and then we flip
142+ // single bits at places where (1) the result leaves the UTF-8 encoding space
143+ // and (2) we stay in the same n-byte sequence family.
144+ //
145+ // The flipped bits are highlighted in angle brackets in comments,
146+ // e.g. "[<1>011 1001]" means we had [0011 1001] but we now flipped
147+ // the most significant bit to 1 to leave UTF-8 encoding space.
148+ struct test_case cases[] = {
149+ // 1-byte UTF-8: [0xxx xxxx]
150+ {true, true, "\x3A"}, // [0011 1010] = ASCII colon ':'
151+ {false, false, "\xBA"}, // [<1>011 1010]
152+ {true, false, "\x39"}, // [0011 1001] = ASCII nine '9'
153+ {false, false, "\xB9"}, // [<1>011 1001]
154+
155+ // 2-byte UTF-8: [110x xxxx] [10xx xxxx]
156+ {true, true, "\xDB\xA5"}, // [1101 1011] [1010 0101] =
157+ // Arabic small waw U+06E5
158+ {false, false, "\x9B\xA5"}, // [1<0>01 1011] [1010 0101]
159+ {false, false, "\xDB\x25"}, // [1101 1011] [<0>010 0101]
160+ {false, false, "\xDB\xE5"}, // [1101 1011] [1<1>10 0101]
161+ {true, false, "\xCC\x81"}, // [1100 1100] [1000 0001] =
162+ // combining char U+0301
163+ {false, false, "\x8C\x81"}, // [1<0>00 1100] [1000 0001]
164+ {false, false, "\xCC\x01"}, // [1100 1100] [<0>000 0001]
165+ {false, false, "\xCC\xC1"}, // [1100 1100] [1<1>00 0001]
166+
167+ // 3-byte UTF-8: [1110 xxxx] [10xx xxxx] [10xxxxxx]
168+ {true, true, "\xE0\xA4\x85"}, // [1110 0000] [1010 0100] [1000 0101] =
169+ // Devanagari Letter A U+0905
170+ {false, false, "\xA0\xA4\x85"}, // [1<0>10 0000] [1010 0100] [1000 0101]
171+ {false, false, "\xE0\x24\x85"}, // [1110 0000] [<0>010 0100] [1000 0101]
172+ {false, false, "\xE0\xE4\x85"}, // [1110 0000] [1<1>10 0100] [1000 0101]
173+ {false, false, "\xE0\xA4\x05"}, // [1110 0000] [1010 0100] [<0>000 0101]
174+ {false, false, "\xE0\xA4\xC5"}, // [1110 0000] [1010 0100] [1<1>00 0101]
175+ {true, false, "\xE0\xA4\x81"}, // [1110 0000] [1010 0100] [1000 0001] =
176+ // combining char U+0901
177+ {false, false, "\xA0\xA4\x81"}, // [1<0>10 0000] [1010 0100] [1000 0001]
178+ {false, false, "\xE0\x24\x81"}, // [1110 0000] [<0>010 0100] [1000 0001]
179+ {false, false, "\xE0\xE4\x81"}, // [1110 0000] [1<1>10 0100] [1000 0001]
180+ {false, false, "\xE0\xA4\x01"}, // [1110 0000] [1010 0100] [<0>000 0001]
181+ {false, false, "\xE0\xA4\xC1"}, // [1110 0000] [1010 0100] [1<1>00 0001]
182+ };
183+ const bool atNameStart[] = {true, false};
184+
185+ size_t i = 0;
186+ char doc[1024];
187+ size_t failCount = 0;
188+
189+ for (; i < sizeof(cases) / sizeof(cases[0]); i++) {
190+ size_t j = 0;
191+ for (; j < sizeof(atNameStart) / sizeof(atNameStart[0]); j++) {
192+ const bool expectedSuccess
193+ = atNameStart[j] ? cases[i].goodNameStart : cases[i].goodName;
194+ sprintf(doc, "<%s%s><!--", atNameStart[j] ? "" : "a", cases[i].tagName);
195+ XML_Parser parser = XML_ParserCreate(NULL);
196+
197+ const enum XML_Status status
198+ = XML_Parse(parser, doc, (int)strlen(doc), /*isFinal=*/XML_FALSE);
199+
200+ bool success = true;
201+ if ((status == XML_STATUS_OK) != expectedSuccess) {
202+ success = false;
203+ }
204+ if ((status == XML_STATUS_ERROR)
205+ && (XML_GetErrorCode(parser) != XML_ERROR_INVALID_TOKEN)) {
206+ success = false;
207+ }
208+
209+ if (! success) {
210+ fprintf(
211+ stderr,
212+ "FAIL case %2u (%sat name start, %u-byte sequence, error code %d)\n",
213+ (unsigned)i + 1u, atNameStart[j] ? " " : "not ",
214+ (unsigned)strlen(cases[i].tagName), XML_GetErrorCode(parser));
215+ failCount++;
216+ }
217+
218+ XML_ParserFree(parser);
219+ }
220+ }
221+
222+ if (failCount > 0) {
223+ fail("UTF-8 regression detected");
224+ }
225+}
226+END_TEST
227+
228 /* Test trailing spaces in elements are accepted */
229 static void XMLCALL
230 record_element_end_handler(void *userData, const XML_Char *name) {
231@@ -6175,6 +6274,14 @@ START_TEST(test_bad_doctype) {
232 }
233 END_TEST
234
235+START_TEST(test_bad_doctype_utf8) {
236+ const char *text = "<!DOCTYPE \xDB\x25"
237+ "doc><doc/>"; // [1101 1011] [<0>010 0101]
238+ expect_failure(text, XML_ERROR_INVALID_TOKEN,
239+ "Invalid UTF-8 in DOCTYPE not faulted");
240+}
241+END_TEST
242+
243 START_TEST(test_bad_doctype_utf16) {
244 const char text[] =
245 /* <!DOCTYPE doc [ \x06f2 ]><doc/>
246@@ -11870,6 +11977,7 @@ make_suite(void) {
247 tcase_add_test(tc_basic, test_ext_entity_utf8_non_bom);
248 tcase_add_test(tc_basic, test_utf8_in_cdata_section);
249 tcase_add_test(tc_basic, test_utf8_in_cdata_section_2);
250+ tcase_add_test(tc_basic, test_utf8_in_start_tags);
251 tcase_add_test(tc_basic, test_trailing_spaces_in_elements);
252 tcase_add_test(tc_basic, test_utf16_attribute);
253 tcase_add_test(tc_basic, test_utf16_second_attr);
254@@ -11878,6 +11986,7 @@ make_suite(void) {
255 tcase_add_test(tc_basic, test_bad_attr_desc_keyword);
256 tcase_add_test(tc_basic, test_bad_attr_desc_keyword_utf16);
257 tcase_add_test(tc_basic, test_bad_doctype);
258+ tcase_add_test(tc_basic, test_bad_doctype_utf8);
259 tcase_add_test(tc_basic, test_bad_doctype_utf16);
260 tcase_add_test(tc_basic, test_bad_doctype_plus);
261 tcase_add_test(tc_basic, test_bad_doctype_star);
diff --git a/meta/recipes-core/expat/expat/CVE-2022-25236-1.patch b/meta/recipes-core/expat/expat/CVE-2022-25236-1.patch
new file mode 100644
index 0000000000..ab53d99c8f
--- /dev/null
+++ b/meta/recipes-core/expat/expat/CVE-2022-25236-1.patch
@@ -0,0 +1,116 @@
1Upstream-Status: Backport [https://github.com/libexpat/libexpat/commit/2cc97e87]
2CVE: CVE-2022-25236
3
4The commit is a merge commit, and this patch is created by:
5
6$ git diff -p --stat 2cc97e87~ 2cc97e87
7
8Remove modification for expat/Changes which fails to be applied.
9
10Signed-off-by: Kai Kang <kai.kang@windriver.com>
11
12commit 2cc97e875ef84da4bcf55156c83599116f7523b4 (from d477fdd284468f2ab822024e75702f2c1b254f42)
13Merge: d477fdd2 e4d7e497
14Author: Sebastian Pipping <sebastian@pipping.org>
15Date: Fri Feb 18 18:01:27 2022 +0100
16
17 Merge pull request #561 from libexpat/namesep-security
18
19 [CVE-2022-25236] lib: Protect against insertion of namesep characters into namespace URIs
20
21---
22 expat/Changes | 16 ++++++++++++++++
23 expat/lib/xmlparse.c | 17 +++++++++++++----
24 expat/tests/runtests.c | 30 ++++++++++++++++++++++++++++++
25 3 files changed, 59 insertions(+), 4 deletions(-)
26
27diff --git a/lib/xmlparse.c b/lib/xmlparse.c
28index 7376aab1..c98e2e9f 100644
29--- a/lib/xmlparse.c
30+++ b/lib/xmlparse.c
31@@ -718,8 +718,7 @@ XML_ParserCreate(const XML_Char *encodingName) {
32
33 XML_Parser XMLCALL
34 XML_ParserCreateNS(const XML_Char *encodingName, XML_Char nsSep) {
35- XML_Char tmp[2];
36- *tmp = nsSep;
37+ XML_Char tmp[2] = {nsSep, 0};
38 return XML_ParserCreate_MM(encodingName, NULL, tmp);
39 }
40
41@@ -1344,8 +1343,7 @@ XML_ExternalEntityParserCreate(XML_Parser oldParser, const XML_Char *context,
42 would be otherwise.
43 */
44 if (parser->m_ns) {
45- XML_Char tmp[2];
46- *tmp = parser->m_namespaceSeparator;
47+ XML_Char tmp[2] = {parser->m_namespaceSeparator, 0};
48 parser = parserCreate(encodingName, &parser->m_mem, tmp, newDtd);
49 } else {
50 parser = parserCreate(encodingName, &parser->m_mem, NULL, newDtd);
51@@ -3761,6 +3759,17 @@ addBinding(XML_Parser parser, PREFIX *prefix, const ATTRIBUTE_ID *attId,
52 if (! mustBeXML && isXMLNS
53 && (len > xmlnsLen || uri[len] != xmlnsNamespace[len]))
54 isXMLNS = XML_FALSE;
55+
56+ // NOTE: While Expat does not validate namespace URIs against RFC 3986,
57+ // we have to at least make sure that the XML processor on top of
58+ // Expat (that is splitting tag names by namespace separator into
59+ // 2- or 3-tuples (uri-local or uri-local-prefix)) cannot be confused
60+ // by an attacker putting additional namespace separator characters
61+ // into namespace declarations. That would be ambiguous and not to
62+ // be expected.
63+ if (parser->m_ns && (uri[len] == parser->m_namespaceSeparator)) {
64+ return XML_ERROR_SYNTAX;
65+ }
66 }
67 isXML = isXML && len == xmlLen;
68 isXMLNS = isXMLNS && len == xmlnsLen;
69diff --git a/tests/runtests.c b/tests/runtests.c
70index d07203f2..bc5344b1 100644
71--- a/tests/runtests.c
72+++ b/tests/runtests.c
73@@ -7220,6 +7220,35 @@ START_TEST(test_ns_double_colon_doctype) {
74 }
75 END_TEST
76
77+START_TEST(test_ns_separator_in_uri) {
78+ struct test_case {
79+ enum XML_Status expectedStatus;
80+ const char *doc;
81+ };
82+ struct test_case cases[] = {
83+ {XML_STATUS_OK, "<doc xmlns='one_two' />"},
84+ {XML_STATUS_ERROR, "<doc xmlns='one&#x0A;two' />"},
85+ };
86+
87+ size_t i = 0;
88+ size_t failCount = 0;
89+ for (; i < sizeof(cases) / sizeof(cases[0]); i++) {
90+ XML_Parser parser = XML_ParserCreateNS(NULL, '\n');
91+ XML_SetElementHandler(parser, dummy_start_element, dummy_end_element);
92+ if (XML_Parse(parser, cases[i].doc, (int)strlen(cases[i].doc),
93+ /*isFinal*/ XML_TRUE)
94+ != cases[i].expectedStatus) {
95+ failCount++;
96+ }
97+ XML_ParserFree(parser);
98+ }
99+
100+ if (failCount) {
101+ fail("Namespace separator handling is broken");
102+ }
103+}
104+END_TEST
105+
106 /* Control variable; the number of times duff_allocator() will successfully
107 * allocate */
108 #define ALLOC_ALWAYS_SUCCEED (-1)
109@@ -11905,6 +11934,7 @@ make_suite(void) {
110 tcase_add_test(tc_namespace, test_ns_utf16_doctype);
111 tcase_add_test(tc_namespace, test_ns_invalid_doctype);
112 tcase_add_test(tc_namespace, test_ns_double_colon_doctype);
113+ tcase_add_test(tc_namespace, test_ns_separator_in_uri);
114
115 suite_add_tcase(s, tc_misc);
116 tcase_add_checked_fixture(tc_misc, NULL, basic_teardown);
diff --git a/meta/recipes-core/expat/expat/CVE-2022-25236-2.patch b/meta/recipes-core/expat/expat/CVE-2022-25236-2.patch
new file mode 100644
index 0000000000..0f14c9631b
--- /dev/null
+++ b/meta/recipes-core/expat/expat/CVE-2022-25236-2.patch
@@ -0,0 +1,232 @@
1Upstream-Status: Backport [https://github.com/libexpat/libexpat/commit/f178826b]
2CVE: CVE-2022-25236
3
4The commit is a merge commit, and this patch is created by:
5
6$ git show -m -p --stat f178826b
7
8Remove changes for expat/Changes and reference.html which fail to be applied.
9
10Signed-off-by: Kai Kang <kai.kang@windriver.com>
11
12commit f178826bb1e9c8ee23202f1be55ad4ac7b649e84 (from c99e0e7f2b15b48848038992ecbb4480f957cfe9)
13Merge: c99e0e7f 9579f7ea
14Author: Sebastian Pipping <sebastian@pipping.org>
15Date: Fri Mar 4 18:43:39 2022 +0100
16
17 Merge pull request #577 from libexpat/namesep
18
19 lib: Relax fix to CVE-2022-25236 with regard to RFC 3986 URI characters (fixes #572)
20---
21 expat/Changes | 16 ++++++
22 expat/doc/reference.html | 8 +++
23 expat/lib/expat.h | 11 ++++
24 expat/lib/xmlparse.c | 139 ++++++++++++++++++++++++++++++++++++++++++++---
25 expat/tests/runtests.c | 8 ++-
26 5 files changed, 171 insertions(+), 11 deletions(-)
27
28diff --git a/lib/expat.h b/lib/expat.h
29index 5ab493f7..181fc960 100644
30--- a/lib/expat.h
31+++ b/lib/expat.h
32@@ -239,6 +239,17 @@ XML_ParserCreate(const XML_Char *encoding);
33 and the local part will be concatenated without any separator.
34 It is a programming error to use the separator '\0' with namespace
35 triplets (see XML_SetReturnNSTriplet).
36+ If a namespace separator is chosen that can be part of a URI or
37+ part of an XML name, splitting an expanded name back into its
38+ 1, 2 or 3 original parts on application level in the element handler
39+ may end up vulnerable, so these are advised against; sane choices for
40+ a namespace separator are e.g. '\n' (line feed) and '|' (pipe).
41+
42+ Note that Expat does not validate namespace URIs (beyond encoding)
43+ against RFC 3986 today (and is not required to do so with regard to
44+ the XML 1.0 namespaces specification) but it may start doing that
45+ in future releases. Before that, an application using Expat must
46+ be ready to receive namespace URIs containing non-URI characters.
47 */
48 XMLPARSEAPI(XML_Parser)
49 XML_ParserCreateNS(const XML_Char *encoding, XML_Char namespaceSeparator);
50diff --git a/lib/xmlparse.c b/lib/xmlparse.c
51index 59da19c8..6fe2cf1e 100644
52--- a/lib/xmlparse.c
53+++ b/lib/xmlparse.c
54@@ -3705,6 +3705,117 @@ storeAtts(XML_Parser parser, const ENCODING *enc, const char *attStr,
55 return XML_ERROR_NONE;
56 }
57
58+static XML_Bool
59+is_rfc3986_uri_char(XML_Char candidate) {
60+ // For the RFC 3986 ANBF grammar see
61+ // https://datatracker.ietf.org/doc/html/rfc3986#appendix-A
62+
63+ switch (candidate) {
64+ // From rule "ALPHA" (uppercase half)
65+ case 'A':
66+ case 'B':
67+ case 'C':
68+ case 'D':
69+ case 'E':
70+ case 'F':
71+ case 'G':
72+ case 'H':
73+ case 'I':
74+ case 'J':
75+ case 'K':
76+ case 'L':
77+ case 'M':
78+ case 'N':
79+ case 'O':
80+ case 'P':
81+ case 'Q':
82+ case 'R':
83+ case 'S':
84+ case 'T':
85+ case 'U':
86+ case 'V':
87+ case 'W':
88+ case 'X':
89+ case 'Y':
90+ case 'Z':
91+
92+ // From rule "ALPHA" (lowercase half)
93+ case 'a':
94+ case 'b':
95+ case 'c':
96+ case 'd':
97+ case 'e':
98+ case 'f':
99+ case 'g':
100+ case 'h':
101+ case 'i':
102+ case 'j':
103+ case 'k':
104+ case 'l':
105+ case 'm':
106+ case 'n':
107+ case 'o':
108+ case 'p':
109+ case 'q':
110+ case 'r':
111+ case 's':
112+ case 't':
113+ case 'u':
114+ case 'v':
115+ case 'w':
116+ case 'x':
117+ case 'y':
118+ case 'z':
119+
120+ // From rule "DIGIT"
121+ case '0':
122+ case '1':
123+ case '2':
124+ case '3':
125+ case '4':
126+ case '5':
127+ case '6':
128+ case '7':
129+ case '8':
130+ case '9':
131+
132+ // From rule "pct-encoded"
133+ case '%':
134+
135+ // From rule "unreserved"
136+ case '-':
137+ case '.':
138+ case '_':
139+ case '~':
140+
141+ // From rule "gen-delims"
142+ case ':':
143+ case '/':
144+ case '?':
145+ case '#':
146+ case '[':
147+ case ']':
148+ case '@':
149+
150+ // From rule "sub-delims"
151+ case '!':
152+ case '$':
153+ case '&':
154+ case '\'':
155+ case '(':
156+ case ')':
157+ case '*':
158+ case '+':
159+ case ',':
160+ case ';':
161+ case '=':
162+ return XML_TRUE;
163+
164+ default:
165+ return XML_FALSE;
166+ }
167+}
168+
169 /* addBinding() overwrites the value of prefix->binding without checking.
170 Therefore one must keep track of the old value outside of addBinding().
171 */
172@@ -3763,14 +3874,26 @@ addBinding(XML_Parser parser, PREFIX *prefix, const ATTRIBUTE_ID *attId,
173 && (len > xmlnsLen || uri[len] != xmlnsNamespace[len]))
174 isXMLNS = XML_FALSE;
175
176- // NOTE: While Expat does not validate namespace URIs against RFC 3986,
177- // we have to at least make sure that the XML processor on top of
178- // Expat (that is splitting tag names by namespace separator into
179- // 2- or 3-tuples (uri-local or uri-local-prefix)) cannot be confused
180- // by an attacker putting additional namespace separator characters
181- // into namespace declarations. That would be ambiguous and not to
182- // be expected.
183- if (parser->m_ns && (uri[len] == parser->m_namespaceSeparator)) {
184+ // NOTE: While Expat does not validate namespace URIs against RFC 3986
185+ // today (and is not REQUIRED to do so with regard to the XML 1.0
186+ // namespaces specification) we have to at least make sure, that
187+ // the application on top of Expat (that is likely splitting expanded
188+ // element names ("qualified names") of form
189+ // "[uri sep] local [sep prefix] '\0'" back into 1, 2 or 3 pieces
190+ // in its element handler code) cannot be confused by an attacker
191+ // putting additional namespace separator characters into namespace
192+ // declarations. That would be ambiguous and not to be expected.
193+ //
194+ // While the HTML API docs of function XML_ParserCreateNS have been
195+ // advising against use of a namespace separator character that can
196+ // appear in a URI for >20 years now, some widespread applications
197+ // are using URI characters (':' (colon) in particular) for a
198+ // namespace separator, in practice. To keep these applications
199+ // functional, we only reject namespaces URIs containing the
200+ // application-chosen namespace separator if the chosen separator
201+ // is a non-URI character with regard to RFC 3986.
202+ if (parser->m_ns && (uri[len] == parser->m_namespaceSeparator)
203+ && ! is_rfc3986_uri_char(uri[len])) {
204 return XML_ERROR_SYNTAX;
205 }
206 }
207diff --git a/tests/runtests.c b/tests/runtests.c
208index 60da868e..712706c4 100644
209--- a/tests/runtests.c
210+++ b/tests/runtests.c
211@@ -7406,16 +7406,18 @@ START_TEST(test_ns_separator_in_uri) {
212 struct test_case {
213 enum XML_Status expectedStatus;
214 const char *doc;
215+ XML_Char namesep;
216 };
217 struct test_case cases[] = {
218- {XML_STATUS_OK, "<doc xmlns='one_two' />"},
219- {XML_STATUS_ERROR, "<doc xmlns='one&#x0A;two' />"},
220+ {XML_STATUS_OK, "<doc xmlns='one_two' />", XCS('\n')},
221+ {XML_STATUS_ERROR, "<doc xmlns='one&#x0A;two' />", XCS('\n')},
222+ {XML_STATUS_OK, "<doc xmlns='one:two' />", XCS(':')},
223 };
224
225 size_t i = 0;
226 size_t failCount = 0;
227 for (; i < sizeof(cases) / sizeof(cases[0]); i++) {
228- XML_Parser parser = XML_ParserCreateNS(NULL, '\n');
229+ XML_Parser parser = XML_ParserCreateNS(NULL, cases[i].namesep);
230 XML_SetElementHandler(parser, dummy_start_element, dummy_end_element);
231 if (XML_Parse(parser, cases[i].doc, (int)strlen(cases[i].doc),
232 /*isFinal*/ XML_TRUE)
diff --git a/meta/recipes-core/expat/expat_2.2.10.bb b/meta/recipes-core/expat/expat_2.2.10.bb
index 08e8ff1cea..f99fa7edb6 100644
--- a/meta/recipes-core/expat/expat_2.2.10.bb
+++ b/meta/recipes-core/expat/expat_2.2.10.bb
@@ -10,9 +10,17 @@ VERSION_TAG = "${@d.getVar('PV').replace('.', '_')}"
10 10
11SRC_URI = "https://github.com/libexpat/libexpat/releases/download/R_${VERSION_TAG}/expat-${PV}.tar.bz2 \ 11SRC_URI = "https://github.com/libexpat/libexpat/releases/download/R_${VERSION_TAG}/expat-${PV}.tar.bz2 \
12 file://libtool-tag.patch \ 12 file://libtool-tag.patch \
13 file://run-ptest \ 13 file://run-ptest \
14 file://0001-Add-output-of-tests-result.patch \ 14 file://0001-Add-output-of-tests-result.patch \
15 " 15 file://CVE-2022-22822-27.patch \
16 file://CVE-2021-45960.patch \
17 file://CVE-2021-46143.patch \
18 file://CVE-2022-23852.patch \
19 file://CVE-2022-23990.patch \
20 file://CVE-2022-25235.patch \
21 file://CVE-2022-25236-1.patch \
22 file://CVE-2022-25236-2.patch \
23 "
16 24
17UPSTREAM_CHECK_URI = "https://github.com/libexpat/libexpat/releases/" 25UPSTREAM_CHECK_URI = "https://github.com/libexpat/libexpat/releases/"
18 26
diff --git a/meta/recipes-core/glibc/glibc-version.inc b/meta/recipes-core/glibc/glibc-version.inc
index 4d69187961..e1eefdee49 100644
--- a/meta/recipes-core/glibc/glibc-version.inc
+++ b/meta/recipes-core/glibc/glibc-version.inc
@@ -1,6 +1,6 @@
1SRCBRANCH ?= "release/2.33/master" 1SRCBRANCH ?= "release/2.33/master"
2PV = "2.33" 2PV = "2.33"
3SRCREV_glibc ?= "6090cf1330faf2deb17285758f327cb23b89ebf1" 3SRCREV_glibc ?= "3e2a15c666e40e5ee740e5079c56d83469280323"
4SRCREV_localedef ?= "bd644c9e6f3e20c5504da1488448173c69c56c28" 4SRCREV_localedef ?= "bd644c9e6f3e20c5504da1488448173c69c56c28"
5 5
6GLIBC_GIT_URI ?= "git://sourceware.org/git/glibc.git" 6GLIBC_GIT_URI ?= "git://sourceware.org/git/glibc.git"
diff --git a/meta/recipes-core/glibc/glibc_2.33.bb b/meta/recipes-core/glibc/glibc_2.33.bb
index ad5e2b8eb1..a1e9eb3a16 100644
--- a/meta/recipes-core/glibc/glibc_2.33.bb
+++ b/meta/recipes-core/glibc/glibc_2.33.bb
@@ -88,7 +88,7 @@ EXTRA_OECONF = "--enable-kernel=${OLDEST_KERNEL} \
88 88
89EXTRA_OECONF += "${@get_libc_fpu_setting(bb, d)}" 89EXTRA_OECONF += "${@get_libc_fpu_setting(bb, d)}"
90 90
91EXTRA_OECONF_append_x86 = " --enable-cet" 91EXTRA_OECONF_append_x86 = " ${@bb.utils.contains_any('TUNE_FEATURES', 'i586 c3', '--disable-cet', '--enable-cet', d)}"
92EXTRA_OECONF_append_x86-64 = " --enable-cet" 92EXTRA_OECONF_append_x86-64 = " --enable-cet"
93 93
94PACKAGECONFIG ??= "nscd" 94PACKAGECONFIG ??= "nscd"
diff --git a/meta/recipes-core/images/build-appliance-image_15.0.0.bb b/meta/recipes-core/images/build-appliance-image_15.0.0.bb
index 455fe825c8..5631cd8ae6 100644
--- a/meta/recipes-core/images/build-appliance-image_15.0.0.bb
+++ b/meta/recipes-core/images/build-appliance-image_15.0.0.bb
@@ -24,7 +24,7 @@ IMAGE_FSTYPES = "wic.vmdk wic.vhd wic.vhdx"
24 24
25inherit core-image setuptools3 25inherit core-image setuptools3
26 26
27SRCREV ?= "ec3ac9d883d53ebbf3c6b9a80694df69c9e9ccc7" 27SRCREV ?= "2954fa87a4d325f1a3c722d6fb8bf13b17f9e7a0"
28SRC_URI = "git://git.yoctoproject.org/poky;branch=hardknott \ 28SRC_URI = "git://git.yoctoproject.org/poky;branch=hardknott \
29 file://Yocto_Build_Appliance.vmx \ 29 file://Yocto_Build_Appliance.vmx \
30 file://Yocto_Build_Appliance.vmxf \ 30 file://Yocto_Build_Appliance.vmxf \
diff --git a/meta/recipes-core/initrdscripts/initramfs-framework/finish b/meta/recipes-core/initrdscripts/initramfs-framework/finish
index 717383ebac..f08a920867 100755
--- a/meta/recipes-core/initrdscripts/initramfs-framework/finish
+++ b/meta/recipes-core/initrdscripts/initramfs-framework/finish
@@ -12,6 +12,18 @@ finish_run() {
12 fatal "ERROR: There's no '/dev' on rootfs." 12 fatal "ERROR: There's no '/dev' on rootfs."
13 fi 13 fi
14 14
15 # Unmount anything that was automounted by busybox via mdev-mount.sh.
16 # We're about to switch_root, and leaving anything mounted will prevent
17 # the next rootfs from modifying the block device. Ignore ROOT_DISK,
18 # if it was set by setup-live, because it'll be mounted over loopback
19 # to ROOTFS_DIR.
20 local dev
21 for dev in /run/media/*; do
22 if mountpoint -q "${dev}" && [ "${dev##*/}" != "${ROOT_DISK}" ]; then
23 umount -f "${dev}" || debug "Failed to unmount ${dev}"
24 fi
25 done
26
15 info "Switching root to '$ROOTFS_DIR'..." 27 info "Switching root to '$ROOTFS_DIR'..."
16 28
17 debug "Moving /dev, /proc and /sys onto rootfs..." 29 debug "Moving /dev, /proc and /sys onto rootfs..."
diff --git a/meta/recipes-core/libxml/libxml2/CVE-2022-23308-fix-regression.patch b/meta/recipes-core/libxml/libxml2/CVE-2022-23308-fix-regression.patch
new file mode 100644
index 0000000000..eefecb9adb
--- /dev/null
+++ b/meta/recipes-core/libxml/libxml2/CVE-2022-23308-fix-regression.patch
@@ -0,0 +1,99 @@
1From 646fe48d1c8a74310c409ddf81fe7df6700052af Mon Sep 17 00:00:00 2001
2From: Nick Wellnhofer <wellnhofer@aevum.de>
3Date: Tue, 22 Feb 2022 11:51:08 +0100
4Subject: [PATCH] Fix --without-valid build
5
6Regressed in commit 652dd12a.
7---
8 valid.c | 58 ++++++++++++++++++++++++++++-----------------------------
9 1 file changed, 29 insertions(+), 29 deletions(-)
10---
11
12From https://github.com/GNOME/libxml2.git
13 commit 646fe48d1c8a74310c409ddf81fe7df6700052af
14
15CVE: CVE-2022-23308
16Upstream-status: Backport
17
18Signed-off-by: Joe Slater <joe.slater@windriver.com>
19
20
21diff --git a/valid.c b/valid.c
22index 8e596f1d..9684683a 100644
23--- a/valid.c
24+++ b/valid.c
25@@ -479,35 +479,6 @@ nodeVPop(xmlValidCtxtPtr ctxt)
26 return (ret);
27 }
28
29-/**
30- * xmlValidNormalizeString:
31- * @str: a string
32- *
33- * Normalize a string in-place.
34- */
35-static void
36-xmlValidNormalizeString(xmlChar *str) {
37- xmlChar *dst;
38- const xmlChar *src;
39-
40- if (str == NULL)
41- return;
42- src = str;
43- dst = str;
44-
45- while (*src == 0x20) src++;
46- while (*src != 0) {
47- if (*src == 0x20) {
48- while (*src == 0x20) src++;
49- if (*src != 0)
50- *dst++ = 0x20;
51- } else {
52- *dst++ = *src++;
53- }
54- }
55- *dst = 0;
56-}
57-
58 #ifdef DEBUG_VALID_ALGO
59 static void
60 xmlValidPrintNode(xmlNodePtr cur) {
61@@ -2636,6 +2607,35 @@ xmlDumpNotationTable(xmlBufferPtr buf, xmlNotationTablePtr table) {
62 (xmlDictOwns(dict, (const xmlChar *)(str)) == 0))) \
63 xmlFree((char *)(str));
64
65+/**
66+ * xmlValidNormalizeString:
67+ * @str: a string
68+ *
69+ * Normalize a string in-place.
70+ */
71+static void
72+xmlValidNormalizeString(xmlChar *str) {
73+ xmlChar *dst;
74+ const xmlChar *src;
75+
76+ if (str == NULL)
77+ return;
78+ src = str;
79+ dst = str;
80+
81+ while (*src == 0x20) src++;
82+ while (*src != 0) {
83+ if (*src == 0x20) {
84+ while (*src == 0x20) src++;
85+ if (*src != 0)
86+ *dst++ = 0x20;
87+ } else {
88+ *dst++ = *src++;
89+ }
90+ }
91+ *dst = 0;
92+}
93+
94 static int
95 xmlIsStreaming(xmlValidCtxtPtr ctxt) {
96 xmlParserCtxtPtr pctxt;
97--
982.35.1
99
diff --git a/meta/recipes-core/libxml/libxml2/CVE-2022-23308.patch b/meta/recipes-core/libxml/libxml2/CVE-2022-23308.patch
new file mode 100644
index 0000000000..708a98b45a
--- /dev/null
+++ b/meta/recipes-core/libxml/libxml2/CVE-2022-23308.patch
@@ -0,0 +1,209 @@
1From 652dd12a858989b14eed4e84e453059cd3ba340e Mon Sep 17 00:00:00 2001
2From: Nick Wellnhofer <wellnhofer@aevum.de>
3Date: Tue, 8 Feb 2022 03:29:24 +0100
4Subject: [PATCH] [CVE-2022-23308] Use-after-free of ID and IDREF attributes
5
6If a document is parsed with XML_PARSE_DTDVALID and without
7XML_PARSE_NOENT, the value of ID attributes has to be normalized after
8potentially expanding entities in xmlRemoveID. Otherwise, later calls
9to xmlGetID can return a pointer to previously freed memory.
10
11ID attributes which are empty or contain only whitespace after
12entity expansion are affected in a similar way. This is fixed by
13not storing such attributes in the ID table.
14
15The test to detect streaming mode when validating against a DTD was
16broken. In connection with the defects above, this could result in a
17use-after-free when using the xmlReader interface with validation.
18Fix detection of streaming mode to avoid similar issues. (This changes
19the expected result of a test case. But as far as I can tell, using the
20XML reader with XIncludes referencing the root document never worked
21properly, anyway.)
22
23All of these issues can result in denial of service. Using xmlReader
24with validation could result in disclosure of memory via the error
25channel, typically stderr. The security impact of xmlGetID returning
26a pointer to freed memory depends on the application. The typical use
27case of calling xmlGetID on an unmodified document is not affected.
28---
29 result/XInclude/ns1.xml.rdr | 2 +-
30 valid.c | 88 +++++++++++++++++++++++--------------
31 2 files changed, 56 insertions(+), 34 deletions(-)
32 ---
33
34From https://github.com/GNOME/libxml2.git
35 commit 652dd12a858989b14eed4e84e453059cd3ba340e
36
37Remove patch to ns1.xml.rdr which does not exist in version 2.9.10.
38
39CVE: CVE-2022-23308
40Upstream-status: Backport
41
42Signed-off-by: Joe Slater <joe.slater@windriver.com>
43
44
45diff --git a/valid.c b/valid.c
46index 5ee391c0..8e596f1d 100644
47--- a/valid.c
48+++ b/valid.c
49@@ -479,6 +479,35 @@ nodeVPop(xmlValidCtxtPtr ctxt)
50 return (ret);
51 }
52
53+/**
54+ * xmlValidNormalizeString:
55+ * @str: a string
56+ *
57+ * Normalize a string in-place.
58+ */
59+static void
60+xmlValidNormalizeString(xmlChar *str) {
61+ xmlChar *dst;
62+ const xmlChar *src;
63+
64+ if (str == NULL)
65+ return;
66+ src = str;
67+ dst = str;
68+
69+ while (*src == 0x20) src++;
70+ while (*src != 0) {
71+ if (*src == 0x20) {
72+ while (*src == 0x20) src++;
73+ if (*src != 0)
74+ *dst++ = 0x20;
75+ } else {
76+ *dst++ = *src++;
77+ }
78+ }
79+ *dst = 0;
80+}
81+
82 #ifdef DEBUG_VALID_ALGO
83 static void
84 xmlValidPrintNode(xmlNodePtr cur) {
85@@ -2607,6 +2636,24 @@ xmlDumpNotationTable(xmlBufferPtr buf, xmlNotationTablePtr table) {
86 (xmlDictOwns(dict, (const xmlChar *)(str)) == 0))) \
87 xmlFree((char *)(str));
88
89+static int
90+xmlIsStreaming(xmlValidCtxtPtr ctxt) {
91+ xmlParserCtxtPtr pctxt;
92+
93+ if (ctxt == NULL)
94+ return(0);
95+ /*
96+ * These magic values are also abused to detect whether we're validating
97+ * while parsing a document. In this case, userData points to the parser
98+ * context.
99+ */
100+ if ((ctxt->finishDtd != XML_CTXT_FINISH_DTD_0) &&
101+ (ctxt->finishDtd != XML_CTXT_FINISH_DTD_1))
102+ return(0);
103+ pctxt = ctxt->userData;
104+ return(pctxt->parseMode == XML_PARSE_READER);
105+}
106+
107 /**
108 * xmlFreeID:
109 * @not: A id
110@@ -2650,7 +2697,7 @@ xmlAddID(xmlValidCtxtPtr ctxt, xmlDocPtr doc, const xmlChar *value,
111 if (doc == NULL) {
112 return(NULL);
113 }
114- if (value == NULL) {
115+ if ((value == NULL) || (value[0] == 0)) {
116 return(NULL);
117 }
118 if (attr == NULL) {
119@@ -2681,7 +2728,7 @@ xmlAddID(xmlValidCtxtPtr ctxt, xmlDocPtr doc, const xmlChar *value,
120 */
121 ret->value = xmlStrdup(value);
122 ret->doc = doc;
123- if ((ctxt != NULL) && (ctxt->vstateNr != 0)) {
124+ if (xmlIsStreaming(ctxt)) {
125 /*
126 * Operating in streaming mode, attr is gonna disappear
127 */
128@@ -2820,6 +2867,7 @@ xmlRemoveID(xmlDocPtr doc, xmlAttrPtr attr) {
129 ID = xmlNodeListGetString(doc, attr->children, 1);
130 if (ID == NULL)
131 return(-1);
132+ xmlValidNormalizeString(ID);
133
134 id = xmlHashLookup(table, ID);
135 if (id == NULL || id->attr != attr) {
136@@ -3009,7 +3057,7 @@ xmlAddRef(xmlValidCtxtPtr ctxt, xmlDocPtr doc, const xmlChar *value,
137 * fill the structure.
138 */
139 ret->value = xmlStrdup(value);
140- if ((ctxt != NULL) && (ctxt->vstateNr != 0)) {
141+ if (xmlIsStreaming(ctxt)) {
142 /*
143 * Operating in streaming mode, attr is gonna disappear
144 */
145@@ -4028,8 +4076,7 @@ xmlValidateAttributeValue2(xmlValidCtxtPtr ctxt, xmlDocPtr doc,
146 xmlChar *
147 xmlValidCtxtNormalizeAttributeValue(xmlValidCtxtPtr ctxt, xmlDocPtr doc,
148 xmlNodePtr elem, const xmlChar *name, const xmlChar *value) {
149- xmlChar *ret, *dst;
150- const xmlChar *src;
151+ xmlChar *ret;
152 xmlAttributePtr attrDecl = NULL;
153 int extsubset = 0;
154
155@@ -4070,19 +4117,7 @@ xmlValidCtxtNormalizeAttributeValue(xmlValidCtxtPtr ctxt, xmlDocPtr doc,
156 ret = xmlStrdup(value);
157 if (ret == NULL)
158 return(NULL);
159- src = value;
160- dst = ret;
161- while (*src == 0x20) src++;
162- while (*src != 0) {
163- if (*src == 0x20) {
164- while (*src == 0x20) src++;
165- if (*src != 0)
166- *dst++ = 0x20;
167- } else {
168- *dst++ = *src++;
169- }
170- }
171- *dst = 0;
172+ xmlValidNormalizeString(ret);
173 if ((doc->standalone) && (extsubset == 1) && (!xmlStrEqual(value, ret))) {
174 xmlErrValidNode(ctxt, elem, XML_DTD_NOT_STANDALONE,
175 "standalone: %s on %s value had to be normalized based on external subset declaration\n",
176@@ -4114,8 +4149,7 @@ xmlValidCtxtNormalizeAttributeValue(xmlValidCtxtPtr ctxt, xmlDocPtr doc,
177 xmlChar *
178 xmlValidNormalizeAttributeValue(xmlDocPtr doc, xmlNodePtr elem,
179 const xmlChar *name, const xmlChar *value) {
180- xmlChar *ret, *dst;
181- const xmlChar *src;
182+ xmlChar *ret;
183 xmlAttributePtr attrDecl = NULL;
184
185 if (doc == NULL) return(NULL);
186@@ -4145,19 +4179,7 @@ xmlValidNormalizeAttributeValue(xmlDocPtr doc, xmlNodePtr elem,
187 ret = xmlStrdup(value);
188 if (ret == NULL)
189 return(NULL);
190- src = value;
191- dst = ret;
192- while (*src == 0x20) src++;
193- while (*src != 0) {
194- if (*src == 0x20) {
195- while (*src == 0x20) src++;
196- if (*src != 0)
197- *dst++ = 0x20;
198- } else {
199- *dst++ = *src++;
200- }
201- }
202- *dst = 0;
203+ xmlValidNormalizeString(ret);
204 return(ret);
205 }
206
207--
2082.25.1
209
diff --git a/meta/recipes-core/libxml/libxml2_2.9.10.bb b/meta/recipes-core/libxml/libxml2_2.9.10.bb
index cabf911816..778312f662 100644
--- a/meta/recipes-core/libxml/libxml2_2.9.10.bb
+++ b/meta/recipes-core/libxml/libxml2_2.9.10.bb
@@ -30,6 +30,8 @@ SRC_URI = "http://www.xmlsoft.org/sources/libxml2-${PV}.tar.gz;name=libtar \
30 file://CVE-2021-3518-0002.patch \ 30 file://CVE-2021-3518-0002.patch \
31 file://CVE-2021-3537.patch \ 31 file://CVE-2021-3537.patch \
32 file://CVE-2021-3541.patch \ 32 file://CVE-2021-3541.patch \
33 file://CVE-2022-23308.patch \
34 file://CVE-2022-23308-fix-regression.patch \
33 " 35 "
34 36
35SRC_URI[libtar.md5sum] = "10942a1dc23137a8aa07f0639cbfece5" 37SRC_URI[libtar.md5sum] = "10942a1dc23137a8aa07f0639cbfece5"
diff --git a/meta/recipes-core/os-release/os-release.bb b/meta/recipes-core/os-release/os-release.bb
index a29d678125..33f75e39b8 100644
--- a/meta/recipes-core/os-release/os-release.bb
+++ b/meta/recipes-core/os-release/os-release.bb
@@ -12,7 +12,9 @@ do_configure[noexec] = "1"
12 12
13# Other valid fields: BUILD_ID ID_LIKE ANSI_COLOR CPE_NAME 13# Other valid fields: BUILD_ID ID_LIKE ANSI_COLOR CPE_NAME
14# HOME_URL SUPPORT_URL BUG_REPORT_URL 14# HOME_URL SUPPORT_URL BUG_REPORT_URL
15OS_RELEASE_FIELDS = "ID ID_LIKE NAME VERSION VERSION_ID PRETTY_NAME" 15OS_RELEASE_FIELDS = "\
16 ID ID_LIKE NAME VERSION VERSION_ID PRETTY_NAME DISTRO_CODENAME \
17"
16OS_RELEASE_UNQUOTED_FIELDS = "ID VERSION_ID VARIANT_ID" 18OS_RELEASE_UNQUOTED_FIELDS = "ID VERSION_ID VARIANT_ID"
17 19
18ID = "${DISTRO}" 20ID = "${DISTRO}"
diff --git a/meta/recipes-core/zlib/zlib/CVE-2018-25032.patch b/meta/recipes-core/zlib/zlib/CVE-2018-25032.patch
new file mode 100644
index 0000000000..5cb6183641
--- /dev/null
+++ b/meta/recipes-core/zlib/zlib/CVE-2018-25032.patch
@@ -0,0 +1,347 @@
1CVE: CVE-2018-25032
2Upstream-Status: Backport
3Signed-off-by: Ross Burton <ross.burton@arm.com>
4
5From 5c44459c3b28a9bd3283aaceab7c615f8020c531 Mon Sep 17 00:00:00 2001
6From: Mark Adler <madler@alumni.caltech.edu>
7Date: Tue, 17 Apr 2018 22:09:22 -0700
8Subject: [PATCH] Fix a bug that can crash deflate on some input when using
9 Z_FIXED.
10
11This bug was reported by Danilo Ramos of Eideticom, Inc. It has
12lain in wait 13 years before being found! The bug was introduced
13in zlib 1.2.2.2, with the addition of the Z_FIXED option. That
14option forces the use of fixed Huffman codes. For rare inputs with
15a large number of distant matches, the pending buffer into which
16the compressed data is written can overwrite the distance symbol
17table which it overlays. That results in corrupted output due to
18invalid distances, and can result in out-of-bound accesses,
19crashing the application.
20
21The fix here combines the distance buffer and literal/length
22buffers into a single symbol buffer. Now three bytes of pending
23buffer space are opened up for each literal or length/distance
24pair consumed, instead of the previous two bytes. This assures
25that the pending buffer cannot overwrite the symbol table, since
26the maximum fixed code compressed length/distance is 31 bits, and
27since there are four bytes of pending space for every three bytes
28of symbol space.
29---
30 deflate.c | 74 ++++++++++++++++++++++++++++++++++++++++---------------
31 deflate.h | 25 +++++++++----------
32 trees.c | 50 +++++++++++--------------------------
33 3 files changed, 79 insertions(+), 70 deletions(-)
34
35diff --git a/deflate.c b/deflate.c
36index 425babc00..19cba873a 100644
37--- a/deflate.c
38+++ b/deflate.c
39@@ -255,11 +255,6 @@ int ZEXPORT deflateInit2_(strm, level, method, windowBits, memLevel, strategy,
40 int wrap = 1;
41 static const char my_version[] = ZLIB_VERSION;
42
43- ushf *overlay;
44- /* We overlay pending_buf and d_buf+l_buf. This works since the average
45- * output size for (length,distance) codes is <= 24 bits.
46- */
47-
48 if (version == Z_NULL || version[0] != my_version[0] ||
49 stream_size != sizeof(z_stream)) {
50 return Z_VERSION_ERROR;
51@@ -329,9 +324,47 @@ int ZEXPORT deflateInit2_(strm, level, method, windowBits, memLevel, strategy,
52
53 s->lit_bufsize = 1 << (memLevel + 6); /* 16K elements by default */
54
55- overlay = (ushf *) ZALLOC(strm, s->lit_bufsize, sizeof(ush)+2);
56- s->pending_buf = (uchf *) overlay;
57- s->pending_buf_size = (ulg)s->lit_bufsize * (sizeof(ush)+2L);
58+ /* We overlay pending_buf and sym_buf. This works since the average size
59+ * for length/distance pairs over any compressed block is assured to be 31
60+ * bits or less.
61+ *
62+ * Analysis: The longest fixed codes are a length code of 8 bits plus 5
63+ * extra bits, for lengths 131 to 257. The longest fixed distance codes are
64+ * 5 bits plus 13 extra bits, for distances 16385 to 32768. The longest
65+ * possible fixed-codes length/distance pair is then 31 bits total.
66+ *
67+ * sym_buf starts one-fourth of the way into pending_buf. So there are
68+ * three bytes in sym_buf for every four bytes in pending_buf. Each symbol
69+ * in sym_buf is three bytes -- two for the distance and one for the
70+ * literal/length. As each symbol is consumed, the pointer to the next
71+ * sym_buf value to read moves forward three bytes. From that symbol, up to
72+ * 31 bits are written to pending_buf. The closest the written pending_buf
73+ * bits gets to the next sym_buf symbol to read is just before the last
74+ * code is written. At that time, 31*(n-2) bits have been written, just
75+ * after 24*(n-2) bits have been consumed from sym_buf. sym_buf starts at
76+ * 8*n bits into pending_buf. (Note that the symbol buffer fills when n-1
77+ * symbols are written.) The closest the writing gets to what is unread is
78+ * then n+14 bits. Here n is lit_bufsize, which is 16384 by default, and
79+ * can range from 128 to 32768.
80+ *
81+ * Therefore, at a minimum, there are 142 bits of space between what is
82+ * written and what is read in the overlain buffers, so the symbols cannot
83+ * be overwritten by the compressed data. That space is actually 139 bits,
84+ * due to the three-bit fixed-code block header.
85+ *
86+ * That covers the case where either Z_FIXED is specified, forcing fixed
87+ * codes, or when the use of fixed codes is chosen, because that choice
88+ * results in a smaller compressed block than dynamic codes. That latter
89+ * condition then assures that the above analysis also covers all dynamic
90+ * blocks. A dynamic-code block will only be chosen to be emitted if it has
91+ * fewer bits than a fixed-code block would for the same set of symbols.
92+ * Therefore its average symbol length is assured to be less than 31. So
93+ * the compressed data for a dynamic block also cannot overwrite the
94+ * symbols from which it is being constructed.
95+ */
96+
97+ s->pending_buf = (uchf *) ZALLOC(strm, s->lit_bufsize, 4);
98+ s->pending_buf_size = (ulg)s->lit_bufsize * 4;
99
100 if (s->window == Z_NULL || s->prev == Z_NULL || s->head == Z_NULL ||
101 s->pending_buf == Z_NULL) {
102@@ -340,8 +373,12 @@ int ZEXPORT deflateInit2_(strm, level, method, windowBits, memLevel, strategy,
103 deflateEnd (strm);
104 return Z_MEM_ERROR;
105 }
106- s->d_buf = overlay + s->lit_bufsize/sizeof(ush);
107- s->l_buf = s->pending_buf + (1+sizeof(ush))*s->lit_bufsize;
108+ s->sym_buf = s->pending_buf + s->lit_bufsize;
109+ s->sym_end = (s->lit_bufsize - 1) * 3;
110+ /* We avoid equality with lit_bufsize*3 because of wraparound at 64K
111+ * on 16 bit machines and because stored blocks are restricted to
112+ * 64K-1 bytes.
113+ */
114
115 s->level = level;
116 s->strategy = strategy;
117@@ -552,7 +589,7 @@ int ZEXPORT deflatePrime (strm, bits, value)
118
119 if (deflateStateCheck(strm)) return Z_STREAM_ERROR;
120 s = strm->state;
121- if ((Bytef *)(s->d_buf) < s->pending_out + ((Buf_size + 7) >> 3))
122+ if (s->sym_buf < s->pending_out + ((Buf_size + 7) >> 3))
123 return Z_BUF_ERROR;
124 do {
125 put = Buf_size - s->bi_valid;
126@@ -1113,7 +1150,6 @@ int ZEXPORT deflateCopy (dest, source)
127 #else
128 deflate_state *ds;
129 deflate_state *ss;
130- ushf *overlay;
131
132
133 if (deflateStateCheck(source) || dest == Z_NULL) {
134@@ -1133,8 +1169,7 @@ int ZEXPORT deflateCopy (dest, source)
135 ds->window = (Bytef *) ZALLOC(dest, ds->w_size, 2*sizeof(Byte));
136 ds->prev = (Posf *) ZALLOC(dest, ds->w_size, sizeof(Pos));
137 ds->head = (Posf *) ZALLOC(dest, ds->hash_size, sizeof(Pos));
138- overlay = (ushf *) ZALLOC(dest, ds->lit_bufsize, sizeof(ush)+2);
139- ds->pending_buf = (uchf *) overlay;
140+ ds->pending_buf = (uchf *) ZALLOC(dest, ds->lit_bufsize, 4);
141
142 if (ds->window == Z_NULL || ds->prev == Z_NULL || ds->head == Z_NULL ||
143 ds->pending_buf == Z_NULL) {
144@@ -1148,8 +1183,7 @@ int ZEXPORT deflateCopy (dest, source)
145 zmemcpy(ds->pending_buf, ss->pending_buf, (uInt)ds->pending_buf_size);
146
147 ds->pending_out = ds->pending_buf + (ss->pending_out - ss->pending_buf);
148- ds->d_buf = overlay + ds->lit_bufsize/sizeof(ush);
149- ds->l_buf = ds->pending_buf + (1+sizeof(ush))*ds->lit_bufsize;
150+ ds->sym_buf = ds->pending_buf + ds->lit_bufsize;
151
152 ds->l_desc.dyn_tree = ds->dyn_ltree;
153 ds->d_desc.dyn_tree = ds->dyn_dtree;
154@@ -1925,7 +1959,7 @@ local block_state deflate_fast(s, flush)
155 FLUSH_BLOCK(s, 1);
156 return finish_done;
157 }
158- if (s->last_lit)
159+ if (s->sym_next)
160 FLUSH_BLOCK(s, 0);
161 return block_done;
162 }
163@@ -2056,7 +2090,7 @@ local block_state deflate_slow(s, flush)
164 FLUSH_BLOCK(s, 1);
165 return finish_done;
166 }
167- if (s->last_lit)
168+ if (s->sym_next)
169 FLUSH_BLOCK(s, 0);
170 return block_done;
171 }
172@@ -2131,7 +2165,7 @@ local block_state deflate_rle(s, flush)
173 FLUSH_BLOCK(s, 1);
174 return finish_done;
175 }
176- if (s->last_lit)
177+ if (s->sym_next)
178 FLUSH_BLOCK(s, 0);
179 return block_done;
180 }
181@@ -2170,7 +2204,7 @@ local block_state deflate_huff(s, flush)
182 FLUSH_BLOCK(s, 1);
183 return finish_done;
184 }
185- if (s->last_lit)
186+ if (s->sym_next)
187 FLUSH_BLOCK(s, 0);
188 return block_done;
189 }
190diff --git a/deflate.h b/deflate.h
191index 23ecdd312..d4cf1a98b 100644
192--- a/deflate.h
193+++ b/deflate.h
194@@ -217,7 +217,7 @@ typedef struct internal_state {
195 /* Depth of each subtree used as tie breaker for trees of equal frequency
196 */
197
198- uchf *l_buf; /* buffer for literals or lengths */
199+ uchf *sym_buf; /* buffer for distances and literals/lengths */
200
201 uInt lit_bufsize;
202 /* Size of match buffer for literals/lengths. There are 4 reasons for
203@@ -239,13 +239,8 @@ typedef struct internal_state {
204 * - I can't count above 4
205 */
206
207- uInt last_lit; /* running index in l_buf */
208-
209- ushf *d_buf;
210- /* Buffer for distances. To simplify the code, d_buf and l_buf have
211- * the same number of elements. To use different lengths, an extra flag
212- * array would be necessary.
213- */
214+ uInt sym_next; /* running index in sym_buf */
215+ uInt sym_end; /* symbol table full when sym_next reaches this */
216
217 ulg opt_len; /* bit length of current block with optimal trees */
218 ulg static_len; /* bit length of current block with static trees */
219@@ -325,20 +320,22 @@ void ZLIB_INTERNAL _tr_stored_block OF((deflate_state *s, charf *buf,
220
221 # define _tr_tally_lit(s, c, flush) \
222 { uch cc = (c); \
223- s->d_buf[s->last_lit] = 0; \
224- s->l_buf[s->last_lit++] = cc; \
225+ s->sym_buf[s->sym_next++] = 0; \
226+ s->sym_buf[s->sym_next++] = 0; \
227+ s->sym_buf[s->sym_next++] = cc; \
228 s->dyn_ltree[cc].Freq++; \
229- flush = (s->last_lit == s->lit_bufsize-1); \
230+ flush = (s->sym_next == s->sym_end); \
231 }
232 # define _tr_tally_dist(s, distance, length, flush) \
233 { uch len = (uch)(length); \
234 ush dist = (ush)(distance); \
235- s->d_buf[s->last_lit] = dist; \
236- s->l_buf[s->last_lit++] = len; \
237+ s->sym_buf[s->sym_next++] = dist; \
238+ s->sym_buf[s->sym_next++] = dist >> 8; \
239+ s->sym_buf[s->sym_next++] = len; \
240 dist--; \
241 s->dyn_ltree[_length_code[len]+LITERALS+1].Freq++; \
242 s->dyn_dtree[d_code(dist)].Freq++; \
243- flush = (s->last_lit == s->lit_bufsize-1); \
244+ flush = (s->sym_next == s->sym_end); \
245 }
246 #else
247 # define _tr_tally_lit(s, c, flush) flush = _tr_tally(s, 0, c)
248diff --git a/trees.c b/trees.c
249index 4f4a65011..decaeb7c3 100644
250--- a/trees.c
251+++ b/trees.c
252@@ -416,7 +416,7 @@ local void init_block(s)
253
254 s->dyn_ltree[END_BLOCK].Freq = 1;
255 s->opt_len = s->static_len = 0L;
256- s->last_lit = s->matches = 0;
257+ s->sym_next = s->matches = 0;
258 }
259
260 #define SMALLEST 1
261@@ -948,7 +948,7 @@ void ZLIB_INTERNAL _tr_flush_block(s, buf, stored_len, last)
262
263 Tracev((stderr, "\nopt %lu(%lu) stat %lu(%lu) stored %lu lit %u ",
264 opt_lenb, s->opt_len, static_lenb, s->static_len, stored_len,
265- s->last_lit));
266+ s->sym_next / 3));
267
268 if (static_lenb <= opt_lenb) opt_lenb = static_lenb;
269
270@@ -1017,8 +1017,9 @@ int ZLIB_INTERNAL _tr_tally (s, dist, lc)
271 unsigned dist; /* distance of matched string */
272 unsigned lc; /* match length-MIN_MATCH or unmatched char (if dist==0) */
273 {
274- s->d_buf[s->last_lit] = (ush)dist;
275- s->l_buf[s->last_lit++] = (uch)lc;
276+ s->sym_buf[s->sym_next++] = dist;
277+ s->sym_buf[s->sym_next++] = dist >> 8;
278+ s->sym_buf[s->sym_next++] = lc;
279 if (dist == 0) {
280 /* lc is the unmatched char */
281 s->dyn_ltree[lc].Freq++;
282@@ -1033,30 +1034,7 @@ int ZLIB_INTERNAL _tr_tally (s, dist, lc)
283 s->dyn_ltree[_length_code[lc]+LITERALS+1].Freq++;
284 s->dyn_dtree[d_code(dist)].Freq++;
285 }
286-
287-#ifdef TRUNCATE_BLOCK
288- /* Try to guess if it is profitable to stop the current block here */
289- if ((s->last_lit & 0x1fff) == 0 && s->level > 2) {
290- /* Compute an upper bound for the compressed length */
291- ulg out_length = (ulg)s->last_lit*8L;
292- ulg in_length = (ulg)((long)s->strstart - s->block_start);
293- int dcode;
294- for (dcode = 0; dcode < D_CODES; dcode++) {
295- out_length += (ulg)s->dyn_dtree[dcode].Freq *
296- (5L+extra_dbits[dcode]);
297- }
298- out_length >>= 3;
299- Tracev((stderr,"\nlast_lit %u, in %ld, out ~%ld(%ld%%) ",
300- s->last_lit, in_length, out_length,
301- 100L - out_length*100L/in_length));
302- if (s->matches < s->last_lit/2 && out_length < in_length/2) return 1;
303- }
304-#endif
305- return (s->last_lit == s->lit_bufsize-1);
306- /* We avoid equality with lit_bufsize because of wraparound at 64K
307- * on 16 bit machines and because stored blocks are restricted to
308- * 64K-1 bytes.
309- */
310+ return (s->sym_next == s->sym_end);
311 }
312
313 /* ===========================================================================
314@@ -1069,13 +1047,14 @@ local void compress_block(s, ltree, dtree)
315 {
316 unsigned dist; /* distance of matched string */
317 int lc; /* match length or unmatched char (if dist == 0) */
318- unsigned lx = 0; /* running index in l_buf */
319+ unsigned sx = 0; /* running index in sym_buf */
320 unsigned code; /* the code to send */
321 int extra; /* number of extra bits to send */
322
323- if (s->last_lit != 0) do {
324- dist = s->d_buf[lx];
325- lc = s->l_buf[lx++];
326+ if (s->sym_next != 0) do {
327+ dist = s->sym_buf[sx++] & 0xff;
328+ dist += (unsigned)(s->sym_buf[sx++] & 0xff) << 8;
329+ lc = s->sym_buf[sx++];
330 if (dist == 0) {
331 send_code(s, lc, ltree); /* send a literal byte */
332 Tracecv(isgraph(lc), (stderr," '%c' ", lc));
333@@ -1100,11 +1079,10 @@ local void compress_block(s, ltree, dtree)
334 }
335 } /* literal or match pair ? */
336
337- /* Check that the overlay between pending_buf and d_buf+l_buf is ok: */
338- Assert((uInt)(s->pending) < s->lit_bufsize + 2*lx,
339- "pendingBuf overflow");
340+ /* Check that the overlay between pending_buf and sym_buf is ok: */
341+ Assert(s->pending < s->lit_bufsize + sx, "pendingBuf overflow");
342
343- } while (lx < s->last_lit);
344+ } while (sx < s->sym_next);
345
346 send_code(s, END_BLOCK, ltree);
347 }
diff --git a/meta/recipes-core/zlib/zlib_1.2.11.bb b/meta/recipes-core/zlib/zlib_1.2.11.bb
index ef9431ae47..bc42cd64e9 100644
--- a/meta/recipes-core/zlib/zlib_1.2.11.bb
+++ b/meta/recipes-core/zlib/zlib_1.2.11.bb
@@ -8,6 +8,7 @@ LIC_FILES_CHKSUM = "file://zlib.h;beginline=6;endline=23;md5=5377232268e952e9ef6
8 8
9SRC_URI = "${SOURCEFORGE_MIRROR}/libpng/${BPN}/${PV}/${BPN}-${PV}.tar.xz \ 9SRC_URI = "${SOURCEFORGE_MIRROR}/libpng/${BPN}/${PV}/${BPN}-${PV}.tar.xz \
10 file://ldflags-tests.patch \ 10 file://ldflags-tests.patch \
11 file://CVE-2018-25032.patch \
11 file://run-ptest \ 12 file://run-ptest \
12 " 13 "
13UPSTREAM_CHECK_URI = "http://zlib.net/" 14UPSTREAM_CHECK_URI = "http://zlib.net/"