diff options
author | Davide Gardenal <davidegarde2000@gmail.com> | 2022-03-14 16:21:13 +0100 |
---|---|---|
committer | Richard Purdie <richard.purdie@linuxfoundation.org> | 2022-03-23 23:16:12 +0000 |
commit | 9d155cbf956024e6ade0f10486ed8fe427652ad0 (patch) | |
tree | 005ac83a90490daeb3d77c080b06069fc19602c0 /meta/recipes-support/re2c/re2c | |
parent | cb78d34faf3ecf8e6bb7961bd388170f5f56e829 (diff) | |
download | poky-9d155cbf956024e6ade0f10486ed8fe427652ad0.tar.gz |
re2c: backport fix for CVE-2018-21232
Backport commits from the following issue:
https://github.com/skvadrik/re2c/issues/219
CVE: CVE-2018-21232
(From OE-Core rev: 8c5ee47d446b36d6832acc8452687f50101f3e65)
Signed-off-by: Davide Gardenal <davide.gardenal@huawei.com>
Signed-off-by: Steve Sakoman <steve@sakoman.com>
Signed-off-by: Richard Purdie <richard.purdie@linuxfoundation.org>
Diffstat (limited to 'meta/recipes-support/re2c/re2c')
-rw-r--r-- | meta/recipes-support/re2c/re2c/CVE-2018-21232-1.patch | 347 | ||||
-rw-r--r-- | meta/recipes-support/re2c/re2c/CVE-2018-21232-2.patch | 243 | ||||
-rw-r--r-- | meta/recipes-support/re2c/re2c/CVE-2018-21232-3.patch | 156 | ||||
-rw-r--r-- | meta/recipes-support/re2c/re2c/CVE-2018-21232-4.patch | 166 |
4 files changed, 912 insertions, 0 deletions
diff --git a/meta/recipes-support/re2c/re2c/CVE-2018-21232-1.patch b/meta/recipes-support/re2c/re2c/CVE-2018-21232-1.patch new file mode 100644 index 0000000000..b7dcaefad3 --- /dev/null +++ b/meta/recipes-support/re2c/re2c/CVE-2018-21232-1.patch | |||
@@ -0,0 +1,347 @@ | |||
1 | From fd634998f813340768c333cdad638498602856e5 Mon Sep 17 00:00:00 2001 | ||
2 | From: Ulya Trofimovich <skvadrik@gmail.com> | ||
3 | Date: Tue, 21 Apr 2020 21:28:32 +0100 | ||
4 | Subject: [PATCH] Rewrite recursion into iteration (Tarjan's SCC algorithm and | ||
5 | YYFILL states). | ||
6 | |||
7 | This is to avoid stack overflow on large RE (especially on instrumented | ||
8 | builds that have larger stack frames, like AddressSanitizer). | ||
9 | |||
10 | Stack overflow reported by Agostino Sarubbo. | ||
11 | Related to #219 "overflow-1.re test fails on system with small stack". | ||
12 | |||
13 | Upstram-Status: Backport: | ||
14 | https://github.com/skvadrik/re2c/commit/fd634998f813340768c333cdad638498602856e5 | ||
15 | |||
16 | CVE: CVE-2018-21232 | ||
17 | |||
18 | Signed-off-by: Davide Gardenal <davide.gardenal@huawei.com> | ||
19 | --- | ||
20 | diff --git a/src/dfa/fillpoints.cc b/src/dfa/fillpoints.cc | ||
21 | --- a/src/dfa/fillpoints.cc (revision e58939b34bb4c37cd990f82dc286f21cb405743e) | ||
22 | +++ b/src/dfa/fillpoints.cc (date 1646929180243) | ||
23 | @@ -5,151 +5,186 @@ | ||
24 | |||
25 | #include "src/dfa/dfa.h" | ||
26 | |||
27 | -namespace re2c | ||
28 | -{ | ||
29 | + | ||
30 | +/* | ||
31 | + * note [finding strongly connected components of DFA] | ||
32 | + * | ||
33 | + * A slight modification of Tarjan's algorithm. | ||
34 | + * | ||
35 | + * The algorithm traverses the DFA in depth-first order. It maintains a stack | ||
36 | + * of states that have already been visited but haven't been assigned to an SCC | ||
37 | + * yet. For each state the algorithm calculates 'lowlink': index of the highest | ||
38 | + * ancestor state reachable in one step from a descendant of this state. | ||
39 | + * Lowlink is used to determine when a set of states should be popped off stack | ||
40 | + * into a new SCC. | ||
41 | + * | ||
42 | + * We use lowlink to hold different kinds of information: | ||
43 | + * - values in range [0 .. stack size] mean that the state is on stack (a | ||
44 | + * link to a state with the smallest index reachable from this one) | ||
45 | + * - SCC_UND means that this state has not been visited yet | ||
46 | + * - SCC_INF means that this state has already been popped off stack | ||
47 | + * | ||
48 | + * We use stack size (rather than topological sort index) as a unique index of | ||
49 | + * the state on stack. This is safe because the indices of states on stack are | ||
50 | + * unique and less than the indices of states that have been popped off stack | ||
51 | + * (SCC_INF). | ||
52 | + */ | ||
53 | + | ||
54 | +namespace re2c { | ||
55 | + namespace { | ||
56 | |||
57 | -static const size_t SCC_INF = std::numeric_limits<size_t>::max(); | ||
58 | -static const size_t SCC_UND = SCC_INF - 1; | ||
59 | + static const size_t SCC_INF = std::numeric_limits<size_t>::max(); | ||
60 | + static const size_t SCC_UND = SCC_INF - 1; | ||
61 | |||
62 | -static bool loopback(size_t node, size_t narcs, const size_t *arcs) | ||
63 | -{ | ||
64 | - for (size_t i = 0; i < narcs; ++i) | ||
65 | - { | ||
66 | - if (arcs[i] == node) | ||
67 | - { | ||
68 | - return true; | ||
69 | - } | ||
70 | - } | ||
71 | - return false; | ||
72 | -} | ||
73 | + static bool loopback(size_t state, size_t narcs, const size_t *arcs) | ||
74 | + { | ||
75 | + for (size_t i = 0; i < narcs; ++i) { | ||
76 | + if (arcs[i] == state) return true; | ||
77 | + } | ||
78 | + return false; | ||
79 | + } | ||
80 | |||
81 | -/* | ||
82 | - * node [finding strongly connected components of DFA] | ||
83 | - * | ||
84 | - * A slight modification of Tarjan's algorithm. | ||
85 | - * | ||
86 | - * The algorithm walks graph in deep-first order. It maintains a stack | ||
87 | - * of nodes that have already been visited but haven't been assigned to | ||
88 | - * SCC yet. For each node the algorithm calculates 'lowlink': index of | ||
89 | - * the highest ancestor node reachable in one step from a descendant of | ||
90 | - * the node. Lowlink is used to determine when a set of nodes should be | ||
91 | - * popped off the stack into a new SCC. | ||
92 | - * | ||
93 | - * We use lowlink to hold different kinds of information: | ||
94 | - * - values in range [0 .. stack size] mean that this node is on stack | ||
95 | - * (link to a node with the smallest index reachable from this one) | ||
96 | - * - SCC_UND means that this node has not been visited yet | ||
97 | - * - SCC_INF means that this node has already been popped off stack | ||
98 | - * | ||
99 | - * We use stack size (rather than topological sort index) as unique index | ||
100 | - * of a node on stack. This is safe because indices of nodes on stack are | ||
101 | - * still unique and less than indices of nodes that have been popped off | ||
102 | - * stack (SCC_INF). | ||
103 | - * | ||
104 | - */ | ||
105 | -static void scc( | ||
106 | - const dfa_t &dfa, | ||
107 | - std::stack<size_t> &stack, | ||
108 | - std::vector<size_t> &lowlink, | ||
109 | - std::vector<bool> &trivial, | ||
110 | - size_t i) | ||
111 | -{ | ||
112 | - const size_t link = stack.size(); | ||
113 | - lowlink[i] = link; | ||
114 | - stack.push(i); | ||
115 | + struct StackItem { | ||
116 | + size_t state; // current state | ||
117 | + size_t symbol; // next arc to be visited in this state | ||
118 | + size_t link; // Tarjan's "lowlink" | ||
119 | + }; | ||
120 | + | ||
121 | +// Tarjan's algorithm | ||
122 | + static void scc(const dfa_t &dfa, std::vector<bool> &trivial, | ||
123 | + std::vector<StackItem> &stack_dfs) | ||
124 | + { | ||
125 | + std::vector<size_t> lowlink(dfa.states.size(), SCC_UND); | ||
126 | + std::stack<size_t> stack; | ||
127 | + | ||
128 | + StackItem x0 = {0, 0, 0}; | ||
129 | + stack_dfs.push_back(x0); | ||
130 | + | ||
131 | + while (!stack_dfs.empty()) { | ||
132 | + const size_t i = stack_dfs.back().state; | ||
133 | + size_t c = stack_dfs.back().symbol; | ||
134 | + size_t link = stack_dfs.back().link; | ||
135 | + stack_dfs.pop_back(); | ||
136 | + | ||
137 | + const size_t *arcs = dfa.states[i]->arcs; | ||
138 | + | ||
139 | + if (c == 0) { | ||
140 | + // DFS recursive enter | ||
141 | + //DASSERT(lowlink[i] == SCC_UND); | ||
142 | + link = lowlink[i] = stack.size(); | ||
143 | + stack.push(i); | ||
144 | + } | ||
145 | + else { | ||
146 | + // DFS recursive return (from one of successor states) | ||
147 | + const size_t j = arcs[c - 1]; | ||
148 | + //DASSERT(lowlink[j] != SCC_UND); | ||
149 | + lowlink[i] = std::min(lowlink[i], lowlink[j]); | ||
150 | + } | ||
151 | |||
152 | - const size_t *arcs = dfa.states[i]->arcs; | ||
153 | - for (size_t c = 0; c < dfa.nchars; ++c) | ||
154 | - { | ||
155 | - const size_t j = arcs[c]; | ||
156 | - if (j != dfa_t::NIL) | ||
157 | - { | ||
158 | - if (lowlink[j] == SCC_UND) | ||
159 | - { | ||
160 | - scc(dfa, stack, lowlink, trivial, j); | ||
161 | - } | ||
162 | - if (lowlink[j] < lowlink[i]) | ||
163 | - { | ||
164 | - lowlink[i] = lowlink[j]; | ||
165 | - } | ||
166 | - } | ||
167 | - } | ||
168 | + // find the next successor state that hasn't been visited yet | ||
169 | + for (; c < dfa.nchars; ++c) { | ||
170 | + const size_t j = arcs[c]; | ||
171 | + if (j != dfa_t::NIL) { | ||
172 | + if (lowlink[j] == SCC_UND) { | ||
173 | + break; | ||
174 | + } | ||
175 | + lowlink[i] = std::min(lowlink[i], lowlink[j]); | ||
176 | + } | ||
177 | + } | ||
178 | |||
179 | - if (lowlink[i] == link) | ||
180 | - { | ||
181 | - // SCC is non-trivial (has loops) iff it either: | ||
182 | - // - consists of multiple nodes (they all must be interconnected) | ||
183 | - // - consists of single node which loops back to itself | ||
184 | - trivial[i] = i == stack.top() | ||
185 | - && !loopback(i, dfa.nchars, arcs); | ||
186 | + if (c < dfa.nchars) { | ||
187 | + // recurse into the next successor state | ||
188 | + StackItem x1 = {i, c + 1, link}; | ||
189 | + stack_dfs.push_back(x1); | ||
190 | + StackItem x2 = {arcs[c], 0, SCC_UND}; | ||
191 | + stack_dfs.push_back(x2); | ||
192 | + } | ||
193 | + else if (lowlink[i] == link) { | ||
194 | + // all successors have been visited | ||
195 | + // SCC is non-trivial (has loops) if either: | ||
196 | + // - it contains multiple interconnected states | ||
197 | + // - it contains a single self-looping state | ||
198 | + trivial[i] = i == stack.top() && !loopback(i, dfa.nchars, arcs); | ||
199 | |||
200 | - size_t j; | ||
201 | - do | ||
202 | - { | ||
203 | - j = stack.top(); | ||
204 | - stack.pop(); | ||
205 | - lowlink[j] = SCC_INF; | ||
206 | - } | ||
207 | - while (j != i); | ||
208 | - } | ||
209 | -} | ||
210 | + for (;;) { | ||
211 | + const size_t j = stack.top(); | ||
212 | + stack.pop(); | ||
213 | + lowlink[j] = SCC_INF; | ||
214 | + if (i == j) break; | ||
215 | + } | ||
216 | + } | ||
217 | + } | ||
218 | + } | ||
219 | |||
220 | -static void calc_fill( | ||
221 | - const dfa_t &dfa, | ||
222 | - const std::vector<bool> &trivial, | ||
223 | - std::vector<size_t> &fill, | ||
224 | - size_t i) | ||
225 | -{ | ||
226 | - if (fill[i] == SCC_UND) | ||
227 | - { | ||
228 | - fill[i] = 0; | ||
229 | - const size_t *arcs = dfa.states[i]->arcs; | ||
230 | - for (size_t c = 0; c < dfa.nchars; ++c) | ||
231 | - { | ||
232 | - const size_t j = arcs[c]; | ||
233 | - if (j != dfa_t::NIL) | ||
234 | - { | ||
235 | - calc_fill(dfa, trivial, fill, j); | ||
236 | - size_t max = 1; | ||
237 | - if (trivial[j]) | ||
238 | - { | ||
239 | - max += fill[j]; | ||
240 | - } | ||
241 | - if (max > fill[i]) | ||
242 | - { | ||
243 | - fill[i] = max; | ||
244 | - } | ||
245 | - } | ||
246 | - } | ||
247 | - } | ||
248 | -} | ||
249 | - | ||
250 | -void fillpoints(const dfa_t &dfa, std::vector<size_t> &fill) | ||
251 | -{ | ||
252 | - const size_t size = dfa.states.size(); | ||
253 | - | ||
254 | - // find DFA states that belong to non-trivial SCC | ||
255 | - std::stack<size_t> stack; | ||
256 | - std::vector<size_t> lowlink(size, SCC_UND); | ||
257 | - std::vector<bool> trivial(size, false); | ||
258 | - scc(dfa, stack, lowlink, trivial, 0); | ||
259 | - | ||
260 | - // for each DFA state, calculate YYFILL argument: | ||
261 | - // maximal path length to the next YYFILL state | ||
262 | - fill.resize(size, SCC_UND); | ||
263 | - calc_fill(dfa, trivial, fill, 0); | ||
264 | + static void calc_fill(const dfa_t &dfa, const std::vector<bool> &trivial, | ||
265 | + std::vector<StackItem> &stack_dfs, std::vector<size_t> &fill) | ||
266 | + { | ||
267 | + const size_t nstates = dfa.states.size(); | ||
268 | + fill.resize(nstates, SCC_UND); | ||
269 | + | ||
270 | + StackItem x0 = {0, 0, SCC_INF}; | ||
271 | + stack_dfs.push_back(x0); | ||
272 | + | ||
273 | + while (!stack_dfs.empty()) { | ||
274 | + const size_t i = stack_dfs.back().state; | ||
275 | + size_t c = stack_dfs.back().symbol; | ||
276 | + stack_dfs.pop_back(); | ||
277 | + | ||
278 | + const size_t *arcs = dfa.states[i]->arcs; | ||
279 | + | ||
280 | + if (c == 0) { | ||
281 | + // DFS recursive enter | ||
282 | + if (fill[i] != SCC_UND) continue; | ||
283 | + fill[i] = 0; | ||
284 | + } | ||
285 | + else { | ||
286 | + // DFS recursive return (from one of successor states) | ||
287 | + const size_t j = arcs[c - 1]; | ||
288 | + //DASSERT(fill[i] != SCC_UND && fill[j] != SCC_UND); | ||
289 | + fill[i] = std::max(fill[i], 1 + (trivial[j] ? fill[j] : 0)); | ||
290 | + } | ||
291 | + | ||
292 | + // find the next successor state that hasn't been visited yet | ||
293 | + for (; c < dfa.nchars; ++c) { | ||
294 | + const size_t j = arcs[c]; | ||
295 | + if (j != dfa_t::NIL) break; | ||
296 | + } | ||
297 | + | ||
298 | + if (c < dfa.nchars) { | ||
299 | + // recurse into the next successor state | ||
300 | + StackItem x1 = {i, c + 1, SCC_INF}; | ||
301 | + stack_dfs.push_back(x1); | ||
302 | + StackItem x2 = {arcs[c], 0, SCC_INF}; | ||
303 | + stack_dfs.push_back(x2); | ||
304 | + } | ||
305 | + } | ||
306 | |||
307 | - // The following states must trigger YYFILL: | ||
308 | - // - inital state | ||
309 | - // - all states in non-trivial SCCs | ||
310 | - // for other states, reset YYFILL argument to zero | ||
311 | - for (size_t i = 1; i < size; ++i) | ||
312 | - { | ||
313 | - if (trivial[i]) | ||
314 | - { | ||
315 | - fill[i] = 0; | ||
316 | - } | ||
317 | - } | ||
318 | -} | ||
319 | + // The following states must trigger YYFILL: | ||
320 | + // - inital state | ||
321 | + // - all states in non-trivial SCCs | ||
322 | + // for other states, reset YYFILL argument to zero | ||
323 | + for (size_t i = 1; i < nstates; ++i) { | ||
324 | + if (trivial[i]) { | ||
325 | + fill[i] = 0; | ||
326 | + } | ||
327 | + } | ||
328 | + } | ||
329 | |||
330 | + } // anonymous namespace | ||
331 | + | ||
332 | + void fillpoints(const dfa_t &dfa, std::vector<size_t> &fill) | ||
333 | + { | ||
334 | + const size_t nstates = dfa.states.size(); | ||
335 | + std::vector<bool> trivial(nstates, false); | ||
336 | + std::vector<StackItem> stack_dfs; | ||
337 | + stack_dfs.reserve(nstates); | ||
338 | + | ||
339 | + // find DFA states that belong to non-trivial SCC | ||
340 | + scc(dfa, trivial, stack_dfs); | ||
341 | + | ||
342 | + // for each DFA state, calculate YYFILL argument: | ||
343 | + // maximal path length to the next YYFILL state | ||
344 | + calc_fill(dfa, trivial, stack_dfs, fill); | ||
345 | + } | ||
346 | + | ||
347 | } // namespace re2c | ||
diff --git a/meta/recipes-support/re2c/re2c/CVE-2018-21232-2.patch b/meta/recipes-support/re2c/re2c/CVE-2018-21232-2.patch new file mode 100644 index 0000000000..820a6decbc --- /dev/null +++ b/meta/recipes-support/re2c/re2c/CVE-2018-21232-2.patch | |||
@@ -0,0 +1,243 @@ | |||
1 | From 7b5643476bd99c994c4f51b8143f942982d85521 Mon Sep 17 00:00:00 2001 | ||
2 | From: Ulya Trofimovich <skvadrik@gmail.com> | ||
3 | Date: Wed, 22 Apr 2020 22:37:24 +0100 | ||
4 | Subject: [PATCH] Rewrite recursion into iteration (fixed tags computation). | ||
5 | |||
6 | This is to avoid stack overflow on large RE (especially on instrumented | ||
7 | builds that have larger stack frames, like AddressSanitizer). | ||
8 | |||
9 | Partial fix for #219 "overflow-1.re test fails on system with small stack". | ||
10 | |||
11 | Upstream-Stauts: Backport: | ||
12 | https://github.com/skvadrik/re2c/commit/7b5643476bd99c994c4f51b8143f942982d85521 | ||
13 | |||
14 | CVE: CVE-2018-21232 | ||
15 | |||
16 | Signed-off-by: Davide Gardenal <davide.gardenal@huawei.com> | ||
17 | --- | ||
18 | diff --git a/src/re/tag.cc b/src/re/tag.cc | ||
19 | --- a/src/re/tag.cc (revision e58939b34bb4c37cd990f82dc286f21cb405743e) | ||
20 | +++ b/src/re/tag.cc (date 1646986908580) | ||
21 | @@ -6,7 +6,7 @@ | ||
22 | { | ||
23 | |||
24 | const size_t Tag::RIGHTMOST = std::numeric_limits<size_t>::max(); | ||
25 | -const size_t Tag::VARDIST = std::numeric_limits<size_t>::max(); | ||
26 | +const uint32_t Tag::VARDIST = std::numeric_limits<uint32_t>::max(); | ||
27 | const size_t Tag::FICTIVE = Tag::RIGHTMOST - 1; | ||
28 | |||
29 | } // namespace re2c | ||
30 | |||
31 | |||
32 | diff --git a/src/re/tag.h b/src/re/tag.h | ||
33 | --- a/src/re/tag.h (revision e58939b34bb4c37cd990f82dc286f21cb405743e) | ||
34 | +++ b/src/re/tag.h (date 1646986922376) | ||
35 | @@ -19,7 +19,7 @@ | ||
36 | struct Tag | ||
37 | { | ||
38 | static const size_t RIGHTMOST; | ||
39 | - static const size_t VARDIST; | ||
40 | + static const uint32_t VARDIST; | ||
41 | static const size_t FICTIVE; | ||
42 | |||
43 | const std::string *name; | ||
44 | |||
45 | |||
46 | diff --git a/src/re/fixed_tags.cc b/src/re/fixed_tags.cc | ||
47 | --- a/src/re/fixed_tags.cc (revision e58939b34bb4c37cd990f82dc286f21cb405743e) | ||
48 | +++ b/src/re/fixed_tags.cc (date 1646991137317) | ||
49 | @@ -7,78 +7,131 @@ | ||
50 | #include "src/re/tag.h" | ||
51 | |||
52 | namespace re2c { | ||
53 | +namespace { | ||
54 | |||
55 | /* note [fixed and variable tags] | ||
56 | * | ||
57 | - * If distance between two tags is constant (equal for all strings that | ||
58 | - * match the given regexp), then lexer only needs to track one of them: | ||
59 | - * the second tag equals the first tag plus static offset. | ||
60 | + * If distance between two tags is constant (equal for all strings that match | ||
61 | + * the given regexp), then lexer only needs to track one of them: the second | ||
62 | + * tag equals the first tag plus static offset. | ||
63 | * | ||
64 | - * However, this optimization is applied only to tags in top-level | ||
65 | - * concatenation, because other tags may be uninitialized and we don't | ||
66 | - * want to mess with conditional calculation of fixed tags. | ||
67 | - * | ||
68 | + * This optimization is applied only to tags in top-level concatenation, | ||
69 | + * because in other cases the base tag may be NULL, and the calculation of | ||
70 | + * the fixed tag value is not as simple as substracting a fixed offset. | ||
71 | * Furthermore, fixed tags are fobidden with generic API because it cannot | ||
72 | - * express fixed offsets. | ||
73 | - * | ||
74 | - * Tags with history also cannot be fixed. | ||
75 | + * express fixed offsets. M-tags (with history) also cannot be fixed. | ||
76 | * | ||
77 | * Another special case is fictive tags (those that exist only to impose | ||
78 | - * hierarchical laws of POSIX disambiguation). We treat them as fixed | ||
79 | - * in order to suppress code generation. | ||
80 | + * hierarchical laws of POSIX disambiguation). We treat them as fixed in order | ||
81 | + * to suppress code generation. | ||
82 | */ | ||
83 | |||
84 | -static void find_fixed_tags(RE *re, std::vector<Tag> &tags, | ||
85 | - size_t &dist, size_t &base, bool toplevel) | ||
86 | +struct StackItem { | ||
87 | + RE *re; // current sub-RE | ||
88 | + uint32_t dist; // distance backup for alternative, unused for other RE | ||
89 | + uint8_t succ; // index of the next successor to be visited | ||
90 | + bool toplevel; // if this sub-RE is in top-level concatenation | ||
91 | +}; | ||
92 | + | ||
93 | +static void find_fixed_tags(RESpec &spec, std::vector<StackItem> &stack, RE *re0) | ||
94 | { | ||
95 | - switch (re->type) { | ||
96 | - case RE::NIL: break; | ||
97 | - case RE::SYM: | ||
98 | - if (dist != Tag::VARDIST) ++dist; | ||
99 | - break; | ||
100 | - case RE::ALT: { | ||
101 | - size_t d1 = dist, d2 = dist; | ||
102 | - find_fixed_tags(re->alt.re1, tags, d1, base, false); | ||
103 | - find_fixed_tags(re->alt.re2, tags, d2, base, false); | ||
104 | - dist = (d1 == d2) ? d1 : Tag::VARDIST; | ||
105 | - break; | ||
106 | - } | ||
107 | - case RE::CAT: | ||
108 | - find_fixed_tags(re->cat.re2, tags, dist, base, toplevel); | ||
109 | - find_fixed_tags(re->cat.re1, tags, dist, base, toplevel); | ||
110 | - break; | ||
111 | - case RE::ITER: | ||
112 | - find_fixed_tags(re->iter.re, tags, dist, base, false); | ||
113 | - dist = Tag::VARDIST; | ||
114 | - break; | ||
115 | - case RE::TAG: { | ||
116 | - // see note [fixed and variable tags] | ||
117 | - Tag &tag = tags[re->tag.idx]; | ||
118 | - if (fictive(tag)) { | ||
119 | - tag.base = tag.dist = 0; | ||
120 | - } else if (toplevel && dist != Tag::VARDIST && !history(tag)) { | ||
121 | - tag.base = base; | ||
122 | - tag.dist = dist; | ||
123 | - } else if (toplevel) { | ||
124 | - base = re->tag.idx; | ||
125 | - dist = 0; | ||
126 | - } | ||
127 | - if (trailing(tag)) dist = 0; | ||
128 | - break; | ||
129 | - } | ||
130 | - } | ||
131 | + static const uint32_t VARDIST = Tag::VARDIST; | ||
132 | + bool toplevel = spec.opts->input_api != INPUT_CUSTOM; | ||
133 | + | ||
134 | + // base tag, intially the fake "rightmost tag" (the end of RE) | ||
135 | + size_t base = Tag::RIGHTMOST; | ||
136 | + | ||
137 | + // the distance to the nearest top-level tag to the right (base tag) | ||
138 | + uint32_t dist = 0; | ||
139 | + | ||
140 | + const StackItem i0 = {re0, VARDIST, 0, toplevel}; | ||
141 | + stack.push_back(i0); | ||
142 | + | ||
143 | + while (!stack.empty()) { | ||
144 | + const StackItem i = stack.back(); | ||
145 | + stack.pop_back(); | ||
146 | + RE *re = i.re; | ||
147 | + | ||
148 | + if (re->type == RE::SYM) { | ||
149 | + if (dist != VARDIST) ++dist; | ||
150 | + } | ||
151 | + else if (re->type == RE::ALT) { | ||
152 | + if (i.succ == 0) { | ||
153 | + // save the current distance on stack (from the alternative end | ||
154 | + // to base) and recurse into the left sub-RE | ||
155 | + StackItem k = {re, dist, 1, i.toplevel}; | ||
156 | + stack.push_back(k); | ||
157 | + StackItem j = {re->alt.re1, VARDIST, 0, false}; | ||
158 | + stack.push_back(j); | ||
159 | + } | ||
160 | + else if (i.succ == 1) { | ||
161 | + // save the current distance on stack (from the left sub-RE to | ||
162 | + // base), reset distance to the distance popped from stack (from | ||
163 | + // the alternative end to base), recurse into the right sub-RE | ||
164 | + StackItem k = {re, dist, 2, i.toplevel}; | ||
165 | + stack.push_back(k); | ||
166 | + StackItem j = {re->alt.re2, VARDIST, 0, false}; | ||
167 | + stack.push_back(j); | ||
168 | + dist = i.dist; | ||
169 | + } | ||
170 | + else { | ||
171 | + // both sub-RE visited, compare the distance on stack (from the | ||
172 | + // left sub-RE to base) to the current distance (from the right | ||
173 | + // sub-RE to base), if not equal set variable distance | ||
174 | + dist = (i.dist == dist) ? i.dist : VARDIST; | ||
175 | + } | ||
176 | + } | ||
177 | + else if (re->type == RE::ITER) { | ||
178 | + if (i.succ == 0) { | ||
179 | + // recurse into the sub-RE | ||
180 | + StackItem k = {re, VARDIST, 1, i.toplevel}; | ||
181 | + stack.push_back(k); | ||
182 | + StackItem j = {re->iter.re, VARDIST, 0, false}; | ||
183 | + stack.push_back(j); | ||
184 | + } | ||
185 | + else { | ||
186 | + // sub-RE visited, assume unknown number of iterations | ||
187 | + // TODO: find precise distance for fixed repetition counter | ||
188 | + dist = VARDIST; | ||
189 | + } | ||
190 | + } | ||
191 | + else if (re->type == RE::CAT) { | ||
192 | + // the right sub-RE is pushed on stack after the left sub-RE and | ||
193 | + // visited earlier (because distance is computed from right to left) | ||
194 | + StackItem j1 = {re->cat.re1, VARDIST, 0, i.toplevel}; | ||
195 | + stack.push_back(j1); | ||
196 | + StackItem j2 = {re->cat.re2, VARDIST, 0, i.toplevel}; | ||
197 | + stack.push_back(j2); | ||
198 | + } | ||
199 | + else if (re->type == RE::TAG) { | ||
200 | + // see note [fixed and variable tags] | ||
201 | + Tag &tag = spec.tags[re->tag.idx]; | ||
202 | + if (fictive(tag)) { | ||
203 | + tag.base = tag.dist = 0; | ||
204 | + } | ||
205 | + else if (i.toplevel && dist != VARDIST && !history(tag)) { | ||
206 | + tag.base = base; | ||
207 | + tag.dist = dist; | ||
208 | + } | ||
209 | + else if (i.toplevel) { | ||
210 | + base = re->tag.idx; | ||
211 | + dist = 0; | ||
212 | + } | ||
213 | + if (trailing(tag)) { | ||
214 | + dist = 0; | ||
215 | + } | ||
216 | + } | ||
217 | + } | ||
218 | } | ||
219 | + | ||
220 | +} // anonymous namespace | ||
221 | |||
222 | -void find_fixed_tags(RESpec &spec) | ||
223 | -{ | ||
224 | - const bool generic = spec.opts->input_api == INPUT_CUSTOM; | ||
225 | - std::vector<RE*>::iterator | ||
226 | - i = spec.res.begin(), | ||
227 | - e = spec.res.end(); | ||
228 | - for (; i != e; ++i) { | ||
229 | - size_t base = Tag::RIGHTMOST, dist = 0; | ||
230 | - find_fixed_tags(*i, spec.tags, dist, base, !generic); | ||
231 | - } | ||
232 | -} | ||
233 | + void find_fixed_tags(RESpec &spec) | ||
234 | + { | ||
235 | + std::vector<StackItem> stack; | ||
236 | + for (std::vector<RE*>::iterator i = spec.res.begin(); i != spec.res.end(); ++i) { | ||
237 | + find_fixed_tags(spec, stack, *i); | ||
238 | + } | ||
239 | + } | ||
240 | |||
241 | -} // namespace re2c | ||
242 | +} // namespace re2c | ||
243 | \ No newline at end of file | ||
diff --git a/meta/recipes-support/re2c/re2c/CVE-2018-21232-3.patch b/meta/recipes-support/re2c/re2c/CVE-2018-21232-3.patch new file mode 100644 index 0000000000..f942e21cba --- /dev/null +++ b/meta/recipes-support/re2c/re2c/CVE-2018-21232-3.patch | |||
@@ -0,0 +1,156 @@ | |||
1 | From 4d9c809355b574f2a58eac119f5e076c48e4d1e2 Mon Sep 17 00:00:00 2001 | ||
2 | From: Ulya Trofimovich <skvadrik@gmail.com> | ||
3 | Date: Thu, 23 Apr 2020 22:16:51 +0100 | ||
4 | Subject: [PATCH] Rewrite recursion into iteration (nullable RE). | ||
5 | |||
6 | This is to avoid stack overflow on large RE (especially on instrumented | ||
7 | builds that have larger stack frames, like AddressSanitizer). | ||
8 | |||
9 | Partial fix for #219 "overflow-1.re test fails on system with small stack". | ||
10 | |||
11 | Upstream-Status: Backport: | ||
12 | https://github.com/skvadrik/re2c/commit/4d9c809355b574f2a58eac119f5e076c48e4d1e2 | ||
13 | |||
14 | CVE: CVE-2018-21232 | ||
15 | |||
16 | Signed-off-by: Davide Gardenal <davide.gardenal@huawei.com> | ||
17 | --- | ||
18 | diff --git a/src/re/nullable.cc b/src/re/nullable.cc | ||
19 | --- a/src/re/nullable.cc (revision e58939b34bb4c37cd990f82dc286f21cb405743e) | ||
20 | +++ b/src/re/nullable.cc (date 1647253886226) | ||
21 | @@ -9,43 +9,100 @@ | ||
22 | #include "src/re/tag.h" | ||
23 | |||
24 | namespace re2c { | ||
25 | + namespace { | ||
26 | + | ||
27 | + struct StackItem { | ||
28 | + const RE *re; // current sub-RE | ||
29 | + uint8_t succ; // index of the next sucessor to be visited | ||
30 | + }; | ||
31 | |||
32 | -static bool nullable(const RESpec &spec, const RE *re, bool &trail) | ||
33 | -{ | ||
34 | - if (trail) return true; | ||
35 | + static bool nullable(const RESpec &spec, std::vector<StackItem> &stack, const RE *re0) | ||
36 | + { | ||
37 | + // the "nullable" status of the last sub-RE visited by DFS | ||
38 | + bool null = false; | ||
39 | |||
40 | - switch (re->type) { | ||
41 | - case RE::NIL: return true; | ||
42 | - case RE::SYM: return false; | ||
43 | - case RE::ITER: | ||
44 | - return nullable(spec, re->iter.re, trail); | ||
45 | - case RE::TAG: | ||
46 | - trail |= trailing(spec.tags[re->tag.idx]); | ||
47 | - return true; | ||
48 | - case RE::ALT: | ||
49 | - return nullable(spec, re->alt.re1, trail) | ||
50 | - || nullable(spec, re->alt.re2, trail); | ||
51 | - case RE::CAT: | ||
52 | - return nullable(spec, re->cat.re1, trail) | ||
53 | - && nullable(spec, re->cat.re2, trail); | ||
54 | - } | ||
55 | - return false; /* unreachable */ | ||
56 | -} | ||
57 | + const StackItem i0 = {re0, 0}; | ||
58 | + stack.push_back(i0); | ||
59 | + | ||
60 | + while (!stack.empty()) { | ||
61 | + const StackItem i = stack.back(); | ||
62 | + stack.pop_back(); | ||
63 | + | ||
64 | + const RE *re = i.re; | ||
65 | + if (re->type == RE::NIL) { | ||
66 | + null = true; | ||
67 | + } | ||
68 | + else if (re->type == RE::SYM) { | ||
69 | + null = false; | ||
70 | + } | ||
71 | + else if (re->type == RE::TAG) { | ||
72 | + null = true; | ||
73 | |||
74 | -/* | ||
75 | - * warn about rules that match empty string | ||
76 | - * (including rules with nonempty trailing context) | ||
77 | - * false positives on partially self-shadowed rules like [^]? | ||
78 | - */ | ||
79 | -void warn_nullable(const RESpec &spec, const std::string &cond) | ||
80 | -{ | ||
81 | - const size_t nre = spec.res.size(); | ||
82 | - for (size_t i = 0; i < nre; ++i) { | ||
83 | - bool trail = false; | ||
84 | - if (nullable(spec, spec.res[i], trail)) { | ||
85 | - spec.warn.match_empty_string(spec.rules[i].code->fline, cond); | ||
86 | - } | ||
87 | - } | ||
88 | -} | ||
89 | + // Trailing context is always in top-level concatenation, and sub-RE | ||
90 | + // are visited from left to right. Since we are here, sub-RE to the | ||
91 | + // left of the trailing context is nullable (otherwise we would not | ||
92 | + // recurse into the right sub-RE), therefore the whole RE is nullable. | ||
93 | + if (trailing(spec.tags[re->tag.idx])) { | ||
94 | + //DASSERT(stack.size() == 1 && stack.back().re->type == RE::CAT); | ||
95 | + stack.pop_back(); | ||
96 | + break; | ||
97 | + } | ||
98 | + } | ||
99 | + else if (re->type == RE::ALT) { | ||
100 | + if (i.succ == 0) { | ||
101 | + // recurse into the left sub-RE | ||
102 | + StackItem k = {re, 1}; | ||
103 | + stack.push_back(k); | ||
104 | + StackItem j = {re->alt.re1, 0}; | ||
105 | + stack.push_back(j); | ||
106 | + } | ||
107 | + else if (!null) { | ||
108 | + // if the left sub-RE is nullable, so is alternative, so stop | ||
109 | + // recursion; otherwise recurse into the right sub-RE | ||
110 | + StackItem j = {re->alt.re2, 0}; | ||
111 | + stack.push_back(j); | ||
112 | + } | ||
113 | + } | ||
114 | + else if (re->type == RE::CAT) { | ||
115 | + if (i.succ == 0) { | ||
116 | + // recurse into the left sub-RE | ||
117 | + StackItem k = {re, 1}; | ||
118 | + stack.push_back(k); | ||
119 | + StackItem j = {re->cat.re1, 0}; | ||
120 | + stack.push_back(j); | ||
121 | + } | ||
122 | + else if (null) { | ||
123 | + // if the left sub-RE is not nullable, neither is concatenation, | ||
124 | + // so stop recursion; otherwise recurse into the right sub-RE | ||
125 | + StackItem j = {re->cat.re2, 0}; | ||
126 | + stack.push_back(j); | ||
127 | + } | ||
128 | + } | ||
129 | + else if (re->type == RE::ITER) { | ||
130 | + // iteration is nullable if the sub-RE is nullable | ||
131 | + // (zero repetitions is represented with alternative) | ||
132 | + StackItem j = {re->iter.re, 0}; | ||
133 | + stack.push_back(j); | ||
134 | + } | ||
135 | + } | ||
136 | + | ||
137 | + //DASSERT(stack.empty()); | ||
138 | + return null; | ||
139 | + } | ||
140 | + | ||
141 | + } // anonymous namespace | ||
142 | + | ||
143 | +// Warn about rules that match empty string (including rules with nonempty | ||
144 | +// trailing context). False positives on partially self-shadowed rules like [^]? | ||
145 | + void warn_nullable(const RESpec &spec, const std::string &cond) | ||
146 | + { | ||
147 | + std::vector<StackItem> stack; | ||
148 | + const size_t nre = spec.res.size(); | ||
149 | + for (size_t i = 0; i < nre; ++i) { | ||
150 | + if (nullable(spec, stack, spec.res[i])) { | ||
151 | + spec.warn.match_empty_string(spec.rules[i].code->fline, cond); | ||
152 | + } | ||
153 | + } | ||
154 | + } | ||
155 | |||
156 | } // namespace re2c | ||
diff --git a/meta/recipes-support/re2c/re2c/CVE-2018-21232-4.patch b/meta/recipes-support/re2c/re2c/CVE-2018-21232-4.patch new file mode 100644 index 0000000000..ee8d84b1bc --- /dev/null +++ b/meta/recipes-support/re2c/re2c/CVE-2018-21232-4.patch | |||
@@ -0,0 +1,166 @@ | |||
1 | From 89be91f3df00657261870adbc590209fdb2bc405 Mon Sep 17 00:00:00 2001 | ||
2 | From: Ulya Trofimovich <skvadrik@gmail.com> | ||
3 | Date: Thu, 23 Apr 2020 23:02:21 +0100 | ||
4 | Subject: [PATCH] Rewrite recursion into iteration (estimation of NFA size for | ||
5 | RE). | ||
6 | |||
7 | This is to avoid stack overflow on large RE (especially on instrumented | ||
8 | builds that have larger stack frames, like AddressSanitizer). | ||
9 | |||
10 | Partial fix for #219 "overflow-1.re test fails on system with small stack". | ||
11 | |||
12 | Upstram-Status: Backport: | ||
13 | https://github.com/skvadrik/re2c/commit/89be91f3df00657261870adbc590209fdb2bc405 | ||
14 | |||
15 | CVE: CVE-2018-21232 | ||
16 | |||
17 | Signed-off-by: Davide Gardenal <davide.gardenal@huawei.com> | ||
18 | --- | ||
19 | diff --git a/src/nfa/estimate_size.cc b/src/nfa/estimate_size.cc | ||
20 | --- a/src/nfa/estimate_size.cc (revision e58939b34bb4c37cd990f82dc286f21cb405743e) | ||
21 | +++ b/src/nfa/estimate_size.cc (date 1647005399735) | ||
22 | @@ -6,41 +6,113 @@ | ||
23 | #include "src/re/re.h" | ||
24 | |||
25 | namespace re2c { | ||
26 | +namespace { | ||
27 | + | ||
28 | +struct StackItem { | ||
29 | + const RE *re; // current sub-RE | ||
30 | + uint32_t size; // size of the sub-RE (only for alternative and concatenation) | ||
31 | + uint8_t succ; // index of the next sucessor to be visited | ||
32 | +}; | ||
33 | |||
34 | -static size_t estimate(const RE *re) | ||
35 | +static uint32_t estimate_re_size(const RE *re0, std::vector<StackItem> &stack) | ||
36 | { | ||
37 | - switch (re->type) { | ||
38 | - case RE::NIL: return 0; | ||
39 | - case RE::SYM: return 1; | ||
40 | - case RE::TAG: return 1; | ||
41 | - case RE::ALT: | ||
42 | - return estimate(re->alt.re1) | ||
43 | - + estimate(re->alt.re2) | ||
44 | - + 1; | ||
45 | - case RE::CAT: | ||
46 | - return estimate(re->cat.re1) | ||
47 | - + estimate(re->cat.re2); | ||
48 | - case RE::ITER: { | ||
49 | - const size_t | ||
50 | - iter = estimate(re->iter.re), | ||
51 | - min = re->iter.min, | ||
52 | - max = re->iter.max; | ||
53 | - return max == AST::MANY | ||
54 | - ? iter * min + 1 | ||
55 | - : iter * max + (max - min); | ||
56 | - } | ||
57 | - } | ||
58 | - return 0; /* unreachable */ | ||
59 | -} | ||
60 | + // the estimated size of the last sub-RE visited by DFS | ||
61 | + uint32_t size = 0; | ||
62 | + | ||
63 | + const StackItem i0 = {re0, 0, 0}; | ||
64 | + stack.push_back(i0); | ||
65 | + | ||
66 | + while (!stack.empty()) { | ||
67 | + const StackItem i = stack.back(); | ||
68 | + stack.pop_back(); | ||
69 | + | ||
70 | + const RE *re = i.re; | ||
71 | + if (re->type == RE::NIL) { | ||
72 | + size = 0; | ||
73 | + } | ||
74 | + else if (re->type == RE::SYM || re->type == RE::TAG) { | ||
75 | + size = 1; | ||
76 | + } | ||
77 | + else if (re->type == RE::ALT) { | ||
78 | + if (i.succ == 0) { | ||
79 | + // recurse into the left sub-RE | ||
80 | + StackItem k = {re, 0, 1}; | ||
81 | + stack.push_back(k); | ||
82 | + StackItem j = {re->alt.re1, 0, 0}; | ||
83 | + stack.push_back(j); | ||
84 | + } | ||
85 | + else if (i.succ == 1) { | ||
86 | + // recurse into the right sub-RE | ||
87 | + StackItem k = {re, size, 2}; | ||
88 | + stack.push_back(k); | ||
89 | + StackItem j = {re->alt.re2, 0, 0}; | ||
90 | + stack.push_back(j); | ||
91 | + } | ||
92 | + else { | ||
93 | + // both sub-RE visited, recursive return | ||
94 | + size = i.size // left sub-RE (saved on stack) | ||
95 | + + size // right sub-RE (just visited by DFS) | ||
96 | + + 1; // additional state for alternative | ||
97 | + } | ||
98 | + } | ||
99 | + else if (re->type == RE::CAT) { | ||
100 | + if (i.succ == 0) { | ||
101 | + // recurse into the left sub-RE | ||
102 | + StackItem k = {re, 0, 1}; | ||
103 | + stack.push_back(k); | ||
104 | + StackItem j = {re->cat.re1, 0, 0}; | ||
105 | + stack.push_back(j); | ||
106 | + } | ||
107 | + else if (i.succ == 1) { | ||
108 | + // recurse into the right sub-RE | ||
109 | + StackItem k = {re, size, 2}; | ||
110 | + stack.push_back(k); | ||
111 | + StackItem j = {re->cat.re2, 0, 0}; | ||
112 | + stack.push_back(j); | ||
113 | + } | ||
114 | + else { | ||
115 | + // both sub-RE visited, recursive return | ||
116 | + size = i.size // left sub-RE (saved on stack) | ||
117 | + + size; // right sub-RE (just visited by DFS) | ||
118 | + } | ||
119 | + } | ||
120 | + else if (re->type == RE::ITER) { | ||
121 | + if (i.succ == 0) { | ||
122 | + // recurse into the sub-RE | ||
123 | + StackItem k = {re, 0, 1}; | ||
124 | + stack.push_back(k); | ||
125 | + StackItem j = {re->iter.re, 0, 0}; | ||
126 | + stack.push_back(j); | ||
127 | + } | ||
128 | + else { | ||
129 | + // sub-RE visited, recursive return | ||
130 | + const uint32_t min = re->iter.min, max = re->iter.max; | ||
131 | + size = max == AST::MANY | ||
132 | + ? size * min + 1 | ||
133 | + : size * max + (max - min); | ||
134 | + } | ||
135 | + } | ||
136 | + } | ||
137 | + | ||
138 | + //DASSERT(stack.empty()); | ||
139 | + return size; | ||
140 | +} | ||
141 | + | ||
142 | +} // anonymous namespace | ||
143 | |||
144 | size_t estimate_size(const std::vector<RE*> &res) | ||
145 | { | ||
146 | - const size_t nre = res.size(); | ||
147 | - size_t size = nre - 1; | ||
148 | - for (size_t i = 0; i < nre; ++i) { | ||
149 | - size += estimate(res[i]) + 1; | ||
150 | - } | ||
151 | - return size; | ||
152 | + std::vector<StackItem> stack; | ||
153 | + | ||
154 | + const size_t nre = res.size(); | ||
155 | + //DASSERT(nre > 0); | ||
156 | + size_t size = nre - 1; | ||
157 | + | ||
158 | + for (size_t i = 0; i < nre; ++i) { | ||
159 | + size += estimate_re_size(res[i], stack) + 1; | ||
160 | + } | ||
161 | + | ||
162 | + return size; | ||
163 | } | ||
164 | |||
165 | } // namespace re2c | ||
166 | |||