summaryrefslogtreecommitdiffstats
path: root/meta/recipes-support/re2c
diff options
context:
space:
mode:
Diffstat (limited to 'meta/recipes-support/re2c')
-rw-r--r--meta/recipes-support/re2c/re2c/CVE-2018-21232-1.patch347
-rw-r--r--meta/recipes-support/re2c/re2c/CVE-2018-21232-2.patch243
-rw-r--r--meta/recipes-support/re2c/re2c/CVE-2018-21232-3.patch156
-rw-r--r--meta/recipes-support/re2c/re2c/CVE-2018-21232-4.patch166
-rw-r--r--meta/recipes-support/re2c/re2c_1.0.1.bb10
5 files changed, 920 insertions, 2 deletions
diff --git a/meta/recipes-support/re2c/re2c/CVE-2018-21232-1.patch b/meta/recipes-support/re2c/re2c/CVE-2018-21232-1.patch
new file mode 100644
index 0000000000..b7dcaefad3
--- /dev/null
+++ b/meta/recipes-support/re2c/re2c/CVE-2018-21232-1.patch
@@ -0,0 +1,347 @@
1From fd634998f813340768c333cdad638498602856e5 Mon Sep 17 00:00:00 2001
2From: Ulya Trofimovich <skvadrik@gmail.com>
3Date: Tue, 21 Apr 2020 21:28:32 +0100
4Subject: [PATCH] Rewrite recursion into iteration (Tarjan's SCC algorithm and
5 YYFILL states).
6
7This is to avoid stack overflow on large RE (especially on instrumented
8builds that have larger stack frames, like AddressSanitizer).
9
10Stack overflow reported by Agostino Sarubbo.
11Related to #219 "overflow-1.re test fails on system with small stack".
12
13Upstram-Status: Backport:
14https://github.com/skvadrik/re2c/commit/fd634998f813340768c333cdad638498602856e5
15
16CVE: CVE-2018-21232
17
18Signed-off-by: Davide Gardenal <davide.gardenal@huawei.com>
19---
20diff --git a/src/dfa/fillpoints.cc b/src/dfa/fillpoints.cc
21--- a/src/dfa/fillpoints.cc (revision e58939b34bb4c37cd990f82dc286f21cb405743e)
22+++ b/src/dfa/fillpoints.cc (date 1646929180243)
23@@ -5,151 +5,186 @@
24
25 #include "src/dfa/dfa.h"
26
27-namespace re2c
28-{
29+
30+/*
31+ * note [finding strongly connected components of DFA]
32+ *
33+ * A slight modification of Tarjan's algorithm.
34+ *
35+ * The algorithm traverses the DFA in depth-first order. It maintains a stack
36+ * of states that have already been visited but haven't been assigned to an SCC
37+ * yet. For each state the algorithm calculates 'lowlink': index of the highest
38+ * ancestor state reachable in one step from a descendant of this state.
39+ * Lowlink is used to determine when a set of states should be popped off stack
40+ * into a new SCC.
41+ *
42+ * We use lowlink to hold different kinds of information:
43+ * - values in range [0 .. stack size] mean that the state is on stack (a
44+ * link to a state with the smallest index reachable from this one)
45+ * - SCC_UND means that this state has not been visited yet
46+ * - SCC_INF means that this state has already been popped off stack
47+ *
48+ * We use stack size (rather than topological sort index) as a unique index of
49+ * the state on stack. This is safe because the indices of states on stack are
50+ * unique and less than the indices of states that have been popped off stack
51+ * (SCC_INF).
52+ */
53+
54+namespace re2c {
55+ namespace {
56
57-static const size_t SCC_INF = std::numeric_limits<size_t>::max();
58-static const size_t SCC_UND = SCC_INF - 1;
59+ static const size_t SCC_INF = std::numeric_limits<size_t>::max();
60+ static const size_t SCC_UND = SCC_INF - 1;
61
62-static bool loopback(size_t node, size_t narcs, const size_t *arcs)
63-{
64- for (size_t i = 0; i < narcs; ++i)
65- {
66- if (arcs[i] == node)
67- {
68- return true;
69- }
70- }
71- return false;
72-}
73+ static bool loopback(size_t state, size_t narcs, const size_t *arcs)
74+ {
75+ for (size_t i = 0; i < narcs; ++i) {
76+ if (arcs[i] == state) return true;
77+ }
78+ return false;
79+ }
80
81-/*
82- * node [finding strongly connected components of DFA]
83- *
84- * A slight modification of Tarjan's algorithm.
85- *
86- * The algorithm walks graph in deep-first order. It maintains a stack
87- * of nodes that have already been visited but haven't been assigned to
88- * SCC yet. For each node the algorithm calculates 'lowlink': index of
89- * the highest ancestor node reachable in one step from a descendant of
90- * the node. Lowlink is used to determine when a set of nodes should be
91- * popped off the stack into a new SCC.
92- *
93- * We use lowlink to hold different kinds of information:
94- * - values in range [0 .. stack size] mean that this node is on stack
95- * (link to a node with the smallest index reachable from this one)
96- * - SCC_UND means that this node has not been visited yet
97- * - SCC_INF means that this node has already been popped off stack
98- *
99- * We use stack size (rather than topological sort index) as unique index
100- * of a node on stack. This is safe because indices of nodes on stack are
101- * still unique and less than indices of nodes that have been popped off
102- * stack (SCC_INF).
103- *
104- */
105-static void scc(
106- const dfa_t &dfa,
107- std::stack<size_t> &stack,
108- std::vector<size_t> &lowlink,
109- std::vector<bool> &trivial,
110- size_t i)
111-{
112- const size_t link = stack.size();
113- lowlink[i] = link;
114- stack.push(i);
115+ struct StackItem {
116+ size_t state; // current state
117+ size_t symbol; // next arc to be visited in this state
118+ size_t link; // Tarjan's "lowlink"
119+ };
120+
121+// Tarjan's algorithm
122+ static void scc(const dfa_t &dfa, std::vector<bool> &trivial,
123+ std::vector<StackItem> &stack_dfs)
124+ {
125+ std::vector<size_t> lowlink(dfa.states.size(), SCC_UND);
126+ std::stack<size_t> stack;
127+
128+ StackItem x0 = {0, 0, 0};
129+ stack_dfs.push_back(x0);
130+
131+ while (!stack_dfs.empty()) {
132+ const size_t i = stack_dfs.back().state;
133+ size_t c = stack_dfs.back().symbol;
134+ size_t link = stack_dfs.back().link;
135+ stack_dfs.pop_back();
136+
137+ const size_t *arcs = dfa.states[i]->arcs;
138+
139+ if (c == 0) {
140+ // DFS recursive enter
141+ //DASSERT(lowlink[i] == SCC_UND);
142+ link = lowlink[i] = stack.size();
143+ stack.push(i);
144+ }
145+ else {
146+ // DFS recursive return (from one of successor states)
147+ const size_t j = arcs[c - 1];
148+ //DASSERT(lowlink[j] != SCC_UND);
149+ lowlink[i] = std::min(lowlink[i], lowlink[j]);
150+ }
151
152- const size_t *arcs = dfa.states[i]->arcs;
153- for (size_t c = 0; c < dfa.nchars; ++c)
154- {
155- const size_t j = arcs[c];
156- if (j != dfa_t::NIL)
157- {
158- if (lowlink[j] == SCC_UND)
159- {
160- scc(dfa, stack, lowlink, trivial, j);
161- }
162- if (lowlink[j] < lowlink[i])
163- {
164- lowlink[i] = lowlink[j];
165- }
166- }
167- }
168+ // find the next successor state that hasn't been visited yet
169+ for (; c < dfa.nchars; ++c) {
170+ const size_t j = arcs[c];
171+ if (j != dfa_t::NIL) {
172+ if (lowlink[j] == SCC_UND) {
173+ break;
174+ }
175+ lowlink[i] = std::min(lowlink[i], lowlink[j]);
176+ }
177+ }
178
179- if (lowlink[i] == link)
180- {
181- // SCC is non-trivial (has loops) iff it either:
182- // - consists of multiple nodes (they all must be interconnected)
183- // - consists of single node which loops back to itself
184- trivial[i] = i == stack.top()
185- && !loopback(i, dfa.nchars, arcs);
186+ if (c < dfa.nchars) {
187+ // recurse into the next successor state
188+ StackItem x1 = {i, c + 1, link};
189+ stack_dfs.push_back(x1);
190+ StackItem x2 = {arcs[c], 0, SCC_UND};
191+ stack_dfs.push_back(x2);
192+ }
193+ else if (lowlink[i] == link) {
194+ // all successors have been visited
195+ // SCC is non-trivial (has loops) if either:
196+ // - it contains multiple interconnected states
197+ // - it contains a single self-looping state
198+ trivial[i] = i == stack.top() && !loopback(i, dfa.nchars, arcs);
199
200- size_t j;
201- do
202- {
203- j = stack.top();
204- stack.pop();
205- lowlink[j] = SCC_INF;
206- }
207- while (j != i);
208- }
209-}
210+ for (;;) {
211+ const size_t j = stack.top();
212+ stack.pop();
213+ lowlink[j] = SCC_INF;
214+ if (i == j) break;
215+ }
216+ }
217+ }
218+ }
219
220-static void calc_fill(
221- const dfa_t &dfa,
222- const std::vector<bool> &trivial,
223- std::vector<size_t> &fill,
224- size_t i)
225-{
226- if (fill[i] == SCC_UND)
227- {
228- fill[i] = 0;
229- const size_t *arcs = dfa.states[i]->arcs;
230- for (size_t c = 0; c < dfa.nchars; ++c)
231- {
232- const size_t j = arcs[c];
233- if (j != dfa_t::NIL)
234- {
235- calc_fill(dfa, trivial, fill, j);
236- size_t max = 1;
237- if (trivial[j])
238- {
239- max += fill[j];
240- }
241- if (max > fill[i])
242- {
243- fill[i] = max;
244- }
245- }
246- }
247- }
248-}
249-
250-void fillpoints(const dfa_t &dfa, std::vector<size_t> &fill)
251-{
252- const size_t size = dfa.states.size();
253-
254- // find DFA states that belong to non-trivial SCC
255- std::stack<size_t> stack;
256- std::vector<size_t> lowlink(size, SCC_UND);
257- std::vector<bool> trivial(size, false);
258- scc(dfa, stack, lowlink, trivial, 0);
259-
260- // for each DFA state, calculate YYFILL argument:
261- // maximal path length to the next YYFILL state
262- fill.resize(size, SCC_UND);
263- calc_fill(dfa, trivial, fill, 0);
264+ static void calc_fill(const dfa_t &dfa, const std::vector<bool> &trivial,
265+ std::vector<StackItem> &stack_dfs, std::vector<size_t> &fill)
266+ {
267+ const size_t nstates = dfa.states.size();
268+ fill.resize(nstates, SCC_UND);
269+
270+ StackItem x0 = {0, 0, SCC_INF};
271+ stack_dfs.push_back(x0);
272+
273+ while (!stack_dfs.empty()) {
274+ const size_t i = stack_dfs.back().state;
275+ size_t c = stack_dfs.back().symbol;
276+ stack_dfs.pop_back();
277+
278+ const size_t *arcs = dfa.states[i]->arcs;
279+
280+ if (c == 0) {
281+ // DFS recursive enter
282+ if (fill[i] != SCC_UND) continue;
283+ fill[i] = 0;
284+ }
285+ else {
286+ // DFS recursive return (from one of successor states)
287+ const size_t j = arcs[c - 1];
288+ //DASSERT(fill[i] != SCC_UND && fill[j] != SCC_UND);
289+ fill[i] = std::max(fill[i], 1 + (trivial[j] ? fill[j] : 0));
290+ }
291+
292+ // find the next successor state that hasn't been visited yet
293+ for (; c < dfa.nchars; ++c) {
294+ const size_t j = arcs[c];
295+ if (j != dfa_t::NIL) break;
296+ }
297+
298+ if (c < dfa.nchars) {
299+ // recurse into the next successor state
300+ StackItem x1 = {i, c + 1, SCC_INF};
301+ stack_dfs.push_back(x1);
302+ StackItem x2 = {arcs[c], 0, SCC_INF};
303+ stack_dfs.push_back(x2);
304+ }
305+ }
306
307- // The following states must trigger YYFILL:
308- // - inital state
309- // - all states in non-trivial SCCs
310- // for other states, reset YYFILL argument to zero
311- for (size_t i = 1; i < size; ++i)
312- {
313- if (trivial[i])
314- {
315- fill[i] = 0;
316- }
317- }
318-}
319+ // The following states must trigger YYFILL:
320+ // - inital state
321+ // - all states in non-trivial SCCs
322+ // for other states, reset YYFILL argument to zero
323+ for (size_t i = 1; i < nstates; ++i) {
324+ if (trivial[i]) {
325+ fill[i] = 0;
326+ }
327+ }
328+ }
329
330+ } // anonymous namespace
331+
332+ void fillpoints(const dfa_t &dfa, std::vector<size_t> &fill)
333+ {
334+ const size_t nstates = dfa.states.size();
335+ std::vector<bool> trivial(nstates, false);
336+ std::vector<StackItem> stack_dfs;
337+ stack_dfs.reserve(nstates);
338+
339+ // find DFA states that belong to non-trivial SCC
340+ scc(dfa, trivial, stack_dfs);
341+
342+ // for each DFA state, calculate YYFILL argument:
343+ // maximal path length to the next YYFILL state
344+ calc_fill(dfa, trivial, stack_dfs, fill);
345+ }
346+
347 } // namespace re2c
diff --git a/meta/recipes-support/re2c/re2c/CVE-2018-21232-2.patch b/meta/recipes-support/re2c/re2c/CVE-2018-21232-2.patch
new file mode 100644
index 0000000000..820a6decbc
--- /dev/null
+++ b/meta/recipes-support/re2c/re2c/CVE-2018-21232-2.patch
@@ -0,0 +1,243 @@
1From 7b5643476bd99c994c4f51b8143f942982d85521 Mon Sep 17 00:00:00 2001
2From: Ulya Trofimovich <skvadrik@gmail.com>
3Date: Wed, 22 Apr 2020 22:37:24 +0100
4Subject: [PATCH] Rewrite recursion into iteration (fixed tags computation).
5
6This is to avoid stack overflow on large RE (especially on instrumented
7builds that have larger stack frames, like AddressSanitizer).
8
9Partial fix for #219 "overflow-1.re test fails on system with small stack".
10
11Upstream-Stauts: Backport:
12https://github.com/skvadrik/re2c/commit/7b5643476bd99c994c4f51b8143f942982d85521
13
14CVE: CVE-2018-21232
15
16Signed-off-by: Davide Gardenal <davide.gardenal@huawei.com>
17---
18diff --git a/src/re/tag.cc b/src/re/tag.cc
19--- a/src/re/tag.cc (revision e58939b34bb4c37cd990f82dc286f21cb405743e)
20+++ b/src/re/tag.cc (date 1646986908580)
21@@ -6,7 +6,7 @@
22 {
23
24 const size_t Tag::RIGHTMOST = std::numeric_limits<size_t>::max();
25-const size_t Tag::VARDIST = std::numeric_limits<size_t>::max();
26+const uint32_t Tag::VARDIST = std::numeric_limits<uint32_t>::max();
27 const size_t Tag::FICTIVE = Tag::RIGHTMOST - 1;
28
29 } // namespace re2c
30
31
32diff --git a/src/re/tag.h b/src/re/tag.h
33--- a/src/re/tag.h (revision e58939b34bb4c37cd990f82dc286f21cb405743e)
34+++ b/src/re/tag.h (date 1646986922376)
35@@ -19,7 +19,7 @@
36 struct Tag
37 {
38 static const size_t RIGHTMOST;
39- static const size_t VARDIST;
40+ static const uint32_t VARDIST;
41 static const size_t FICTIVE;
42
43 const std::string *name;
44
45
46diff --git a/src/re/fixed_tags.cc b/src/re/fixed_tags.cc
47--- a/src/re/fixed_tags.cc (revision e58939b34bb4c37cd990f82dc286f21cb405743e)
48+++ b/src/re/fixed_tags.cc (date 1646991137317)
49@@ -7,78 +7,131 @@
50 #include "src/re/tag.h"
51
52 namespace re2c {
53+namespace {
54
55 /* note [fixed and variable tags]
56 *
57- * If distance between two tags is constant (equal for all strings that
58- * match the given regexp), then lexer only needs to track one of them:
59- * the second tag equals the first tag plus static offset.
60+ * If distance between two tags is constant (equal for all strings that match
61+ * the given regexp), then lexer only needs to track one of them: the second
62+ * tag equals the first tag plus static offset.
63 *
64- * However, this optimization is applied only to tags in top-level
65- * concatenation, because other tags may be uninitialized and we don't
66- * want to mess with conditional calculation of fixed tags.
67- *
68+ * This optimization is applied only to tags in top-level concatenation,
69+ * because in other cases the base tag may be NULL, and the calculation of
70+ * the fixed tag value is not as simple as substracting a fixed offset.
71 * Furthermore, fixed tags are fobidden with generic API because it cannot
72- * express fixed offsets.
73- *
74- * Tags with history also cannot be fixed.
75+ * express fixed offsets. M-tags (with history) also cannot be fixed.
76 *
77 * Another special case is fictive tags (those that exist only to impose
78- * hierarchical laws of POSIX disambiguation). We treat them as fixed
79- * in order to suppress code generation.
80+ * hierarchical laws of POSIX disambiguation). We treat them as fixed in order
81+ * to suppress code generation.
82 */
83
84-static void find_fixed_tags(RE *re, std::vector<Tag> &tags,
85- size_t &dist, size_t &base, bool toplevel)
86+struct StackItem {
87+ RE *re; // current sub-RE
88+ uint32_t dist; // distance backup for alternative, unused for other RE
89+ uint8_t succ; // index of the next successor to be visited
90+ bool toplevel; // if this sub-RE is in top-level concatenation
91+};
92+
93+static void find_fixed_tags(RESpec &spec, std::vector<StackItem> &stack, RE *re0)
94 {
95- switch (re->type) {
96- case RE::NIL: break;
97- case RE::SYM:
98- if (dist != Tag::VARDIST) ++dist;
99- break;
100- case RE::ALT: {
101- size_t d1 = dist, d2 = dist;
102- find_fixed_tags(re->alt.re1, tags, d1, base, false);
103- find_fixed_tags(re->alt.re2, tags, d2, base, false);
104- dist = (d1 == d2) ? d1 : Tag::VARDIST;
105- break;
106- }
107- case RE::CAT:
108- find_fixed_tags(re->cat.re2, tags, dist, base, toplevel);
109- find_fixed_tags(re->cat.re1, tags, dist, base, toplevel);
110- break;
111- case RE::ITER:
112- find_fixed_tags(re->iter.re, tags, dist, base, false);
113- dist = Tag::VARDIST;
114- break;
115- case RE::TAG: {
116- // see note [fixed and variable tags]
117- Tag &tag = tags[re->tag.idx];
118- if (fictive(tag)) {
119- tag.base = tag.dist = 0;
120- } else if (toplevel && dist != Tag::VARDIST && !history(tag)) {
121- tag.base = base;
122- tag.dist = dist;
123- } else if (toplevel) {
124- base = re->tag.idx;
125- dist = 0;
126- }
127- if (trailing(tag)) dist = 0;
128- break;
129- }
130- }
131+ static const uint32_t VARDIST = Tag::VARDIST;
132+ bool toplevel = spec.opts->input_api != INPUT_CUSTOM;
133+
134+ // base tag, intially the fake "rightmost tag" (the end of RE)
135+ size_t base = Tag::RIGHTMOST;
136+
137+ // the distance to the nearest top-level tag to the right (base tag)
138+ uint32_t dist = 0;
139+
140+ const StackItem i0 = {re0, VARDIST, 0, toplevel};
141+ stack.push_back(i0);
142+
143+ while (!stack.empty()) {
144+ const StackItem i = stack.back();
145+ stack.pop_back();
146+ RE *re = i.re;
147+
148+ if (re->type == RE::SYM) {
149+ if (dist != VARDIST) ++dist;
150+ }
151+ else if (re->type == RE::ALT) {
152+ if (i.succ == 0) {
153+ // save the current distance on stack (from the alternative end
154+ // to base) and recurse into the left sub-RE
155+ StackItem k = {re, dist, 1, i.toplevel};
156+ stack.push_back(k);
157+ StackItem j = {re->alt.re1, VARDIST, 0, false};
158+ stack.push_back(j);
159+ }
160+ else if (i.succ == 1) {
161+ // save the current distance on stack (from the left sub-RE to
162+ // base), reset distance to the distance popped from stack (from
163+ // the alternative end to base), recurse into the right sub-RE
164+ StackItem k = {re, dist, 2, i.toplevel};
165+ stack.push_back(k);
166+ StackItem j = {re->alt.re2, VARDIST, 0, false};
167+ stack.push_back(j);
168+ dist = i.dist;
169+ }
170+ else {
171+ // both sub-RE visited, compare the distance on stack (from the
172+ // left sub-RE to base) to the current distance (from the right
173+ // sub-RE to base), if not equal set variable distance
174+ dist = (i.dist == dist) ? i.dist : VARDIST;
175+ }
176+ }
177+ else if (re->type == RE::ITER) {
178+ if (i.succ == 0) {
179+ // recurse into the sub-RE
180+ StackItem k = {re, VARDIST, 1, i.toplevel};
181+ stack.push_back(k);
182+ StackItem j = {re->iter.re, VARDIST, 0, false};
183+ stack.push_back(j);
184+ }
185+ else {
186+ // sub-RE visited, assume unknown number of iterations
187+ // TODO: find precise distance for fixed repetition counter
188+ dist = VARDIST;
189+ }
190+ }
191+ else if (re->type == RE::CAT) {
192+ // the right sub-RE is pushed on stack after the left sub-RE and
193+ // visited earlier (because distance is computed from right to left)
194+ StackItem j1 = {re->cat.re1, VARDIST, 0, i.toplevel};
195+ stack.push_back(j1);
196+ StackItem j2 = {re->cat.re2, VARDIST, 0, i.toplevel};
197+ stack.push_back(j2);
198+ }
199+ else if (re->type == RE::TAG) {
200+ // see note [fixed and variable tags]
201+ Tag &tag = spec.tags[re->tag.idx];
202+ if (fictive(tag)) {
203+ tag.base = tag.dist = 0;
204+ }
205+ else if (i.toplevel && dist != VARDIST && !history(tag)) {
206+ tag.base = base;
207+ tag.dist = dist;
208+ }
209+ else if (i.toplevel) {
210+ base = re->tag.idx;
211+ dist = 0;
212+ }
213+ if (trailing(tag)) {
214+ dist = 0;
215+ }
216+ }
217+ }
218 }
219+
220+} // anonymous namespace
221
222-void find_fixed_tags(RESpec &spec)
223-{
224- const bool generic = spec.opts->input_api == INPUT_CUSTOM;
225- std::vector<RE*>::iterator
226- i = spec.res.begin(),
227- e = spec.res.end();
228- for (; i != e; ++i) {
229- size_t base = Tag::RIGHTMOST, dist = 0;
230- find_fixed_tags(*i, spec.tags, dist, base, !generic);
231- }
232-}
233+ void find_fixed_tags(RESpec &spec)
234+ {
235+ std::vector<StackItem> stack;
236+ for (std::vector<RE*>::iterator i = spec.res.begin(); i != spec.res.end(); ++i) {
237+ find_fixed_tags(spec, stack, *i);
238+ }
239+ }
240
241-} // namespace re2c
242+} // namespace re2c
243\ No newline at end of file
diff --git a/meta/recipes-support/re2c/re2c/CVE-2018-21232-3.patch b/meta/recipes-support/re2c/re2c/CVE-2018-21232-3.patch
new file mode 100644
index 0000000000..f942e21cba
--- /dev/null
+++ b/meta/recipes-support/re2c/re2c/CVE-2018-21232-3.patch
@@ -0,0 +1,156 @@
1From 4d9c809355b574f2a58eac119f5e076c48e4d1e2 Mon Sep 17 00:00:00 2001
2From: Ulya Trofimovich <skvadrik@gmail.com>
3Date: Thu, 23 Apr 2020 22:16:51 +0100
4Subject: [PATCH] Rewrite recursion into iteration (nullable RE).
5
6This is to avoid stack overflow on large RE (especially on instrumented
7builds that have larger stack frames, like AddressSanitizer).
8
9Partial fix for #219 "overflow-1.re test fails on system with small stack".
10
11Upstream-Status: Backport:
12https://github.com/skvadrik/re2c/commit/4d9c809355b574f2a58eac119f5e076c48e4d1e2
13
14CVE: CVE-2018-21232
15
16Signed-off-by: Davide Gardenal <davide.gardenal@huawei.com>
17---
18diff --git a/src/re/nullable.cc b/src/re/nullable.cc
19--- a/src/re/nullable.cc (revision e58939b34bb4c37cd990f82dc286f21cb405743e)
20+++ b/src/re/nullable.cc (date 1647253886226)
21@@ -9,43 +9,100 @@
22 #include "src/re/tag.h"
23
24 namespace re2c {
25+ namespace {
26+
27+ struct StackItem {
28+ const RE *re; // current sub-RE
29+ uint8_t succ; // index of the next sucessor to be visited
30+ };
31
32-static bool nullable(const RESpec &spec, const RE *re, bool &trail)
33-{
34- if (trail) return true;
35+ static bool nullable(const RESpec &spec, std::vector<StackItem> &stack, const RE *re0)
36+ {
37+ // the "nullable" status of the last sub-RE visited by DFS
38+ bool null = false;
39
40- switch (re->type) {
41- case RE::NIL: return true;
42- case RE::SYM: return false;
43- case RE::ITER:
44- return nullable(spec, re->iter.re, trail);
45- case RE::TAG:
46- trail |= trailing(spec.tags[re->tag.idx]);
47- return true;
48- case RE::ALT:
49- return nullable(spec, re->alt.re1, trail)
50- || nullable(spec, re->alt.re2, trail);
51- case RE::CAT:
52- return nullable(spec, re->cat.re1, trail)
53- && nullable(spec, re->cat.re2, trail);
54- }
55- return false; /* unreachable */
56-}
57+ const StackItem i0 = {re0, 0};
58+ stack.push_back(i0);
59+
60+ while (!stack.empty()) {
61+ const StackItem i = stack.back();
62+ stack.pop_back();
63+
64+ const RE *re = i.re;
65+ if (re->type == RE::NIL) {
66+ null = true;
67+ }
68+ else if (re->type == RE::SYM) {
69+ null = false;
70+ }
71+ else if (re->type == RE::TAG) {
72+ null = true;
73
74-/*
75- * warn about rules that match empty string
76- * (including rules with nonempty trailing context)
77- * false positives on partially self-shadowed rules like [^]?
78- */
79-void warn_nullable(const RESpec &spec, const std::string &cond)
80-{
81- const size_t nre = spec.res.size();
82- for (size_t i = 0; i < nre; ++i) {
83- bool trail = false;
84- if (nullable(spec, spec.res[i], trail)) {
85- spec.warn.match_empty_string(spec.rules[i].code->fline, cond);
86- }
87- }
88-}
89+ // Trailing context is always in top-level concatenation, and sub-RE
90+ // are visited from left to right. Since we are here, sub-RE to the
91+ // left of the trailing context is nullable (otherwise we would not
92+ // recurse into the right sub-RE), therefore the whole RE is nullable.
93+ if (trailing(spec.tags[re->tag.idx])) {
94+ //DASSERT(stack.size() == 1 && stack.back().re->type == RE::CAT);
95+ stack.pop_back();
96+ break;
97+ }
98+ }
99+ else if (re->type == RE::ALT) {
100+ if (i.succ == 0) {
101+ // recurse into the left sub-RE
102+ StackItem k = {re, 1};
103+ stack.push_back(k);
104+ StackItem j = {re->alt.re1, 0};
105+ stack.push_back(j);
106+ }
107+ else if (!null) {
108+ // if the left sub-RE is nullable, so is alternative, so stop
109+ // recursion; otherwise recurse into the right sub-RE
110+ StackItem j = {re->alt.re2, 0};
111+ stack.push_back(j);
112+ }
113+ }
114+ else if (re->type == RE::CAT) {
115+ if (i.succ == 0) {
116+ // recurse into the left sub-RE
117+ StackItem k = {re, 1};
118+ stack.push_back(k);
119+ StackItem j = {re->cat.re1, 0};
120+ stack.push_back(j);
121+ }
122+ else if (null) {
123+ // if the left sub-RE is not nullable, neither is concatenation,
124+ // so stop recursion; otherwise recurse into the right sub-RE
125+ StackItem j = {re->cat.re2, 0};
126+ stack.push_back(j);
127+ }
128+ }
129+ else if (re->type == RE::ITER) {
130+ // iteration is nullable if the sub-RE is nullable
131+ // (zero repetitions is represented with alternative)
132+ StackItem j = {re->iter.re, 0};
133+ stack.push_back(j);
134+ }
135+ }
136+
137+ //DASSERT(stack.empty());
138+ return null;
139+ }
140+
141+ } // anonymous namespace
142+
143+// Warn about rules that match empty string (including rules with nonempty
144+// trailing context). False positives on partially self-shadowed rules like [^]?
145+ void warn_nullable(const RESpec &spec, const std::string &cond)
146+ {
147+ std::vector<StackItem> stack;
148+ const size_t nre = spec.res.size();
149+ for (size_t i = 0; i < nre; ++i) {
150+ if (nullable(spec, stack, spec.res[i])) {
151+ spec.warn.match_empty_string(spec.rules[i].code->fline, cond);
152+ }
153+ }
154+ }
155
156 } // namespace re2c
diff --git a/meta/recipes-support/re2c/re2c/CVE-2018-21232-4.patch b/meta/recipes-support/re2c/re2c/CVE-2018-21232-4.patch
new file mode 100644
index 0000000000..ee8d84b1bc
--- /dev/null
+++ b/meta/recipes-support/re2c/re2c/CVE-2018-21232-4.patch
@@ -0,0 +1,166 @@
1From 89be91f3df00657261870adbc590209fdb2bc405 Mon Sep 17 00:00:00 2001
2From: Ulya Trofimovich <skvadrik@gmail.com>
3Date: Thu, 23 Apr 2020 23:02:21 +0100
4Subject: [PATCH] Rewrite recursion into iteration (estimation of NFA size for
5 RE).
6
7This is to avoid stack overflow on large RE (especially on instrumented
8builds that have larger stack frames, like AddressSanitizer).
9
10Partial fix for #219 "overflow-1.re test fails on system with small stack".
11
12Upstram-Status: Backport:
13https://github.com/skvadrik/re2c/commit/89be91f3df00657261870adbc590209fdb2bc405
14
15CVE: CVE-2018-21232
16
17Signed-off-by: Davide Gardenal <davide.gardenal@huawei.com>
18---
19diff --git a/src/nfa/estimate_size.cc b/src/nfa/estimate_size.cc
20--- a/src/nfa/estimate_size.cc (revision e58939b34bb4c37cd990f82dc286f21cb405743e)
21+++ b/src/nfa/estimate_size.cc (date 1647005399735)
22@@ -6,41 +6,113 @@
23 #include "src/re/re.h"
24
25 namespace re2c {
26+namespace {
27+
28+struct StackItem {
29+ const RE *re; // current sub-RE
30+ uint32_t size; // size of the sub-RE (only for alternative and concatenation)
31+ uint8_t succ; // index of the next sucessor to be visited
32+};
33
34-static size_t estimate(const RE *re)
35+static uint32_t estimate_re_size(const RE *re0, std::vector<StackItem> &stack)
36 {
37- switch (re->type) {
38- case RE::NIL: return 0;
39- case RE::SYM: return 1;
40- case RE::TAG: return 1;
41- case RE::ALT:
42- return estimate(re->alt.re1)
43- + estimate(re->alt.re2)
44- + 1;
45- case RE::CAT:
46- return estimate(re->cat.re1)
47- + estimate(re->cat.re2);
48- case RE::ITER: {
49- const size_t
50- iter = estimate(re->iter.re),
51- min = re->iter.min,
52- max = re->iter.max;
53- return max == AST::MANY
54- ? iter * min + 1
55- : iter * max + (max - min);
56- }
57- }
58- return 0; /* unreachable */
59-}
60+ // the estimated size of the last sub-RE visited by DFS
61+ uint32_t size = 0;
62+
63+ const StackItem i0 = {re0, 0, 0};
64+ stack.push_back(i0);
65+
66+ while (!stack.empty()) {
67+ const StackItem i = stack.back();
68+ stack.pop_back();
69+
70+ const RE *re = i.re;
71+ if (re->type == RE::NIL) {
72+ size = 0;
73+ }
74+ else if (re->type == RE::SYM || re->type == RE::TAG) {
75+ size = 1;
76+ }
77+ else if (re->type == RE::ALT) {
78+ if (i.succ == 0) {
79+ // recurse into the left sub-RE
80+ StackItem k = {re, 0, 1};
81+ stack.push_back(k);
82+ StackItem j = {re->alt.re1, 0, 0};
83+ stack.push_back(j);
84+ }
85+ else if (i.succ == 1) {
86+ // recurse into the right sub-RE
87+ StackItem k = {re, size, 2};
88+ stack.push_back(k);
89+ StackItem j = {re->alt.re2, 0, 0};
90+ stack.push_back(j);
91+ }
92+ else {
93+ // both sub-RE visited, recursive return
94+ size = i.size // left sub-RE (saved on stack)
95+ + size // right sub-RE (just visited by DFS)
96+ + 1; // additional state for alternative
97+ }
98+ }
99+ else if (re->type == RE::CAT) {
100+ if (i.succ == 0) {
101+ // recurse into the left sub-RE
102+ StackItem k = {re, 0, 1};
103+ stack.push_back(k);
104+ StackItem j = {re->cat.re1, 0, 0};
105+ stack.push_back(j);
106+ }
107+ else if (i.succ == 1) {
108+ // recurse into the right sub-RE
109+ StackItem k = {re, size, 2};
110+ stack.push_back(k);
111+ StackItem j = {re->cat.re2, 0, 0};
112+ stack.push_back(j);
113+ }
114+ else {
115+ // both sub-RE visited, recursive return
116+ size = i.size // left sub-RE (saved on stack)
117+ + size; // right sub-RE (just visited by DFS)
118+ }
119+ }
120+ else if (re->type == RE::ITER) {
121+ if (i.succ == 0) {
122+ // recurse into the sub-RE
123+ StackItem k = {re, 0, 1};
124+ stack.push_back(k);
125+ StackItem j = {re->iter.re, 0, 0};
126+ stack.push_back(j);
127+ }
128+ else {
129+ // sub-RE visited, recursive return
130+ const uint32_t min = re->iter.min, max = re->iter.max;
131+ size = max == AST::MANY
132+ ? size * min + 1
133+ : size * max + (max - min);
134+ }
135+ }
136+ }
137+
138+ //DASSERT(stack.empty());
139+ return size;
140+}
141+
142+} // anonymous namespace
143
144 size_t estimate_size(const std::vector<RE*> &res)
145 {
146- const size_t nre = res.size();
147- size_t size = nre - 1;
148- for (size_t i = 0; i < nre; ++i) {
149- size += estimate(res[i]) + 1;
150- }
151- return size;
152+ std::vector<StackItem> stack;
153+
154+ const size_t nre = res.size();
155+ //DASSERT(nre > 0);
156+ size_t size = nre - 1;
157+
158+ for (size_t i = 0; i < nre; ++i) {
159+ size += estimate_re_size(res[i], stack) + 1;
160+ }
161+
162+ return size;
163 }
164
165 } // namespace re2c
166
diff --git a/meta/recipes-support/re2c/re2c_1.0.1.bb b/meta/recipes-support/re2c/re2c_1.0.1.bb
index 35200ecde8..ca5c33f151 100644
--- a/meta/recipes-support/re2c/re2c_1.0.1.bb
+++ b/meta/recipes-support/re2c/re2c_1.0.1.bb
@@ -1,11 +1,17 @@
1SUMMARY = "Tool for writing very fast and very flexible scanners" 1SUMMARY = "Tool for writing very fast and very flexible scanners"
2HOMEPAGE = "http://re2c.sourceforge.net/" 2DESCRIPTION = "A free and open-source lexer generator for C, C++ and Go. It compiles regular expressions to determinisitic finite automata and encodes the automata in the form of a program in the target language. Unlike any other such tool, re2c focuses on generating high efficient code for regular expression matching. As a result this allows a much broader range of use than any traditional lexer."
3HOMEPAGE = "http://re2c.org/"
4BUGTRACKER = "https://github.com/skvadrik/re2c/issues"
3AUTHOR = "Marcus Börger <helly@users.sourceforge.net>" 5AUTHOR = "Marcus Börger <helly@users.sourceforge.net>"
4SECTION = "devel" 6SECTION = "devel"
5LICENSE = "PD" 7LICENSE = "PD"
6LIC_FILES_CHKSUM = "file://README;beginline=146;md5=881056c9add17f8019ccd8c382ba963a" 8LIC_FILES_CHKSUM = "file://README;beginline=146;md5=881056c9add17f8019ccd8c382ba963a"
7 9
8SRC_URI = "https://github.com/skvadrik/re2c/releases/download/${PV}/${BPN}-${PV}.tar.gz" 10SRC_URI = "https://github.com/skvadrik/re2c/releases/download/${PV}/${BPN}-${PV}.tar.gz \
11file://CVE-2018-21232-1.patch \
12file://CVE-2018-21232-2.patch \
13file://CVE-2018-21232-3.patch \
14file://CVE-2018-21232-4.patch"
9SRC_URI[md5sum] = "e2c6cf52fc6a21595f21bc82db5324f8" 15SRC_URI[md5sum] = "e2c6cf52fc6a21595f21bc82db5324f8"
10SRC_URI[sha256sum] = "605058d18a00e01bfc32aebf83af35ed5b13180b4e9f279c90843afab2c66c7c" 16SRC_URI[sha256sum] = "605058d18a00e01bfc32aebf83af35ed5b13180b4e9f279c90843afab2c66c7c"
11UPSTREAM_CHECK_URI = "https://github.com/skvadrik/re2c/releases" 17UPSTREAM_CHECK_URI = "https://github.com/skvadrik/re2c/releases"