summaryrefslogtreecommitdiffstats
path: root/meta/recipes-support/re2c/re2c/CVE-2018-21232-1.patch
diff options
context:
space:
mode:
Diffstat (limited to 'meta/recipes-support/re2c/re2c/CVE-2018-21232-1.patch')
-rw-r--r--meta/recipes-support/re2c/re2c/CVE-2018-21232-1.patch347
1 files changed, 347 insertions, 0 deletions
diff --git a/meta/recipes-support/re2c/re2c/CVE-2018-21232-1.patch b/meta/recipes-support/re2c/re2c/CVE-2018-21232-1.patch
new file mode 100644
index 0000000000..b7dcaefad3
--- /dev/null
+++ b/meta/recipes-support/re2c/re2c/CVE-2018-21232-1.patch
@@ -0,0 +1,347 @@
1From fd634998f813340768c333cdad638498602856e5 Mon Sep 17 00:00:00 2001
2From: Ulya Trofimovich <skvadrik@gmail.com>
3Date: Tue, 21 Apr 2020 21:28:32 +0100
4Subject: [PATCH] Rewrite recursion into iteration (Tarjan's SCC algorithm and
5 YYFILL states).
6
7This is to avoid stack overflow on large RE (especially on instrumented
8builds that have larger stack frames, like AddressSanitizer).
9
10Stack overflow reported by Agostino Sarubbo.
11Related to #219 "overflow-1.re test fails on system with small stack".
12
13Upstram-Status: Backport:
14https://github.com/skvadrik/re2c/commit/fd634998f813340768c333cdad638498602856e5
15
16CVE: CVE-2018-21232
17
18Signed-off-by: Davide Gardenal <davide.gardenal@huawei.com>
19---
20diff --git a/src/dfa/fillpoints.cc b/src/dfa/fillpoints.cc
21--- a/src/dfa/fillpoints.cc (revision e58939b34bb4c37cd990f82dc286f21cb405743e)
22+++ b/src/dfa/fillpoints.cc (date 1646929180243)
23@@ -5,151 +5,186 @@
24
25 #include "src/dfa/dfa.h"
26
27-namespace re2c
28-{
29+
30+/*
31+ * note [finding strongly connected components of DFA]
32+ *
33+ * A slight modification of Tarjan's algorithm.
34+ *
35+ * The algorithm traverses the DFA in depth-first order. It maintains a stack
36+ * of states that have already been visited but haven't been assigned to an SCC
37+ * yet. For each state the algorithm calculates 'lowlink': index of the highest
38+ * ancestor state reachable in one step from a descendant of this state.
39+ * Lowlink is used to determine when a set of states should be popped off stack
40+ * into a new SCC.
41+ *
42+ * We use lowlink to hold different kinds of information:
43+ * - values in range [0 .. stack size] mean that the state is on stack (a
44+ * link to a state with the smallest index reachable from this one)
45+ * - SCC_UND means that this state has not been visited yet
46+ * - SCC_INF means that this state has already been popped off stack
47+ *
48+ * We use stack size (rather than topological sort index) as a unique index of
49+ * the state on stack. This is safe because the indices of states on stack are
50+ * unique and less than the indices of states that have been popped off stack
51+ * (SCC_INF).
52+ */
53+
54+namespace re2c {
55+ namespace {
56
57-static const size_t SCC_INF = std::numeric_limits<size_t>::max();
58-static const size_t SCC_UND = SCC_INF - 1;
59+ static const size_t SCC_INF = std::numeric_limits<size_t>::max();
60+ static const size_t SCC_UND = SCC_INF - 1;
61
62-static bool loopback(size_t node, size_t narcs, const size_t *arcs)
63-{
64- for (size_t i = 0; i < narcs; ++i)
65- {
66- if (arcs[i] == node)
67- {
68- return true;
69- }
70- }
71- return false;
72-}
73+ static bool loopback(size_t state, size_t narcs, const size_t *arcs)
74+ {
75+ for (size_t i = 0; i < narcs; ++i) {
76+ if (arcs[i] == state) return true;
77+ }
78+ return false;
79+ }
80
81-/*
82- * node [finding strongly connected components of DFA]
83- *
84- * A slight modification of Tarjan's algorithm.
85- *
86- * The algorithm walks graph in deep-first order. It maintains a stack
87- * of nodes that have already been visited but haven't been assigned to
88- * SCC yet. For each node the algorithm calculates 'lowlink': index of
89- * the highest ancestor node reachable in one step from a descendant of
90- * the node. Lowlink is used to determine when a set of nodes should be
91- * popped off the stack into a new SCC.
92- *
93- * We use lowlink to hold different kinds of information:
94- * - values in range [0 .. stack size] mean that this node is on stack
95- * (link to a node with the smallest index reachable from this one)
96- * - SCC_UND means that this node has not been visited yet
97- * - SCC_INF means that this node has already been popped off stack
98- *
99- * We use stack size (rather than topological sort index) as unique index
100- * of a node on stack. This is safe because indices of nodes on stack are
101- * still unique and less than indices of nodes that have been popped off
102- * stack (SCC_INF).
103- *
104- */
105-static void scc(
106- const dfa_t &dfa,
107- std::stack<size_t> &stack,
108- std::vector<size_t> &lowlink,
109- std::vector<bool> &trivial,
110- size_t i)
111-{
112- const size_t link = stack.size();
113- lowlink[i] = link;
114- stack.push(i);
115+ struct StackItem {
116+ size_t state; // current state
117+ size_t symbol; // next arc to be visited in this state
118+ size_t link; // Tarjan's "lowlink"
119+ };
120+
121+// Tarjan's algorithm
122+ static void scc(const dfa_t &dfa, std::vector<bool> &trivial,
123+ std::vector<StackItem> &stack_dfs)
124+ {
125+ std::vector<size_t> lowlink(dfa.states.size(), SCC_UND);
126+ std::stack<size_t> stack;
127+
128+ StackItem x0 = {0, 0, 0};
129+ stack_dfs.push_back(x0);
130+
131+ while (!stack_dfs.empty()) {
132+ const size_t i = stack_dfs.back().state;
133+ size_t c = stack_dfs.back().symbol;
134+ size_t link = stack_dfs.back().link;
135+ stack_dfs.pop_back();
136+
137+ const size_t *arcs = dfa.states[i]->arcs;
138+
139+ if (c == 0) {
140+ // DFS recursive enter
141+ //DASSERT(lowlink[i] == SCC_UND);
142+ link = lowlink[i] = stack.size();
143+ stack.push(i);
144+ }
145+ else {
146+ // DFS recursive return (from one of successor states)
147+ const size_t j = arcs[c - 1];
148+ //DASSERT(lowlink[j] != SCC_UND);
149+ lowlink[i] = std::min(lowlink[i], lowlink[j]);
150+ }
151
152- const size_t *arcs = dfa.states[i]->arcs;
153- for (size_t c = 0; c < dfa.nchars; ++c)
154- {
155- const size_t j = arcs[c];
156- if (j != dfa_t::NIL)
157- {
158- if (lowlink[j] == SCC_UND)
159- {
160- scc(dfa, stack, lowlink, trivial, j);
161- }
162- if (lowlink[j] < lowlink[i])
163- {
164- lowlink[i] = lowlink[j];
165- }
166- }
167- }
168+ // find the next successor state that hasn't been visited yet
169+ for (; c < dfa.nchars; ++c) {
170+ const size_t j = arcs[c];
171+ if (j != dfa_t::NIL) {
172+ if (lowlink[j] == SCC_UND) {
173+ break;
174+ }
175+ lowlink[i] = std::min(lowlink[i], lowlink[j]);
176+ }
177+ }
178
179- if (lowlink[i] == link)
180- {
181- // SCC is non-trivial (has loops) iff it either:
182- // - consists of multiple nodes (they all must be interconnected)
183- // - consists of single node which loops back to itself
184- trivial[i] = i == stack.top()
185- && !loopback(i, dfa.nchars, arcs);
186+ if (c < dfa.nchars) {
187+ // recurse into the next successor state
188+ StackItem x1 = {i, c + 1, link};
189+ stack_dfs.push_back(x1);
190+ StackItem x2 = {arcs[c], 0, SCC_UND};
191+ stack_dfs.push_back(x2);
192+ }
193+ else if (lowlink[i] == link) {
194+ // all successors have been visited
195+ // SCC is non-trivial (has loops) if either:
196+ // - it contains multiple interconnected states
197+ // - it contains a single self-looping state
198+ trivial[i] = i == stack.top() && !loopback(i, dfa.nchars, arcs);
199
200- size_t j;
201- do
202- {
203- j = stack.top();
204- stack.pop();
205- lowlink[j] = SCC_INF;
206- }
207- while (j != i);
208- }
209-}
210+ for (;;) {
211+ const size_t j = stack.top();
212+ stack.pop();
213+ lowlink[j] = SCC_INF;
214+ if (i == j) break;
215+ }
216+ }
217+ }
218+ }
219
220-static void calc_fill(
221- const dfa_t &dfa,
222- const std::vector<bool> &trivial,
223- std::vector<size_t> &fill,
224- size_t i)
225-{
226- if (fill[i] == SCC_UND)
227- {
228- fill[i] = 0;
229- const size_t *arcs = dfa.states[i]->arcs;
230- for (size_t c = 0; c < dfa.nchars; ++c)
231- {
232- const size_t j = arcs[c];
233- if (j != dfa_t::NIL)
234- {
235- calc_fill(dfa, trivial, fill, j);
236- size_t max = 1;
237- if (trivial[j])
238- {
239- max += fill[j];
240- }
241- if (max > fill[i])
242- {
243- fill[i] = max;
244- }
245- }
246- }
247- }
248-}
249-
250-void fillpoints(const dfa_t &dfa, std::vector<size_t> &fill)
251-{
252- const size_t size = dfa.states.size();
253-
254- // find DFA states that belong to non-trivial SCC
255- std::stack<size_t> stack;
256- std::vector<size_t> lowlink(size, SCC_UND);
257- std::vector<bool> trivial(size, false);
258- scc(dfa, stack, lowlink, trivial, 0);
259-
260- // for each DFA state, calculate YYFILL argument:
261- // maximal path length to the next YYFILL state
262- fill.resize(size, SCC_UND);
263- calc_fill(dfa, trivial, fill, 0);
264+ static void calc_fill(const dfa_t &dfa, const std::vector<bool> &trivial,
265+ std::vector<StackItem> &stack_dfs, std::vector<size_t> &fill)
266+ {
267+ const size_t nstates = dfa.states.size();
268+ fill.resize(nstates, SCC_UND);
269+
270+ StackItem x0 = {0, 0, SCC_INF};
271+ stack_dfs.push_back(x0);
272+
273+ while (!stack_dfs.empty()) {
274+ const size_t i = stack_dfs.back().state;
275+ size_t c = stack_dfs.back().symbol;
276+ stack_dfs.pop_back();
277+
278+ const size_t *arcs = dfa.states[i]->arcs;
279+
280+ if (c == 0) {
281+ // DFS recursive enter
282+ if (fill[i] != SCC_UND) continue;
283+ fill[i] = 0;
284+ }
285+ else {
286+ // DFS recursive return (from one of successor states)
287+ const size_t j = arcs[c - 1];
288+ //DASSERT(fill[i] != SCC_UND && fill[j] != SCC_UND);
289+ fill[i] = std::max(fill[i], 1 + (trivial[j] ? fill[j] : 0));
290+ }
291+
292+ // find the next successor state that hasn't been visited yet
293+ for (; c < dfa.nchars; ++c) {
294+ const size_t j = arcs[c];
295+ if (j != dfa_t::NIL) break;
296+ }
297+
298+ if (c < dfa.nchars) {
299+ // recurse into the next successor state
300+ StackItem x1 = {i, c + 1, SCC_INF};
301+ stack_dfs.push_back(x1);
302+ StackItem x2 = {arcs[c], 0, SCC_INF};
303+ stack_dfs.push_back(x2);
304+ }
305+ }
306
307- // The following states must trigger YYFILL:
308- // - inital state
309- // - all states in non-trivial SCCs
310- // for other states, reset YYFILL argument to zero
311- for (size_t i = 1; i < size; ++i)
312- {
313- if (trivial[i])
314- {
315- fill[i] = 0;
316- }
317- }
318-}
319+ // The following states must trigger YYFILL:
320+ // - inital state
321+ // - all states in non-trivial SCCs
322+ // for other states, reset YYFILL argument to zero
323+ for (size_t i = 1; i < nstates; ++i) {
324+ if (trivial[i]) {
325+ fill[i] = 0;
326+ }
327+ }
328+ }
329
330+ } // anonymous namespace
331+
332+ void fillpoints(const dfa_t &dfa, std::vector<size_t> &fill)
333+ {
334+ const size_t nstates = dfa.states.size();
335+ std::vector<bool> trivial(nstates, false);
336+ std::vector<StackItem> stack_dfs;
337+ stack_dfs.reserve(nstates);
338+
339+ // find DFA states that belong to non-trivial SCC
340+ scc(dfa, trivial, stack_dfs);
341+
342+ // for each DFA state, calculate YYFILL argument:
343+ // maximal path length to the next YYFILL state
344+ calc_fill(dfa, trivial, stack_dfs, fill);
345+ }
346+
347 } // namespace re2c