From 89be91f3df00657261870adbc590209fdb2bc405 Mon Sep 17 00:00:00 2001 From: Ulya Trofimovich Date: Thu, 23 Apr 2020 23:02:21 +0100 Subject: [PATCH] Rewrite recursion into iteration (estimation of NFA size for RE). This is to avoid stack overflow on large RE (especially on instrumented builds that have larger stack frames, like AddressSanitizer). Partial fix for #219 "overflow-1.re test fails on system with small stack". Upstram-Status: Backport: https://github.com/skvadrik/re2c/commit/89be91f3df00657261870adbc590209fdb2bc405 CVE: CVE-2018-21232 Signed-off-by: Davide Gardenal --- diff --git a/src/nfa/estimate_size.cc b/src/nfa/estimate_size.cc --- a/src/nfa/estimate_size.cc (revision e58939b34bb4c37cd990f82dc286f21cb405743e) +++ b/src/nfa/estimate_size.cc (date 1647005399735) @@ -6,41 +6,113 @@ #include "src/re/re.h" namespace re2c { +namespace { + +struct StackItem { + const RE *re; // current sub-RE + uint32_t size; // size of the sub-RE (only for alternative and concatenation) + uint8_t succ; // index of the next sucessor to be visited +}; -static size_t estimate(const RE *re) +static uint32_t estimate_re_size(const RE *re0, std::vector &stack) { - switch (re->type) { - case RE::NIL: return 0; - case RE::SYM: return 1; - case RE::TAG: return 1; - case RE::ALT: - return estimate(re->alt.re1) - + estimate(re->alt.re2) - + 1; - case RE::CAT: - return estimate(re->cat.re1) - + estimate(re->cat.re2); - case RE::ITER: { - const size_t - iter = estimate(re->iter.re), - min = re->iter.min, - max = re->iter.max; - return max == AST::MANY - ? iter * min + 1 - : iter * max + (max - min); - } - } - return 0; /* unreachable */ -} + // the estimated size of the last sub-RE visited by DFS + uint32_t size = 0; + + const StackItem i0 = {re0, 0, 0}; + stack.push_back(i0); + + while (!stack.empty()) { + const StackItem i = stack.back(); + stack.pop_back(); + + const RE *re = i.re; + if (re->type == RE::NIL) { + size = 0; + } + else if (re->type == RE::SYM || re->type == RE::TAG) { + size = 1; + } + else if (re->type == RE::ALT) { + if (i.succ == 0) { + // recurse into the left sub-RE + StackItem k = {re, 0, 1}; + stack.push_back(k); + StackItem j = {re->alt.re1, 0, 0}; + stack.push_back(j); + } + else if (i.succ == 1) { + // recurse into the right sub-RE + StackItem k = {re, size, 2}; + stack.push_back(k); + StackItem j = {re->alt.re2, 0, 0}; + stack.push_back(j); + } + else { + // both sub-RE visited, recursive return + size = i.size // left sub-RE (saved on stack) + + size // right sub-RE (just visited by DFS) + + 1; // additional state for alternative + } + } + else if (re->type == RE::CAT) { + if (i.succ == 0) { + // recurse into the left sub-RE + StackItem k = {re, 0, 1}; + stack.push_back(k); + StackItem j = {re->cat.re1, 0, 0}; + stack.push_back(j); + } + else if (i.succ == 1) { + // recurse into the right sub-RE + StackItem k = {re, size, 2}; + stack.push_back(k); + StackItem j = {re->cat.re2, 0, 0}; + stack.push_back(j); + } + else { + // both sub-RE visited, recursive return + size = i.size // left sub-RE (saved on stack) + + size; // right sub-RE (just visited by DFS) + } + } + else if (re->type == RE::ITER) { + if (i.succ == 0) { + // recurse into the sub-RE + StackItem k = {re, 0, 1}; + stack.push_back(k); + StackItem j = {re->iter.re, 0, 0}; + stack.push_back(j); + } + else { + // sub-RE visited, recursive return + const uint32_t min = re->iter.min, max = re->iter.max; + size = max == AST::MANY + ? size * min + 1 + : size * max + (max - min); + } + } + } + + //DASSERT(stack.empty()); + return size; +} + +} // anonymous namespace size_t estimate_size(const std::vector &res) { - const size_t nre = res.size(); - size_t size = nre - 1; - for (size_t i = 0; i < nre; ++i) { - size += estimate(res[i]) + 1; - } - return size; + std::vector stack; + + const size_t nre = res.size(); + //DASSERT(nre > 0); + size_t size = nre - 1; + + for (size_t i = 0; i < nre; ++i) { + size += estimate_re_size(res[i], stack) + 1; + } + + return size; } } // namespace re2c