meta-python/recipes-devtools/python/python3-django/CVE-2024-27351.patch


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149

From 072963e4c4d0b3a7a8c5412bc0c7d27d1a9c3521 Mon Sep 17 00:00:00 2001
From: Shai Berger <shai@platonix.com>
Date: Mon, 19 Feb 2024 13:56:37 +0100
Subject: [PATCH] Fixed CVE-2024-27351 -- Prevented potential ReDoS in
 Truncator.words().

Thanks Seokchan Yoon for the report.

CVE: CVE-2024-27351

Upstream-Status: Backport
https://github.com/django/django/commit/072963e4c4d0b3a7a8c5412bc0c7d27d1a9c3521

Signed-off-by: Shai Berger <shai@platonix.com>
Co-Authored-By: Mariusz Felisiak <felisiak.mariusz@gmail.com>
Signed-off-by: Saravanan <saravanan.kadambathursubramaniyam@windriver.com>

%% original patch: CVE-2024-27351.patch
---
 django/utils/text.py           | 57 ++++++++++++++++++++++++++++++++--
 docs/releases/2.2.28.txt       |  9 ++++++
 tests/utils_tests/test_text.py | 26 ++++++++++++++++
 3 files changed, 90 insertions(+), 2 deletions(-)

diff --git a/django/utils/text.py b/django/utils/text.py
index 06a377b..2c4040e 100644
--- a/django/utils/text.py
+++ b/django/utils/text.py
@@ -15,8 +15,61 @@ def capfirst(x):
     return x and str(x)[0].upper() + str(x)[1:]
 
 
-# Set up regular expressions
-re_words = re.compile(r'<[^>]+?>|([^<>\s]+)', re.S)
+# ----- Begin security-related performance workaround -----
+
+# We used to have, below
+#
+# re_words = _lazy_re_compile(r"<[^>]+?>|([^<>\s]+)", re.S)
+#
+# But it was shown that this regex, in the way we use it here, has some
+# catastrophic edge-case performance features. Namely, when it is applied to
+# text with only open brackets "<<<...". The class below provides the services
+# and correct answers for the use cases, but in these edge cases does it much
+# faster.
+re_notag = _lazy_re_compile(r"([^<>\s]+)", re.S)
+re_prt = _lazy_re_compile(r"<|([^<>\s]+)", re.S)
+
+
+class WordsRegex:
+    @staticmethod
+    def search(text, pos):
+        # Look for "<" or a non-tag word.
+        partial = re_prt.search(text, pos)
+        if partial is None or partial[1] is not None:
+            return partial
+
+        # "<" was found, look for a closing ">".
+        end = text.find(">", partial.end(0))
+        if end < 0:
+            # ">" cannot be found, look for a word.
+            return re_notag.search(text, pos + 1)
+        else:
+            # "<" followed by a ">" was found -- fake a match.
+            end += 1
+            return FakeMatch(text[partial.start(0): end], end)
+
+
+class FakeMatch:
+    __slots__ = ["_text", "_end"]
+
+    def end(self, group=0):
+        assert group == 0, "This specific object takes only group=0"
+        return self._end
+
+    def __getitem__(self, group):
+        if group == 1:
+            return None
+        assert group == 0, "This specific object takes only group in {0,1}"
+        return self._text
+
+    def __init__(self, text, end):
+        self._text, self._end = text, end
+
+
+# ----- End security-related performance workaround -----
+
+# Set up regular expressions.
+re_words = WordsRegex
 re_chars = re.compile(r'<[^>]+?>|(.)', re.S)
 re_tag = re.compile(r'<(/)?(\S+?)(?:(\s*/)|\s.*?)?>', re.S)
 re_newlines = re.compile(r'\r\n|\r')  # Used in normalize_newlines
diff --git a/docs/releases/2.2.28.txt b/docs/releases/2.2.28.txt
index c653cb6..7227452 100644
--- a/docs/releases/2.2.28.txt
+++ b/docs/releases/2.2.28.txt
@@ -90,3 +90,12 @@ large number of Unicode characters.
 In order to avoid the vulnerability, invalid values longer than
 ``UsernameField.max_length`` are no longer normalized, since they cannot pass
 validation anyway.
+
+CVE-2024-27351: Potential regular expression denial-of-service in ``django.utils.text.Truncator.words()``
+=========================================================================================================
+
+``django.utils.text.Truncator.words()`` method (with ``html=True``) and
+:tfilter:`truncatewords_html` template filter were subject to a potential
+regular expression denial-of-service attack using a suitably crafted string
+(follow up to :cve:`2019-14232` and :cve:`2023-43665`).
+
diff --git a/tests/utils_tests/test_text.py b/tests/utils_tests/test_text.py
index cb3063d..7e9f2b3 100644
--- a/tests/utils_tests/test_text.py
+++ b/tests/utils_tests/test_text.py
@@ -156,6 +156,32 @@ class TestUtilsText(SimpleTestCase):
         truncator = text.Truncator('<p>I &lt;3 python, what about you?</p>')
         self.assertEqual('<p>I &lt;3 python,…</p>', truncator.words(3, html=True))
 
+        # Only open brackets.
+        test = "<" * 60_000
+        truncator = text.Truncator(test)
+        self.assertEqual(truncator.words(1, html=True), test)
+
+        # Tags with special chars in attrs.
+        truncator = text.Truncator(
+            """<i style="margin: 5%; font: *;">Hello, my dear lady!</i>"""
+        )
+        self.assertEqual(
+            """<i style="margin: 5%; font: *;">Hello, my dear…</i>""",
+            truncator.words(3, html=True),
+        )
+
+        # Tags with special non-latin chars in attrs.
+        truncator = text.Truncator("""<p data-x="א">Hello, my dear lady!</p>""")
+        self.assertEqual(
+            """<p data-x="א">Hello, my dear…</p>""",
+            truncator.words(3, html=True),
+        )
+
+        # Misplaced brackets.
+        truncator = text.Truncator("hello >< world")
+        self.assertEqual(truncator.words(1, html=True), "hello…")
+        self.assertEqual(truncator.words(2, html=True), "hello >< world")
+
     @patch("django.utils.text.Truncator.MAX_LENGTH_HTML", 10_000)
     def test_truncate_words_html_size_limit(self):
         max_len = text.Truncator.MAX_LENGTH_HTML
-- 
2.40.0