summaryrefslogtreecommitdiffstats
path: root/meta/recipes-core/expat/expat/CVE-2023-52425-0010.patch
blob: 3fbf69de080ebdd1735b90cd34c752d5833f352e (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
From 60b74209899a67d426d208662674b55a5eed918c Mon Sep 17 00:00:00 2001
From: Snild Dolkow <snild@sony.com>
Date: Wed, 4 Oct 2023 16:00:14 +0200
Subject: [PATCH] Bypass partial token heuristic when close to maximum buffer 
 size

For huge tokens, we may end up in a situation where the partial token
parse deferral heuristic demands more bytes than Expat's maximum buffer
size (currently ~half of INT_MAX) could fit.

INT_MAX/2 is 1024 MiB on most systems. Clearly, a token of 950 MiB could
fit in that buffer, but the reparse threshold might be such that
callProcessor() will defer it, allowing the app to keep filling the
buffer until XML_GetBuffer() eventually returns a memory error.

By bypassing the heuristic when we're getting close to the maximum
buffer size, it will once again be possible to parse tokens in the size
range INT_MAX/2/ratio < size < INT_MAX/2 reliably.

We subtract the last buffer fill size as a way to detect that the next
XML_GetBuffer() call has a risk of returning a memory error -- assuming
that the application is likely to keep using the same (or smaller) fill.

We subtract XML_CONTEXT_BYTES because that's the maximum amount of bytes
that could remain at the start of the buffer, preceding the partial
token. Technically, it could be fewer bytes, but XML_CONTEXT_BYTES is
normally small relative to INT_MAX, and is much simpler to use.

Co-authored-by: Sebastian Pipping <sebastian@pipping.org>

CVE: CVE-2023-52425

Upstream-Status: Backport [https://github.com/libexpat/libexpat/commit/60b74209899a67d426d208662674b55a5eed918c]

Signed-off-by: Meenali Gupta <meenali.gupta@windriver.com>
---
 lib/xmlparse.c | 23 ++++++++++++++++++++++-
 1 file changed, 22 insertions(+), 1 deletion(-)

diff --git a/lib/xmlparse.c b/lib/xmlparse.c
index 6746d70..32c57f6 100644
--- a/lib/xmlparse.c
+++ b/lib/xmlparse.c
@@ -205,6 +205,8 @@ typedef char ICHAR;
 /* Do safe (NULL-aware) pointer arithmetic */
 #define EXPAT_SAFE_PTR_DIFF(p, q) (((p) && (q)) ? ((p) - (q)) : 0)
 
+#define EXPAT_MIN(a, b) (((a) < (b)) ? (a) : (b))
+
 #include "internal.h"
 #include "xmltok.h"
 #include "xmlrole.h"
@@ -634,6 +636,7 @@ struct XML_ParserStruct {
   const char *m_parseEndPtr;
   size_t m_partialTokenBytesBefore; /* used in heuristic to avoid O(n^2) */
   XML_Bool m_reparseDeferralEnabled;
+  int m_lastBufferRequestSize;
   XML_Char *m_dataBuf;
   XML_Char *m_dataBufEnd;
   XML_StartElementHandler m_startElementHandler;
@@ -975,7 +978,18 @@ callProcessor(XML_Parser parser, const char *start, const char *end,
     // Heuristic: don't try to parse a partial token again until the amount of
     // available data has increased significantly.
     const size_t had_before = parser->m_partialTokenBytesBefore;
-    const bool enough = (have_now >= 2 * had_before);
+    // ...but *do* try anyway if we're close to reaching the max buffer size.
+    size_t close_to_maxbuf = INT_MAX / 2 + (INT_MAX & 1); // round up
+#if XML_CONTEXT_BYTES > 0
+    // subtract XML_CONTEXT_BYTES, but don't go below zero
+    close_to_maxbuf -= EXPAT_MIN(close_to_maxbuf, XML_CONTEXT_BYTES);
+#endif
+    // subtract the last buffer fill size, but don't go below zero
+    // m_lastBufferRequestSize is never assigned a value < 0, so the cast is ok
+    close_to_maxbuf
+        -= EXPAT_MIN(close_to_maxbuf, (size_t)parser->m_lastBufferRequestSize);
+    const bool enough
+        = (have_now >= 2 * had_before) || (have_now > close_to_maxbuf);
 
     if (! enough) {
       *endPtr = start; // callers may expect this to be set
@@ -1177,6 +1191,7 @@ parserInit(XML_Parser parser, const XML_Char *encodingName) {
   parser->m_parseEndPtr = NULL;
   parser->m_partialTokenBytesBefore = 0;
   parser->m_reparseDeferralEnabled = g_reparseDeferralEnabledDefault;
+  parser->m_lastBufferRequestSize = 0;
   parser->m_declElementType = NULL;
   parser->m_declAttributeId = NULL;
   parser->m_declEntity = NULL;
@@ -1911,6 +1926,9 @@ XML_Parse(XML_Parser parser, const char *s, int len, int isFinal) {
       parser->m_processor = errorProcessor;
       return XML_STATUS_ERROR;
     }
+    // though this isn't a buffer request, we assume that `len` is the app's
+    // preferred buffer fill size, and therefore save it here.
+    parser->m_lastBufferRequestSize = len;
     parser->m_parseEndByteIndex += len;
     parser->m_positionPtr = s;
     parser->m_parsingStatus.finalBuffer = (XML_Bool)isFinal;
@@ -2064,6 +2082,9 @@ XML_GetBuffer(XML_Parser parser, int len) {
   default:;
   }
 
+  // whether or not the request succeeds, `len` seems to be the app's preferred
+  // buffer fill size; remember it.
+  parser->m_lastBufferRequestSize = len;
   if (len > EXPAT_SAFE_PTR_DIFF(parser->m_bufferLim, parser->m_bufferEnd)
       || parser->m_buffer == NULL) {
 #ifdef XML_CONTEXT_BYTES
-- 
2.40.0