summaryrefslogtreecommitdiffstats
path: root/meta-python/recipes-devtools/python/python3-cbor2/CVE-2025-68131.patch
diff options
context:
space:
mode:
Diffstat (limited to 'meta-python/recipes-devtools/python/python3-cbor2/CVE-2025-68131.patch')
-rw-r--r--meta-python/recipes-devtools/python/python3-cbor2/CVE-2025-68131.patch517
1 files changed, 517 insertions, 0 deletions
diff --git a/meta-python/recipes-devtools/python/python3-cbor2/CVE-2025-68131.patch b/meta-python/recipes-devtools/python/python3-cbor2/CVE-2025-68131.patch
new file mode 100644
index 0000000000..4a35694d34
--- /dev/null
+++ b/meta-python/recipes-devtools/python/python3-cbor2/CVE-2025-68131.patch
@@ -0,0 +1,517 @@
1From 7be0ee8272a541e291f13ed67d69b951ae42a9da Mon Sep 17 00:00:00 2001
2From: Andreas Eriksen <andreer@vespa.ai>
3Date: Thu, 18 Dec 2025 16:48:26 +0100
4Subject: [PATCH] Merge commit from fork
5
6* track depth of recursive encode/decode, clear shared refs on start
7
8* test that shared refs are cleared on start
9
10* add fix-shared-state-reset to version history
11
12* clear shared state _after_ encode/decode
13
14* use PY_SSIZE_T_MAX to clear shareables list
15
16* use context manager for python decoder depth tracking
17
18* use context manager for python encoder depth tracking
19
20CVE: CVE-2025-68131
21Upstream-Status: Backport [https://github.com/agronholm/cbor2/commit/f1d701cd2c411ee40bb1fe383afe7f365f35abf0]
22
23Dropped changes to the changelog from the original commit.
24
25Signed-off-by: Ankur Tyagi <ankur.tyagi85@gmail.com>
26---
27 cbor2/_decoder.py | 38 ++++++++++++++++++-----
28 cbor2/_encoder.py | 44 ++++++++++++++++++++++-----
29 source/decoder.c | 28 ++++++++++++++++-
30 source/decoder.h | 1 +
31 source/encoder.c | 23 ++++++++++++--
32 source/encoder.h | 1 +
33 tests/test_decoder.py | 61 +++++++++++++++++++++++++++++++++++++
34 tests/test_encoder.py | 70 +++++++++++++++++++++++++++++++++++++++++++
35 8 files changed, 249 insertions(+), 17 deletions(-)
36
37diff --git a/cbor2/_decoder.py b/cbor2/_decoder.py
38index c8f1a8f..4aeadcf 100644
39--- a/cbor2/_decoder.py
40+++ b/cbor2/_decoder.py
41@@ -5,6 +5,7 @@ import struct
42 import sys
43 from codecs import getincrementaldecoder
44 from collections.abc import Callable, Mapping, Sequence
45+from contextlib import contextmanager
46 from datetime import date, datetime, timedelta, timezone
47 from io import BytesIO
48 from typing import IO, TYPE_CHECKING, Any, TypeVar, cast, overload
49@@ -59,6 +60,7 @@ class CBORDecoder:
50 "_immutable",
51 "_str_errors",
52 "_stringref_namespace",
53+ "_decode_depth",
54 )
55
56 _fp: IO[bytes]
57@@ -100,6 +102,7 @@ class CBORDecoder:
58 self._shareables: list[object] = []
59 self._stringref_namespace: list[str | bytes] | None = None
60 self._immutable = False
61+ self._decode_depth = 0
62
63 @property
64 def immutable(self) -> bool:
65@@ -225,13 +228,33 @@ class CBORDecoder:
66 if unshared:
67 self._share_index = old_index
68
69+ @contextmanager
70+ def _decoding_context(self):
71+ """
72+ Context manager for tracking decode depth and clearing shared state.
73+
74+ Shared state is cleared at the end of each top-level decode to prevent
75+ shared references from leaking between independent decode operations.
76+ Nested calls (from hooks) must preserve the state.
77+ """
78+ self._decode_depth += 1
79+ try:
80+ yield
81+ finally:
82+ self._decode_depth -= 1
83+ assert self._decode_depth >= 0
84+ if self._decode_depth == 0:
85+ self._shareables.clear()
86+ self._share_index = None
87+
88 def decode(self) -> object:
89 """
90 Decode the next value from the stream.
91
92 :raises CBORDecodeError: if there is any problem decoding the stream
93 """
94- return self._decode()
95+ with self._decoding_context():
96+ return self._decode()
97
98 def decode_from_bytes(self, buf: bytes) -> object:
99 """
100@@ -242,12 +265,13 @@ class CBORDecoder:
101 object needs to be decoded separately from the rest but while still
102 taking advantage of the shared value registry.
103 """
104- with BytesIO(buf) as fp:
105- old_fp = self.fp
106- self.fp = fp
107- retval = self._decode()
108- self.fp = old_fp
109- return retval
110+ with self._decoding_context():
111+ with BytesIO(buf) as fp:
112+ old_fp = self.fp
113+ self.fp = fp
114+ retval = self._decode()
115+ self.fp = old_fp
116+ return retval
117
118 @overload
119 def _decode_length(self, subtype: int) -> int: ...
120diff --git a/cbor2/_encoder.py b/cbor2/_encoder.py
121index 699c656..a653026 100644
122--- a/cbor2/_encoder.py
123+++ b/cbor2/_encoder.py
124@@ -123,6 +123,7 @@ class CBOREncoder:
125 "string_referencing",
126 "string_namespacing",
127 "_string_references",
128+ "_encode_depth",
129 )
130
131 _fp: IO[bytes]
132@@ -183,6 +184,7 @@ class CBOREncoder:
133 int, tuple[object, int | None]
134 ] = {} # indexes used for value sharing
135 self._string_references: dict[str | bytes, int] = {} # indexes used for string references
136+ self._encode_depth = 0
137 self._encoders = default_encoders.copy()
138 if canonical:
139 self._encoders.update(canonical_encoders)
140@@ -298,6 +300,24 @@ class CBOREncoder:
141 """
142 self._fp_write(data)
143
144+ @contextmanager
145+ def _encoding_context(self):
146+ """
147+ Context manager for tracking encode depth and clearing shared state.
148+
149+ Shared state is cleared at the end of each top-level encode to prevent
150+ shared references from leaking between independent encode operations.
151+ Nested calls (from hooks) must preserve the state.
152+ """
153+ self._encode_depth += 1
154+ try:
155+ yield
156+ finally:
157+ self._encode_depth -= 1
158+ if self._encode_depth == 0:
159+ self._shared_containers.clear()
160+ self._string_references.clear()
161+
162 def encode(self, obj: Any) -> None:
163 """
164 Encode the given object using CBOR.
165@@ -305,6 +325,16 @@ class CBOREncoder:
166 :param obj:
167 the object to encode
168 """
169+ with self._encoding_context():
170+ self._encode_value(obj)
171+
172+ def _encode_value(self, obj: Any) -> None:
173+ """
174+ Internal fast path for encoding - used by built-in encoders.
175+
176+ External code should use encode() instead, which properly manages
177+ shared state between independent encode operations.
178+ """
179 obj_type = obj.__class__
180 encoder = self._encoders.get(obj_type) or self._find_encoder(obj_type) or self._default
181 if not encoder:
182@@ -448,14 +478,14 @@ class CBOREncoder:
183 def encode_array(self, value: Sequence[Any]) -> None:
184 self.encode_length(4, len(value))
185 for item in value:
186- self.encode(item)
187+ self._encode_value(item)
188
189 @container_encoder
190 def encode_map(self, value: Mapping[Any, Any]) -> None:
191 self.encode_length(5, len(value))
192 for key, val in value.items():
193- self.encode(key)
194- self.encode(val)
195+ self._encode_value(key)
196+ self._encode_value(val)
197
198 def encode_sortable_key(self, value: Any) -> tuple[int, bytes]:
199 """
200@@ -477,10 +507,10 @@ class CBOREncoder:
201 # String referencing requires that the order encoded is
202 # the same as the order emitted so string references are
203 # generated after an order is determined
204- self.encode(realkey)
205+ self._encode_value(realkey)
206 else:
207 self._fp_write(sortkey[1])
208- self.encode(value)
209+ self._encode_value(value)
210
211 def encode_semantic(self, value: CBORTag) -> None:
212 # Nested string reference domains are distinct
213@@ -491,7 +521,7 @@ class CBOREncoder:
214 self._string_references = {}
215
216 self.encode_length(6, value.tag)
217- self.encode(value.value)
218+ self._encode_value(value.value)
219
220 self.string_referencing = old_string_referencing
221 self._string_references = old_string_references
222@@ -554,7 +584,7 @@ class CBOREncoder:
223 def encode_stringref(self, value: str | bytes) -> None:
224 # Semantic tag 25
225 if not self._stringref(value):
226- self.encode(value)
227+ self._encode_value(value)
228
229 def encode_rational(self, value: Fraction) -> None:
230 # Semantic tag 30
231diff --git a/source/decoder.c b/source/decoder.c
232index fd4d70c..033b73f 100644
233--- a/source/decoder.c
234+++ b/source/decoder.c
235@@ -142,6 +142,7 @@ CBORDecoder_new(PyTypeObject *type, PyObject *args, PyObject *kwargs)
236 self->str_errors = PyBytes_FromString("strict");
237 self->immutable = false;
238 self->shared_index = -1;
239+ self->decode_depth = 0;
240 }
241 return (PyObject *) self;
242 error:
243@@ -2052,11 +2053,30 @@ decode(CBORDecoderObject *self, DecodeOptions options)
244 }
245
246
247+// Reset shared state at the end of each top-level decode to prevent
248+// shared references from leaking between independent decode operations.
249+// Nested calls (from hooks) must preserve the state.
250+static inline void
251+clear_shareable_state(CBORDecoderObject *self)
252+{
253+ PyList_SetSlice(self->shareables, 0, PY_SSIZE_T_MAX, NULL);
254+ self->shared_index = -1;
255+}
256+
257+
258 // CBORDecoder.decode(self) -> obj
259 PyObject *
260 CBORDecoder_decode(CBORDecoderObject *self)
261 {
262- return decode(self, DECODE_NORMAL);
263+ PyObject *ret;
264+ self->decode_depth++;
265+ ret = decode(self, DECODE_NORMAL);
266+ self->decode_depth--;
267+ assert(self->decode_depth >= 0);
268+ if (self->decode_depth == 0) {
269+ clear_shareable_state(self);
270+ }
271+ return ret;
272 }
273
274
275@@ -2069,6 +2089,7 @@ CBORDecoder_decode_from_bytes(CBORDecoderObject *self, PyObject *data)
276 if (!_CBOR2_BytesIO && _CBOR2_init_BytesIO() == -1)
277 return NULL;
278
279+ self->decode_depth++;
280 save_read = self->read;
281 buf = PyObject_CallFunctionObjArgs(_CBOR2_BytesIO, data, NULL);
282 if (buf) {
283@@ -2080,6 +2101,11 @@ CBORDecoder_decode_from_bytes(CBORDecoderObject *self, PyObject *data)
284 Py_DECREF(buf);
285 }
286 self->read = save_read;
287+ self->decode_depth--;
288+ assert(self->decode_depth >= 0);
289+ if (self->decode_depth == 0) {
290+ clear_shareable_state(self);
291+ }
292 return ret;
293 }
294
295diff --git a/source/decoder.h b/source/decoder.h
296index 6bb6d52..a2f1bcb 100644
297--- a/source/decoder.h
298+++ b/source/decoder.h
299@@ -13,6 +13,7 @@ typedef struct {
300 PyObject *str_errors;
301 bool immutable;
302 Py_ssize_t shared_index;
303+ Py_ssize_t decode_depth;
304 } CBORDecoderObject;
305
306 extern PyTypeObject CBORDecoderType;
307diff --git a/source/encoder.c b/source/encoder.c
308index a0670aa..a7738a0 100644
309--- a/source/encoder.c
310+++ b/source/encoder.c
311@@ -113,6 +113,7 @@ CBOREncoder_new(PyTypeObject *type, PyObject *args, PyObject *kwargs)
312 self->shared_handler = NULL;
313 self->string_referencing = false;
314 self->string_namespacing = false;
315+ self->encode_depth = 0;
316 }
317 return (PyObject *) self;
318 }
319@@ -2027,17 +2028,35 @@ encode(CBOREncoderObject *self, PyObject *value)
320 }
321
322
323+// Reset shared state at the end of each top-level encode to prevent
324+// shared references from leaking between independent encode operations.
325+// Nested calls (from hooks or recursive encoding) must preserve the state.
326+static inline void
327+clear_shared_state(CBOREncoderObject *self)
328+{
329+ PyDict_Clear(self->shared);
330+ PyDict_Clear(self->string_references);
331+}
332+
333+
334 // CBOREncoder.encode(self, value)
335 PyObject *
336 CBOREncoder_encode(CBOREncoderObject *self, PyObject *value)
337 {
338 PyObject *ret;
339
340- // TODO reset shared dict?
341- if (Py_EnterRecursiveCall(" in CBOREncoder.encode"))
342+ self->encode_depth++;
343+ if (Py_EnterRecursiveCall(" in CBOREncoder.encode")) {
344+ self->encode_depth--;
345 return NULL;
346+ }
347 ret = encode(self, value);
348 Py_LeaveRecursiveCall();
349+ self->encode_depth--;
350+ assert(self->encode_depth >= 0);
351+ if (self->encode_depth == 0) {
352+ clear_shared_state(self);
353+ }
354 return ret;
355 }
356
357diff --git a/source/encoder.h b/source/encoder.h
358index 8b2d696..0dcc46d 100644
359--- a/source/encoder.h
360+++ b/source/encoder.h
361@@ -24,6 +24,7 @@ typedef struct {
362 bool value_sharing;
363 bool string_referencing;
364 bool string_namespacing;
365+ Py_ssize_t encode_depth;
366 } CBOREncoderObject;
367
368 extern PyTypeObject CBOREncoderType;
369diff --git a/tests/test_decoder.py b/tests/test_decoder.py
370index 485c604..253d079 100644
371--- a/tests/test_decoder.py
372+++ b/tests/test_decoder.py
373@@ -961,3 +961,64 @@ def test_oversized_read(impl, payload: bytes, tmp_path: Path) -> None:
374 dummy_path.write_bytes(payload)
375 with dummy_path.open("rb") as f:
376 impl.load(f)
377+
378+class TestDecoderReuse:
379+ """
380+ Tests for correct behavior when reusing CBORDecoder instances.
381+ """
382+
383+ def test_decoder_reuse_resets_shared_refs(self, impl):
384+ """
385+ Shared references should be scoped to a single decode operation,
386+ not persist across multiple decodes on the same decoder instance.
387+ """
388+ # Message with shareable tag (28)
389+ msg1 = impl.dumps(impl.CBORTag(28, "first_value"))
390+
391+ # Message with sharedref tag (29) referencing index 0
392+ msg2 = impl.dumps(impl.CBORTag(29, 0))
393+
394+ # Reuse decoder across messages
395+ decoder = impl.CBORDecoder(BytesIO(msg1))
396+ result1 = decoder.decode()
397+ assert result1 == "first_value"
398+
399+ # Second decode should fail - sharedref(0) doesn't exist in this context
400+ decoder.fp = BytesIO(msg2)
401+ with pytest.raises(impl.CBORDecodeValueError, match="shared reference"):
402+ decoder.decode()
403+
404+ def test_decode_from_bytes_resets_shared_refs(self, impl):
405+ """
406+ decode_from_bytes should also reset shared references between calls.
407+ """
408+ msg1 = impl.dumps(impl.CBORTag(28, "value"))
409+ msg2 = impl.dumps(impl.CBORTag(29, 0))
410+
411+ decoder = impl.CBORDecoder(BytesIO(b""))
412+ decoder.decode_from_bytes(msg1)
413+
414+ with pytest.raises(impl.CBORDecodeValueError, match="shared reference"):
415+ decoder.decode_from_bytes(msg2)
416+
417+ def test_shared_refs_within_single_decode(self, impl):
418+ """
419+ Shared references must work correctly within a single decode operation.
420+
421+ Note: This tests non-cyclic sibling references [shareable(x), sharedref(0)],
422+ which is a different pattern from test_cyclic_array/test_cyclic_map that
423+ test self-referencing structures like shareable([sharedref(0)]).
424+ """
425+ # [shareable("hello"), sharedref(0)] -> ["hello", "hello"]
426+ data = unhexlify(
427+ "82" # array(2)
428+ "d81c" # tag(28) shareable
429+ "65" # text(5)
430+ "68656c6c6f" # "hello"
431+ "d81d" # tag(29) sharedref
432+ "00" # unsigned(0)
433+ )
434+
435+ result = impl.loads(data)
436+ assert result == ["hello", "hello"]
437+ assert result[0] is result[1] # Same object reference
438\ No newline at end of file
439diff --git a/tests/test_encoder.py b/tests/test_encoder.py
440index f2ef248..3ca6a95 100644
441--- a/tests/test_encoder.py
442+++ b/tests/test_encoder.py
443@@ -654,3 +654,73 @@ def test_invariant_encode_decode(impl, val):
444 undergoing an encode and decode)
445 """
446 assert impl.loads(impl.dumps(val)) == val
447+
448+
449+class TestEncoderReuse:
450+ """
451+ Tests for correct behavior when reusing CBOREncoder instances.
452+ """
453+
454+ def test_encoder_reuse_resets_shared_containers(self, impl):
455+ """
456+ Shared container tracking should be scoped to a single encode operation,
457+ not persist across multiple encodes on the same encoder instance.
458+ """
459+ fp = BytesIO()
460+ encoder = impl.CBOREncoder(fp, value_sharing=True)
461+ shared_obj = ["hello"]
462+
463+ # First encode: object is tracked in shared containers
464+ encoder.encode([shared_obj, shared_obj])
465+
466+ # Second encode on new fp: should produce valid standalone CBOR
467+ # (not a sharedref pointing to stale first-encode data)
468+ encoder.fp = BytesIO()
469+ encoder.encode(shared_obj)
470+ second_output = encoder.fp.getvalue()
471+
472+ # The second output must be decodable on its own
473+ result = impl.loads(second_output)
474+ assert result == ["hello"]
475+
476+ def test_encode_to_bytes_resets_shared_containers(self, impl):
477+ """
478+ encode_to_bytes should also reset shared container tracking between calls.
479+ """
480+ fp = BytesIO()
481+ encoder = impl.CBOREncoder(fp, value_sharing=True)
482+ shared_obj = ["hello"]
483+
484+ # First encode
485+ encoder.encode_to_bytes([shared_obj, shared_obj])
486+
487+ # Second encode should produce valid standalone CBOR
488+ result_bytes = encoder.encode_to_bytes(shared_obj)
489+ result = impl.loads(result_bytes)
490+ assert result == ["hello"]
491+
492+ def test_encoder_hook_does_not_reset_state(self, impl):
493+ """
494+ When a custom encoder hook calls encode(), the shared container
495+ tracking should be preserved (not reset mid-operation).
496+ """
497+
498+ class Custom:
499+ def __init__(self, value):
500+ self.value = value
501+
502+ def custom_encoder(encoder, obj):
503+ # Hook encodes the wrapped value
504+ encoder.encode(obj.value)
505+
506+ # Encode a Custom wrapping a list
507+ data = impl.dumps(Custom(["a", "b"]), default=custom_encoder)
508+
509+ # Verify the output decodes correctly
510+ result = impl.loads(data)
511+ assert result == ["a", "b"]
512+
513+ # Test nested Custom objects - hook should work recursively
514+ data2 = impl.dumps(Custom(Custom(["x"])), default=custom_encoder)
515+ result2 = impl.loads(data2)
516+ assert result2 == ["x"]
517\ No newline at end of file