summaryrefslogtreecommitdiffstats
path: root/meta/recipes-devtools/python/python/CVE-2018-1000030-2.patch
blob: 9b7713be885054fee3cda6e2420327decba5a1d5 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
From dbf52e02f18dac6f5f0a64f78932f3dc6efc056b Mon Sep 17 00:00:00 2001
From: Benjamin Peterson <benjamin@python.org>
Date: Tue, 2 Jan 2018 09:25:41 -0800
Subject: [PATCH] bpo-31530: fix crash when multiple threads iterate over a
 file, round 2 (#5060)

Multiple threads iterating over a file can corrupt the file's internal readahead
buffer resulting in crashes. To fix this, cache buffer state thread-locally for
the duration of a file_iternext call and only update the file's internal state
after reading completes.

No attempt is made to define or provide "reasonable" semantics for iterating
over a file on multiple threads. (Non-crashing) races are still
present. Duplicated, corrupt, and missing data will happen.

This was originally fixed by 6401e5671781eb217ee1afb4603cc0d1b0367ae6, which
raised an exception from seek() and next() when concurrent operations were
detected. Alas, this simpler solution breaks legitimate use cases such as
capturing the standard streams when multiple threads are logging.

CVE: CVE-2018-1000030
Upstream-Status: Backport [https://github.com/python/cpython/commit/dbf52e02f18dac6f5f0a64f78932f3dc6efc056b]

Signed-off-by: Jagadeesh Krishnanjanappa <jkrishnanjanappa@mvista.com>

---
 Lib/test/test_file2k.py                            |  27 ++---
 .../2017-09-20-18-28-09.bpo-31530.CdLOM7.rst       |   3 -
 Objects/fileobject.c                               | 118 ++++++++++++---------
 3 files changed, 78 insertions(+), 70 deletions(-)

diff --git a/Lib/test/test_file2k.py b/Lib/test/test_file2k.py
index d8966e034e..c73e8d8dc4 100644
--- a/Lib/test/test_file2k.py
+++ b/Lib/test/test_file2k.py
@@ -653,18 +653,15 @@ class FileThreadingTests(unittest.TestCase):
         self._test_close_open_io(io_func)
 
     def test_iteration_torture(self):
-        # bpo-31530: Crash when concurrently iterate over a file.
+        # bpo-31530
         with open(self.filename, "wb") as fp:
             for i in xrange(2**20):
                 fp.write(b"0"*50 + b"\n")
         with open(self.filename, "rb") as f:
-            def iterate():
-                try:
-                    for l in f:
-                        pass
-                except IOError:
+            def it():
+                for l in f:
                     pass
-            self._run_workers(iterate, 10)
+            self._run_workers(it, 10)
 
     def test_iteration_seek(self):
         # bpo-31530: Crash when concurrently seek and iterate over a file.
@@ -674,17 +671,15 @@ class FileThreadingTests(unittest.TestCase):
         with open(self.filename, "rb") as f:
             it = iter([1] + [0]*10)  # one thread reads, others seek
             def iterate():
-                try:
-                    if next(it):
-                        for l in f:
-                            pass
-                    else:
-                        for i in range(100):
-                            f.seek(i*100, 0)
-                except IOError:
-                    pass
+                if next(it):
+                    for l in f:
+                        pass
+                else:
+                    for i in xrange(100):
+                        f.seek(i*100, 0)
             self._run_workers(iterate, 10)
 
+
 @unittest.skipUnless(os.name == 'posix', 'test requires a posix system.')
 class TestFileSignalEINTR(unittest.TestCase):
     def _test_reading(self, data_to_write, read_and_verify_code, method_name,
diff --git a/Misc/NEWS.d/next/Core and Builtins/2017-09-20-18-28-09.bpo-31530.CdLOM7.rst b/Misc/NEWS.d/next/Core and Builtins/2017-09-20-18-28-09.bpo-31530.CdLOM7.rst
index a6cb6c9e9b..beb09b5ae6 100644
--- a/Misc/NEWS.d/next/Core and Builtins/2017-09-20-18-28-09.bpo-31530.CdLOM7.rst	
+++ b/Misc/NEWS.d/next/Core and Builtins/2017-09-20-18-28-09.bpo-31530.CdLOM7.rst	
@@ -1,4 +1 @@
 Fixed crashes when iterating over a file on multiple threads.
-seek() and next() methods of file objects now raise an exception during
-concurrent operation on the same file object.
-A lock can be used to prevent the error.
diff --git a/Objects/fileobject.c b/Objects/fileobject.c
index 8d1c5812f0..270b28264a 100644
--- a/Objects/fileobject.c
+++ b/Objects/fileobject.c
@@ -609,7 +609,12 @@ err_iterbuffered(void)
     return NULL;
 }
 
-static void drop_readahead(PyFileObject *);
+static void
+drop_file_readahead(PyFileObject *f)
+{
+    PyMem_FREE(f->f_buf);
+    f->f_buf = NULL;
+}
 
 /* Methods */
 
@@ -632,7 +637,7 @@ file_dealloc(PyFileObject *f)
     Py_XDECREF(f->f_mode);
     Py_XDECREF(f->f_encoding);
     Py_XDECREF(f->f_errors);
-    drop_readahead(f);
+    drop_file_readahead(f);
     Py_TYPE(f)->tp_free((PyObject *)f);
 }
 
@@ -767,13 +772,7 @@ file_seek(PyFileObject *f, PyObject *args)
 
     if (f->f_fp == NULL)
         return err_closed();
-    if (f->unlocked_count > 0) {
-        PyErr_SetString(PyExc_IOError,
-            "seek() called during concurrent "
-            "operation on the same file object");
-        return NULL;
-    }
-    drop_readahead(f);
+    drop_file_readahead(f);
     whence = 0;
     if (!PyArg_ParseTuple(args, "O|i:seek", &offobj, &whence))
         return NULL;
@@ -2242,12 +2241,16 @@ static PyGetSetDef file_getsetlist[] = {
     {0},
 };
 
+typedef struct {
+    char *buf, *bufptr, *bufend;
+} readaheadbuffer;
+
 static void
-drop_readahead(PyFileObject *f)
+drop_readaheadbuffer(readaheadbuffer *rab)
 {
-    if (f->f_buf != NULL) {
-        PyMem_Free(f->f_buf);
-        f->f_buf = NULL;
+    if (rab->buf != NULL) {
+        PyMem_FREE(rab->buf);
+        rab->buf = NULL;
     }
 }
 
@@ -2255,36 +2258,34 @@ drop_readahead(PyFileObject *f)
    (unless at EOF) and no more than bufsize.  Returns negative value on
    error, will set MemoryError if bufsize bytes cannot be allocated. */
 static int
-readahead(PyFileObject *f, Py_ssize_t bufsize)
+readahead(PyFileObject *f, readaheadbuffer *rab, Py_ssize_t bufsize)
 {
     Py_ssize_t chunksize;
 
-    assert(f->unlocked_count == 0);
-    if (f->f_buf != NULL) {
-        if( (f->f_bufend - f->f_bufptr) >= 1)
+    if (rab->buf != NULL) {
+        if ((rab->bufend - rab->bufptr) >= 1)
             return 0;
         else
-            drop_readahead(f);
+            drop_readaheadbuffer(rab);
     }
-    if ((f->f_buf = (char *)PyMem_Malloc(bufsize)) == NULL) {
+    if ((rab->buf = PyMem_MALLOC(bufsize)) == NULL) {
         PyErr_NoMemory();
         return -1;
     }
     FILE_BEGIN_ALLOW_THREADS(f)
     errno = 0;
-    chunksize = Py_UniversalNewlineFread(
-        f->f_buf, bufsize, f->f_fp, (PyObject *)f);
+    chunksize = Py_UniversalNewlineFread(rab->buf, bufsize, f->f_fp, (PyObject *)f);
     FILE_END_ALLOW_THREADS(f)
     if (chunksize == 0) {
         if (ferror(f->f_fp)) {
             PyErr_SetFromErrno(PyExc_IOError);
             clearerr(f->f_fp);
-            drop_readahead(f);
+            drop_readaheadbuffer(rab);
             return -1;
         }
     }
-    f->f_bufptr = f->f_buf;
-    f->f_bufend = f->f_buf + chunksize;
+    rab->bufptr = rab->buf;
+    rab->bufend = rab->buf + chunksize;
     return 0;
 }
 
@@ -2294,51 +2295,43 @@ readahead(PyFileObject *f, Py_ssize_t bufsize)
    logarithmic buffer growth to about 50 even when reading a 1gb line. */
 
 static PyStringObject *
-readahead_get_line_skip(PyFileObject *f, Py_ssize_t skip, Py_ssize_t bufsize)
+readahead_get_line_skip(PyFileObject *f, readaheadbuffer *rab, Py_ssize_t skip, Py_ssize_t bufsize)
 {
     PyStringObject* s;
     char *bufptr;
     char *buf;
     Py_ssize_t len;
 
-    if (f->unlocked_count > 0) {
-        PyErr_SetString(PyExc_IOError,
-            "next() called during concurrent "
-            "operation on the same file object");
-        return NULL;
-    }
-    if (f->f_buf == NULL)
-        if (readahead(f, bufsize) < 0)
+    if (rab->buf == NULL)
+        if (readahead(f, rab, bufsize) < 0)
             return NULL;
 
-    len = f->f_bufend - f->f_bufptr;
+    len = rab->bufend - rab->bufptr;
     if (len == 0)
-        return (PyStringObject *)
-            PyString_FromStringAndSize(NULL, skip);
-    bufptr = (char *)memchr(f->f_bufptr, '\n', len);
+        return (PyStringObject *)PyString_FromStringAndSize(NULL, skip);
+    bufptr = (char *)memchr(rab->bufptr, '\n', len);
     if (bufptr != NULL) {
         bufptr++;                               /* Count the '\n' */
-        len = bufptr - f->f_bufptr;
-        s = (PyStringObject *)
-            PyString_FromStringAndSize(NULL, skip + len);
+        len = bufptr - rab->bufptr;
+        s = (PyStringObject *)PyString_FromStringAndSize(NULL, skip + len);
         if (s == NULL)
             return NULL;
-        memcpy(PyString_AS_STRING(s) + skip, f->f_bufptr, len);
-        f->f_bufptr = bufptr;
-        if (bufptr == f->f_bufend)
-            drop_readahead(f);
+        memcpy(PyString_AS_STRING(s) + skip, rab->bufptr, len);
+        rab->bufptr = bufptr;
+        if (bufptr == rab->bufend)
+            drop_readaheadbuffer(rab);
     } else {
-        bufptr = f->f_bufptr;
-        buf = f->f_buf;
-        f->f_buf = NULL;                /* Force new readahead buffer */
+        bufptr = rab->bufptr;
+        buf = rab->buf;
+        rab->buf = NULL;                /* Force new readahead buffer */
         assert(len <= PY_SSIZE_T_MAX - skip);
-        s = readahead_get_line_skip(f, skip + len, bufsize + (bufsize>>2));
+        s = readahead_get_line_skip(f, rab, skip + len, bufsize + (bufsize>>2));
         if (s == NULL) {
-            PyMem_Free(buf);
+            PyMem_FREE(buf);
             return NULL;
         }
         memcpy(PyString_AS_STRING(s) + skip, bufptr, len);
-        PyMem_Free(buf);
+        PyMem_FREE(buf);
     }
     return s;
 }
@@ -2356,7 +2349,30 @@ file_iternext(PyFileObject *f)
     if (!f->readable)
         return err_mode("reading");
 
-    l = readahead_get_line_skip(f, 0, READAHEAD_BUFSIZE);
+    {
+        /*
+          Multiple threads can enter this method while the GIL is released
+          during file read and wreak havoc on the file object's readahead
+          buffer. To avoid dealing with cross-thread coordination issues, we
+          cache the file buffer state locally and only set it back on the file
+          object when we're done.
+        */
+        readaheadbuffer rab = {f->f_buf, f->f_bufptr, f->f_bufend};
+        f->f_buf = NULL;
+        l = readahead_get_line_skip(f, &rab, 0, READAHEAD_BUFSIZE);
+        /*
+          Make sure the file's internal read buffer is cleared out. This will
+          only do anything if some other thread interleaved with us during
+          readahead. We want to drop any changeling buffer, so we don't leak
+          memory. We may lose data, but that's what you get for reading the same
+          file object in multiple threads.
+        */
+        drop_file_readahead(f);
+        f->f_buf = rab.buf;
+        f->f_bufptr = rab.bufptr;
+        f->f_bufend = rab.bufend;
+    }
+
     if (l == NULL || PyString_GET_SIZE(l) == 0) {
         Py_XDECREF(l);
         return NULL;
-- 
2.13.3