diff --git a/Lib/test/test_json/test_speedups.py b/Lib/test/test_json/test_speedups.py index 682014cfd5b344..8376a55e9a7152 100644 --- a/Lib/test/test_json/test_speedups.py +++ b/Lib/test/test_json/test_speedups.py @@ -80,3 +80,60 @@ def test(name): def test_unsortable_keys(self): with self.assertRaises(TypeError): self.json.encoder.JSONEncoder(sort_keys=True).encode({'a': 1, 1: 'a'}) + + def test_indent_argument_to_encoder(self): + # gh-143196: indent must be str, int, or None + # int is converted to spaces + enc = self.json.encoder.c_make_encoder( + None, lambda obj: obj, lambda obj: obj, + 4, ':', ', ', False, False, False, + ) + result = enc({'a': 1}, 0) + self.assertIn(' ', result[0]) # 4 spaces + + # Negative int should raise ValueError + with self.assertRaisesRegex( + ValueError, + r'make_encoder\(\) argument 4 must be a non-negative int', + ): + self.json.encoder.c_make_encoder(None, None, None, -1, ': ', ', ', + False, False, False) + + # Other types should raise TypeError + with self.assertRaisesRegex( + TypeError, + r'make_encoder\(\) argument 4 must be str, int, or None, not list', + ): + self.json.encoder.c_make_encoder(None, None, None, [' '], + ': ', ', ', False, False, False) + + def test_nonzero_indent_level_with_indent(self): + # gh-143196: _current_indent_level must be 0 when indent is set + # This prevents heap-buffer-overflow from uninitialized cache access + # and also prevents re-entrant __mul__ attacks since PySequence_Repeat + # is only called when indent_level != 0 + enc = self.json.encoder.c_make_encoder( + None, lambda obj: obj, lambda obj: obj, + ' ', ':', ', ', False, False, False, + ) + # indent_level=0 should work + enc([None], 0) + # indent_level!=0 should raise ValueError + with self.assertRaisesRegex( + ValueError, + r'_current_indent_level must be 0 when indent is set', + ): + enc([None], 1) + + # Verify that str subclasses with custom __mul__ are safe because + # __mul__ is never called when indent_level=0 + class CustomIndent(str): + def __mul__(self, count): + raise RuntimeError("__mul__ should not be called") + + enc2 = self.json.encoder.c_make_encoder( + None, lambda obj: obj, lambda obj: obj, + CustomIndent(' '), ':', ', ', False, False, False, + ) + # This should work - __mul__ is not called when indent_level=0 + enc2({'a': 1}, 0) diff --git a/Modules/_json.c b/Modules/_json.c index 14714d4b346546..1efc08fcced66f 100644 --- a/Modules/_json.c +++ b/Modules/_json.c @@ -1319,14 +1319,47 @@ encoder_new(PyTypeObject *type, PyObject *args, PyObject *kwds) return NULL; } + /* Convert indent to str if it's not None or already a string */ + if (indent != Py_None && !PyUnicode_Check(indent)) { + Py_ssize_t indent_level; + if (!PyIndex_Check(indent)) { + PyErr_Format(PyExc_TypeError, + "make_encoder() argument 4 must be str, int, or None, " + "not %.200s", Py_TYPE(indent)->tp_name); + return NULL; + } + indent_level = PyNumber_AsSsize_t(indent, PyExc_ValueError); + if (indent_level == -1 && PyErr_Occurred()) { + return NULL; + } + if (indent_level < 0) { + PyErr_SetString(PyExc_ValueError, + "make_encoder() argument 4 must be a non-negative int"); + return NULL; + } + /* Create a string of spaces: ' ' * indent_level */ + indent = PyUnicode_New(indent_level, ' '); + if (indent == NULL) { + return NULL; + } + if (indent_level > 0) { + memset(PyUnicode_1BYTE_DATA(indent), ' ', indent_level); + } + } + else { + Py_INCREF(indent); + } + s = (PyEncoderObject *)type->tp_alloc(type, 0); - if (s == NULL) + if (s == NULL) { + Py_DECREF(indent); return NULL; + } s->markers = Py_NewRef(markers); s->defaultfn = Py_NewRef(defaultfn); s->encoder = Py_NewRef(encoder); - s->indent = Py_NewRef(indent); + s->indent = indent; /* Already incref'd or newly created */ s->key_separator = Py_NewRef(key_separator); s->item_separator = Py_NewRef(item_separator); s->sort_keys = sort_keys; @@ -1453,6 +1486,12 @@ encoder_call(PyObject *op, PyObject *args, PyObject *kwds) PyObject *indent_cache = NULL; if (self->indent != Py_None) { + if (indent_level != 0) { + PyErr_SetString(PyExc_ValueError, + "_current_indent_level must be 0 when indent is set"); + PyUnicodeWriter_Discard(writer); + return NULL; + } indent_cache = create_indent_cache(self, indent_level); if (indent_cache == NULL) { PyUnicodeWriter_Discard(writer);