Skip to content

Commit cd7ce12

Browse files
committed
Merge remote-tracking branch 'upstream/3.11' into backport-f04bea4-3.11
2 parents 38f9c63 + 1459d1f commit cd7ce12

File tree

3 files changed

+65
-0
lines changed

3 files changed

+65
-0
lines changed

Lib/test/test_pyexpat.py

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -762,6 +762,42 @@ def resolve_entity(context, base, system_id, public_id):
762762
self.assertEqual(handler_call_args, [("bar", "baz")])
763763

764764

765+
class ParentParserLifetimeTest(unittest.TestCase):
766+
"""
767+
Subparsers make use of their parent XML_Parser inside of Expat.
768+
As a result, parent parsers need to outlive subparsers.
769+
770+
See https://github.com/python/cpython/issues/139400.
771+
"""
772+
773+
def test_parent_parser_outlives_its_subparsers__single(self):
774+
parser = expat.ParserCreate()
775+
subparser = parser.ExternalEntityParserCreate(None)
776+
777+
# Now try to cause garbage collection of the parent parser
778+
# while it's still being referenced by a related subparser.
779+
del parser
780+
781+
def test_parent_parser_outlives_its_subparsers__multiple(self):
782+
parser = expat.ParserCreate()
783+
subparser_one = parser.ExternalEntityParserCreate(None)
784+
subparser_two = parser.ExternalEntityParserCreate(None)
785+
786+
# Now try to cause garbage collection of the parent parser
787+
# while it's still being referenced by a related subparser.
788+
del parser
789+
790+
def test_parent_parser_outlives_its_subparsers__chain(self):
791+
parser = expat.ParserCreate()
792+
subparser = parser.ExternalEntityParserCreate(None)
793+
subsubparser = subparser.ExternalEntityParserCreate(None)
794+
795+
# Now try to cause garbage collection of the parent parsers
796+
# while they are still being referenced by a related subparser.
797+
del parser
798+
del subparser
799+
800+
765801
class ReparseDeferralTest(unittest.TestCase):
766802
def test_getter_setter_round_trip(self):
767803
parser = expat.ParserCreate()
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
:mod:`xml.parsers.expat`: Make sure that parent Expat parsers are only
2+
garbage-collected once they are no longer referenced by subparsers created
3+
by :meth:`~xml.parsers.expat.xmlparser.ExternalEntityParserCreate`.
4+
Patch by Sebastian Pipping.

Modules/pyexpat.c

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,15 @@ typedef struct {
7575
PyObject_HEAD
7676

7777
XML_Parser itself;
78+
/*
79+
* Strong reference to a parent `xmlparseobject` if this parser
80+
* is a child parser. Set to NULL if this parser is a root parser.
81+
* This is needed to keep the parent parser alive as long as it has
82+
* at least one child parser.
83+
*
84+
* See https://github.com/python/cpython/issues/139400 for details.
85+
*/
86+
PyObject *parent;
7887
int ordered_attributes; /* Return attributes as a list. */
7988
int specified_attributes; /* Report only specified attributes. */
8089
int in_callback; /* Is a callback active? */
@@ -1035,6 +1044,11 @@ pyexpat_xmlparser_ExternalEntityParserCreate_impl(xmlparseobject *self,
10351044
return NULL;
10361045
}
10371046

1047+
// The new subparser will make use of the parent XML_Parser inside of Expat.
1048+
// So we need to take subparsers into account with the reference counting
1049+
// of their parent parser.
1050+
Py_INCREF(self);
1051+
10381052
new_parser->buffer_size = self->buffer_size;
10391053
new_parser->buffer_used = 0;
10401054
new_parser->buffer = NULL;
@@ -1044,6 +1058,7 @@ pyexpat_xmlparser_ExternalEntityParserCreate_impl(xmlparseobject *self,
10441058
new_parser->ns_prefixes = self->ns_prefixes;
10451059
new_parser->itself = XML_ExternalEntityParserCreate(self->itself, context,
10461060
encoding);
1061+
new_parser->parent = (PyObject *)self;
10471062
new_parser->handlers = 0;
10481063
new_parser->intern = self->intern;
10491064
Py_XINCREF(new_parser->intern);
@@ -1052,11 +1067,13 @@ pyexpat_xmlparser_ExternalEntityParserCreate_impl(xmlparseobject *self,
10521067
new_parser->buffer = PyMem_Malloc(new_parser->buffer_size);
10531068
if (new_parser->buffer == NULL) {
10541069
Py_DECREF(new_parser);
1070+
Py_DECREF(self);
10551071
return PyErr_NoMemory();
10561072
}
10571073
}
10581074
if (!new_parser->itself) {
10591075
Py_DECREF(new_parser);
1076+
Py_DECREF(self);
10601077
return PyErr_NoMemory();
10611078
}
10621079

@@ -1069,6 +1086,7 @@ pyexpat_xmlparser_ExternalEntityParserCreate_impl(xmlparseobject *self,
10691086
new_parser->handlers = PyMem_New(PyObject *, i);
10701087
if (!new_parser->handlers) {
10711088
Py_DECREF(new_parser);
1089+
Py_DECREF(self);
10721090
return PyErr_NoMemory();
10731091
}
10741092
clear_handlers(new_parser, 1);
@@ -1353,6 +1371,7 @@ newxmlparseobject(pyexpat_state *state, const char *encoding,
13531371
/* namespace_separator is either NULL or contains one char + \0 */
13541372
self->itself = XML_ParserCreate_MM(encoding, &ExpatMemoryHandler,
13551373
namespace_separator);
1374+
self->parent = NULL;
13561375
if (self->itself == NULL) {
13571376
PyErr_SetString(PyExc_RuntimeError,
13581377
"XML_ParserCreate failed");
@@ -1388,6 +1407,7 @@ xmlparse_traverse(xmlparseobject *op, visitproc visit, void *arg)
13881407
for (int i = 0; handler_info[i].name != NULL; i++) {
13891408
Py_VISIT(op->handlers[i]);
13901409
}
1410+
Py_VISIT(op->parent);
13911411
Py_VISIT(Py_TYPE(op));
13921412
return 0;
13931413
}
@@ -1397,6 +1417,10 @@ xmlparse_clear(xmlparseobject *op)
13971417
{
13981418
clear_handlers(op, 0);
13991419
Py_CLEAR(op->intern);
1420+
// NOTE: We cannot call Py_CLEAR(op->parent) prior to calling
1421+
// XML_ParserFree(op->itself), or a subparser could lose its parent
1422+
// XML_Parser while still making use of it internally.
1423+
// https://github.com/python/cpython/issues/139400
14001424
return 0;
14011425
}
14021426

@@ -1408,6 +1432,7 @@ xmlparse_dealloc(xmlparseobject *self)
14081432
if (self->itself != NULL)
14091433
XML_ParserFree(self->itself);
14101434
self->itself = NULL;
1435+
Py_CLEAR(self->parent);
14111436

14121437
if (self->handlers != NULL) {
14131438
PyMem_Free(self->handlers);

0 commit comments

Comments
 (0)