Skip to content

Commit 0049b51

Browse files
committed
Merge remote-tracking branch 'upstream/3.10' into backport-f04bea4-3.10
2 parents 21bdc0d + 8ea678d commit 0049b51

File tree

3 files changed

+65
-0
lines changed

3 files changed

+65
-0
lines changed

Lib/test/test_pyexpat.py

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -735,6 +735,42 @@ def resolve_entity(context, base, system_id, public_id):
735735
self.assertEqual(handler_call_args, [("bar", "baz")])
736736

737737

738+
class ParentParserLifetimeTest(unittest.TestCase):
739+
"""
740+
Subparsers make use of their parent XML_Parser inside of Expat.
741+
As a result, parent parsers need to outlive subparsers.
742+
743+
See https://github.com/python/cpython/issues/139400.
744+
"""
745+
746+
def test_parent_parser_outlives_its_subparsers__single(self):
747+
parser = expat.ParserCreate()
748+
subparser = parser.ExternalEntityParserCreate(None)
749+
750+
# Now try to cause garbage collection of the parent parser
751+
# while it's still being referenced by a related subparser.
752+
del parser
753+
754+
def test_parent_parser_outlives_its_subparsers__multiple(self):
755+
parser = expat.ParserCreate()
756+
subparser_one = parser.ExternalEntityParserCreate(None)
757+
subparser_two = parser.ExternalEntityParserCreate(None)
758+
759+
# Now try to cause garbage collection of the parent parser
760+
# while it's still being referenced by a related subparser.
761+
del parser
762+
763+
def test_parent_parser_outlives_its_subparsers__chain(self):
764+
parser = expat.ParserCreate()
765+
subparser = parser.ExternalEntityParserCreate(None)
766+
subsubparser = subparser.ExternalEntityParserCreate(None)
767+
768+
# Now try to cause garbage collection of the parent parsers
769+
# while they are still being referenced by a related subparser.
770+
del parser
771+
del subparser
772+
773+
738774
class ReparseDeferralTest(unittest.TestCase):
739775
def test_getter_setter_round_trip(self):
740776
parser = expat.ParserCreate()
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
:mod:`xml.parsers.expat`: Make sure that parent Expat parsers are only
2+
garbage-collected once they are no longer referenced by subparsers created
3+
by :meth:`~xml.parsers.expat.xmlparser.ExternalEntityParserCreate`.
4+
Patch by Sebastian Pipping.

Modules/pyexpat.c

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,15 @@ typedef struct {
7171
PyObject_HEAD
7272

7373
XML_Parser itself;
74+
/*
75+
* Strong reference to a parent `xmlparseobject` if this parser
76+
* is a child parser. Set to NULL if this parser is a root parser.
77+
* This is needed to keep the parent parser alive as long as it has
78+
* at least one child parser.
79+
*
80+
* See https://github.com/python/cpython/issues/139400 for details.
81+
*/
82+
PyObject *parent;
7483
int ordered_attributes; /* Return attributes as a list. */
7584
int specified_attributes; /* Report only specified attributes. */
7685
int in_callback; /* Is a callback active? */
@@ -1030,6 +1039,11 @@ pyexpat_xmlparser_ExternalEntityParserCreate_impl(xmlparseobject *self,
10301039
return NULL;
10311040
}
10321041

1042+
// The new subparser will make use of the parent XML_Parser inside of Expat.
1043+
// So we need to take subparsers into account with the reference counting
1044+
// of their parent parser.
1045+
Py_INCREF(self);
1046+
10331047
new_parser->buffer_size = self->buffer_size;
10341048
new_parser->buffer_used = 0;
10351049
new_parser->buffer = NULL;
@@ -1039,6 +1053,7 @@ pyexpat_xmlparser_ExternalEntityParserCreate_impl(xmlparseobject *self,
10391053
new_parser->ns_prefixes = self->ns_prefixes;
10401054
new_parser->itself = XML_ExternalEntityParserCreate(self->itself, context,
10411055
encoding);
1056+
new_parser->parent = (PyObject *)self;
10421057
new_parser->handlers = 0;
10431058
new_parser->intern = self->intern;
10441059
Py_XINCREF(new_parser->intern);
@@ -1047,11 +1062,13 @@ pyexpat_xmlparser_ExternalEntityParserCreate_impl(xmlparseobject *self,
10471062
new_parser->buffer = PyMem_Malloc(new_parser->buffer_size);
10481063
if (new_parser->buffer == NULL) {
10491064
Py_DECREF(new_parser);
1065+
Py_DECREF(self);
10501066
return PyErr_NoMemory();
10511067
}
10521068
}
10531069
if (!new_parser->itself) {
10541070
Py_DECREF(new_parser);
1071+
Py_DECREF(self);
10551072
return PyErr_NoMemory();
10561073
}
10571074

@@ -1064,6 +1081,7 @@ pyexpat_xmlparser_ExternalEntityParserCreate_impl(xmlparseobject *self,
10641081
new_parser->handlers = PyMem_New(PyObject *, i);
10651082
if (!new_parser->handlers) {
10661083
Py_DECREF(new_parser);
1084+
Py_DECREF(self);
10671085
return PyErr_NoMemory();
10681086
}
10691087
clear_handlers(new_parser, 1);
@@ -1348,6 +1366,7 @@ newxmlparseobject(pyexpat_state *state, const char *encoding,
13481366
/* namespace_separator is either NULL or contains one char + \0 */
13491367
self->itself = XML_ParserCreate_MM(encoding, &ExpatMemoryHandler,
13501368
namespace_separator);
1369+
self->parent = NULL;
13511370
if (self->itself == NULL) {
13521371
PyErr_SetString(PyExc_RuntimeError,
13531372
"XML_ParserCreate failed");
@@ -1383,6 +1402,7 @@ xmlparse_traverse(xmlparseobject *op, visitproc visit, void *arg)
13831402
for (int i = 0; handler_info[i].name != NULL; i++) {
13841403
Py_VISIT(op->handlers[i]);
13851404
}
1405+
Py_VISIT(op->parent);
13861406
Py_VISIT(Py_TYPE(op));
13871407
return 0;
13881408
}
@@ -1392,6 +1412,10 @@ xmlparse_clear(xmlparseobject *op)
13921412
{
13931413
clear_handlers(op, 0);
13941414
Py_CLEAR(op->intern);
1415+
// NOTE: We cannot call Py_CLEAR(op->parent) prior to calling
1416+
// XML_ParserFree(op->itself), or a subparser could lose its parent
1417+
// XML_Parser while still making use of it internally.
1418+
// https://github.com/python/cpython/issues/139400
13951419
return 0;
13961420
}
13971421

@@ -1403,6 +1427,7 @@ xmlparse_dealloc(xmlparseobject *self)
14031427
if (self->itself != NULL)
14041428
XML_ParserFree(self->itself);
14051429
self->itself = NULL;
1430+
Py_CLEAR(self->parent);
14061431

14071432
if (self->handlers != NULL) {
14081433
PyMem_Free(self->handlers);

0 commit comments

Comments
 (0)