@@ -96,6 +96,19 @@ _getrecord_ex(Py_UCS4 code)
9696 return & _PyUnicode_Database_Records [index ];
9797}
9898
99+ typedef struct {
100+ PyObject * GraphemeType ;
101+ PyObject * GraphemeBreakIteratorType ;
102+ } unicodedatastate ;
103+
104+ static inline unicodedatastate *
105+ get_unicodedata_state (PyObject * module )
106+ {
107+ void * state = _PyModule_GetState (module );
108+ assert (state != NULL );
109+ return (unicodedatastate * )state ;
110+ }
111+
99112/* ------------- Previous-version API ------------------------------------- */
100113typedef struct previous_version {
101114 PyObject_HEAD
@@ -1687,7 +1700,7 @@ typedef struct {
16871700 bool ri_flag ;
16881701} _PyGraphemeBreak ;
16891702
1690- static enum ExtPictState
1703+ static inline enum ExtPictState
16911704update_ext_pict_state (enum ExtPictState state , int gcb , bool ext_pict )
16921705{
16931706 if (ext_pict ) {
@@ -1704,7 +1717,7 @@ update_ext_pict_state(enum ExtPictState state, int gcb, bool ext_pict)
17041717 return ExtPictState_Init ;
17051718}
17061719
1707- static enum InCBState
1720+ static inline enum InCBState
17081721update_incb_state (enum InCBState state , int incb )
17091722{
17101723 if (incb == InCB_Consonant ) {
@@ -1721,7 +1734,7 @@ update_incb_state(enum InCBState state, int incb)
17211734 return InCBState_Init ;
17221735}
17231736
1724- static bool
1737+ static inline bool
17251738update_ri_flag (bool flag , int gcb )
17261739{
17271740 if (gcb == GCB_Regional_Indicator ) {
@@ -1732,7 +1745,7 @@ update_ri_flag(bool flag, int gcb)
17321745 }
17331746}
17341747
1735- static bool
1748+ static inline bool
17361749grapheme_break (int prev_gcb , int curr_gcb , enum ExtPictState ep_state ,
17371750 bool ri_flag , enum InCBState incb_state )
17381751{
@@ -1905,19 +1918,29 @@ static PyMemberDef Grapheme_members[] = {
19051918 {NULL } /* Sentinel */
19061919};
19071920
1908- static PyTypeObject GraphemeType = {
1909- PyVarObject_HEAD_INIT (NULL , 0 )
1910- .tp_name = "unicodedata.Grapheme" ,
1911- .tp_basicsize = sizeof (GraphemeObject ),
1912- .tp_dealloc = Grapheme_dealloc ,
1913- .tp_flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC ,
1914- .tp_iter = PyObject_SelfIter ,
1915- .tp_traverse = Grapheme_traverse ,
1916- .tp_clear = Grapheme_clear ,
1917- .tp_str = Grapheme_str ,
1918- .tp_members = Grapheme_members
1921+ static PyType_Slot Grapheme_slots [] = {
1922+ {Py_tp_dealloc , Grapheme_dealloc },
1923+ {Py_tp_iter , PyObject_SelfIter },
1924+ {Py_tp_traverse , Grapheme_traverse },
1925+ {Py_tp_clear , Grapheme_clear },
1926+ {Py_tp_str , Grapheme_str },
1927+ {Py_tp_members , Grapheme_members },
1928+ {0 , 0 },
19191929};
19201930
1931+ static PyType_Spec Grapheme_spec = {
1932+ .name = "unicodedata.Grapheme" ,
1933+ .basicsize = sizeof (GraphemeObject ),
1934+ .flags = (
1935+ Py_TPFLAGS_DEFAULT
1936+ | Py_TPFLAGS_HAVE_GC
1937+ | Py_TPFLAGS_DISALLOW_INSTANTIATION
1938+ | Py_TPFLAGS_IMMUTABLETYPE
1939+ ),
1940+ .slots = Grapheme_slots
1941+ };
1942+
1943+
19211944/* Grapheme Cluster iterator */
19221945
19231946typedef struct {
@@ -1957,7 +1980,10 @@ GBI_iternext(PyObject *self)
19571980 if (pos < 0 ) {
19581981 return NULL ;
19591982 }
1960- GraphemeObject * g = PyObject_GC_New (GraphemeObject , & GraphemeType );
1983+ PyObject * module = PyType_GetModule (Py_TYPE (it ));
1984+ PyObject * GraphemeType = get_unicodedata_state (module )-> GraphemeType ;
1985+ GraphemeObject * g = PyObject_GC_New (GraphemeObject ,
1986+ (PyTypeObject * )GraphemeType );
19611987 if (!g ) {
19621988 return NULL ;
19631989 }
@@ -1970,16 +1996,25 @@ GBI_iternext(PyObject *self)
19701996}
19711997
19721998
1973- static PyTypeObject GraphemeBreakIteratorType = {
1974- PyVarObject_HEAD_INIT (NULL , 0 )
1975- .tp_name = "unicodedata.GraphemeBreakIterator" ,
1976- .tp_basicsize = sizeof (GraphemeBreakIterator ),
1977- .tp_dealloc = GBI_dealloc ,
1978- .tp_flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC ,
1979- .tp_iter = PyObject_SelfIter ,
1980- .tp_iternext = GBI_iternext ,
1981- .tp_traverse = GBI_traverse ,
1982- .tp_clear = GBI_clear
1999+ static PyType_Slot GraphemeBreakIterator_slots [] = {
2000+ {Py_tp_dealloc , GBI_dealloc },
2001+ {Py_tp_iter , PyObject_SelfIter },
2002+ {Py_tp_iternext , GBI_iternext },
2003+ {Py_tp_traverse , GBI_traverse },
2004+ {Py_tp_clear , GBI_clear },
2005+ {0 , 0 },
2006+ };
2007+
2008+ static PyType_Spec GraphemeBreakIterator_spec = {
2009+ .name = "unicodedata.GraphemeBreakIterator" ,
2010+ .basicsize = sizeof (GraphemeBreakIterator ),
2011+ .flags = (
2012+ Py_TPFLAGS_DEFAULT
2013+ | Py_TPFLAGS_HAVE_GC
2014+ | Py_TPFLAGS_DISALLOW_INSTANTIATION
2015+ | Py_TPFLAGS_IMMUTABLETYPE
2016+ ),
2017+ .slots = GraphemeBreakIterator_slots
19832018};
19842019
19852020
@@ -2001,18 +2036,19 @@ unicodedata_iter_graphemes_impl(PyObject *module, PyObject *unistr,
20012036 Py_ssize_t start , Py_ssize_t end )
20022037/*[clinic end generated code: output=b0b831944265d36f input=a1454d9e8135951f]*/
20032038{
2004- GraphemeBreakIterator * gci = PyObject_GC_New (GraphemeBreakIterator ,
2005- & GraphemeBreakIteratorType );
2006- if (!gci ) {
2039+ PyObject * GraphemeBreakIteratorType = get_unicodedata_state (module )-> GraphemeBreakIteratorType ;
2040+ GraphemeBreakIterator * gbi = PyObject_GC_New (GraphemeBreakIterator ,
2041+ (PyTypeObject * )GraphemeBreakIteratorType );
2042+ if (!gbi ) {
20072043 return NULL ;
20082044 }
20092045
20102046 Py_ssize_t len = PyUnicode_GET_LENGTH (unistr );
20112047 ADJUST_INDICES (start , end , len );
20122048 Py_INCREF (unistr );
2013- _Py_InitGraphemeBreak (& gci -> iter , unistr , start , end );
2014- PyObject_GC_Track (gci );
2015- return (PyObject * )gci ;
2049+ _Py_InitGraphemeBreak (& gbi -> iter , unistr , start , end );
2050+ PyObject_GC_Track (gbi );
2051+ return (PyObject * )gbi ;
20162052}
20172053
20182054/*[clinic input]
@@ -2129,6 +2165,7 @@ static PyType_Spec ucd_type_spec = {
21292165 .slots = ucd_type_slots
21302166};
21312167
2168+
21322169PyDoc_STRVAR (unicodedata_docstring ,
21332170"This module provides access to the Unicode Character Database which\n\
21342171defines character properties for all Unicode characters. The data in\n\
@@ -2138,15 +2175,46 @@ this database is based on the UnicodeData.txt file version\n\
21382175The module uses the same names and symbols as defined by the\n\
21392176UnicodeData File Format " UNIDATA_VERSION "." );
21402177
2178+ static int
2179+ unicodedata_traverse (PyObject * module , visitproc visit , void * arg )
2180+ {
2181+ unicodedatastate * state = get_unicodedata_state (module );
2182+ Py_VISIT (state -> GraphemeType );
2183+ Py_VISIT (state -> GraphemeBreakIteratorType );
2184+ return 0 ;
2185+ }
2186+
2187+ static int
2188+ unicodedata_clear (PyObject * module )
2189+ {
2190+ unicodedatastate * state = get_unicodedata_state (module );
2191+ Py_CLEAR (state -> GraphemeType );
2192+ Py_CLEAR (state -> GraphemeBreakIteratorType );
2193+ return 0 ;
2194+ }
2195+
2196+ static void
2197+ unicodedata_free (void * module )
2198+ {
2199+ unicodedata_clear ((PyObject * )module );
2200+ }
2201+
21412202static int
21422203unicodedata_exec (PyObject * module )
21432204{
2144- if (PyType_Ready (& GraphemeType )) {
2205+ unicodedatastate * state = get_unicodedata_state (module );
2206+
2207+ PyObject * GraphemeType = PyType_FromModuleAndSpec (module , & Grapheme_spec , NULL );
2208+ if (GraphemeType == NULL ) {
21452209 return -1 ;
21462210 }
2147- if (PyType_Ready (& GraphemeBreakIteratorType )) {
2211+ state -> GraphemeType = GraphemeType ;
2212+
2213+ PyObject * GraphemeBreakIteratorType = PyType_FromModuleAndSpec (module , & GraphemeBreakIterator_spec , NULL );
2214+ if (GraphemeBreakIteratorType == NULL ) {
21482215 return -1 ;
21492216 }
2217+ state -> GraphemeBreakIteratorType = GraphemeBreakIteratorType ;
21502218
21512219 if (PyModule_AddStringConstant (module , "unidata_version" , UNIDATA_VERSION ) < 0 ) {
21522220 return -1 ;
@@ -2189,9 +2257,12 @@ static struct PyModuleDef unicodedata_module = {
21892257 PyModuleDef_HEAD_INIT ,
21902258 .m_name = "unicodedata" ,
21912259 .m_doc = unicodedata_docstring ,
2192- .m_size = 0 ,
2260+ .m_size = sizeof ( unicodedatastate ) ,
21932261 .m_methods = unicodedata_functions ,
21942262 .m_slots = unicodedata_slots ,
2263+ .m_traverse = unicodedata_traverse ,
2264+ .m_clear = unicodedata_clear ,
2265+ .m_free = unicodedata_free ,
21952266};
21962267
21972268PyMODINIT_FUNC
0 commit comments