1919#include "ucnhash.h"
2020#include "structmember.h"
2121
22+ /* helper macro to fixup start/end slice values */
23+ #define ADJUST_INDICES (start , end , len ) \
24+ if (end > len) \
25+ end = len; \
26+ else if (end < 0) { \
27+ end += len; \
28+ if (end < 0) \
29+ end = 0; \
30+ } \
31+ if (start < 0) { \
32+ start += len; \
33+ if (start < 0) \
34+ start = 0; \
35+ }
36+
2237/*[clinic input]
2338module unicodedata
2439class unicodedata.UCD 'PreviousDBVersion *' '&UCD_Type'
@@ -1278,6 +1293,7 @@ typedef struct {
12781293 PyObject_HEAD
12791294 PyObject * str ;
12801295 Py_ssize_t pos ;
1296+ Py_ssize_t end ;
12811297} GraphemeClusterIterator ;
12821298
12831299static void
@@ -1308,24 +1324,24 @@ GCI_iternext(GraphemeClusterIterator *self)
13081324{
13091325 int kind = PyUnicode_KIND (self -> str );
13101326 void * pstr = PyUnicode_DATA (self -> str );
1311- if (PyUnicode_READ (kind , pstr , self -> pos )) {
1312- int start = self -> pos ;
1313- GCBState s = STATE_sot ;
1314- while (1 ) {
1315- if (!PyUnicode_READ (kind , pstr , self -> pos )) {
1316- return PyUnicode_Substring (self -> str , start , self -> pos );
1317- }
1318- Py_UCS4 chr = PyUnicode_READ (kind , pstr , self -> pos );
1319- int prop = _getrecord_ex (chr )-> grapheme_cluster_break ;
1320- s = GRAPH_CLUSTER_AUTOMATON [s ][prop ];
1321- if (s == STATE_BREAK ) {
1322- return PyUnicode_Substring (self -> str , start , self -> pos );
1323- }
1324- ++ self -> pos ;
1325- }
1326- } else {
1327+ if (self -> pos == self -> end ) {
13271328 return NULL ;
13281329 }
1330+
1331+ int start = self -> pos ;
1332+ GCBState s = STATE_sot ;
1333+ while (1 ) {
1334+ if (self -> pos == self -> end ) {
1335+ return PyUnicode_Substring (self -> str , start , self -> pos );
1336+ }
1337+ Py_UCS4 chr = PyUnicode_READ (kind , pstr , self -> pos );
1338+ int prop = _getrecord_ex (chr )-> grapheme_cluster_break ;
1339+ s = GRAPH_CLUSTER_AUTOMATON [s ][prop ];
1340+ if (s == STATE_BREAK ) {
1341+ return PyUnicode_Substring (self -> str , start , self -> pos );
1342+ }
1343+ ++ self -> pos ;
1344+ }
13291345}
13301346
13311347static PyTypeObject GraphemeClusterIteratorType = {
@@ -1346,6 +1362,8 @@ unicodedata.UCD.iter_graphemes
13461362
13471363 self: self
13481364 unistr: unicode
1365+ start: int = 0
1366+ end: Py_ssize_t(c_default="PY_SSIZE_T_MAX - 1") = sys.maxsize
13491367 /
13501368
13511369Returns an iterator to iterate over grapheme clusters in unistr.
@@ -1354,19 +1372,23 @@ It uses extended grapheme cluster rules from TR29.
13541372[clinic start generated code]*/
13551373
13561374static PyObject *
1357- unicodedata_UCD_iter_graphemes_impl (PyObject * self , PyObject * unistr )
1358- /*[clinic end generated code: output=92374c1d94db4165 input=59c4794a7f2e6742]*/
1375+ unicodedata_UCD_iter_graphemes_impl (PyObject * self , PyObject * unistr ,
1376+ int start , Py_ssize_t end )
1377+ /*[clinic end generated code: output=96aa5bb59138ea9c input=5667e0efb55be68a]*/
13591378{
13601379 GraphemeClusterIterator * gci = PyObject_GC_New (GraphemeClusterIterator ,
13611380 & GraphemeClusterIteratorType );
13621381
13631382 if (!gci )
13641383 return NULL ;
13651384
1385+ Py_ssize_t len = PyUnicode_GET_LENGTH (unistr );
1386+ ADJUST_INDICES (start , end , len );
13661387 gci -> str = unistr ;
13671388 Py_INCREF (unistr );
13681389 PyObject_GC_Track (gci );
1369- gci -> pos = 0 ;
1390+ gci -> pos = start ;
1391+ gci -> end = end ;
13701392 return (PyObject * )gci ;
13711393}
13721394
0 commit comments