@@ -848,5 +848,52 @@ class MyStr(str):
848848 self .assertIs (type (normalize (form , MyStr (input_str ))), str )
849849
850850
851+ class GraphemeBreakTest (unittest .TestCase ):
852+ @staticmethod
853+ def check_version (testfile ):
854+ hdr = testfile .readline ()
855+ return unicodedata .unidata_version in hdr
856+
857+ @requires_resource ('network' )
858+ def test_grapheme_break (self ):
859+ TESTDATAFILE = "auxiliary/GraphemeBreakTest.txt"
860+ TESTDATAURL = f"https://www.unicode.org/Public/{ unicodedata .unidata_version } /ucd/{ TESTDATAFILE } "
861+
862+ # Hit the exception early
863+ try :
864+ testdata = open_urlresource (TESTDATAURL , encoding = "utf-8" ,
865+ check = self .check_version )
866+ except PermissionError :
867+ self .skipTest (f"Permission error when downloading { TESTDATAURL } "
868+ f"into the test data directory" )
869+ except (OSError , HTTPException ) as exc :
870+ self .skipTest (f"Failed to download { TESTDATAURL } : { exc } " )
871+
872+ with testdata :
873+ self .run_grapheme_break_tests (testdata , unicodedata )
874+
875+ def run_grapheme_break_tests (self , testdata , ucd ):
876+ part = None
877+ part1_data = set ()
878+
879+ for line in testdata :
880+ line , _ , comment = line .partition ('#' )
881+ line = line .strip ()
882+ if not line :
883+ continue
884+ comment = comment .strip ()
885+
886+ chunks = []
887+ for field in line .replace ('×' , ' ' ).split ():
888+ if field == '÷' :
889+ chunks .append ('' )
890+ else :
891+ chunks [- 1 ] += chr (int (field , 16 ))
892+ self .assertEqual (chunks .pop (), '' , line )
893+ with self .subTest (line ):
894+ result = list (unicodedata .iter_graphemes ('' .join (chunks )))
895+ self .assertEqual (result , chunks , comment )
896+
897+
851898if __name__ == "__main__" :
852899 unittest .main ()
0 commit comments