@@ -2832,18 +2832,12 @@ def duplicated(self, subset=None, take_last=False):
28322832 duplicated : Series
28332833 """
28342834 from pandas .core .groupby import get_group_index
2835+ from pandas .core .algorithms import factorize
28352836 from pandas .hashtable import duplicated_int64 , _SIZE_HINT_LIMIT
28362837
2837- size_hint = min (len (self ), _SIZE_HINT_LIMIT )
2838-
2839- def factorize (vals ):
2840- (hash_klass , vec_klass ), vals = \
2841- algos ._get_data_algo (vals , algos ._hashtables )
2842-
2843- uniques , table = vec_klass (), hash_klass (size_hint )
2844- labels = table .get_labels (vals , uniques , 0 , - 1 )
2845-
2846- return labels .astype ('i8' , copy = False ), len (uniques )
2838+ def f (vals ):
2839+ labels , shape = factorize (vals , size_hint = min (len (self ), _SIZE_HINT_LIMIT ))
2840+ return labels .astype ('i8' ,copy = False ), len (shape )
28472841
28482842 if subset is None :
28492843 subset = self .columns
@@ -2853,7 +2847,7 @@ def factorize(vals):
28532847 subset = subset ,
28542848
28552849 vals = (self [col ].values for col in subset )
2856- labels , shape = map (list , zip ( * map (factorize , vals )))
2850+ labels , shape = map (list , zip ( * map (f , vals )))
28572851
28582852 ids = get_group_index (labels , shape , sort = False , xnull = False )
28592853 return Series (duplicated_int64 (ids , take_last ), index = self .index )
0 commit comments