@@ -484,3 +484,78 @@ def f(g):
484484
485485groupby_agg_builtins1 = Benchmark ("df.groupby('jim').agg([sum, min, max])" , setup )
486486groupby_agg_builtins2 = Benchmark ("df.groupby(['jim', 'joe']).agg([sum, min, max])" , setup )
487+
488+ #----------------------------------------------------------------------
489+ # groupby with a variable value for ngroups
490+
491+
492+ ngroups_list = [100 , 10000 ]
493+ no_arg_func_list = [
494+ 'all' ,
495+ 'any' ,
496+ 'count' ,
497+ 'cumcount' ,
498+ 'cummax' ,
499+ 'cummin' ,
500+ 'cumprod' ,
501+ 'cumsum' ,
502+ 'describe' ,
503+ 'diff' ,
504+ 'first' ,
505+ 'head' ,
506+ 'last' ,
507+ 'mad' ,
508+ 'max' ,
509+ 'mean' ,
510+ 'median' ,
511+ 'min' ,
512+ 'nunique' ,
513+ 'pct_change' ,
514+ 'prod' ,
515+ 'rank' ,
516+ 'sem' ,
517+ 'size' ,
518+ 'skew' ,
519+ 'std' ,
520+ 'sum' ,
521+ 'tail' ,
522+ 'unique' ,
523+ 'var' ,
524+ 'value_counts' ,
525+ ]
526+
527+
528+ _stmt_template = "df.groupby('value')['timestamp'].%s"
529+ _setup_template = common_setup + """
530+ np.random.seed(1234)
531+ ngroups = %s
532+ size = ngroups * 2
533+ rng = np.arange(ngroups)
534+ df = DataFrame(dict(
535+ timestamp=rng.take(np.random.randint(0, ngroups, size=size)),
536+ value=np.random.randint(0, size, size=size)
537+ ))
538+ """
539+ START_DATE = datetime (2011 , 7 , 1 )
540+
541+
542+ def make_large_ngroups_bmark (ngroups , func_name , func_args = '' ):
543+ bmark_name = 'groupby_ngroups_%s_%s' % (ngroups , func_name )
544+ stmt = _stmt_template % ('%s(%s)' % (func_name , func_args ))
545+ setup = _setup_template % ngroups
546+ bmark = Benchmark (stmt , setup , start_date = START_DATE )
547+ # MUST set name
548+ bmark .name = bmark_name
549+ return bmark
550+
551+
552+ def inject_bmark_into_globals (bmark ):
553+ if not bmark .name :
554+ raise AssertionError ('benchmark must have a name' )
555+ globals ()[bmark .name ] = bmark
556+
557+
558+ for ngroups in ngroups_list :
559+ for func_name in no_arg_func_list :
560+ bmark = make_large_ngroups_bmark (ngroups , func_name )
561+ inject_bmark_into_globals (bmark )
0 commit comments