66
77 import pandas as pd
88 import numpy as np
9- options.display.max_rows= 15
9+ pd. options.display.max_rows= 15
1010
1111 Comparison with R / R libraries
1212*******************************
@@ -51,7 +51,7 @@ Selecting multiple columns by name in ``pandas`` is straightforward
5151
5252.. ipython :: python
5353
54- df = DataFrame(np.random.randn(10 , 3 ), columns = list (' abc' ))
54+ df = pd. DataFrame(np.random.randn(10 , 3 ), columns = list (' abc' ))
5555 df[[' a' , ' c' ]]
5656 df.loc[:, [' a' , ' c' ]]
5757
@@ -63,7 +63,7 @@ with a combination of the ``iloc`` indexer attribute and ``numpy.r_``.
6363 named = list (' abcdefg' )
6464 n = 30
6565 columns = named + np.arange(len (named), n).tolist()
66- df = DataFrame(np.random.randn(n, n), columns = columns)
66+ df = pd. DataFrame(np.random.randn(n, n), columns = columns)
6767
6868 df.iloc[:, np.r_[:10 , 24 :30 ]]
6969
@@ -88,8 +88,7 @@ function.
8888
8989.. ipython :: python
9090
91- from pandas import DataFrame
92- df = DataFrame({
91+ df = pd.DataFrame({
9392 ' v1' : [1 ,3 ,5 ,7 ,8 ,3 ,5 ,np.nan,4 ,5 ,7 ,9 ],
9493 ' v2' : [11 ,33 ,55 ,77 ,88 ,33 ,55 ,np.nan,44 ,55 ,77 ,99 ],
9594 ' by1' : [" red" , " blue" , 1 , 2 , np.nan, " big" , 1 , 2 , " red" , 1 , np.nan, 12 ],
@@ -166,7 +165,7 @@ In ``pandas`` we may use :meth:`~pandas.pivot_table` method to handle this:
166165 import random
167166 import string
168167
169- baseball = DataFrame({
168+ baseball = pd. DataFrame({
170169 ' team' : [" team %d " % (x+ 1 ) for x in range (5 )]* 5 ,
171170 ' player' : random.sample(list (string.ascii_lowercase),25 ),
172171 ' batting avg' : np.random.uniform(.200 , .400 , 25 )
@@ -197,7 +196,7 @@ index/slice as well as standard boolean indexing:
197196
198197.. ipython :: python
199198
200- df = DataFrame({' a' : np.random.randn(10 ), ' b' : np.random.randn(10 )})
199+ df = pd. DataFrame({' a' : np.random.randn(10 ), ' b' : np.random.randn(10 )})
201200 df.query(' a <= b' )
202201 df[df.a <= df.b]
203202 df.loc[df.a <= df.b]
@@ -225,7 +224,7 @@ In ``pandas`` the equivalent expression, using the
225224
226225.. ipython :: python
227226
228- df = DataFrame({' a' : np.random.randn(10 ), ' b' : np.random.randn(10 )})
227+ df = pd. DataFrame({' a' : np.random.randn(10 ), ' b' : np.random.randn(10 )})
229228 df.eval(' a + b' )
230229 df.a + df.b # same as the previous expression
231230
@@ -283,7 +282,7 @@ In ``pandas`` the equivalent expression, using the
283282
284283.. ipython :: python
285284
286- df = DataFrame({
285+ df = pd. DataFrame({
287286 ' x' : np.random.uniform(1 ., 168 ., 120 ),
288287 ' y' : np.random.uniform(7 ., 334 ., 120 ),
289288 ' z' : np.random.uniform(1.7 , 20.7 , 120 ),
@@ -317,7 +316,7 @@ In Python, since ``a`` is a list, you can simply use list comprehension.
317316.. ipython :: python
318317
319318 a = np.array(list (range (1 ,24 ))+ [np.NAN ]).reshape(2 ,3 ,4 )
320- DataFrame([tuple (list (x)+ [val]) for x, val in np.ndenumerate(a)])
319+ pd. DataFrame([tuple (list (x)+ [val]) for x, val in np.ndenumerate(a)])
321320
322321 |meltlist |_
323322~~~~~~~~~~~~
@@ -336,7 +335,7 @@ In Python, this list would be a list of tuples, so
336335.. ipython :: python
337336
338337 a = list (enumerate (list (range (1 ,5 ))+ [np.NAN ]))
339- DataFrame(a)
338+ pd. DataFrame(a)
340339
341340 For more details and examples see :ref: `the Into to Data Structures
342341documentation <basics.dataframe.from_items>`.
@@ -361,7 +360,7 @@ In Python, the :meth:`~pandas.melt` method is the R equivalent:
361360
362361.. ipython :: python
363362
364- cheese = DataFrame({' first' : [' John' , ' Mary' ],
363+ cheese = pd. DataFrame({' first' : [' John' , ' Mary' ],
365364 ' last' : [' Doe' , ' Bo' ],
366365 ' height' : [5.5 , 6.0 ],
367366 ' weight' : [130 , 150 ]})
@@ -394,7 +393,7 @@ In Python the best way is to make use of :meth:`~pandas.pivot_table`:
394393
395394.. ipython :: python
396395
397- df = DataFrame({
396+ df = pd. DataFrame({
398397 ' x' : np.random.uniform(1 ., 168 ., 12 ),
399398 ' y' : np.random.uniform(7 ., 334 ., 12 ),
400399 ' z' : np.random.uniform(1.7 , 20.7 , 12 ),
@@ -426,7 +425,7 @@ using :meth:`~pandas.pivot_table`:
426425
427426.. ipython :: python
428427
429- df = DataFrame({
428+ df = pd. DataFrame({
430429 ' Animal' : [' Animal1' , ' Animal2' , ' Animal3' , ' Animal2' , ' Animal1' ,
431430 ' Animal2' , ' Animal3' ],
432431 ' FeedType' : [' A' , ' B' , ' A' , ' A' , ' B' , ' B' , ' A' ],
@@ -444,6 +443,30 @@ The second approach is to use the :meth:`~pandas.DataFrame.groupby` method:
444443 For more details and examples see :ref: `the reshaping documentation
445444<reshaping.pivot>` or :ref: `the groupby documentation<groupby.split> `.
446445
446+ |factor |_
447+ ~~~~~~~~
448+
449+ .. versionadded :: 0.15
450+
451+ pandas has a data type for categorical data.
452+
453+ .. code-block :: r
454+
455+ cut(c(1,2,3,4,5,6), 3)
456+ factor(c(1,2,3,2,2,3))
457+
458+ In pandas this is accomplished with ``pd.cut `` and ``astype("category") ``:
459+
460+ .. ipython :: python
461+
462+ pd.cut(pd.Series([1 ,2 ,3 ,4 ,5 ,6 ]), 3 )
463+ pd.Series([1 ,2 ,3 ,2 ,2 ,3 ]).astype(" category" )
464+
465+ For more details and examples see :ref: `categorical introduction <categorical >` and the
466+ :ref: `API documentation <api.categorical >`. There is also a documentation regarding the
467+ :ref: `differences to R's factor <categorical.rfactor >`.
468+
469+
447470.. |c | replace :: ``c ``
448471.. _c : http://stat.ethz.ch/R-manual/R-patched/library/base/html/c.html
449472
@@ -477,3 +500,5 @@ For more details and examples see :ref:`the reshaping documentation
477500 .. |cast | replace :: ``cast ``
478501.. cast: http://www.inside-r.org/packages/cran/reshape2/docs/cast
479502
503+ .. |factor | replace :: ``factor ``
504+ .. _factor : https://stat.ethz.ch/R-manual/R-devel/library/base/html/factor.html
0 commit comments