@@ -206,6 +206,7 @@ class InvalidColumnName(Warning):
206206underscores, no Stata reserved words)
207207"""
208208
209+
209210def _cast_to_stata_types (data ):
210211 """Checks the dtypes of the columns of a pandas DataFrame for
211212 compatibility with the data types and ranges supported by Stata, and
@@ -218,18 +219,44 @@ def _cast_to_stata_types(data):
218219
219220 Notes
220221 -----
221- Numeric columns must be one of int8, int16, int32, float32 or float64, with
222- some additional value restrictions on the integer data types . int8 and
223- int16 columns are checked for violations of the value restrictions and
222+ Numeric columns in Stata must be one of int8, int16, int32, float32 or
223+ float64, with some additional value restrictions. int8 and int16 columns
224+ are checked for violations of the value restrictions and
224225 upcast if needed. int64 data is not usable in Stata, and so it is
225226 downcast to int32 whenever the value are in the int32 range, and
226227 sidecast to float64 when larger than this range. If the int64 values
227228 are outside of the range of those perfectly representable as float64 values,
228229 a warning is raised.
230+
231+ bool columns are cast to int8. uint colums are converted to int of the same
232+ size if there is no loss in precision, other wise are upcast to a larger
233+ type. uint64 is currently not supported since it is concerted to object in
234+ a DataFrame.
229235 """
230236 ws = ''
237+ # original, if small, if large
238+ conversion_data = ((np .bool , np .int8 , np .int8 ),
239+ (np .uint8 , np .int8 , np .int16 ),
240+ (np .uint16 , np .int16 , np .int32 ),
241+ (np .uint32 , np .int32 , np .int64 ))
242+
231243 for col in data :
232244 dtype = data [col ].dtype
245+ # Cast from unsupported types to supported types
246+ for c_data in conversion_data :
247+ if dtype == c_data [0 ]:
248+ if data [col ].max () <= np .iinfo (c_data [1 ]).max :
249+ dtype = c_data [1 ]
250+ else :
251+ dtype = c_data [2 ]
252+ if c_data [2 ] == np .float64 : # Warn if necessary
253+ if data [col ].max () >= 2 * 53 :
254+ ws = precision_loss_doc % ('uint64' , 'float64' )
255+
256+ data [col ] = data [col ].astype (dtype )
257+
258+
259+ # Check values and upcast if necessary
233260 if dtype == np .int8 :
234261 if data [col ].max () > 100 or data [col ].min () < - 127 :
235262 data [col ] = data [col ].astype (np .int16 )
@@ -241,7 +268,7 @@ def _cast_to_stata_types(data):
241268 data [col ] = data [col ].astype (np .int32 )
242269 else :
243270 data [col ] = data [col ].astype (np .float64 )
244- if data [col ].max () < = 2 * 53 or data [col ].min () > = - 2 ** 53 :
271+ if data [col ].max () > = 2 ** 53 or data [col ].min () < = - 2 ** 53 :
245272 ws = precision_loss_doc % ('int64' , 'float64' )
246273
247274 if ws :
0 commit comments