1+ # -*- coding: utf-8 -*-
12"""
23Collection of query wrappers / abstractions to both facilitate data
34retrieval and to reduce dependency on DB-specific API.
45"""
6+
57from __future__ import print_function , division
6- from datetime import datetime , date , timedelta
8+ from datetime import datetime , date
79
810import warnings
911import traceback
10- import itertools
1112import re
1213import numpy as np
1314
1415import pandas .lib as lib
1516import pandas .core .common as com
1617from pandas .compat import lzip , map , zip , raise_with_traceback , string_types
1718from pandas .core .api import DataFrame , Series
18- from pandas .core .common import notnull , isnull
19+ from pandas .core .common import isnull
1920from pandas .core .base import PandasObject
2021from pandas .tseries .tools import to_datetime
2122
2223from contextlib import contextmanager
2324
25+
2426class SQLAlchemyRequired (ImportError ):
2527 pass
2628
@@ -34,6 +36,7 @@ class DatabaseError(IOError):
3436
3537_SQLALCHEMY_INSTALLED = None
3638
39+
3740def _is_sqlalchemy_engine (con ):
3841 global _SQLALCHEMY_INSTALLED
3942 if _SQLALCHEMY_INSTALLED is None :
@@ -80,7 +83,8 @@ def _handle_date_column(col, format=None):
8083 else :
8184 if format in ['D' , 's' , 'ms' , 'us' , 'ns' ]:
8285 return to_datetime (col , coerce = True , unit = format )
83- elif issubclass (col .dtype .type , np .floating ) or issubclass (col .dtype .type , np .integer ):
86+ elif (issubclass (col .dtype .type , np .floating )
87+ or issubclass (col .dtype .type , np .integer )):
8488 # parse dates as timestamp
8589 format = 's' if format is None else format
8690 return to_datetime (col , coerce = True , unit = format )
@@ -89,8 +93,9 @@ def _handle_date_column(col, format=None):
8993
9094
9195def _parse_date_columns (data_frame , parse_dates ):
92- """ Force non-datetime columns to be read as such.
93- Supports both string formatted and integer timestamp columns
96+ """
97+ Force non-datetime columns to be read as such.
98+ Supports both string formatted and integer timestamp columns
9499 """
95100 # handle non-list entries for parse_dates gracefully
96101 if parse_dates is True or parse_dates is None or parse_dates is False :
@@ -152,6 +157,7 @@ def _safe_fetch(cur):
152157 if excName == 'OperationalError' :
153158 return []
154159
160+
155161def tquery (sql , con = None , cur = None , retry = True ):
156162 """
157163 DEPRECATED. Returns list of tuples corresponding to each row in given sql
@@ -209,8 +215,8 @@ def tquery(sql, con=None, cur=None, retry=True):
209215
210216def uquery (sql , con = None , cur = None , retry = True , params = None ):
211217 """
212- DEPRECATED. Does the same thing as tquery, but instead of returning results, it
213- returns the number of rows affected. Good for update queries.
218+ DEPRECATED. Does the same thing as tquery, but instead of returning
219+ results, it returns the number of rows affected. Good for update queries.
214220
215221 To obtain the same result in the future, you can use the following:
216222
@@ -269,8 +275,8 @@ def read_sql_table(table_name, con, schema=None, index_col=None,
269275 con : SQLAlchemy engine
270276 Sqlite DBAPI connection mode not supported
271277 schema : string, default None
272- Name of SQL schema in database to query (if database flavor supports this).
273- If None, use default schema (default).
278+ Name of SQL schema in database to query (if database flavor
279+ supports this). If None, use default schema (default).
274280 index_col : string, optional
275281 Column to set as index
276282 coerce_float : boolean, default True
@@ -343,7 +349,7 @@ def read_sql_query(sql, con, index_col=None, coerce_float=True, params=None,
343349 decimal.Decimal) to floating point, useful for SQL result sets
344350 params : list, tuple or dict, optional
345351 List of parameters to pass to execute method. The syntax used
346- to pass parameters is database driver dependent. Check your
352+ to pass parameters is database driver dependent. Check your
347353 database driver documentation for which of the five syntax styles,
348354 described in PEP 249's paramstyle, is supported.
349355 Eg. for psycopg2, uses %(name)s so use params={'name' : 'value'}
@@ -393,7 +399,7 @@ def read_sql(sql, con, index_col=None, coerce_float=True, params=None,
393399 decimal.Decimal) to floating point, useful for SQL result sets
394400 params : list, tuple or dict, optional
395401 List of parameters to pass to execute method. The syntax used
396- to pass parameters is database driver dependent. Check your
402+ to pass parameters is database driver dependent. Check your
397403 database driver documentation for which of the five syntax styles,
398404 described in PEP 249's paramstyle, is supported.
399405 Eg. for psycopg2, uses %(name)s so use params={'name' : 'value'}
@@ -469,8 +475,8 @@ def to_sql(frame, name, con, flavor='sqlite', schema=None, if_exists='fail',
469475 'mysql' is deprecated and will be removed in future versions, but it
470476 will be further supported through SQLAlchemy engines.
471477 schema : string, default None
472- Name of SQL schema in database to write to (if database flavor supports
473- this). If None, use default schema (default).
478+ Name of SQL schema in database to write to (if database flavor
479+ supports this). If None, use default schema (default).
474480 if_exists : {'fail', 'replace', 'append'}, default 'fail'
475481 - fail: If table exists, do nothing.
476482 - replace: If table exists, drop it, recreate it, and insert data.
@@ -482,7 +488,7 @@ def to_sql(frame, name, con, flavor='sqlite', schema=None, if_exists='fail',
482488 `index` is True, then the index names are used.
483489 A sequence should be given if the DataFrame uses MultiIndex.
484490 chunksize : int, default None
485- If not None, then rows will be written in batches of this size at a
491+ If not None, then rows will be written in batches of this size at a
486492 time. If None, all rows will be written at once.
487493
488494 """
@@ -535,7 +541,9 @@ def has_table(table_name, con, flavor='sqlite', schema=None):
535541 "and will be removed in future versions. "
536542 "MySQL will be further supported with SQLAlchemy engines." )
537543
538- def pandasSQL_builder (con , flavor = None , schema = None , meta = None , is_cursor = False ):
544+
545+ def pandasSQL_builder (con , flavor = None , schema = None , meta = None ,
546+ is_cursor = False ):
539547 """
540548 Convenience function to return the correct PandasSQL subclass based on the
541549 provided parameters
@@ -622,7 +630,7 @@ def insert_data(self):
622630 "duplicate name in index/columns: {0}" .format (err ))
623631 else :
624632 temp = self .frame
625-
633+
626634 column_names = list (map (str , temp .columns ))
627635 ncols = len (column_names )
628636 data_list = [None ] * ncols
@@ -631,7 +639,8 @@ def insert_data(self):
631639 for i in range (len (blocks )):
632640 b = blocks [i ]
633641 if b .is_datetime :
634- # convert to microsecond resolution so this yields datetime.datetime
642+ # convert to microsecond resolution so this yields
643+ # datetime.datetime
635644 d = b .values .astype ('M8[us]' ).astype (object )
636645 else :
637646 d = np .array (b .values , dtype = object )
@@ -647,7 +656,7 @@ def insert_data(self):
647656 return column_names , data_list
648657
649658 def _execute_insert (self , conn , keys , data_iter ):
650- data = [dict ( (k , v ) for k , v in zip (keys , row ) ) for row in data_iter ]
659+ data = [dict ((k , v ) for k , v in zip (keys , row )) for row in data_iter ]
651660 conn .execute (self .insert_statement (), data )
652661
653662 def insert (self , chunksize = None ):
@@ -658,11 +667,11 @@ def insert(self, chunksize=None):
658667 if nrows == 0 :
659668 return
660669
661- if chunksize is None :
670+ if chunksize is None :
662671 chunksize = nrows
663672 elif chunksize == 0 :
664673 raise ValueError ('chunksize argument should be non-zero' )
665-
674+
666675 chunks = int (nrows / chunksize ) + 1
667676
668677 with self .pd_sql .run_transaction () as conn :
@@ -715,7 +724,8 @@ def _index_name(self, index, index_label):
715724 else :
716725 return index_label
717726 # return the used column labels for the index columns
718- if nlevels == 1 and 'index' not in self .frame .columns and self .frame .index .name is None :
727+ if (nlevels == 1 and 'index' not in self .frame .columns
728+ and self .frame .index .name is None ):
719729 return ['index' ]
720730 else :
721731 return [l if l is not None else "level_{0}" .format (i )
@@ -739,7 +749,7 @@ def _get_column_names_and_types(self, dtype_mapper):
739749
740750 column_names_and_types += [
741751 (str (self .frame .columns [i ]),
742- dtype_mapper (self .frame .iloc [:,i ]),
752+ dtype_mapper (self .frame .iloc [:, i ]),
743753 False )
744754 for i in range (len (self .frame .columns ))
745755 ]
@@ -756,9 +766,8 @@ def _create_table_setup(self):
756766 for name , typ , is_index in column_names_and_types ]
757767
758768 if self .keys is not None :
759- columns .append (PrimaryKeyConstraint (self .keys ,
760- name = self .name + '_pk' ))
761-
769+ pkc = PrimaryKeyConstraint (self .keys , name = self .name + '_pk' )
770+ columns .append (pkc )
762771
763772 schema = self .schema or self .pd_sql .meta .schema
764773
@@ -770,17 +779,16 @@ def _create_table_setup(self):
770779 return Table (self .name , meta , * columns , schema = schema )
771780
772781 def _harmonize_columns (self , parse_dates = None ):
773- """ Make a data_frame's column type align with an sql_table
774- column types
775- Need to work around limited NA value support.
776- Floats are always fine, ints must always
777- be floats if there are Null values.
778- Booleans are hard because converting bool column with None replaces
779- all Nones with false. Therefore only convert bool if there are no
780- NA values.
781- Datetimes should already be converted
782- to np.datetime if supported, but here we also force conversion
783- if required
782+ """
783+ Make the DataFrame's column types align with the SQL table
784+ column types.
785+ Need to work around limited NA value support. Floats are always
786+ fine, ints must always be floats if there are Null values.
787+ Booleans are hard because converting bool column with None replaces
788+ all Nones with false. Therefore only convert bool if there are no
789+ NA values.
790+ Datetimes should already be converted to np.datetime64 if supported,
791+ but here we also force conversion if required
784792 """
785793 # handle non-list entries for parse_dates gracefully
786794 if parse_dates is True or parse_dates is None or parse_dates is False :
@@ -823,7 +831,7 @@ def _harmonize_columns(self, parse_dates=None):
823831
824832 def _sqlalchemy_type (self , col ):
825833 from sqlalchemy .types import (BigInteger , Float , Text , Boolean ,
826- DateTime , Date , Time , Interval )
834+ DateTime , Date , Time )
827835
828836 if com .is_datetime64_dtype (col ):
829837 try :
@@ -874,12 +882,12 @@ class PandasSQL(PandasObject):
874882 """
875883
876884 def read_sql (self , * args , ** kwargs ):
877- raise ValueError (
878- "PandasSQL must be created with an SQLAlchemy engine or connection+sql flavor" )
885+ raise ValueError ("PandasSQL must be created with an SQLAlchemy engine"
886+ " or connection+sql flavor" )
879887
880888 def to_sql (self , * args , ** kwargs ):
881- raise ValueError (
882- "PandasSQL must be created with an SQLAlchemy engine or connection+sql flavor" )
889+ raise ValueError ("PandasSQL must be created with an SQLAlchemy engine"
890+ " or connection+sql flavor" )
883891
884892
885893class PandasSQLAlchemy (PandasSQL ):
@@ -897,7 +905,7 @@ def __init__(self, engine, schema=None, meta=None):
897905 self .meta = meta
898906
899907 def run_transaction (self ):
900- return self .engine .begin ()
908+ return self .engine .begin ()
901909
902910 def execute (self , * args , ** kwargs ):
903911 """Simple passthrough to SQLAlchemy engine"""
@@ -964,8 +972,8 @@ def drop_table(self, table_name, schema=None):
964972 self .meta .clear ()
965973
966974 def _create_sql_schema (self , frame , table_name , keys = None ):
967- table = PandasSQLTable (table_name , self , frame = frame , index = False ,
968- keys = keys )
975+ table = PandasSQLTable (table_name , self , frame = frame , index = False ,
976+ keys = keys )
969977 return str (table .sql_schema ())
970978
971979
@@ -1025,9 +1033,11 @@ def _create_sql_schema(self, frame, table_name, keys=None):
10251033
10261034
10271035class PandasSQLTableLegacy (PandasSQLTable ):
1028- """Patch the PandasSQLTable for legacy support.
1029- Instead of a table variable just use the Create Table
1030- statement"""
1036+ """
1037+ Patch the PandasSQLTable for legacy support.
1038+ Instead of a table variable just use the Create Table statement.
1039+ """
1040+
10311041 def sql_schema (self ):
10321042 return str (";\n " .join (self .table ))
10331043
@@ -1058,11 +1068,11 @@ def _execute_insert(self, conn, keys, data_iter):
10581068 conn .executemany (self .insert_statement (), data_list )
10591069
10601070 def _create_table_setup (self ):
1061- """Return a list of SQL statement that create a table reflecting the
1071+ """
1072+ Return a list of SQL statement that create a table reflecting the
10621073 structure of a DataFrame. The first entry will be a CREATE TABLE
10631074 statement while the rest will be CREATE INDEX statements
10641075 """
1065-
10661076 column_names_and_types = \
10671077 self ._get_column_names_and_types (self ._sql_type_name )
10681078
@@ -1159,15 +1169,15 @@ def execute(self, *args, **kwargs):
11591169 else :
11601170 cur .execute (* args )
11611171 return cur
1162- except Exception as e :
1172+ except Exception as exc :
11631173 try :
11641174 self .con .rollback ()
11651175 except Exception : # pragma: no cover
1166- ex = DatabaseError (
1167- "Execution failed on sql: %s \n %s \n unable to rollback" % (args [0 ], e ))
1176+ ex = DatabaseError ("Execution failed on sql: %s \n %s \n unable"
1177+ " to rollback" % (args [0 ], exc ))
11681178 raise_with_traceback (ex )
11691179
1170- ex = DatabaseError ("Execution failed on sql '%s': %s" % (args [0 ], e ))
1180+ ex = DatabaseError ("Execution failed on sql '%s': %s" % (args [0 ], exc ))
11711181 raise_with_traceback (ex )
11721182
11731183 def read_sql (self , sql , index_col = None , coerce_float = True , params = None ,
@@ -1213,11 +1223,11 @@ def to_sql(self, frame, name, if_exists='fail', index=True,
12131223 `index` is True, then the index names are used.
12141224 A sequence should be given if the DataFrame uses MultiIndex.
12151225 schema : string, default None
1216- Ignored parameter included for compatability with SQLAlchemy version
1217- of `to_sql`.
1226+ Ignored parameter included for compatability with SQLAlchemy
1227+ version of `` to_sql` `.
12181228 chunksize : int, default None
1219- If not None, then rows will be written in batches of this size at a
1220- time. If None, all rows will be written at once.
1229+ If not None, then rows will be written in batches of this
1230+ size at a time. If None, all rows will be written at once.
12211231
12221232 """
12231233 table = PandasSQLTableLegacy (
@@ -1243,8 +1253,8 @@ def drop_table(self, name, schema=None):
12431253 self .execute (drop_sql )
12441254
12451255 def _create_sql_schema (self , frame , table_name , keys = None ):
1246- table = PandasSQLTableLegacy (table_name , self , frame = frame , index = False ,
1247- keys = keys )
1256+ table = PandasSQLTableLegacy (table_name , self , frame = frame ,
1257+ index = False , keys = keys )
12481258 return str (table .sql_schema ())
12491259
12501260
0 commit comments