Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -69,3 +69,10 @@ wheelhouse/
*.zip
*.csv

# Pycharm files
.idea
.idea/droste-engarde.iml
.idea/misc.xml
.idea/modules.xml
.idea/workspace.xml
.idea/vcs.xml
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -68,4 +68,4 @@ See Also
========

- [assertr](https://github.com/tonyfischetti/assertr)
- [Validada](https://github.com/jnmclarty/validada)
- [Validada](https://github.com/jnmclarty/validada)
44 changes: 39 additions & 5 deletions engarde/checks.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ def none_missing(df, columns=None):
raise
return df


def is_monotonic(df, items=None, increasing=None, strict=False):
"""
Asserts that the DataFrame is monotonic.
Expand All @@ -68,7 +69,7 @@ def is_monotonic(df, items=None, increasing=None, strict=False):
if increasing:
good = getattr(s, 'is_monotonic_increasing')
elif increasing is None:
good = getattr(s, 'is_monotonic') | getattr(s, 'is_monotonic_decreasing')
good = getattr(s, 'is_monotonic') | getattr(s, 'is_monotonic_decreasing') # noqa
else:
good = getattr(s, 'is_monotonic_decreasing')
if strict:
Expand All @@ -83,6 +84,7 @@ def is_monotonic(df, items=None, increasing=None, strict=False):
raise AssertionError
return df


def is_shape(df, shape):
"""
Asserts that the DataFrame is of a known shape.
Expand All @@ -101,7 +103,7 @@ def is_shape(df, shape):
"""
try:
check = np.all(np.equal(df.shape, shape) | (np.equal(shape, [-1, -1]) |
np.equal(shape, [None, None])))
np.equal(shape, [None, None]))) # noqa
assert check
except AssertionError as e:
msg = ("Expected shape: {}\n"
Expand All @@ -110,6 +112,7 @@ def is_shape(df, shape):
raise
return df


def unique_index(df):
"""
Assert that the index is unique
Expand Down Expand Up @@ -151,6 +154,7 @@ def within_set(df, items=None):
raise AssertionError('Not in set', bad)
return df


def within_range(df, items=None):
"""
Assert that a DataFrame is within a range.
Expand All @@ -172,6 +176,7 @@ def within_range(df, items=None):
raise AssertionError("Outside range", bad)
return df


def within_n_std(df, n=3):
"""
Assert that every value is within ``n`` standard
Expand All @@ -195,6 +200,7 @@ def within_n_std(df, n=3):
raise AssertionError(msg)
return df


def has_dtypes(df, items):
"""
Assert that a DataFrame has ``dtypes``
Expand Down Expand Up @@ -241,7 +247,8 @@ def one_to_many(df, unitcol, manycol):
subset = df[[manycol, unitcol]].drop_duplicates()
for many in subset[manycol].unique():
if subset[subset[manycol] == many].shape[0] > 1:
msg = "{} in {} has multiple values for {}".format(many, manycol, unitcol)
msg = "{} in {} has multiple values for {}".format(many, manycol,
unitcol)
raise AssertionError(msg)

return df
Expand Down Expand Up @@ -270,7 +277,34 @@ def is_same_as(df, df_to_compare, **kwargs):
return df


def has_columns(df, columns, **kwargs):
"""
Assert that a pandas dataframe contains given columns

Parameters
==========
:param df:
:param columns:

df : pandas DataFrame
columns : list of columns
**kwargs : dict
keyword arguments passed through to panda's ``assert_frame_equal``

Returns
=======
:return: df : pandas DataFrame
"""
missing_columns = []
for x in columns:
if x not in df.columns:
missing_columns.append(x)

if len(missing_columns) > 0:
raise AssertionError("DataFrame does not contain "
"required columns: {}".format(missing_columns))
return df

__all__ = ['is_monotonic', 'is_same_as', 'is_shape', 'none_missing',
'unique_index', 'within_n_std', 'within_range', 'within_set',
'has_dtypes', 'verify', 'verify_all', 'verify_any']

'has_dtypes', 'has_columns' 'verify', 'verify_all', 'verify_any']
23 changes: 21 additions & 2 deletions engarde/decorators.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@

import engarde.checks as ck


def none_missing(columns=None):
"""Asserts that no missing values (NaN) are found"""
def decorate(func):
Expand Down Expand Up @@ -38,6 +39,7 @@ def wrapper(*args, **kwargs):
return wrapper
return decorate


def is_monotonic(items=None, increasing=None, strict=False):
def decorate(func):
@wraps(func)
Expand All @@ -49,6 +51,7 @@ def wrapper(*args, **kwargs):
return wrapper
return decorate


def within_set(items):
"""
Check that DataFrame values are within set.
Expand Down Expand Up @@ -102,6 +105,7 @@ def wrapper(*args, **kwargs):
return wrapper
return decorate


def has_dtypes(items):
"""
Tests that the dtypes are as specified in items.
Expand All @@ -115,6 +119,18 @@ def wrapper(*args, **kwargs):
return wrapper
return decorate

def has_columns(columns):
"""
Tests that a dataframe contains required columns
"""
def decorate(func):
@wraps(func)
def wrapper(*args, **kwargs):
result = func(*args, **kwargs)
ck.has_columns(result, columns)
return result
return wrapper
return decorate

def one_to_many(unitcol, manycol):
""" Tests that each value in ``manycol`` only is associated with
Expand All @@ -124,7 +140,7 @@ def decorate(func):
@wraps(func)
def wrapper(*args, **kwargs):
result = func(*args, **kwargs)
ck.one_to_many(results, unitcol, manycol)
ck.one_to_many(result, unitcol, manycol)
return result
return wrapper
return decorate
Expand All @@ -136,18 +152,21 @@ def verify(func, *args, **kwargs):
"""
return _verify(func, None, *args, **kwargs)


def verify_all(func, *args, **kwargs):
"""
Assert that all of `func(*args, **kwargs)` are true.
"""
return _verify(func, 'all', *args, **kwargs)


def verify_any(func, *args, **kwargs):
"""
Assert that any of `func(*args, **kwargs)` are true.
"""
return _verify(func, 'any', *args, **kwargs)


def _verify(func, _kind, *args, **kwargs):
d = {None: ck.verify, 'all': ck.verify_all, 'any': ck.verify_any}
vfunc = d[_kind]
Expand Down Expand Up @@ -175,5 +194,5 @@ def wrapper(*args, **kwargs):

__all__ = ['is_monotonic', 'is_same_as', 'is_shape', 'none_missing',
'unique_index', 'within_range', 'within_set', 'has_dtypes',
'verify', 'verify_all', 'verify_any', 'within_n_std']
'has_columns', 'verify', 'verify_all', 'verify_any', 'within_n_std']

5 changes: 4 additions & 1 deletion engarde/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ def verify(df, check, *args, **kwargs):
raise
return df


def verify_all(df, check, *args, **kwargs):
"""
Verify that all the entries in ``check(df, *args, **kwargs)``
Expand All @@ -51,6 +52,7 @@ def verify_all(df, check, *args, **kwargs):
raise
return df


def verify_any(df, check, *args, **kwargs):
"""
Verify that any of the entries in ``check(df, *args, **kwargs)``
Expand All @@ -69,9 +71,10 @@ def verify_any(df, check, *args, **kwargs):
# Error reporting
# ---------------


def bad_locations(df):
columns = df.columns
all_locs = chain.from_iterable(zip(df.index, cycle([col])) for col in columns)
all_locs = chain.from_iterable(zip(df.index, cycle([col])) for col in columns) # noqa
bad = pd.Series(list(all_locs))[np.asarray(df).ravel(1)]
msg = bad.values
return msg
Expand Down
8 changes: 4 additions & 4 deletions examples/Trains.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -226,7 +226,6 @@
{
"ename": "AssertionError",
"evalue": "('rational not true for all', id choiceid choice price1 time1 change1 comfort1 price2 time2 \\\n13 2 3 choice2 2450 121 0 0 2450 93 \n18 2 8 choice2 2975 108 0 0 2450 108 \n27 3 6 choice2 1920 106 0 0 1440 96 \n28 3 7 choice1 1920 106 0 0 1920 96 \n33 4 1 choice2 545 105 1 1 545 85 \n... ... ... ... ... ... ... ... ... ... \n2899 233 10 choice1 1350 110 0 0 1350 95 \n2900 234 1 choice2 4400 85 1 1 3300 85 \n2907 234 8 choice2 3300 95 1 0 3300 85 \n2914 235 1 choice2 3000 75 2 1 3000 65 \n2916 235 3 choice2 2550 75 1 0 2100 55 \n\n change2 comfort2 \n13 0 1 \n18 0 1 \n27 0 1 \n28 0 1 \n33 1 1 \n... ... ... \n2899 0 1 \n2900 0 1 \n2907 0 1 \n2914 1 1 \n2916 1 1 \n\n[467 rows x 11 columns])",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mAssertionError\u001b[0m Traceback (most recent call last)",
Expand All @@ -236,7 +235,8 @@
"\u001b[0;32m/Users/tom.augspurger/sandbox/engarde/engarde/decorators.py\u001b[0m in \u001b[0;36mwrapper\u001b[0;34m(*operation_args, **operation_kwargs)\u001b[0m\n\u001b[1;32m 147\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mwrapper\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0moperation_args\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0moperation_kwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 148\u001b[0m \u001b[0mresult\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0moperation_func\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0moperation_args\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0moperation_kwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 149\u001b[0;31m \u001b[0mvfunc\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mresult\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mfunc\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 150\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mresult\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 151\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mwrapper\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m/Users/tom.augspurger/sandbox/engarde/engarde/generic.py\u001b[0m in \u001b[0;36mverify_all\u001b[0;34m(df, check, *args, **kwargs)\u001b[0m\n\u001b[1;32m 40\u001b[0m \u001b[0mresult\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mcheck\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdf\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 41\u001b[0m \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 42\u001b[0;31m \u001b[0;32massert\u001b[0m \u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mall\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mresult\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 43\u001b[0m \u001b[0;32mexcept\u001b[0m \u001b[0mAssertionError\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0me\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 44\u001b[0m \u001b[0mmsg\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m\"{} not true for all\"\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mformat\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mcheck\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m__name__\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;31mAssertionError\u001b[0m: ('rational not true for all', id choiceid choice price1 time1 change1 comfort1 price2 time2 \\\n13 2 3 choice2 2450 121 0 0 2450 93 \n18 2 8 choice2 2975 108 0 0 2450 108 \n27 3 6 choice2 1920 106 0 0 1440 96 \n28 3 7 choice1 1920 106 0 0 1920 96 \n33 4 1 choice2 545 105 1 1 545 85 \n... ... ... ... ... ... ... ... ... ... \n2899 233 10 choice1 1350 110 0 0 1350 95 \n2900 234 1 choice2 4400 85 1 1 3300 85 \n2907 234 8 choice2 3300 95 1 0 3300 85 \n2914 235 1 choice2 3000 75 2 1 3000 65 \n2916 235 3 choice2 2550 75 1 0 2100 55 \n\n change2 comfort2 \n13 0 1 \n18 0 1 \n27 0 1 \n28 0 1 \n33 1 1 \n... ... ... \n2899 0 1 \n2900 0 1 \n2907 0 1 \n2914 1 1 \n2916 1 1 \n\n[467 rows x 11 columns])"
]
],
"output_type": "error"
}
],
"source": [
Expand Down Expand Up @@ -307,7 +307,7 @@
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
"version": 3.0
},
"file_extension": ".py",
"mimetype": "text/x-python",
Expand All @@ -319,4 +319,4 @@
},
"nbformat": 4,
"nbformat_minor": 0
}
}
Loading