Source code for datawrangler.zoo.array
import pandas as pd
import numpy as np
import six
import os
from ..io import load
from ..core.configurator import update_dict
def is_number(x):
"""
Internal function-- return whether an object is a numerical scalar
Parameters
----------
:param x: the object to test
Returns
-------
:return: True of x is a real or complex scalar and False otherwise
"""
if np.isscalar(x):
return np.isreal(x) or np.iscomplex(x) # exclude single characters (non-numeric)
if type(x) is list:
return all([is_number(i) for i in x])
return False
[docs]def is_array(x):
"""
Return True if and only if is an Array, or a file that can be loaded into an Array.
Parameters
----------
:param x: an object, file path or URL
Returns
-------
:return: whether (or not) x is an array (or if it points to an array)
"""
if (not ('str' in str(type(x)))) and (type(x).__module__ == 'numpy'):
return True
else:
# noinspection PyBroadException
try:
if is_array(load(x)):
return True
except:
if type(x) == list:
return all([is_array(i) for i in x])
elif is_number(x):
return True
return False
[docs]def wrangle_array(data, return_model=False, **kwargs):
"""
Turn an Array into a Pandas DataFrame
Parameters
----------
:param data: an Array (or path to an Array)
:param return_model: if True, return a function for casting an Array into a DataFrame (along with the resulting
DataFrame). Default: False
:param kwargs: a list of keyword arguments:
- 'model': a callable function or constructor, or a dictionary containing the following keys:
- 'model': a callable function or constructor
- 'args': a list of arguments to pass to the function (in addition to data)
- 'kwargs': a list of keyword arguments to pass to the function
default: pandas.DataFrame
- all other keyword arguments are passed to the model (or constructor). These can be used to change how the
DataFrame is created (e.g., passing columns=['one', 'two', 'three'] will change the column names of the
resulting DataFrame, assuming the "model" is pandas.DataFrame).
Returns
-------
:return: The resulting DataFrame
"""
def stacker(x):
while x.ndim >= 3:
last_dim = x.ndim - 1
x = np.concatenate(np.split(x, x.shape[last_dim], axis=last_dim), axis=last_dim-1)
x = np.squeeze(x)
return x
if is_number(data):
data = np.array(data)
elif (type(data) in six.string_types) and os.path.exists(data) and is_array(data):
data = load(data)
if ('sparse' in str(type(data))) and hasattr(data, 'toarray'):
data = data.toarray()
data = stacker(np.atleast_2d(data))
model = kwargs.pop('model', pd.DataFrame)
if type(model) is dict:
# noinspection PyArgumentList
assert all([k in model.keys() for k in ['model', 'args', 'kwargs']]), ValueError(f'Invalid model: {model}')
model_args = model['args']
model_kwargs = update_dict(model['kwargs'], kwargs)
model = model['model']
else:
model_args = []
model_kwargs = kwargs
wrangled = model(data, *model_args, **model_kwargs)
if return_model:
return wrangled, {'model': model, 'args': model_args, 'kwargs': model_kwargs}
return wrangled