Source code for datawrangler.zoo.array

import pandas as pd
import numpy as np
import six
import os
from ..io import load
from ..core.configurator import update_dict
from ..util.lazy_imports import get_polars
from .polars_dataframe import create_polars_dataframe


def is_number(x):
    """
    Internal function-- return whether an object is a numerical scalar

    Parameters
    ----------
    :param x: the object to test

    Returns
    -------
    :return: True of x is a real or complex scalar and False otherwise
    """
    if np.isscalar(x):
        return np.isreal(x) or np.iscomplex(x)  # exclude single characters (non-numeric)
    if type(x) is list:
        return all([is_number(i) for i in x])
    return False


[docs] def is_array(x): """ Return True if and only if is an Array, or a file that can be loaded into an Array. Parameters ---------- :param x: an object, file path or URL Returns ------- :return: whether (or not) x is an array (or if it points to an array) """ if (not ('str' in str(type(x)))) and (type(x).__module__ == 'numpy'): return True else: # noinspection PyBroadException try: if is_array(load(x)): return True except: if type(x) == list: return all([is_array(i) for i in x]) elif is_number(x): return True return False
[docs] def wrangle_array(data, return_model=False, backend=None, **kwargs): """ Turn an Array into a DataFrame (pandas or Polars) Parameters ---------- :param data: an Array (or path to an Array) :param return_model: if True, return a function for casting an Array into a DataFrame (along with the resulting DataFrame). Default: False :param backend: str, optional The DataFrame backend to use ('pandas' or 'polars'). If None, uses the default backend (pandas) :param kwargs: a list of keyword arguments: - 'model': a callable function or constructor, or a dictionary containing the following keys: - 'model': a callable function or constructor - 'args': a list of arguments to pass to the function (in addition to data) - 'kwargs': a list of keyword arguments to pass to the function default: pandas.DataFrame or polars.DataFrame (based on backend) - all other keyword arguments are passed to the model (or constructor). These can be used to change how the DataFrame is created (e.g., passing columns=['one', 'two', 'three'] will change the column names of the resulting DataFrame). Returns ------- :return: The resulting DataFrame (pandas or Polars based on backend) Examples -------- >>> import numpy as np >>> import datawrangler as dw >>> # Create pandas DataFrame from array (default) >>> df_pandas = dw.wrangle(np.array([1, 2, 3])) >>> # Create Polars DataFrame from array >>> df_polars = dw.wrangle(np.array([1, 2, 3]), backend='polars') >>> # Create DataFrame with custom column names >>> df_custom = dw.wrangle([[1, 2], [3, 4]], array_kwargs={'columns': ['A', 'B']}) """ def stacker(x): while x.ndim >= 3: last_dim = x.ndim - 1 x = np.concatenate(np.split(x, x.shape[last_dim], axis=last_dim), axis=last_dim-1) x = np.squeeze(x) return x if is_number(data): data = np.array(data) elif (type(data) in six.string_types) and os.path.exists(data) and is_array(data): data = load(data) if ('sparse' in str(type(data))) and hasattr(data, 'toarray'): data = data.toarray() data = stacker(np.atleast_2d(data)) # Determine default model based on backend if 'model' not in kwargs: if backend == 'polars': default_model = create_polars_dataframe else: default_model = pd.DataFrame else: default_model = pd.DataFrame model = kwargs.pop('model', default_model) if type(model) is dict: # noinspection PyArgumentList assert all([k in model.keys() for k in ['model', 'args', 'kwargs']]), ValueError(f'Invalid model: {model}') model_args = model['args'] model_kwargs = update_dict(model['kwargs'], kwargs) model = model['model'] else: model_args = [] model_kwargs = kwargs wrangled = model(data, *model_args, **model_kwargs) if return_model: return wrangled, {'model': model, 'args': model_args, 'kwargs': model_kwargs} return wrangled