Source code for skutil.h2o.frame

from __future__ import absolute_import, division, print_function
from h2o.frame import H2OFrame
import pandas as pd
from .base import check_frame
from ..utils import flatten_all

__all__ = [
    '_check_is_1d_frame',
    'as_series',
    'is_numeric',
    'is_integer',
    'is_float',
    'value_counts'
]


def _check_is_1d_frame(X):
    """Check whether X is an H2OFrame
    and that it's a 1d column. If not, will
    raise an AssertionError

    Parameters
    ----------

    X : H2OFrame, shape=(n_samples, 1)
        The H2OFrame to check

    Raises
    ------

    AssertionError if the ``X`` variable
    is not a 1-dimensional H2OFrame.

    Returns
    -------

    X : H2OFrame, shape=(n_samples, 1)
        The frame if is 1d
    """
    X = check_frame(X, copy=False)
    assert X.shape[1] == 1, 'expected 1d H2OFrame'

    return X


[docs]def as_series(x): """Make a 1d H2OFrame into a pd.Series. Parameters ---------- x : ``H2OFrame``, shape=(n_samples, 1) The H2OFrame Returns ------- x : Pandas ``Series``, shape=(n_samples,) The pandas series """ x = _check_is_1d_frame(x) x = x.as_data_frame(use_pandas=True)[x.columns[0]] return x
[docs]def is_numeric(x): """Determine whether a 1d H2OFrame is numeric. Parameters ---------- x : H2OFrame, shape=(n_samples, 1) The H2OFrame Returns ------- bool : True if numeric, else False """ _check_is_1d_frame(x) return flatten_all(x.isnumeric())[0]
[docs]def is_integer(x): """Determine whether a 1d H2OFrame is made up of integers. Parameters ---------- x : H2OFrame, shape=(n_samples, 1) The H2OFrame Returns ------- bool : True if integers, else False """ _check_is_1d_frame(x) if not is_numeric(x): return False return (x.round(digits=0) - x).sum() == 0
[docs]def is_float(x): """Determine whether a 1d H2OFrame is made up of floats. Parameters ---------- x : H2OFrame, shape=(n_samples, 1) The H2OFrame Returns ------- bool : True if float, else False """ _check_is_1d_frame(x) return is_numeric(x) and not is_integer(x)
[docs]def value_counts(x): """Compute a Pandas-esque ``value_counts`` on a 1d H2OFrame. Parameters ---------- x : H2OFrame, shape=(n_samples, 1) The H2OFrame Returns ------- cts : pd.Series, shape=(n_samples,) The pandas series """ x = _check_is_1d_frame(x) cts = as_series(x).value_counts() return cts