Source code for skutil.metrics.kernel

from __future__ import print_function
import numpy as np
import warnings
from skutil import exp
from sklearn.metrics.pairwise import (check_pairwise_arrays,
                                      linear_kernel as lk)
from ._kernel_fast import (_hilbert_dot_fast, _hilbert_matrix_fast, _spline_kernel_fast)

__all__ = [
    'exponential_kernel',
    'gaussian_kernel',
    'inverse_multiquadric_kernel',
    'laplace_kernel',
    'linear_kernel',
    'multiquadric_kernel',
    'polynomial_kernel',
    'power_kernel',
    'rbf_kernel',
    'spline_kernel',
    'tanh_kernel'
]


def _div(num, div):
    with warnings.catch_warnings():
        warnings.simplefilter("ignore")

        # do division operation -- might throw runtimewarning
        return num / div


def _prep_X_Y_for_cython(X, Y):
    X, Y = check_pairwise_arrays(X, Y)
    X, Y = X.astype(np.double, order='C'), Y.astype(np.double, order='C').T  # transposing Y here!
    res = np.zeros((X.shape[0], Y.shape[1]), dtype=X.dtype)
    return X, Y, res


# Cython proxies
def _hilbert_dot(x, y, scalar=1.0):
    # return ``2 * safe_sparse_dot(x, y) - safe_sparse_dot(x, x.T) - safe_sparse_dot(y, y.T)``
    x, y = x.astype(np.double, order='C'), y.astype(np.double, order='C')
    return _hilbert_dot_fast(x, y, scalar)


def _hilbert_matrix(X, Y=None, scalar=1.0):
    X, Y, res = _prep_X_Y_for_cython(X, Y)
    _hilbert_matrix_fast(X, Y, res, np.double(scalar))
    return res


[docs]def exponential_kernel(X, Y=None, sigma=1.0): """The ``exponential_kernel`` is closely related to the ``gaussian_kernel``, with only the square of the norm left out. It is also an ``rbf_kernel``. Note that the adjustable parameter, ``sigma``, plays a major role in the performance of the kernel and should be carefully tuned. If overestimated, the exponential will behave almost linearly and the higher-dimensional projection will start to lose its non-linear power. In the other hand, if underestimated, the function will lack regularization and the decision boundary will be highly sensitive to noise in training data. The kernel is given by: :math:`k(x, y) = exp( -||x-y|| / 2\\sigma^2 )` Parameters ---------- X : array_like (float), shape=(n_samples, n_features) The array of pandas DataFrame on which to compute the kernel. If ``Y`` is None, the kernel will be computed with ``X``. Y : array_like (float), shape=(n_samples, n_features), optional (default=None) The array of pandas DataFrame on which to compute the kernel. If ``Y`` is None, the kernel will be computed with ``X``. sigma : float, optional (default=1.0) The exponential tuning parameter. Returns ------- c : float The result of the kernel computation. References ---------- Souza, Cesar R., Kernel Functions for Machine Learning Applications http://crsouza.blogspot.com/2010/03/kernel-functions-for-machine-learning.html """ c = exp(_hilbert_matrix(X, Y, scalar=-1.0) / 2 * np.power(sigma, 2)) return c
[docs]def gaussian_kernel(X, Y=None, sigma=1.0): """The ``gaussian_kernel`` is closely related to the ``exponential_kernel``. It is also an ``rbf_kernel``. Note that the adjustable parameter, ``sigma``, plays a major role in the performance of the kernel and should be carefully tuned. If overestimated, the exponential will behave almost linearly and the higher-dimensional projection will start to lose its non-linear power. In the other hand, if underestimated, the function will lack regularization and the decision boundary will be highly sensitive to noise in training data. The kernel is given by: :math:`k(x, y) = exp( -||x-y||^2 / 2\\sigma^2 )` Parameters ---------- X : array_like (float), shape=(n_samples, n_features) The array of pandas DataFrame on which to compute the kernel. If ``Y`` is None, the kernel will be computed with ``X``. Y : array_like (float), shape=(n_samples, n_features), optional (default=None) The array of pandas DataFrame on which to compute the kernel. If ``Y`` is None, the kernel will be computed with ``X``. sigma : float, optional (default=1.0) The exponential tuning parameter. Returns ------- c : float The result of the kernel computation. References ---------- Souza, Cesar R., Kernel Functions for Machine Learning Applications http://crsouza.blogspot.com/2010/03/kernel-functions-for-machine-learning.html """ c = exp(-np.power(_hilbert_matrix(X, Y), 2.0) / 2 * np.power(sigma, 2)) return c
[docs]def inverse_multiquadric_kernel(X, Y=None, constant=1.0): """The ``inverse_multiquadric_kernel``, as with the ``gaussian_kernel``, results in a kernel matrix with full rank (Micchelli, 1986) and thus forms an infinite dimension feature space. The kernel is given by: :math:`k(x, y) = 1 / sqrt( -||x-y||^2 + c^2 )` Parameters ---------- X : array_like (float), shape=(n_samples, n_features) The array of pandas DataFrame on which to compute the kernel. If ``Y`` is None, the kernel will be computed with ``X``. Y : array_like (float), shape=(n_samples, n_features), optional (default=None) The array of pandas DataFrame on which to compute the kernel. If ``Y`` is None, the kernel will be computed with ``X``. constant : float, optional (default=1.0) The linear tuning parameter. Returns ------- c : float The result of the kernel computation. References ---------- Souza, Cesar R., Kernel Functions for Machine Learning Applications http://crsouza.blogspot.com/2010/03/kernel-functions-for-machine-learning.html """ c = _div(1.0, multiquadric_kernel(X, Y, constant)) return c
[docs]def laplace_kernel(X, Y=None, sigma=1.0): """The ``laplace_kernel`` is completely equivalent to the ``exponential_kernel``, except for being less sensitive for changes in the ``sigma`` parameter. Being equivalent, it is also an ``rbf_kernel``. The kernel is given by: :math:`k(x, y) = exp( -||x-y|| / \\sigma )` Parameters ---------- X : array_like (float), shape=(n_samples, n_features) The array of pandas DataFrame on which to compute the kernel. If ``Y`` is None, the kernel will be computed with ``X``. Y : array_like (float), shape=(n_samples, n_features), optional (default=None) The array of pandas DataFrame on which to compute the kernel. If ``Y`` is None, the kernel will be computed with ``X``. sigma : float, optional (default=1.0) The exponential tuning parameter. Returns ------- c : float The result of the kernel computation. References ---------- Souza, Cesar R., Kernel Functions for Machine Learning Applications http://crsouza.blogspot.com/2010/03/kernel-functions-for-machine-learning.html """ c = exp(_hilbert_matrix(X, Y, scalar=-1.0) / sigma) return c
[docs]def linear_kernel(X, Y=None, constant=0.0): """The ``linear_kernel`` is the simplest kernel function. It is given by the inner product <x,y> plus an optional ``constant`` parameter. Kernel algorithms using a linear kernel are often equivalent to their non-kernel counterparts, i.e. KPCA with a ``linear_kernel`` is the same as standard PCA. The kernel is given by: :math:`k(x, y) = x^Ty + c` Parameters ---------- X : array_like (float), shape=(n_samples, n_features) The array of pandas DataFrame on which to compute the kernel. If ``Y`` is None, the kernel will be computed with ``X``. Y : array_like (float), shape=(n_samples, n_features), optional (default=None) The array of pandas DataFrame on which to compute the kernel. If ``Y`` is None, the kernel will be computed with ``X``. constant : float, optional (default=0.0) The linear tuning parameter. Returns ------- c : float The result of the kernel computation. References ---------- Souza, Cesar R., Kernel Functions for Machine Learning Applications http://crsouza.blogspot.com/2010/03/kernel-functions-for-machine-learning.html """ c = lk(X, Y) + constant return c
[docs]def multiquadric_kernel(X, Y=None, constant=0.0): """The ``multiquadric_kernel`` can be used in the same situations as the Rational Quadratic kernel. As is the case with the Sigmoid kernel, it is also an example of an non-positive definite kernel. The kernel is given by: :math:`k(x, y) = sqrt( -||x-y||^2 + c^2 )` Parameters ---------- X : array_like (float), shape=(n_samples, n_features) The array of pandas DataFrame on which to compute the kernel. If ``Y`` is None, the kernel will be computed with ``X``. Y : array_like (float), shape=(n_samples, n_features), optional (default=None) The array of pandas DataFrame on which to compute the kernel. If ``Y`` is None, the kernel will be computed with ``X``. constant : float, optional (default=0.0) The linear tuning parameter. Returns ------- c : float The result of the kernel computation. References ---------- Souza, Cesar R., Kernel Functions for Machine Learning Applications http://crsouza.blogspot.com/2010/03/kernel-functions-for-machine-learning.html """ hs = _hilbert_matrix(X=X, Y=Y, scalar=1.0) hs = np.power(hs, 2.0) c = np.sqrt(hs + np.power(constant, 2.0)) return c
[docs]def polynomial_kernel(X, Y=None, alpha=1.0, degree=1.0, constant=1.0): """The ``polynomial_kernel`` is a non-stationary kernel. Polynomial kernels are well suited for problems where all the training data is normalized. Adjustable parameters are the slope (``alpha``), the constant term (``constant``), and the polynomial degree (``degree``). The kernel is given by: :math:`k(x, y) = ( \\alpha x^Ty + c)^d` Parameters ---------- X : array_like (float), shape=(n_samples, n_features) The array of pandas DataFrame on which to compute the kernel. If ``Y`` is None, the kernel will be computed with ``X``. Y : array_like (float), shape=(n_samples, n_features), optional (default=None) The array of pandas DataFrame on which to compute the kernel. If ``Y`` is None, the kernel will be computed with ``X``. alpha : float, optional (default=1.0) The slope tuning parameter. degree : float, optional (default=1.0) The polynomial degree tuning parameter. constant : float, optional (default=1.0) The linear tuning parameter. Returns ------- c : float The result of the kernel computation. References ---------- Souza, Cesar R., Kernel Functions for Machine Learning Applications http://crsouza.blogspot.com/2010/03/kernel-functions-for-machine-learning.html """ lc = linear_kernel(X=X, Y=Y, constant=0.0) c = np.power(lc * alpha + constant, degree) return c
[docs]def power_kernel(X, Y=None, degree=1.0): """The ``power_kernel`` is also known as the (unrectified) triangular kernel. It is an example of scale-invariant kernel (Sahbi and Fleuret, 2004) and is also only conditionally positive definite. The kernel is given by: :math:`k(x, y) = -||x-y||^d` Parameters ---------- X : array_like (float), shape=(n_samples, n_features) The array of pandas DataFrame on which to compute the kernel. If ``Y`` is None, the kernel will be computed with ``X``. Y : array_like (float), shape=(n_samples, n_features), optional (default=None) The array of pandas DataFrame on which to compute the kernel. If ``Y`` is None, the kernel will be computed with ``X``. degree : float, optional (default=1.0) The polynomial degree tuning parameter. Returns ------- c : float The result of the kernel computation. References ---------- Souza, Cesar R., Kernel Functions for Machine Learning Applications http://crsouza.blogspot.com/2010/03/kernel-functions-for-machine-learning.html """ c = -np.power(_hilbert_matrix(X, Y), degree) return c
[docs]def rbf_kernel(X, Y=None, sigma=1.0): """The ``rbf_kernel`` is closely related to the ``exponential_kernel`` and ``gaussian_kernel``. Note that the adjustable parameter, ``sigma``, plays a major role in the performance of the kernel and should be carefully tuned. If overestimated, the exponential will behave almost linearly and the higher-dimensional projection will start to lose its non-linear power. In the other hand, if underestimated, the function will lack regularization and the decision boundary will be highly sensitive to noise in training data. The kernel is given by: :math:`k(x, y) = exp(- \\gamma * ||x-y||^2)` where: :math:`\\gamma = 1/( \\sigma ^2)` Parameters ---------- X : array_like (float), shape=(n_samples, n_features) The array of pandas DataFrame on which to compute the kernel. If ``Y`` is None, the kernel will be computed with ``X``. Y : array_like (float), shape=(n_samples, n_features), optional (default=None) The array of pandas DataFrame on which to compute the kernel. If ``Y`` is None, the kernel will be computed with ``X``. sigma : float, optional (default=1.0) The exponential tuning parameter. Returns ------- c : float The result of the kernel computation. References ---------- Souza, Cesar R., Kernel Functions for Machine Learning Applications http://crsouza.blogspot.com/2010/03/kernel-functions-for-machine-learning.html """ c = exp(_hilbert_matrix(X, Y, scalar=sigma)) return c
[docs]def spline_kernel(X, Y=None): """ The ``spline_kernel`` is given as a piece-wise cubic polynomial, as derived in the works by Gunn (1998). The kernel is given by: :math:`k(x, y) = 1 + xy + xy * min(x,y) - (1/2 * (x+y)) * min(x,y)^2 + 1/3 * min(x,y)^3` Parameters ---------- X : array_like (float), shape=(n_samples, n_features) The array of pandas DataFrame on which to compute the kernel. If ``Y`` is None, the kernel will be computed with ``X``. Y : array_like (float), shape=(n_samples, n_features), optional (default=None) The array of pandas DataFrame on which to compute the kernel. If ``Y`` is None, the kernel will be computed with ``X``. Returns ------- res : float The result of the kernel computation. References ---------- Souza, Cesar R., Kernel Functions for Machine Learning Applications http://crsouza.blogspot.com/2010/03/kernel-functions-for-machine-learning.html """ X, Y, res = _prep_X_Y_for_cython(X, Y) _spline_kernel_fast(X, Y, res) return res
[docs]def tanh_kernel(X, Y=None, constant=0.0, alpha=1.0): """The ``tanh_kernel`` (Hyperbolic Tangent Kernel) is also known as the Sigmoid Kernel and as the Multilayer Perceptron (MLP) kernel. The Sigmoid Kernel comes from the Neural Networks field, where the bipolar sigmoid function is often used as an activation function for artificial neurons. The kernel is given by: :math:`k(x, y) = tanh (\\alpha x^T y + c)` It is interesting to note that a SVM model using a sigmoid kernel function is equivalent to a two-layer, perceptron neural network. This kernel was quite popular for support vector machines due to its origin from neural network theory. Also, despite being only conditionally positive definite, it has been found to perform well in practice. There are two adjustable parameters in the sigmoid kernel, the slope ``alpha`` and the intercept ``constant``. A common value for alpha is 1/N, where N is the data dimension. A more detailed study on sigmoid kernels can be found in the works by Hsuan-Tien and Chih-Jen. Parameters ---------- X : array_like (float), shape=(n_samples, n_features) The array of pandas DataFrame on which to compute the kernel. If ``Y`` is None, the kernel will be computed with ``X``. Y : array_like (float), shape=(n_samples, n_features), optional (default=None) The array of pandas DataFrame on which to compute the kernel. If ``Y`` is None, the kernel will be computed with ``X``. constant : float, optional (default=0.0) The linear tuning parameter. alpha : float, optional (default=1.0) The slope tuning parameter. Returns ------- c : float The result of the kernel computation. References ---------- Souza, Cesar R., Kernel Functions for Machine Learning Applications http://crsouza.blogspot.com/2010/03/kernel-functions-for-machine-learning.html """ lc = linear_kernel(X=X, Y=Y, constant=0.0) # don't add it here c = np.tanh(alpha * lc + constant) # add it here return c