from __future__ import print_function
import numpy as np
import warnings
from skutil import exp
from sklearn.metrics.pairwise import (check_pairwise_arrays,
linear_kernel as lk)
from ._kernel_fast import (_hilbert_dot_fast, _hilbert_matrix_fast, _spline_kernel_fast)
__all__ = [
'exponential_kernel',
'gaussian_kernel',
'inverse_multiquadric_kernel',
'laplace_kernel',
'linear_kernel',
'multiquadric_kernel',
'polynomial_kernel',
'power_kernel',
'rbf_kernel',
'spline_kernel',
'tanh_kernel'
]
def _div(num, div):
with warnings.catch_warnings():
warnings.simplefilter("ignore")
# do division operation -- might throw runtimewarning
return num / div
def _prep_X_Y_for_cython(X, Y):
X, Y = check_pairwise_arrays(X, Y)
X, Y = X.astype(np.double, order='C'), Y.astype(np.double, order='C').T # transposing Y here!
res = np.zeros((X.shape[0], Y.shape[1]), dtype=X.dtype)
return X, Y, res
# Cython proxies
def _hilbert_dot(x, y, scalar=1.0):
# return ``2 * safe_sparse_dot(x, y) - safe_sparse_dot(x, x.T) - safe_sparse_dot(y, y.T)``
x, y = x.astype(np.double, order='C'), y.astype(np.double, order='C')
return _hilbert_dot_fast(x, y, scalar)
def _hilbert_matrix(X, Y=None, scalar=1.0):
X, Y, res = _prep_X_Y_for_cython(X, Y)
_hilbert_matrix_fast(X, Y, res, np.double(scalar))
return res
[docs]def exponential_kernel(X, Y=None, sigma=1.0):
"""The ``exponential_kernel`` is closely related to the ``gaussian_kernel``,
with only the square of the norm left out. It is also an ``rbf_kernel``. Note that
the adjustable parameter, ``sigma``, plays a major role in the performance of the
kernel and should be carefully tuned. If overestimated, the exponential will behave
almost linearly and the higher-dimensional projection will start to lose its non-linear
power. In the other hand, if underestimated, the function will lack regularization and
the decision boundary will be highly sensitive to noise in training data.
The kernel is given by:
:math:`k(x, y) = exp( -||x-y|| / 2\\sigma^2 )`
Parameters
----------
X : array_like (float), shape=(n_samples, n_features)
The array of pandas DataFrame on which to compute
the kernel. If ``Y`` is None, the kernel will be computed
with ``X``.
Y : array_like (float), shape=(n_samples, n_features), optional (default=None)
The array of pandas DataFrame on which to compute
the kernel. If ``Y`` is None, the kernel will be computed
with ``X``.
sigma : float, optional (default=1.0)
The exponential tuning parameter.
Returns
-------
c : float
The result of the kernel computation.
References
----------
Souza, Cesar R., Kernel Functions for Machine Learning Applications
http://crsouza.blogspot.com/2010/03/kernel-functions-for-machine-learning.html
"""
c = exp(_hilbert_matrix(X, Y, scalar=-1.0) / 2 * np.power(sigma, 2))
return c
[docs]def gaussian_kernel(X, Y=None, sigma=1.0):
"""The ``gaussian_kernel`` is closely related to the ``exponential_kernel``.
It is also an ``rbf_kernel``. Note that the adjustable parameter, ``sigma``,
plays a major role in the performance of the kernel and should be carefully
tuned. If overestimated, the exponential will behave almost linearly and
the higher-dimensional projection will start to lose its non-linear
power. In the other hand, if underestimated, the function will lack regularization and
the decision boundary will be highly sensitive to noise in training data.
The kernel is given by:
:math:`k(x, y) = exp( -||x-y||^2 / 2\\sigma^2 )`
Parameters
----------
X : array_like (float), shape=(n_samples, n_features)
The array of pandas DataFrame on which to compute
the kernel. If ``Y`` is None, the kernel will be computed
with ``X``.
Y : array_like (float), shape=(n_samples, n_features), optional (default=None)
The array of pandas DataFrame on which to compute
the kernel. If ``Y`` is None, the kernel will be computed
with ``X``.
sigma : float, optional (default=1.0)
The exponential tuning parameter.
Returns
-------
c : float
The result of the kernel computation.
References
----------
Souza, Cesar R., Kernel Functions for Machine Learning Applications
http://crsouza.blogspot.com/2010/03/kernel-functions-for-machine-learning.html
"""
c = exp(-np.power(_hilbert_matrix(X, Y), 2.0) / 2 * np.power(sigma, 2))
return c
[docs]def inverse_multiquadric_kernel(X, Y=None, constant=1.0):
"""The ``inverse_multiquadric_kernel``, as with the ``gaussian_kernel``,
results in a kernel matrix with full rank (Micchelli, 1986) and thus forms
an infinite dimension feature space.
The kernel is given by:
:math:`k(x, y) = 1 / sqrt( -||x-y||^2 + c^2 )`
Parameters
----------
X : array_like (float), shape=(n_samples, n_features)
The array of pandas DataFrame on which to compute
the kernel. If ``Y`` is None, the kernel will be computed
with ``X``.
Y : array_like (float), shape=(n_samples, n_features), optional (default=None)
The array of pandas DataFrame on which to compute
the kernel. If ``Y`` is None, the kernel will be computed
with ``X``.
constant : float, optional (default=1.0)
The linear tuning parameter.
Returns
-------
c : float
The result of the kernel computation.
References
----------
Souza, Cesar R., Kernel Functions for Machine Learning Applications
http://crsouza.blogspot.com/2010/03/kernel-functions-for-machine-learning.html
"""
c = _div(1.0, multiquadric_kernel(X, Y, constant))
return c
[docs]def laplace_kernel(X, Y=None, sigma=1.0):
"""The ``laplace_kernel`` is completely equivalent to the ``exponential_kernel``,
except for being less sensitive for changes in the ``sigma`` parameter.
Being equivalent, it is also an ``rbf_kernel``.
The kernel is given by:
:math:`k(x, y) = exp( -||x-y|| / \\sigma )`
Parameters
----------
X : array_like (float), shape=(n_samples, n_features)
The array of pandas DataFrame on which to compute
the kernel. If ``Y`` is None, the kernel will be computed
with ``X``.
Y : array_like (float), shape=(n_samples, n_features), optional (default=None)
The array of pandas DataFrame on which to compute
the kernel. If ``Y`` is None, the kernel will be computed
with ``X``.
sigma : float, optional (default=1.0)
The exponential tuning parameter.
Returns
-------
c : float
The result of the kernel computation.
References
----------
Souza, Cesar R., Kernel Functions for Machine Learning Applications
http://crsouza.blogspot.com/2010/03/kernel-functions-for-machine-learning.html
"""
c = exp(_hilbert_matrix(X, Y, scalar=-1.0) / sigma)
return c
[docs]def linear_kernel(X, Y=None, constant=0.0):
"""The ``linear_kernel`` is the simplest kernel function. It is
given by the inner product <x,y> plus an optional ``constant`` parameter.
Kernel algorithms using a linear kernel are often equivalent to their non-kernel
counterparts, i.e. KPCA with a ``linear_kernel`` is the same as standard PCA.
The kernel is given by:
:math:`k(x, y) = x^Ty + c`
Parameters
----------
X : array_like (float), shape=(n_samples, n_features)
The array of pandas DataFrame on which to compute
the kernel. If ``Y`` is None, the kernel will be computed
with ``X``.
Y : array_like (float), shape=(n_samples, n_features), optional (default=None)
The array of pandas DataFrame on which to compute
the kernel. If ``Y`` is None, the kernel will be computed
with ``X``.
constant : float, optional (default=0.0)
The linear tuning parameter.
Returns
-------
c : float
The result of the kernel computation.
References
----------
Souza, Cesar R., Kernel Functions for Machine Learning Applications
http://crsouza.blogspot.com/2010/03/kernel-functions-for-machine-learning.html
"""
c = lk(X, Y) + constant
return c
[docs]def multiquadric_kernel(X, Y=None, constant=0.0):
"""The ``multiquadric_kernel`` can be used in the same situations
as the Rational Quadratic kernel. As is the case with the Sigmoid kernel,
it is also an example of an non-positive definite kernel.
The kernel is given by:
:math:`k(x, y) = sqrt( -||x-y||^2 + c^2 )`
Parameters
----------
X : array_like (float), shape=(n_samples, n_features)
The array of pandas DataFrame on which to compute
the kernel. If ``Y`` is None, the kernel will be computed
with ``X``.
Y : array_like (float), shape=(n_samples, n_features), optional (default=None)
The array of pandas DataFrame on which to compute
the kernel. If ``Y`` is None, the kernel will be computed
with ``X``.
constant : float, optional (default=0.0)
The linear tuning parameter.
Returns
-------
c : float
The result of the kernel computation.
References
----------
Souza, Cesar R., Kernel Functions for Machine Learning Applications
http://crsouza.blogspot.com/2010/03/kernel-functions-for-machine-learning.html
"""
hs = _hilbert_matrix(X=X, Y=Y, scalar=1.0)
hs = np.power(hs, 2.0)
c = np.sqrt(hs + np.power(constant, 2.0))
return c
[docs]def polynomial_kernel(X, Y=None, alpha=1.0, degree=1.0, constant=1.0):
"""The ``polynomial_kernel`` is a non-stationary kernel. Polynomial
kernels are well suited for problems where all the training data is normalized.
Adjustable parameters are the slope (``alpha``), the constant term (``constant``),
and the polynomial degree (``degree``).
The kernel is given by:
:math:`k(x, y) = ( \\alpha x^Ty + c)^d`
Parameters
----------
X : array_like (float), shape=(n_samples, n_features)
The array of pandas DataFrame on which to compute
the kernel. If ``Y`` is None, the kernel will be computed
with ``X``.
Y : array_like (float), shape=(n_samples, n_features), optional (default=None)
The array of pandas DataFrame on which to compute
the kernel. If ``Y`` is None, the kernel will be computed
with ``X``.
alpha : float, optional (default=1.0)
The slope tuning parameter.
degree : float, optional (default=1.0)
The polynomial degree tuning parameter.
constant : float, optional (default=1.0)
The linear tuning parameter.
Returns
-------
c : float
The result of the kernel computation.
References
----------
Souza, Cesar R., Kernel Functions for Machine Learning Applications
http://crsouza.blogspot.com/2010/03/kernel-functions-for-machine-learning.html
"""
lc = linear_kernel(X=X, Y=Y, constant=0.0)
c = np.power(lc * alpha + constant, degree)
return c
[docs]def power_kernel(X, Y=None, degree=1.0):
"""The ``power_kernel`` is also known as the (unrectified) triangular kernel.
It is an example of scale-invariant kernel (Sahbi and Fleuret, 2004) and is
also only conditionally positive definite.
The kernel is given by:
:math:`k(x, y) = -||x-y||^d`
Parameters
----------
X : array_like (float), shape=(n_samples, n_features)
The array of pandas DataFrame on which to compute
the kernel. If ``Y`` is None, the kernel will be computed
with ``X``.
Y : array_like (float), shape=(n_samples, n_features), optional (default=None)
The array of pandas DataFrame on which to compute
the kernel. If ``Y`` is None, the kernel will be computed
with ``X``.
degree : float, optional (default=1.0)
The polynomial degree tuning parameter.
Returns
-------
c : float
The result of the kernel computation.
References
----------
Souza, Cesar R., Kernel Functions for Machine Learning Applications
http://crsouza.blogspot.com/2010/03/kernel-functions-for-machine-learning.html
"""
c = -np.power(_hilbert_matrix(X, Y), degree)
return c
[docs]def rbf_kernel(X, Y=None, sigma=1.0):
"""The ``rbf_kernel`` is closely related to the ``exponential_kernel`` and
``gaussian_kernel``. Note that the adjustable parameter, ``sigma``,
plays a major role in the performance of the kernel and should be carefully
tuned. If overestimated, the exponential will behave almost linearly and
the higher-dimensional projection will start to lose its non-linear
power. In the other hand, if underestimated, the function will lack regularization and
the decision boundary will be highly sensitive to noise in training data.
The kernel is given by:
:math:`k(x, y) = exp(- \\gamma * ||x-y||^2)`
where:
:math:`\\gamma = 1/( \\sigma ^2)`
Parameters
----------
X : array_like (float), shape=(n_samples, n_features)
The array of pandas DataFrame on which to compute
the kernel. If ``Y`` is None, the kernel will be computed
with ``X``.
Y : array_like (float), shape=(n_samples, n_features), optional (default=None)
The array of pandas DataFrame on which to compute
the kernel. If ``Y`` is None, the kernel will be computed
with ``X``.
sigma : float, optional (default=1.0)
The exponential tuning parameter.
Returns
-------
c : float
The result of the kernel computation.
References
----------
Souza, Cesar R., Kernel Functions for Machine Learning Applications
http://crsouza.blogspot.com/2010/03/kernel-functions-for-machine-learning.html
"""
c = exp(_hilbert_matrix(X, Y, scalar=sigma))
return c
[docs]def spline_kernel(X, Y=None):
"""
The ``spline_kernel`` is given as a piece-wise cubic polynomial,
as derived in the works by Gunn (1998).
The kernel is given by:
:math:`k(x, y) = 1 + xy + xy * min(x,y) - (1/2 * (x+y)) * min(x,y)^2 + 1/3 * min(x,y)^3`
Parameters
----------
X : array_like (float), shape=(n_samples, n_features)
The array of pandas DataFrame on which to compute
the kernel. If ``Y`` is None, the kernel will be computed
with ``X``.
Y : array_like (float), shape=(n_samples, n_features), optional (default=None)
The array of pandas DataFrame on which to compute
the kernel. If ``Y`` is None, the kernel will be computed
with ``X``.
Returns
-------
res : float
The result of the kernel computation.
References
----------
Souza, Cesar R., Kernel Functions for Machine Learning Applications
http://crsouza.blogspot.com/2010/03/kernel-functions-for-machine-learning.html
"""
X, Y, res = _prep_X_Y_for_cython(X, Y)
_spline_kernel_fast(X, Y, res)
return res
[docs]def tanh_kernel(X, Y=None, constant=0.0, alpha=1.0):
"""The ``tanh_kernel`` (Hyperbolic Tangent Kernel) is also known as the Sigmoid
Kernel and as the Multilayer Perceptron (MLP) kernel. The Sigmoid Kernel comes
from the Neural Networks field, where the bipolar sigmoid function is often used
as an activation function for artificial neurons.
The kernel is given by:
:math:`k(x, y) = tanh (\\alpha x^T y + c)`
It is interesting to note that a SVM model using a sigmoid kernel function is
equivalent to a two-layer, perceptron neural network. This kernel was quite popular
for support vector machines due to its origin from neural network theory. Also, despite
being only conditionally positive definite, it has been found to perform well in practice.
There are two adjustable parameters in the sigmoid kernel, the slope ``alpha`` and the
intercept ``constant``. A common value for alpha is 1/N, where N is the data dimension.
A more detailed study on sigmoid kernels can be found in the works by Hsuan-Tien and Chih-Jen.
Parameters
----------
X : array_like (float), shape=(n_samples, n_features)
The array of pandas DataFrame on which to compute
the kernel. If ``Y`` is None, the kernel will be computed
with ``X``.
Y : array_like (float), shape=(n_samples, n_features), optional (default=None)
The array of pandas DataFrame on which to compute
the kernel. If ``Y`` is None, the kernel will be computed
with ``X``.
constant : float, optional (default=0.0)
The linear tuning parameter.
alpha : float, optional (default=1.0)
The slope tuning parameter.
Returns
-------
c : float
The result of the kernel computation.
References
----------
Souza, Cesar R., Kernel Functions for Machine Learning Applications
http://crsouza.blogspot.com/2010/03/kernel-functions-for-machine-learning.html
"""
lc = linear_kernel(X=X, Y=Y, constant=0.0) # don't add it here
c = np.tanh(alpha * lc + constant) # add it here
return c