Source code for pytranskit.optrans.decomposition.cca
import numpy as np
from sklearn.cross_decomposition import CCA as CanonCorr
from ..utils import check_array
[docs]class CCA():
"""
Canonical Correlation Analysis.
This is a wrapper for scikit-learn's CCA class, which allows it to be used
in a similar manner to PLDA and PCA.
Parameters
----------
n_components : int (default=1)
Number of components to keep.
scale : bool (default=True)
Whether to scale the data?
max_iter : int (default=500)
The maximum number of iterations of the NIPALS inner loop.
tol : float (default=1e-6)
The tolerance used in the iterative algorithm.
copy : bool (default=True)
Whether the deflation be done on a copy. Let the default value to True
unless you don’t care about side effects.
Attributes
----------
components_ : array, shape (n_components, n_features)
X block weights vectors.
components_y_ : array, shape (n_components, n_targets)
Y block weights vectors.
explained_variance_ : array, shape (n_components,)
The amount of variance explained by each of the selected weights for
the X data.
explained_variance_y_ : array, shape (n_components,)
The amount of variance explained by each of the selected weights for
the Y data.
mean_ : array, shape (n_features,)
Per-feature empirical mean of X, estimated from the training set.
mean_y_ : array, shape (n_targets,)
Per-feature empirical mean of Y, estimated from the training set.
n_components_ : int
The number of components.
References
----------
[scikit-learn's documentation on CCA]
(http://scikit-learn.org/stable/modules/generated/sklearn.cross_decomposition.CCA.html)
Jacob A. Wegelin. A survey of Partial Least Squares (PLS) methods, with
emphasis on the two-block case. Technical Report 371, Department of
Statistics, University of Washington, Seattle, 2000.
"""
def __init__(self, n_components=1, scale=True, max_iter=500, tol=1e-6,
copy=True):
self.is_fitted = False
self.n_components_ = n_components
self.cca = CanonCorr(n_components=n_components, scale=scale,
max_iter=max_iter, tol=tol, copy=copy)
return
def _check_is_fitted(self):
if not self.is_fitted:
raise AssertionError("The fit function has not been "
"called yet. Call 'fit' before using "
"this method".format(type(self).__name__))
return
[docs] def fit(self, X, Y):
"""
Fit model to data.
Parameters
----------
X : array, shape (n_samples, n_features)
Training vectors, where n_samples is the number of samples and
n_features is the number of predictors.
Y : array, shape (n_samples, n_targets)
Target vectors, where n_samples is the number of samples and
n_targets is the number of response variables.
"""
X = check_array(X, ndim=2, dtype='numeric', force_all_finite=True)
Y = check_array(Y, ndim=2, dtype='numeric', force_all_finite=True)
if X.shape[0] != Y.shape[0]:
raise ValueError("Number of samples in X and Y must be the same: "
"{} vs {}".format(X.shape[0], Y.shape[0]))
if self.n_components_ > X.shape[1]:
raise ValueError("n_components exceeds number of features in X: "
"{} > {}".format(self.n_components_, X.shape[1]))
if self.n_components_ > Y.shape[1]:
raise ValueError("n_components exceeds number of targets in Y: "
"{} > {}".format(self.n_components_, Y.shape[1]))
self.cca.fit(X, Y)
self.components_ = self.cca.x_weights_.T
self.components_y_ = self.cca.y_weights_.T
self.mean_ = self.cca.x_mean_
self.mean_y_ = self.cca.y_mean_
# Get the explained variance of the transformed data
self.explained_variance_ = self.cca.x_scores_.var(axis=0)
self.explained_variance_y_ = self.cca.y_scores_.var(axis=0)
self.is_fitted = True
return
[docs] def transform(self, X, Y=None):
"""
Apply the dimension reduction learned on the train data.
Parameters
----------
X : array, shape (n_samples, n_features)
Input X data.
Y : array, shape (n_samples, n_targets) or None (default=None)
Input Y data. If Y=None, then only the transformed X data are
returned.
Returns
-------
X_new : array, shape (n_samples, n_components)
Transformed X data.
Y_new : array, shape (n_samples, n_components)
Transformed Y data. If Y=None, only X_new is returned.
"""
self._check_is_fitted()
X = check_array(X, ndim=2, dtype='numeric', force_all_finite=True)
if Y is None:
return self.cca.transform(X, Y=None, copy=True)
else:
Y = check_array(Y, ndim=2, dtype='numeric', force_all_finite=True)
X_new, Y_new = self.cca.transform(X, Y=Y, copy=True)
# If n_components=1, reshape Y_new so it is 2D
if self.n_components_ == 1:
n_samples = Y_new.shape[0]
Y_new = Y_new.reshape((n_samples,1))
return X_new, Y_new
[docs] def fit_transform(self, X, Y):
"""
Learn and apply the dimension reduction on the train data.
Parameters
----------
X : array, shape (n_samples, n_features)
Training vectors, where n_samples is the number of samples and
n_features is the number of predictors.
Y : array, shape (n_samples, n_targets)
Target vectors, where n_samples is the number of samples and
n_targets is the number of response variables.
Returns
-------
X_new : array, shape (n_samples, n_components)
Transformed X data.
Y_new : array, shape (n_samples, n_components)
Transformed Y data.
"""
self.fit(X, Y)
return self.transform(X, Y=Y)
[docs] def score(self, X, Y):
"""
Return Pearson product-moment correlation coefficients for each
component.
The values of R are between -1 and 1, inclusive.
Note: This is different from sklearn.cross_decomposition.CCA.score(),
which returns the coefficient of determination of the prediction.
Parameters
----------
X : array, shape (n_samples, n_features)
Input X data.
Y : array, shape (n_samples, n_targets) or None (default=None)
Input Y data.
Returns
-------
score : float or array, shape (n_components,)
Pearson product-moment correlation coefficients. If n_components=1,
a single value is returned, else an array of correlation
coefficients is returned.
"""
x_trans, y_trans = self.transform(X, Y)
score = np.zeros(self.n_components_)
for i in range(self.n_components_):
score[i] = np.corrcoef(x_trans[:,i], y_trans[:,i])[0,1]
if self.n_components_ == 1:
return score[i]
else:
return score
[docs] def inverse_transform(self, X, Y=None):
"""
Transform data back to its original space.
Note: This is not exact!
Parameters
----------
X : array, shape (n_samples, n_components)
Transformed X data.
Y : array, shape (n_samples, n_components) or None (default=None)
Transformed Y data. If Y=None, only the X data are transformed back
to the original space.
Returns
-------
X_original : array, shape (n_samples, n_features)
X data transformed back into original space.
Y_original : array, shape (n_samples, n_targets)
Y data transformed back into original space. If Y=None, only
X_original is returned.
"""
self._check_is_fitted()
# Check X is in transformed space
X = check_array(X, ndim=2, dtype='numeric', force_all_finite=True)
if X.shape[1] != self.n_components_:
raise ValueError("X has {} features per sample."
"Expecting {}".format(X.shape[1],
self.n_components_))
# Invert X into original space
X_original = np.dot(X, self.components_) + self.mean_
if Y is None:
return X_original
else:
# Check Y is in transformed space
Y = check_array(Y, ndim=2, dtype='numeric', force_all_finite=True)
if Y.shape[1] != self.n_components_:
raise ValueError("Y has {} features per sample."
"Expecting {}".format(Y.shape[1],
self.n_components_))
# Invert Y into original space
Y_original = np.dot(Y, self.components_y_) + self.mean_y_
return X_original, Y_original