Source code for pytranskit.optrans.decomposition.cca

import numpy as np
from sklearn.cross_decomposition import CCA as CanonCorr

from ..utils import check_array


[docs]class CCA(): """ Canonical Correlation Analysis. This is a wrapper for scikit-learn's CCA class, which allows it to be used in a similar manner to PLDA and PCA. Parameters ---------- n_components : int (default=1) Number of components to keep. scale : bool (default=True) Whether to scale the data? max_iter : int (default=500) The maximum number of iterations of the NIPALS inner loop. tol : float (default=1e-6) The tolerance used in the iterative algorithm. copy : bool (default=True) Whether the deflation be done on a copy. Let the default value to True unless you don’t care about side effects. Attributes ---------- components_ : array, shape (n_components, n_features) X block weights vectors. components_y_ : array, shape (n_components, n_targets) Y block weights vectors. explained_variance_ : array, shape (n_components,) The amount of variance explained by each of the selected weights for the X data. explained_variance_y_ : array, shape (n_components,) The amount of variance explained by each of the selected weights for the Y data. mean_ : array, shape (n_features,) Per-feature empirical mean of X, estimated from the training set. mean_y_ : array, shape (n_targets,) Per-feature empirical mean of Y, estimated from the training set. n_components_ : int The number of components. References ---------- [scikit-learn's documentation on CCA] (http://scikit-learn.org/stable/modules/generated/sklearn.cross_decomposition.CCA.html) Jacob A. Wegelin. A survey of Partial Least Squares (PLS) methods, with emphasis on the two-block case. Technical Report 371, Department of Statistics, University of Washington, Seattle, 2000. """ def __init__(self, n_components=1, scale=True, max_iter=500, tol=1e-6, copy=True): self.is_fitted = False self.n_components_ = n_components self.cca = CanonCorr(n_components=n_components, scale=scale, max_iter=max_iter, tol=tol, copy=copy) return def _check_is_fitted(self): if not self.is_fitted: raise AssertionError("The fit function has not been " "called yet. Call 'fit' before using " "this method".format(type(self).__name__)) return
[docs] def fit(self, X, Y): """ Fit model to data. Parameters ---------- X : array, shape (n_samples, n_features) Training vectors, where n_samples is the number of samples and n_features is the number of predictors. Y : array, shape (n_samples, n_targets) Target vectors, where n_samples is the number of samples and n_targets is the number of response variables. """ X = check_array(X, ndim=2, dtype='numeric', force_all_finite=True) Y = check_array(Y, ndim=2, dtype='numeric', force_all_finite=True) if X.shape[0] != Y.shape[0]: raise ValueError("Number of samples in X and Y must be the same: " "{} vs {}".format(X.shape[0], Y.shape[0])) if self.n_components_ > X.shape[1]: raise ValueError("n_components exceeds number of features in X: " "{} > {}".format(self.n_components_, X.shape[1])) if self.n_components_ > Y.shape[1]: raise ValueError("n_components exceeds number of targets in Y: " "{} > {}".format(self.n_components_, Y.shape[1])) self.cca.fit(X, Y) self.components_ = self.cca.x_weights_.T self.components_y_ = self.cca.y_weights_.T self.mean_ = self.cca.x_mean_ self.mean_y_ = self.cca.y_mean_ # Get the explained variance of the transformed data self.explained_variance_ = self.cca.x_scores_.var(axis=0) self.explained_variance_y_ = self.cca.y_scores_.var(axis=0) self.is_fitted = True return
[docs] def transform(self, X, Y=None): """ Apply the dimension reduction learned on the train data. Parameters ---------- X : array, shape (n_samples, n_features) Input X data. Y : array, shape (n_samples, n_targets) or None (default=None) Input Y data. If Y=None, then only the transformed X data are returned. Returns ------- X_new : array, shape (n_samples, n_components) Transformed X data. Y_new : array, shape (n_samples, n_components) Transformed Y data. If Y=None, only X_new is returned. """ self._check_is_fitted() X = check_array(X, ndim=2, dtype='numeric', force_all_finite=True) if Y is None: return self.cca.transform(X, Y=None, copy=True) else: Y = check_array(Y, ndim=2, dtype='numeric', force_all_finite=True) X_new, Y_new = self.cca.transform(X, Y=Y, copy=True) # If n_components=1, reshape Y_new so it is 2D if self.n_components_ == 1: n_samples = Y_new.shape[0] Y_new = Y_new.reshape((n_samples,1)) return X_new, Y_new
[docs] def fit_transform(self, X, Y): """ Learn and apply the dimension reduction on the train data. Parameters ---------- X : array, shape (n_samples, n_features) Training vectors, where n_samples is the number of samples and n_features is the number of predictors. Y : array, shape (n_samples, n_targets) Target vectors, where n_samples is the number of samples and n_targets is the number of response variables. Returns ------- X_new : array, shape (n_samples, n_components) Transformed X data. Y_new : array, shape (n_samples, n_components) Transformed Y data. """ self.fit(X, Y) return self.transform(X, Y=Y)
[docs] def score(self, X, Y): """ Return Pearson product-moment correlation coefficients for each component. The values of R are between -1 and 1, inclusive. Note: This is different from sklearn.cross_decomposition.CCA.score(), which returns the coefficient of determination of the prediction. Parameters ---------- X : array, shape (n_samples, n_features) Input X data. Y : array, shape (n_samples, n_targets) or None (default=None) Input Y data. Returns ------- score : float or array, shape (n_components,) Pearson product-moment correlation coefficients. If n_components=1, a single value is returned, else an array of correlation coefficients is returned. """ x_trans, y_trans = self.transform(X, Y) score = np.zeros(self.n_components_) for i in range(self.n_components_): score[i] = np.corrcoef(x_trans[:,i], y_trans[:,i])[0,1] if self.n_components_ == 1: return score[i] else: return score
[docs] def inverse_transform(self, X, Y=None): """ Transform data back to its original space. Note: This is not exact! Parameters ---------- X : array, shape (n_samples, n_components) Transformed X data. Y : array, shape (n_samples, n_components) or None (default=None) Transformed Y data. If Y=None, only the X data are transformed back to the original space. Returns ------- X_original : array, shape (n_samples, n_features) X data transformed back into original space. Y_original : array, shape (n_samples, n_targets) Y data transformed back into original space. If Y=None, only X_original is returned. """ self._check_is_fitted() # Check X is in transformed space X = check_array(X, ndim=2, dtype='numeric', force_all_finite=True) if X.shape[1] != self.n_components_: raise ValueError("X has {} features per sample." "Expecting {}".format(X.shape[1], self.n_components_)) # Invert X into original space X_original = np.dot(X, self.components_) + self.mean_ if Y is None: return X_original else: # Check Y is in transformed space Y = check_array(Y, ndim=2, dtype='numeric', force_all_finite=True) if Y.shape[1] != self.n_components_: raise ValueError("Y has {} features per sample." "Expecting {}".format(Y.shape[1], self.n_components_)) # Invert Y into original space Y_original = np.dot(Y, self.components_y_) + self.mean_y_ return X_original, Y_original