|
| 1 | +import numpy as np |
| 2 | +from scipy import linalg |
| 3 | + |
| 4 | + |
| 5 | + |
| 6 | +def LDA(data, gnd): |
| 7 | + ''' |
| 8 | + LDA: Linear Discriminant Analysis for ML Course |
| 9 | +
|
| 10 | + Input: |
| 11 | + data - Data matrix(numpy array). Each row vector of fea is a data point. It should be centered before. |
| 12 | + gnd - Colunm vector of the label information for each |
| 13 | + data point. |
| 14 | +
|
| 15 | + Output: |
| 16 | + eigvector - Each column is an embedding function, for a new |
| 17 | + data point (row vector) x, y = np.matmul(x, eigvector) |
| 18 | + will be the embedding result of x. |
| 19 | + eigvalue - The sorted eigvalue of LDA eigen-problem. |
| 20 | +
|
| 21 | + ''' |
| 22 | + label = np.unique(gnd) |
| 23 | + mylda = myLDA() |
| 24 | + mylda.fit(data, gnd) |
| 25 | + eigvector = mylda.scalings_ |
| 26 | + eigvalue = mylda.scalings |
| 27 | + return eigvector, eigvalue |
| 28 | + |
| 29 | +class myLDA(): |
| 30 | + """Linear Discriminant Analysis |
| 31 | +
|
| 32 | + A classifier with a linear decision boundary, generated by fitting class |
| 33 | + conditional densities to the data and using Bayes' rule. |
| 34 | +
|
| 35 | + The model fits a Gaussian density to each class, assuming that all classes |
| 36 | + share the same covariance matrix. |
| 37 | +
|
| 38 | + The fitted model can also be used to reduce the dimensionality of the input |
| 39 | + by projecting it to the most discriminative directions. |
| 40 | + |
| 41 | + This class is partly modified from sklearn.discriminant_analysis.LinearDiscriminantAnalysis |
| 42 | + """ |
| 43 | + def __init__(self): |
| 44 | + pass |
| 45 | + |
| 46 | + def fit(self, X, y): |
| 47 | + self.Dim = np.unique(y).shape[0] - 1 |
| 48 | + self.svd_solver(X, y) |
| 49 | + |
| 50 | + def svd_solver(self, X, y, tol=1e-5): |
| 51 | + """SVD solver. |
| 52 | +
|
| 53 | + Parameters |
| 54 | + ---------- |
| 55 | + X : array-like, shape (n_samples, n_features) |
| 56 | + Training data. |
| 57 | +
|
| 58 | + y : array-like, shape (n_samples,) or (n_samples, n_targets) |
| 59 | + Target values. |
| 60 | + tol: the tol for floating point error |
| 61 | + """ |
| 62 | + n_samples, n_features = X.shape |
| 63 | + classes = np.unique(y) |
| 64 | + n_classes = classes.shape[0] |
| 65 | + _, y_t = np.unique(y, return_inverse=True) # non-negative ints |
| 66 | + priors_ = np.bincount(y_t) / float(len(y)) |
| 67 | + means_ = self.class_means(X, y) |
| 68 | + covariance_ = self.class_cov(X, y, priors_) |
| 69 | + |
| 70 | + Xc = [] |
| 71 | + for idx, group in enumerate(classes): |
| 72 | + Xg = X[y == group, :] |
| 73 | + Xc.append(Xg - means_[idx]) |
| 74 | + |
| 75 | + self.xbar_ = np.dot(priors_, means_) |
| 76 | + Xc = np.concatenate(Xc, axis=0) |
| 77 | + |
| 78 | + # 1) within (univariate) scaling by with classes std-dev |
| 79 | + std = Xc.std(axis=0) |
| 80 | + # avoid division by zero in normalization |
| 81 | + std[std == 0] = 1. |
| 82 | + fac = 1. / (n_samples - n_classes) |
| 83 | + |
| 84 | + # 2) Within variance scaling |
| 85 | + X = np.sqrt(fac) * (Xc / std) |
| 86 | + # SVD of centered (within)scaled data |
| 87 | + U, S, V = linalg.svd(X, full_matrices=False) |
| 88 | + |
| 89 | + rank = np.sum(S > tol) |
| 90 | + # Scaling of within covariance is: V' 1/S |
| 91 | + self.scalings = (V[:rank] / std).T / S[:rank] |
| 92 | + |
| 93 | + # 3) Between variance scaling |
| 94 | + # Scale weighted centers |
| 95 | + X = np.dot(((np.sqrt((n_samples * priors_) * fac)) * |
| 96 | + (means_ - self.xbar_).T).T, self.scalings) |
| 97 | + # Centers are living in a space with n_classes-1 dim (maximum) |
| 98 | + # Use SVD to find projection in the space spanned by the |
| 99 | + # (n_classes) centers |
| 100 | + _, S, V = linalg.svd(X, full_matrices=0) |
| 101 | + |
| 102 | + explained_variance_ratio_ = (S**2 / np.sum( |
| 103 | + S**2))[:self.Dim] |
| 104 | + rank = np.sum(S > tol * S[0]) |
| 105 | + self.scalings_ = np.dot(self.scalings, V.T[:, :rank]) |
| 106 | + coef = np.dot(means_ - self.xbar_, self.scalings_) |
| 107 | + |
| 108 | + def class_means(self, X, y): |
| 109 | + """Compute class means. |
| 110 | +
|
| 111 | + Input |
| 112 | + ---------- |
| 113 | + X : array-like, shape (n_samples, n_features) |
| 114 | + Input data. |
| 115 | +
|
| 116 | + y : array-like, shape (n_samples,) or (n_samples, n_targets) |
| 117 | + Target values. |
| 118 | +
|
| 119 | + Returns |
| 120 | + ------- |
| 121 | + means : array-like, shape (n_features,) |
| 122 | + Class means. |
| 123 | + """ |
| 124 | + means = [] |
| 125 | + classes = np.unique(y) |
| 126 | + for group in classes: |
| 127 | + Xg = X[y == group, :] |
| 128 | + means.append(Xg.mean(0)) |
| 129 | + return np.asarray(means) |
| 130 | + |
| 131 | + def class_cov(self, X, y, priors=None, shrinkage=None): |
| 132 | + """Compute class covariance matrix. |
| 133 | +
|
| 134 | + Input |
| 135 | + ---------- |
| 136 | + X : array-like, shape (n_samples, n_features) |
| 137 | + Input data. |
| 138 | +
|
| 139 | + y : array-like, shape (n_samples,) or (n_samples, n_targets) |
| 140 | + Target values. |
| 141 | +
|
| 142 | + priors : array-like, shape (n_classes,) |
| 143 | + Class priors. |
| 144 | +
|
| 145 | + shrinkage : string or float, optional |
| 146 | + Shrinkage parameter, possible values: |
| 147 | + - None: no shrinkage (default). |
| 148 | + - 'auto': automatic shrinkage using the Ledoit-Wolf lemma. |
| 149 | + - float between 0 and 1: fixed shrinkage parameter. |
| 150 | +
|
| 151 | + Returns |
| 152 | + ------- |
| 153 | + cov : array-like, shape (n_features, n_features) |
| 154 | + Class covariance matrix. |
| 155 | + """ |
| 156 | + classes = np.unique(y) |
| 157 | + covs = [] |
| 158 | + for group in classes: |
| 159 | + Xg = X[y == group, :] |
| 160 | + covs.append(np.atleast_2d(np.cov(Xg))) |
| 161 | + return np.average(covs, axis=0, weights=priors) |
0 commit comments