Skip to content

Commit 8e86f96

Browse files
committedJul 6, 2020
add some more algs
1 parent e52f82f commit 8e86f96

19 files changed

+1274
-2
lines changed
 

‎LDA.py

+161
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,161 @@
1+
import numpy as np
2+
from scipy import linalg
3+
4+
5+
6+
def LDA(data, gnd):
7+
'''
8+
LDA: Linear Discriminant Analysis for ML Course
9+
10+
Input:
11+
data - Data matrix(numpy array). Each row vector of fea is a data point. It should be centered before.
12+
gnd - Colunm vector of the label information for each
13+
data point.
14+
15+
Output:
16+
eigvector - Each column is an embedding function, for a new
17+
data point (row vector) x, y = np.matmul(x, eigvector)
18+
will be the embedding result of x.
19+
eigvalue - The sorted eigvalue of LDA eigen-problem.
20+
21+
'''
22+
label = np.unique(gnd)
23+
mylda = myLDA()
24+
mylda.fit(data, gnd)
25+
eigvector = mylda.scalings_
26+
eigvalue = mylda.scalings
27+
return eigvector, eigvalue
28+
29+
class myLDA():
30+
"""Linear Discriminant Analysis
31+
32+
A classifier with a linear decision boundary, generated by fitting class
33+
conditional densities to the data and using Bayes' rule.
34+
35+
The model fits a Gaussian density to each class, assuming that all classes
36+
share the same covariance matrix.
37+
38+
The fitted model can also be used to reduce the dimensionality of the input
39+
by projecting it to the most discriminative directions.
40+
41+
This class is partly modified from sklearn.discriminant_analysis.LinearDiscriminantAnalysis
42+
"""
43+
def __init__(self):
44+
pass
45+
46+
def fit(self, X, y):
47+
self.Dim = np.unique(y).shape[0] - 1
48+
self.svd_solver(X, y)
49+
50+
def svd_solver(self, X, y, tol=1e-5):
51+
"""SVD solver.
52+
53+
Parameters
54+
----------
55+
X : array-like, shape (n_samples, n_features)
56+
Training data.
57+
58+
y : array-like, shape (n_samples,) or (n_samples, n_targets)
59+
Target values.
60+
tol: the tol for floating point error
61+
"""
62+
n_samples, n_features = X.shape
63+
classes = np.unique(y)
64+
n_classes = classes.shape[0]
65+
_, y_t = np.unique(y, return_inverse=True) # non-negative ints
66+
priors_ = np.bincount(y_t) / float(len(y))
67+
means_ = self.class_means(X, y)
68+
covariance_ = self.class_cov(X, y, priors_)
69+
70+
Xc = []
71+
for idx, group in enumerate(classes):
72+
Xg = X[y == group, :]
73+
Xc.append(Xg - means_[idx])
74+
75+
self.xbar_ = np.dot(priors_, means_)
76+
Xc = np.concatenate(Xc, axis=0)
77+
78+
# 1) within (univariate) scaling by with classes std-dev
79+
std = Xc.std(axis=0)
80+
# avoid division by zero in normalization
81+
std[std == 0] = 1.
82+
fac = 1. / (n_samples - n_classes)
83+
84+
# 2) Within variance scaling
85+
X = np.sqrt(fac) * (Xc / std)
86+
# SVD of centered (within)scaled data
87+
U, S, V = linalg.svd(X, full_matrices=False)
88+
89+
rank = np.sum(S > tol)
90+
# Scaling of within covariance is: V' 1/S
91+
self.scalings = (V[:rank] / std).T / S[:rank]
92+
93+
# 3) Between variance scaling
94+
# Scale weighted centers
95+
X = np.dot(((np.sqrt((n_samples * priors_) * fac)) *
96+
(means_ - self.xbar_).T).T, self.scalings)
97+
# Centers are living in a space with n_classes-1 dim (maximum)
98+
# Use SVD to find projection in the space spanned by the
99+
# (n_classes) centers
100+
_, S, V = linalg.svd(X, full_matrices=0)
101+
102+
explained_variance_ratio_ = (S**2 / np.sum(
103+
S**2))[:self.Dim]
104+
rank = np.sum(S > tol * S[0])
105+
self.scalings_ = np.dot(self.scalings, V.T[:, :rank])
106+
coef = np.dot(means_ - self.xbar_, self.scalings_)
107+
108+
def class_means(self, X, y):
109+
"""Compute class means.
110+
111+
Input
112+
----------
113+
X : array-like, shape (n_samples, n_features)
114+
Input data.
115+
116+
y : array-like, shape (n_samples,) or (n_samples, n_targets)
117+
Target values.
118+
119+
Returns
120+
-------
121+
means : array-like, shape (n_features,)
122+
Class means.
123+
"""
124+
means = []
125+
classes = np.unique(y)
126+
for group in classes:
127+
Xg = X[y == group, :]
128+
means.append(Xg.mean(0))
129+
return np.asarray(means)
130+
131+
def class_cov(self, X, y, priors=None, shrinkage=None):
132+
"""Compute class covariance matrix.
133+
134+
Input
135+
----------
136+
X : array-like, shape (n_samples, n_features)
137+
Input data.
138+
139+
y : array-like, shape (n_samples,) or (n_samples, n_targets)
140+
Target values.
141+
142+
priors : array-like, shape (n_classes,)
143+
Class priors.
144+
145+
shrinkage : string or float, optional
146+
Shrinkage parameter, possible values:
147+
- None: no shrinkage (default).
148+
- 'auto': automatic shrinkage using the Ledoit-Wolf lemma.
149+
- float between 0 and 1: fixed shrinkage parameter.
150+
151+
Returns
152+
-------
153+
cov : array-like, shape (n_features, n_features)
154+
Class covariance matrix.
155+
"""
156+
classes = np.unique(y)
157+
covs = []
158+
for group in classes:
159+
Xg = X[y == group, :]
160+
covs.append(np.atleast_2d(np.cov(Xg)))
161+
return np.average(covs, axis=0, weights=priors)

‎README.md

+7-2
Original file line numberDiff line numberDiff line change
@@ -13,10 +13,15 @@
1313
- [x] Multiple Layer Perceptron (Full connected)
1414
- [x] Decision Tree
1515
- [x] Random Forest
16-
- [ ] KNN
16+
- [x] AdaBoost
17+
- [x] Gradient Boost
18+
- [x] KNN
1719

1820
### Unsupervised Learning
1921

2022
- [x] GMM
2123
- [x] EM
22-
- [x] K-Means
24+
- [x] K-Means
25+
- [x] spectral clustering
26+
- [x] LDA
27+
- [x] PCA

‎dt_rf_adaboost_gbdt/GBDT.py

+71
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,71 @@
1+
import copy
2+
import numpy as np
3+
4+
5+
class GBDT:
6+
'''GDBT Classifier.
7+
8+
Note that this class only support binary classification.
9+
'''
10+
11+
def __init__(self,
12+
base_learner,
13+
n_estimator,
14+
learning_rate=0.1,
15+
seed=2020):
16+
'''Initialize the classifier.
17+
18+
Args:
19+
base_learner: the base_learner should provide the .fit() and .predict() interface.
20+
n_estimator (int): The number of base learners in RandomForest.
21+
seed (int): random seed
22+
'''
23+
np.random.seed(seed)
24+
self.base_learner = base_learner
25+
self.n_estimator = n_estimator
26+
self._estimators = [copy.deepcopy(self.base_learner) for _ in range(self.n_estimator)]
27+
self.lr = learning_rate
28+
29+
30+
def fit(self, X, y):
31+
"""Build the Adaboost according to the training data.
32+
33+
Args:
34+
X: training features, of shape (N, D). Each X[i] is a training sample.
35+
y: vector of training labels, of shape (N,).
36+
"""
37+
# YOUR CODE HERE
38+
# begin answer
39+
def _gredient(y, p):
40+
tmp = np.clip(p, 1e-12, 1-1e-12)
41+
return -(y/tmp) + (1-y)/(1-tmp)
42+
43+
self._estimators[0].fit(X, y)
44+
pred = self._estimators[0].predict(X)
45+
for i in range(1, self.n_estimator):
46+
s_r = _gredient(y, pred)
47+
self._estimators[i].fit(X, s_r)
48+
pred -= self.lr * self._estimators[i].predict(X)
49+
# end answer
50+
return self
51+
52+
def predict(self, X):
53+
"""Predict classification results for X.
54+
55+
Args:
56+
X: testing sample features, of shape (N, D).
57+
58+
Returns:
59+
(np.array): predicted testing sample labels, of shape (N,).
60+
"""
61+
N = X.shape[0]
62+
# YOUR CODE HERE
63+
# begin answer
64+
pred = self._estimators[0].predict(X)
65+
for i in range(1, self.n_estimator):
66+
pred -= self.lr * self._estimators[i].predict(X)
67+
y_pred = pred > 0.5
68+
y_pred[y_pred == True] = 1
69+
y_pred[y_pred == False] = 0
70+
# end answer
71+
return y_pred

‎dt_rf_adaboost_gbdt/adaboost.py

+70
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,70 @@
1+
import copy
2+
import numpy as np
3+
4+
5+
class Adaboost:
6+
'''Adaboost Classifier.
7+
8+
Note that this class only support binary classification.
9+
'''
10+
11+
def __init__(self,
12+
base_learner,
13+
n_estimator,
14+
seed=2020):
15+
'''Initialize the classifier.
16+
17+
Args:
18+
base_learner: the base_learner should provide the .fit() and .predict() interface.
19+
n_estimator (int): The number of base learners in RandomForest.
20+
seed (int): random seed
21+
'''
22+
np.random.seed(seed)
23+
self.base_learner = base_learner
24+
self.n_estimator = n_estimator
25+
self._estimators = [copy.deepcopy(self.base_learner) for _ in range(self.n_estimator)]
26+
self._alphas = [1 for _ in range(n_estimator)]
27+
28+
def fit(self, X, y):
29+
"""Build the Adaboost according to the training data.
30+
31+
Args:
32+
X: training features, of shape (N, D). Each X[i] is a training sample.
33+
y: vector of training labels, of shape (N,).
34+
"""
35+
# YOUR CODE HERE
36+
# begin answer
37+
sample_weights = np.ones(len(y)) / len(y)
38+
for i in range(self.n_estimator):
39+
self._estimators[i].fit(X, y, sample_weights)
40+
pred = self._estimators[i].predict(X)
41+
I = pred != y
42+
err = np.sum(I * sample_weights)
43+
self._alphas[i] = 0.5 * np.log((1 - err) / err)
44+
sample_weights *= np.exp(self._alphas[i] * (2 * err - 1))
45+
sample_weights /= np.sum(sample_weights)
46+
# end answer
47+
return self
48+
49+
def predict(self, X):
50+
"""Predict classification results for X.
51+
52+
Args:
53+
X: testing sample features, of shape (N, D).
54+
55+
Returns:
56+
(np.array): predicted testing sample labels, of shape (N,).
57+
"""
58+
N = X.shape[0]
59+
y_pred = np.zeros(N)
60+
# YOUR CODE HERE
61+
# begin answer
62+
tmp = np.zeros((N, self.n_estimator))
63+
for i in range(self.n_estimator):
64+
tmp[:, i] = self._estimators[i].predict(X)
65+
tmp = tmp * np.array(self._alphas).reshape((1, -1))
66+
y_pred = np.sum(tmp, axis=1) > 0.5
67+
y_pred[y_pred == True] = 1
68+
y_pred[y_pred == False] = 0
69+
# end answer
70+
return y_pred

0 commit comments

Comments
 (0)
Please sign in to comment.