0%

LogicalRegressionAndSVM

项目背景:完成实验课逻辑回归与支持向量机的机器学习实现
Based on: Python 3.9, pytorch in conda


Quite Start


Main class

使用一个类封装,十分简单的实现原理,参考背后数学推导即可完成最核心的loss部分,通过初始化类时设定不同的初始权重设置方式,不同的损失函数方式等,模式不同体现在train()中

另:Adam优化器暂时没有写,有待后续更新
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
# Finished Date: 2024/03/29 10:00 - 2024/03/30 00:14
# Author: Olives
# Position: in SCUT
# Environment: pytorch, python3.9
# Do the thing better

import random
import numpy as np
import sklearn.datasets as sd
import matplotlib.pyplot as plt
import plotly.graph_objects as go


# Activation function to strengthen the non-linear ability, use sigmoid function
def sigmoid(x: object) -> object:
temp = 1 / (1 + np.exp(-x))
return temp


# Define a class and encapsulate all the entire functions, which makes it easier to modify
class LogicalRegressionAndSVM:
def __init__(self, path_train, path_test, learning_rate=0.002, episodes=100, loss_mode=0,
omega_init_mode=0, mini_batch=40, regularization=0.5, optimize_mode='SGD'):
self.learning_rate = learning_rate
self.episodes = episodes
self.loss_mode = loss_mode
self.path_train = path_train
self.path_test = path_test
self.X_train = np.empty((1, 1))
self.X_test = np.empty((1, 1))
self.y_train = np.empty((1, 1))
self.y_test = np.empty((1, 1))
self.omega = np.empty((1, 1))
self.train_loss = []
self.test_loss = []
self.omega_init_mode = omega_init_mode
self.opt_omega = np.empty((1, 1))
self.mini_batch = mini_batch
self.regularization = regularization
self.b = np.empty((1, 1))
self.optimize_mode = optimize_mode

# read data from file don't need other operate for data
def read(self):
X_train, y_train = sd.load_svmlight_file(self.path_train, n_features=123)
X_valid, y_valid = sd.load_svmlight_file(self.path_test, n_features=123)

# transform to ndarray
X_train = X_train.toarray()
X_valid = X_valid.toarray()
# transform to column vector
y_train = y_train.reshape(len(y_train), 1)
y_valid = y_valid.reshape(len(y_valid), 1)

# add one column values is 1, in my point, this is the bias part, course I the train part of bias, so delete

# X_train = np.concatenate((np.ones((X_train.shape[0], 1)), X_train), axis=1)
# X_valid = np.concatenate((np.ones((X_valid.shape[0], 1)), X_valid), axis=1)
X_train = np.nan_to_num(X_train)
X_valid = np.nan_to_num(X_valid)
self.X_train = X_train
self.X_test = X_valid
self.y_train = y_train
self.y_test = y_valid

# get the classification result score
def score(self):
hx = sigmoid(self.X_test.dot(self.opt_omega))
hx[hx >= 0.5] = 1
hx[hx < 0.5] = -1
hx = (hx == self.y_test)
return np.mean(hx)

# calculate the gradient of logistic regression
def logistic_gradient(self, X, y):
sigmoid_x = sigmoid(X.dot(self.omega))
loss = X.T.dot(sigmoid_x - y)
return loss

# calculate the loss of logistic regression
def logistic_loss(self, X, y):
sigmoid_x = sigmoid(X.dot(self.omega))
cost = np.multiply((1 + y), np.log(1 + sigmoid_x)) + np.multiply((1 - y), np.log(1 - sigmoid_x))
return -cost.mean() / 2

# calculate the loss of hinge
def hinge_loss(self, X, y):
loss = np.maximum(0, 1 - np.multiply(y, (X.dot(self.omega)) + self.b.T)).mean()
reg = np.multiply(self.omega, self.omega).sum() / 2
temp = self.regularization * loss + reg
return temp

# calculate the gradient of hinge(includes omega and b)
def hinge_gradient(self, X, y):
error = np.maximum(0, 1 - np.multiply(y, X.dot(self.omega)))
index = np.where(error == 0)
x = X.copy()
x[index, :] = 0
gw = -y.T.dot(x)
gb = -y
grad_omega = self.omega + self.regularization * gw.mean()
grad_b = self.regularization * gb.mean()
return grad_omega, grad_b

# train and test the model
def train(self):
# depends on the omega init mode to choose different init mode, like random or zeros
if self.omega_init_mode:
self.omega = np.random.random((self.X_train.shape[1], 1))
else:
self.omega = np.zeros((self.X_train.shape[1], 1))

# the init b can change its way of init, but don't have much meaning
self.b = np.random.random((self.X_train.shape[1], 1))
train_data = np.concatenate((self.y_train, self.X_train), axis=1)

# different loss mode decides logical regression or support vector machine and different optimize mode
if self.optimize_mode == "SGD":
if self.loss_mode:
for episode in range(self.episodes):
train_sample = np.matrix(random.sample(train_data.tolist(), self.mini_batch))
grad = self.logistic_gradient(train_sample[:, 1:125], train_sample[:, 0])
self.omega = self.omega - (self.learning_rate * grad)
self.train_loss.append(self.logistic_loss(self.X_train, self.y_train))
self.test_loss.append(self.logistic_loss(self.X_test, self.y_test))
self.opt_omega = self.omega
else:
for episode in range(self.episodes):
train_sample = np.matrix(random.sample(train_data.tolist(), self.mini_batch))
grad_omega, grad_b = self.hinge_gradient(train_sample[:, 1:125], train_sample[:, 0])
self.omega = self.omega - (self.learning_rate * grad_omega)
self.b = self.b - (self.learning_rate * grad_b)
self.train_loss.append(self.hinge_loss(self.X_train, self.y_train))
self.test_loss.append(self.hinge_loss(self.X_test, self.y_test))
self.opt_omega = self.omega
else:
pass

# plotly draw the loss of each episode
def plotly_graph(self):
episodes_list = list(range(self.episodes))
fig = go.Figure()
fig.add_trace(go.Scatter(x=episodes_list, y=self.train_loss, name="train_loss",
line=dict(color='firebrick', width=4)))
fig.add_trace(go.Scatter(x=episodes_list, y=self.test_loss, name="test_loss",
line=dict(color='royalblue', width=4)))

fig.update_layout(title='The train and test data loss in different episodes',
xaxis_title='Episodes',
yaxis_title='Loss')

fig.show()

# plot draw the loss of each episode
def plot_graph(self):
iteration = np.arange(0, self.episodes, step=1)
fig, ax = plt.subplots(figsize=(12, 8))
ax.set_title('Train')
ax.set_xlabel('iteration')
ax.set_ylabel('loss')
plt.plot(iteration, self.train_loss, 'b', label='Train')
plt.plot(iteration, self.test_loss, 'r', label='Test')
plt.legend()
plt.show()

# the run function
def run(self):
self.read()
self.train()
# self.plotly_graph()
self.plot_graph()


if __name__ == '__main__':
# logical regression to get the result
logical = LogicalRegressionAndSVM(path_train='./a9a.txt', path_test='./a9a.t.txt', learning_rate=0.001,
episodes=200, loss_mode=1, omega_init_mode=1, mini_batch=70)
logical.run()
print("Logical Regression Over!\n")
print(logical.score())

# svm to get the result
svm = LogicalRegressionAndSVM(path_train='./a9a.txt', path_test='./a9a.t.txt', learning_rate=0.003, episodes=200,
loss_mode=0, omega_init_mode=1, mini_batch=70)
svm.run()
print("SVM Over!")
print(svm.score())

# To use Adam to optimise
# svm = LogicalRegressionAndSVM(path_train='./a9a.txt', path_test='./a9a.t.txt', learning_rate=0.003, episodes=100,
# loss_mode=0, omega_init_mode=1, mini_batch=70, optimize_mode="Adam")

Look back

没什么特别难的部分,注意数学推导使用矩阵计算方法即可,careful还有主要的loss function.

loss function

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31

# calculate the gradient of logistic regression
def logistic_gradient(self, X, y):
sigmoid_x = sigmoid(X.dot(self.omega))
loss = X.T.dot(sigmoid_x - y)
return loss

# calculate the loss of logistic regression
def logistic_loss(self, X, y):
sigmoid_x = sigmoid(X.dot(self.omega))
cost = np.multiply((1 + y), np.log(1 + sigmoid_x)) + np.multiply((1 - y), np.log(1 - sigmoid_x))
return -cost.mean() / 2

# calculate the loss of hinge
def hinge_loss(self, X, y):
loss = np.maximum(0, 1 - np.multiply(y, (X.dot(self.omega)) + self.b.T)).mean()
reg = np.multiply(self.omega, self.omega).sum() / 2
temp = self.regularization * loss + reg
return temp

# calculate the gradient of hinge(includes omega and b)
def hinge_gradient(self, X, y):
error = np.maximum(0, 1 - np.multiply(y, X.dot(self.omega)))
index = np.where(error == 0)
x = X.copy()
x[index, :] = 0
gw = -y.T.dot(x)
gb = -y
grad_omega = self.omega + self.regularization * gw.mean()
grad_b = self.regularization * gb.mean()
return grad_omega, grad_b

By the way

画图可使用 plot/plotly, plotly网页交互体验比较好

Draw things

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
# plotly draw the loss of each episode
def plotly_graph(self):
episodes_list = list(range(self.episodes))
fig = go.Figure()
fig.add_trace(go.Scatter(x=episodes_list, y=self.train_loss, name="train_loss",
line=dict(color='firebrick', width=4)))
fig.add_trace(go.Scatter(x=episodes_list, y=self.test_loss, name="test_loss",
line=dict(color='royalblue', width=4)))

fig.update_layout(title='The train and test data loss in different episodes',
xaxis_title='Episodes',
yaxis_title='Loss')

fig.show()

# plot draw the loss of each episode
def plot_graph(self):
iteration = np.arange(0, self.episodes, step=1)
fig, ax = plt.subplots(figsize=(12, 8))
ax.set_title('Train')
ax.set_xlabel('iteration')
ax.set_ylabel('loss')
plt.plot(iteration, self.train_loss, 'b', label='Train')
plt.plot(iteration, self.test_loss, 'r', label='Test')
plt.legend()
plt.show()