机器学习—NeuronNetwork(python类实现)

mac2024-04-09 26

神经网络的训练过程主要包括几个部分：

前馈计算损失计算反向传播

下面以单隐藏层神经网络为例，结构如下图所示：

输入X：特征数为100weights参数 : w1: [100,20]w2：[20,2] 激活函数:sigmoid输出y_hat：分类数为2

前馈计算：

$layer_1=x\cdot w_1$

$activation_1=sigmoid(layer_1)$

$layer_2=activation_1\cdot w_2$

$y_{hat}=sigmoid(layer_2)$

损失计算： $loss = (y_{hat} - y)^2$

反向传播：

梯度计算： $\frac{\partial Loss}{\partial w_2} = \frac{\partial Loss}{\partial y_{hat}}\cdot \frac{\partial y_{hat}}{\partial layer2}\cdot \frac{\partial layer_2}{\partial w_2}\\=\frac{\partial Loss}{\partial y_{hat}}\cdot sigmoid{}'(layer2)\cdot activation_1$

$\frac{\partial Loss}{\partial w_1} = \frac{\partial Loss}{\partial y_{hat}}\cdot \frac{\partial y_{hat}}{\partial layer2}\cdot \frac{\partial layer_2}{\partial activation_1}\cdot \frac{\partial activation_1}{\partial layer_1}\cdot \frac{\partial layer_1}{\partial w_1}\\=\frac{\partial Loss}{\partial y_{hat}}\cdot sigmoid{}'(layer_2)\cdot w_2\cdot sigmoid{}'(layer_1)\cdot x$

更新参数： $learningrate*dw1\\w2 = w2 - learningrate*dw2$ 代码示例：

# -*- coding: utf-8 -*- import numpy as np class NeuronNetwork: def __init__(self, input_dim, output_dim, hide_layers, learn_rate=0.01, batch_size=64, iter_num=1000 , act_func='sigmoid'): """ :param input_dim: int :param output_dim: int :param hide_layers: array like [20, 10],means two hide layers, dimension 20 and 10 :param act_func: assign the activation function, sigmoid or relu :param learn_rate: float32 :param batch_size: int :param iter_num: int """ self.dim_in = input_dim self.dim_out = output_dim self.w = [] self.dw = [] self.layer_out = [] self.act_out = [] self.layer_size = len(hide_layers) self.activation_func = act_func self.lr = learn_rate self.iter_num = iter_num self.batch_size = batch_size self.w_log = [] self.loss_log = [] self.weights_init(hide_layers) @staticmethod def sigmoid(z): return 1 / (1 + np.exp(-1 * z)) def gra_sigmoid(self, z): fx = self.sigmoid(z) return fx * (1 - fx) @staticmethod def relu(z): return np.maximum(z, 0) @staticmethod def gra_relu(z): z[z <= 0] = 0 z[z > 0] = 1 return z def act_function(self, z): if self.activation_func == 'sigmoid': return self.sigmoid(z) elif self.activation_func == 'relu': return self.relu(z) else: raise ValueError('act_func should be "sigmoid" or "relu"') def grad_function(self, z): if self.activation_func == 'sigmoid': return self.gra_sigmoid(z) else: return self.gra_relu(z) def weights_init(self, hide_layers): if self.layer_size == 1: self.w.append(np.random.rand(self.dim_in, hide_layers[0])) self.dw.append(np.zeros((self.dim_in, hide_layers[0]))) self.layer_out.append(np.zeros((self.batch_size, hide_layers[0]))) self.act_out.append(np.zeros((self.batch_size, hide_layers[0]))) self.w.append(np.random.rand(hide_layers[0], self.dim_out)) self.dw.append(np.zeros((hide_layers[0], self.dim_out))) self.layer_out.append(np.zeros((self.batch_size, self.dim_out))) self.act_out.append(np.zeros((self.batch_size, self.dim_out))) elif self.layer_size == 2: self.w.append(np.random.rand(self.dim_in, hide_layers[0])) self.dw.append(np.zeros((self.dim_in, hide_layers[0]))) self.layer_out.append(np.zeros((self.batch_size, hide_layers[0]))) self.act_out.append(np.zeros((self.batch_size, hide_layers[0]))) self.w.append(np.random.rand(hide_layers[0], hide_layers[1])) self.dw.append(np.zeros((hide_layers[0], hide_layers[1]))) self.layer_out.append(np.zeros((self.batch_size, hide_layers[1]))) self.act_out.append(np.zeros((self.batch_size, hide_layers[1]))) self.w.append(np.random.rand(hide_layers[1], self.dim_out)) self.dw.append(np.zeros((hide_layers[1], self.dim_out))) self.layer_out.append(np.zeros((self.batch_size, self.dim_out))) self.act_out.append(np.zeros((self.batch_size, self.dim_out))) else: raise ValueError('hide layer size should be: 1 or 2, but got %d' % self.layer_size) def calculate(self, x): """ :param x: shape of [batch_size, dim_in] :return: y_hat, shape of [batch_size, dim_out] """ if self.layer_size == 1: self.layer_out[0] = x.dot(self.w[0]) self.act_out[0] = self.act_function(self.layer_out[0]) self.layer_out[1] = self.act_out[0].dot(self.w[1]) self.act_out[1] = self.act_function(self.layer_out[1]) return self.act_out[-1] else: self.layer_out[0] = x.dot(self.w[0]) self.act_out[0] = self.act_function(self.layer_out[0]) self.layer_out[1] = self.act_out[0].dot(self.w[1]) self.act_out[1] = self.act_function(self.layer_out[1]) self.layer_out[2] = self.act_out[1].dot(self.w[2]) self.act_out[2] = self.act_function(self.layer_out[2]) return self.act_out[-1] @staticmethod def cal_loss(y_hat, y_label): """ :param y_hat: predictions of output, shape of [batch_size, dim_out] :param y_label: real label, shape of [batch_size, dim_out] :return: loss, float32 """ loss = np.square(y_hat - y_label).sum() return loss def back_propagation(self, x, y_hat, y_label): """ :param x: :param y_hat: predictions of output, shape of [batch_size, dim_out] :param y_label: real label, shape of [batch_size, dim_out] :return: None """ grad_y_hat = 2.0 * (y_hat - y_label) if self.layer_size == 1: # calculate dw2 grad_temp = grad_y_hat * self.grad_function(self.layer_out[1]) self.dw[1] = self.act_out[0].T.dot(grad_temp) # calculate dw2 grad_temp = grad_temp.dot(self.w[1].T) grad_temp = grad_temp * self.grad_function(self.layer_out[0]) self.dw[0] = x.T.dot(grad_temp) # update w1 and w2 self.w[0] -= self.lr * self.dw[0] self.w[1] -= self.lr * self.dw[1] else: # calculate dw3 grad_temp = grad_y_hat * self.grad_function(self.layer_out[2]) self.dw[2] = self.act_out[1].T.dot(grad_temp) # calculate dw2 grad_temp = grad_temp.dot(self.w[2].T) grad_temp = grad_temp * self.grad_function(self.layer_out[1]) self.dw[1] = self.act_out[0].T.dot(grad_temp) # calculate dw1 grad_temp = grad_temp.dot(self.w[1].T) grad_temp = grad_temp * self.grad_function(self.layer_out[0]) self.dw[0] = x.T.dot(grad_temp) # update w1 and w2 and w3 self.w[0] -= self.lr * self.dw[0] self.w[1] -= self.lr * self.dw[1] self.w[2] -= self.lr * self.dw[2] def train(self, x_sets, y_sets): sample_count = len(x_sets) for i in range(self.iter_num): batch_index = np.random.choice(sample_count, self.batch_size) batch_x = x_sets[batch_index] batch_y = y_sets[batch_index] batch_y_hat = self.calculate(batch_x) self.back_propagation(batch_x, batch_y_hat, batch_y) if i % 50 == 0: batch_loss = self.cal_loss(batch_y_hat, batch_y) print('current loss: ',batch_loss) if __name__ == '__main__': # 生成随机数据集 # np.random.seed(1) # x_set = np.random.rand(1000, 100) # y_set = np.random.randint(0, 9, size=1000) # y_set = np.eye(10)[y_set] # 使用iris数据集 x_samples = [] y_samples = [] with open('iris.txt') as f: data = f.readlines() for sample in data: try: x1, x2, x3, x4, label = sample.split(',') if 'setosa' in label: label = 0 elif 'versicolor' in label: label = 1 else: label = 2 x_samples.append([x1, x2, x3, x4]) y_samples.append(label) except ValueError: pass x_samples = np.array(x_samples, dtype=np.float64) y_samples = np.eye(3)[y_samples] nn = NeuronNetwork(4, 3, [12, 6]) nn.train(x_samples, y_samples)

最新回复(0)