机器学习—NeuronNetwork(python类实现)

mac2024-04-09  26

神经网络的训练过程主要包括几个部分:

前馈计算损失计算反向传播

下面以单隐藏层神经网络为例,结构如下图所示:

输入X:特征数为100weights参数 : w1: [100,20]w2:[20,2] 激活函数:sigmoid输出y_hat:分类数为2

前馈计算:

l a y e r 1 = x ⋅ w 1 layer_1=x\cdot w_1 layer1=xw1

a c t i v a t i o n 1 = s i g m o i d ( l a y e r 1 ) activation_1=sigmoid(layer_1) activation1=sigmoid(layer1)

l a y e r 2 = a c t i v a t i o n 1 ⋅ w 2 layer_2=activation_1\cdot w_2 layer2=activation1w2

y h a t = s i g m o i d ( l a y e r 2 ) y_{hat}=sigmoid(layer_2) yhat=sigmoid(layer2)

损失计算: l o s s = ( y h a t − y ) 2 loss = (y_{hat} - y)^2 loss=(yhaty)2

反向传播:

梯度计算: ∂ L o s s ∂ w 2 = ∂ L o s s ∂ y h a t ⋅ ∂ y h a t ∂ l a y e r 2 ⋅ ∂ l a y e r 2 ∂ w 2 = ∂ L o s s ∂ y h a t ⋅ s i g m o i d ′ ( l a y e r 2 ) ⋅ a c t i v a t i o n 1 \frac{\partial Loss}{\partial w_2} = \frac{\partial Loss}{\partial y_{hat}}\cdot \frac{\partial y_{hat}}{\partial layer2}\cdot \frac{\partial layer_2}{\partial w_2}\\=\frac{\partial Loss}{\partial y_{hat}}\cdot sigmoid{}'(layer2)\cdot activation_1 w2Loss=yhatLosslayer2yhatw2layer2=yhatLosssigmoid(layer2)activation1

∂ L o s s ∂ w 1 = ∂ L o s s ∂ y h a t ⋅ ∂ y h a t ∂ l a y e r 2 ⋅ ∂ l a y e r 2 ∂ a c t i v a t i o n 1 ⋅ ∂ a c t i v a t i o n 1 ∂ l a y e r 1 ⋅ ∂ l a y e r 1 ∂ w 1 = ∂ L o s s ∂ y h a t ⋅ s i g m o i d ′ ( l a y e r 2 ) ⋅ w 2 ⋅ s i g m o i d ′ ( l a y e r 1 ) ⋅ x \frac{\partial Loss}{\partial w_1} = \frac{\partial Loss}{\partial y_{hat}}\cdot \frac{\partial y_{hat}}{\partial layer2}\cdot \frac{\partial layer_2}{\partial activation_1}\cdot \frac{\partial activation_1}{\partial layer_1}\cdot \frac{\partial layer_1}{\partial w_1}\\=\frac{\partial Loss}{\partial y_{hat}}\cdot sigmoid{}'(layer_2)\cdot w_2\cdot sigmoid{}'(layer_1)\cdot x w1Loss=yhatLosslayer2yhatactivation1layer2layer1activation1w1layer1=yhatLosssigmoid(layer2)w2sigmoid(layer1)x

更新参数: w 1 = w 1 − l e a r n i n g r a t e ∗ d w 1 w 2 = w 2 − l e a r n i n g r a t e ∗ d w 2 w1 = w1 - learningrate*dw1\\w2 = w2 - learningrate*dw2 w1=w1learningratedw1w2=w2learningratedw2 代码示例:

# -*- coding: utf-8 -*- import numpy as np class NeuronNetwork: def __init__(self, input_dim, output_dim, hide_layers, learn_rate=0.01, batch_size=64, iter_num=1000 , act_func='sigmoid'): """ :param input_dim: int :param output_dim: int :param hide_layers: array like [20, 10],means two hide layers, dimension 20 and 10 :param act_func: assign the activation function, sigmoid or relu :param learn_rate: float32 :param batch_size: int :param iter_num: int """ self.dim_in = input_dim self.dim_out = output_dim self.w = [] self.dw = [] self.layer_out = [] self.act_out = [] self.layer_size = len(hide_layers) self.activation_func = act_func self.lr = learn_rate self.iter_num = iter_num self.batch_size = batch_size self.w_log = [] self.loss_log = [] self.weights_init(hide_layers) @staticmethod def sigmoid(z): return 1 / (1 + np.exp(-1 * z)) def gra_sigmoid(self, z): fx = self.sigmoid(z) return fx * (1 - fx) @staticmethod def relu(z): return np.maximum(z, 0) @staticmethod def gra_relu(z): z[z <= 0] = 0 z[z > 0] = 1 return z def act_function(self, z): if self.activation_func == 'sigmoid': return self.sigmoid(z) elif self.activation_func == 'relu': return self.relu(z) else: raise ValueError('act_func should be "sigmoid" or "relu"') def grad_function(self, z): if self.activation_func == 'sigmoid': return self.gra_sigmoid(z) else: return self.gra_relu(z) def weights_init(self, hide_layers): if self.layer_size == 1: self.w.append(np.random.rand(self.dim_in, hide_layers[0])) self.dw.append(np.zeros((self.dim_in, hide_layers[0]))) self.layer_out.append(np.zeros((self.batch_size, hide_layers[0]))) self.act_out.append(np.zeros((self.batch_size, hide_layers[0]))) self.w.append(np.random.rand(hide_layers[0], self.dim_out)) self.dw.append(np.zeros((hide_layers[0], self.dim_out))) self.layer_out.append(np.zeros((self.batch_size, self.dim_out))) self.act_out.append(np.zeros((self.batch_size, self.dim_out))) elif self.layer_size == 2: self.w.append(np.random.rand(self.dim_in, hide_layers[0])) self.dw.append(np.zeros((self.dim_in, hide_layers[0]))) self.layer_out.append(np.zeros((self.batch_size, hide_layers[0]))) self.act_out.append(np.zeros((self.batch_size, hide_layers[0]))) self.w.append(np.random.rand(hide_layers[0], hide_layers[1])) self.dw.append(np.zeros((hide_layers[0], hide_layers[1]))) self.layer_out.append(np.zeros((self.batch_size, hide_layers[1]))) self.act_out.append(np.zeros((self.batch_size, hide_layers[1]))) self.w.append(np.random.rand(hide_layers[1], self.dim_out)) self.dw.append(np.zeros((hide_layers[1], self.dim_out))) self.layer_out.append(np.zeros((self.batch_size, self.dim_out))) self.act_out.append(np.zeros((self.batch_size, self.dim_out))) else: raise ValueError('hide layer size should be: 1 or 2, but got %d' % self.layer_size) def calculate(self, x): """ :param x: shape of [batch_size, dim_in] :return: y_hat, shape of [batch_size, dim_out] """ if self.layer_size == 1: self.layer_out[0] = x.dot(self.w[0]) self.act_out[0] = self.act_function(self.layer_out[0]) self.layer_out[1] = self.act_out[0].dot(self.w[1]) self.act_out[1] = self.act_function(self.layer_out[1]) return self.act_out[-1] else: self.layer_out[0] = x.dot(self.w[0]) self.act_out[0] = self.act_function(self.layer_out[0]) self.layer_out[1] = self.act_out[0].dot(self.w[1]) self.act_out[1] = self.act_function(self.layer_out[1]) self.layer_out[2] = self.act_out[1].dot(self.w[2]) self.act_out[2] = self.act_function(self.layer_out[2]) return self.act_out[-1] @staticmethod def cal_loss(y_hat, y_label): """ :param y_hat: predictions of output, shape of [batch_size, dim_out] :param y_label: real label, shape of [batch_size, dim_out] :return: loss, float32 """ loss = np.square(y_hat - y_label).sum() return loss def back_propagation(self, x, y_hat, y_label): """ :param x: :param y_hat: predictions of output, shape of [batch_size, dim_out] :param y_label: real label, shape of [batch_size, dim_out] :return: None """ grad_y_hat = 2.0 * (y_hat - y_label) if self.layer_size == 1: # calculate dw2 grad_temp = grad_y_hat * self.grad_function(self.layer_out[1]) self.dw[1] = self.act_out[0].T.dot(grad_temp) # calculate dw2 grad_temp = grad_temp.dot(self.w[1].T) grad_temp = grad_temp * self.grad_function(self.layer_out[0]) self.dw[0] = x.T.dot(grad_temp) # update w1 and w2 self.w[0] -= self.lr * self.dw[0] self.w[1] -= self.lr * self.dw[1] else: # calculate dw3 grad_temp = grad_y_hat * self.grad_function(self.layer_out[2]) self.dw[2] = self.act_out[1].T.dot(grad_temp) # calculate dw2 grad_temp = grad_temp.dot(self.w[2].T) grad_temp = grad_temp * self.grad_function(self.layer_out[1]) self.dw[1] = self.act_out[0].T.dot(grad_temp) # calculate dw1 grad_temp = grad_temp.dot(self.w[1].T) grad_temp = grad_temp * self.grad_function(self.layer_out[0]) self.dw[0] = x.T.dot(grad_temp) # update w1 and w2 and w3 self.w[0] -= self.lr * self.dw[0] self.w[1] -= self.lr * self.dw[1] self.w[2] -= self.lr * self.dw[2] def train(self, x_sets, y_sets): sample_count = len(x_sets) for i in range(self.iter_num): batch_index = np.random.choice(sample_count, self.batch_size) batch_x = x_sets[batch_index] batch_y = y_sets[batch_index] batch_y_hat = self.calculate(batch_x) self.back_propagation(batch_x, batch_y_hat, batch_y) if i % 50 == 0: batch_loss = self.cal_loss(batch_y_hat, batch_y) print('current loss: ',batch_loss) if __name__ == '__main__': # 生成随机数据集 # np.random.seed(1) # x_set = np.random.rand(1000, 100) # y_set = np.random.randint(0, 9, size=1000) # y_set = np.eye(10)[y_set] # 使用iris数据集 x_samples = [] y_samples = [] with open('iris.txt') as f: data = f.readlines() for sample in data: try: x1, x2, x3, x4, label = sample.split(',') if 'setosa' in label: label = 0 elif 'versicolor' in label: label = 1 else: label = 2 x_samples.append([x1, x2, x3, x4]) y_samples.append(label) except ValueError: pass x_samples = np.array(x_samples, dtype=np.float64) y_samples = np.eye(3)[y_samples] nn = NeuronNetwork(4, 3, [12, 6]) nn.train(x_samples, y_samples)
最新回复(0)