前言 BP神经网络是深度神经网络的基础,由于深度神经网络过于复杂,不便于理解其中参数更新的过程,所以我们一般会用3层网络来理解这个过程。BP网络由输入层,隐藏层和输出层组成。一次正向传播输出结果,算出损失值,然后一次反向传播,求出各层之间连接的权重和偏置的梯度,通过梯度下降法(或者其他方法)更新参数,从而完成一次loss的最小优化。重复正,反向传播的过程,直到loss不再减小(或者说减少的很小,可忽略不计)。求梯度的过程是需要仔细推导的,矩阵求导可以记住求导公式(或者自己去推敲),其他函数的求导倒是没啥问题,激活函数都比较简单。
编程实现
import numpy as np import os import matplotlib.pyplot as plt #sigmoid 函数的导数 def sigmoid_dt(y): return y*(1-y) #sigmoid 函数 def sigmoid(x): return 1./(1+np.exp(-x)) """ 函数说明:加载数据集 set,txt文件的数据格式形如: -0.017612 14.053064 0 -1.395634 4.662541 1 -0.752157 6.538620 0 -1.322371 7.152853 0 0.423363 11.054677 0 0.406704 7.067335 1 Parameters: 无 Returns: 返回 feature 和 lable """ def loadData(): feature = [] lable = [] fr = open('set.txt') lines = fr.readlines() for line in lines: lineArr = line.strip().split() feature.append([lineArr[0], lineArr[1]]) lable.append([lineArr[-1]]) return np.array(feature, dtype='float64'), np.array(lable, dtype='float64') class BP(object): #随机初始化参数 def __init__(self, layers): self.w1 = np.random.random((layers[1],layers[0])) self.b1 = np.random.random((layers[1], 1)) self.w2 = np.random.random((layers[2],layers[1])) self.b2 = np.random.random((layers[2], 1)) def train(self, X, Y, learn = 0.1, epochs = 10000): for n in range(epochs+1): i = np.random.randint(X.shape[0]) x = X[i] x = np.reshape(x,(x.shape[0], 1)) #前向传播 #第二层(隐藏层)神经元的输出 L1 = sigmoid(np.dot(self.w1,x)+self.b1) #输出层 神经元的输出 L2 = sigmoid(np.dot(self.w2,L1)+self.b2) #对第二层(隐藏层)神经元的输出的导数 也是对输出层神经元输入的导数 delta_L2 = (L2 - Y[i])*sigmoid_dt(L2) #对第二层(隐藏层)神经元的输入的导数 delta_L1 = np.dot(self.w2.T,delta_L2)*sigmoid_dt(L1) #通过梯度更新参数(对于 f(X) = W*X, delta(f(X)) = W.T) self.w2 = self.w2 - learn * np.dot(delta_L2, L1.T) self.b2 = self.b2 - learn * delta_L2 self.w1 = self.w1 - learn * np.dot(delta_L1, x.T) self.b1 = self.b1 - learn * delta_L1 #更新完成后,计算正确率和损失 correct_count = 0 k = 0 cost=0 for x in X: prediction = self.predict(x) if prediction > 0.5 and Y[k]==1.0: correct_count = correct_count+1 if prediction <= 0.5 and Y[k]==0.0: correct_count = correct_count+1 cost = cost + abs(Y[k] - prediction) k = k+1 correct_rate = correct_count / X.shape[0] cost = cost/X.shape[0] return correct_rate, cost #预测函数,一次正向传播 def predict(self, x): x = np.reshape(x,(x.shape[0], 1)) L1 = sigmoid(np.dot(self.w1,x)+self.b1) L2 = sigmoid(np.dot(self.w2,L1)+self.b2) return L2 if __name__ == "__main__": X, Y = loadData() bp = BP([2,9,1]) correct_rate = bp.train(X,Y) print(correct_rate)