神经网络——吴恩达第二次大作业

mac2022-06-30 25

题目：训练一个带有一个隐藏层的神经网络，用来对红蓝点集进行分类。在本题目中，训练集合和测试集合相同，来源于一组固定seed的随机数

所需资源和包: 链接: https://pan.baidu.com/s/1pslul3qqeU2QvKptTtEOhQ 提取码: 348w

其中需要导入的包有：

1：数据集合准备

#利用随机数组生成一个X[2,400] Y[1,400]的矩阵 def load_planar_dataset(): np.random.seed(1) m = 400 # number of examples N = int(m/2) # number of points per class D = 2 # dimensionality X = np.zeros((m,D)) # data matrix where each row is a single example Y = np.zeros((m,1), dtype='uint8') # labels vector (0 for red, 1 for blue) a = 4 # maximum ray of the flower for j in range(2): ix = range(N*j,N*(j+1)) t = np.linspace(j*3.12,(j+1)*3.12,N) + np.random.randn(N)*0.2 # theta r = a*np.sin(4*t) + np.random.randn(N)*0.2 # radius X[ix] = np.c_[r*np.sin(t), r*np.cos(t)] Y[ix] = j X = X.T Y = Y.T return X, Y

2：训练过程

神经网络的训练过程分为: 前向传播计算代价函数反向传播参数修正

#返回各个数据的维读 def layer_sizes(X , Y): """ 参数： X - 输入数据集,维度为（输入的数量，训练/测试的数量） Y - 标签，维度为（输出的数量，训练/测试数量）返回： n_x - 输入层的数量 n_h - 隐藏层的数量 n_y - 输出层的数量 """ n_x = X.shape[0] #输入层 n_h = 4 #，隐藏层，硬编码为4 n_y = Y.shape[0] #输出层 return (n_x,n_h,n_y) #初始化权重矩阵和偏向量 def initialize_parameters( n_x , n_h ,n_y): """ 参数： n_x - 输入层节点的数量 n_h - 隐藏层节点的数量 n_y - 输出层节点的数量返回： parameters - 包含参数的字典： W1 - 权重矩阵,维度为（n_h，n_x） b1 - 偏向量，维度为（n_h，1） W2 - 权重矩阵，维度为（n_y，n_h） b2 - 偏向量，维度为（n_y，1） """ np.random.seed(2) #指定一个随机种子，以便你的输出与我们的一样。 W1 = np.random.randn(n_h,n_x) * 0.01 b1 = np.zeros(shape=(n_h, 1)) W2 = np.random.randn(n_y,n_h) * 0.01 b2 = np.zeros(shape=(n_y, 1)) #使用断言确保我的数据格式是正确的 assert(W1.shape == ( n_h , n_x )) assert(b1.shape == ( n_h , 1 )) assert(W2.shape == ( n_y , n_h )) assert(b2.shape == ( n_y , 1 )) parameters = {"W1" : W1, "b1" : b1, "W2" : W2, "b2" : b2 } return parameters #前向传播 def forward_propagation( X , parameters ): """ 参数： X - 维度为（n_x，m）的输入数据。 parameters - 初始化函数（initialize_parameters）的输出返回： A2 - 使用sigmoid()函数计算的第二次激活后的数值 cache - 包含“Z1”，“A1”，“Z2”和“A2”的字典类型变量 """ W1 = parameters["W1"] b1 = parameters["b1"] W2 = parameters["W2"] b2 = parameters["b2"] #前向传播计算A2 Z1 = np.dot(W1 , X) + b1 A1 = np.tanh(Z1) Z2 = np.dot(W2 , A1) + b2 A2 = sigmoid(Z2) #使用断言确保我的数据格式是正确的 assert(A2.shape == (1,X.shape[1])) cache = {"Z1": Z1, "A1": A1, "Z2": Z2, "A2": A2} return (A2, cache) #计算代价函数 def compute_cost(A2,Y,parameters): """ 计算方程（6）中给出的交叉熵成本，参数： A2 - 使用sigmoid()函数计算的第二次激活后的数值 Y - "True"标签向量,维度为（1，数量） parameters - 一个包含W1，B1，W2和B2的字典类型的变量返回：成本 - 交叉熵成本给出方程（13） """ m = Y.shape[1] W1 = parameters["W1"] W2 = parameters["W2"] #计算成本 logprobs = logprobs = np.multiply(np.log(A2), Y) + np.multiply((1 - Y), np.log(1 - A2)) cost = - np.sum(logprobs) / m # cost = float(np.squeeze(cost)) assert(isinstance(cost,float)) return cost #反向传播 def backward_propagation(parameters,cache,X,Y): """ 使用上述说明搭建反向传播函数。参数： parameters - 包含我们的参数的一个字典类型的变量。 cache - 包含“Z1”，“A1”，“Z2”和“A2”的字典类型的变量。 X - 输入数据，维度为（2，数量） Y - “True”标签，维度为（1，数量）返回： grads - 包含W和b的导数一个字典类型的变量。 """ m = X.shape[1] W1 = parameters["W1"] W2 = parameters["W2"] A1 = cache["A1"] A2 = cache["A2"] dZ2= A2 - Y dW2 = (1 / m) * np.dot(dZ2, A1.T) db2 = (1 / m) * np.sum(dZ2, axis=1, keepdims=True) dZ1 = np.multiply(np.dot(W2.T, dZ2), 1 - np.power(A1, 2)) dW1 = (1 / m) * np.dot(dZ1, X.T) db1 = (1 / m) * np.sum(dZ1, axis=1, keepdims=True) grads = {"dW1": dW1, "db1": db1, "dW2": dW2, "db2": db2 } return grads #参数修正 def update_parameters(parameters,grads,learning_rate=1.2): """ 使用上面给出的梯度下降更新规则更新参数参数： parameters - 包含参数的字典类型的变量。 grads - 包含导数值的字典类型的变量。 learning_rate - 学习速率返回： parameters - 包含更新参数的字典类型的变量。 """ W1,W2 = parameters["W1"],parameters["W2"] b1,b2 = parameters["b1"],parameters["b2"] dW1,dW2 = grads["dW1"],grads["dW2"] db1,db2 = grads["db1"],grads["db2"] W1 = W1 - learning_rate * dW1 b1 = b1 - learning_rate * db1 W2 = W2 - learning_rate * dW2 b2 = b2 - learning_rate * db2 parameters = {"W1": W1, "b1": b1, "W2": W2, "b2": b2} return parameters def predict(parameters,X): """ 使用学习的参数，为X中的每个示例预测一个类参数： parameters - 包含参数的字典类型的变量。 X - 输入数据（n_x，m）返回 predictions - 我们模型预测的向量（红色：0 /蓝色：1） """ A2 , cache = forward_propagation(X,parameters) predictions = np.round(A2) return predictions

三：辅助函数(sigmod(),画图函数)

import matplotlib.pyplot as plt import numpy as np import sklearn import sklearn.datasets import sklearn.linear_model # 绘制图形 def plot_decision_boundary(model, X, y): # Set min and max values and give it some padding x_min, x_max = X[0, :].min() - 1, X[0, :].max() + 1 y_min, y_max = X[1, :].min() - 1, X[1, :].max() + 1 h = 0.01 # Generate a grid of points with distance h between them xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h)) # Predict the function value for the whole grid Z = model(np.c_[xx.ravel(), yy.ravel()]) Z = Z.reshape(xx.shape) # Plot the contour and training examples plt.contourf(xx, yy, Z, cmap=plt.cm.Spectral) plt.ylabel('x2') plt.xlabel('x1') # plt.scatter(X[0, :], X[1, :], c=y, cmap=plt.cm.Spectral) plt.scatter(X[0, :], X[1, :], c=np.squeeze(y), cmap=plt.cm.Spectral) def sigmoid(x): s = 1/(1+np.exp(-x)) return s

四：正式运行

其中有两个函数 nn_model(X,Y,n_h,num_iterations,print_cost=False)为训练的主过程，会调用三中函数进行训练

import numpy as np import matplotlib.pyplot as plt from testCases import * import sklearn import sklearn.datasets import sklearn.linear_model from planar_utils import plot_decision_boundary, sigmoid, load_planar_dataset from methods import *; np.random.seed(1) #设置一个固定的随机种子，以保证接下来的步骤中我们的结果是一致的。 X, Y = load_planar_dataset() plt.scatter(X[0, :], X[1, :], c=np.squeeze(Y), s=40, cmap=plt.cm.Spectral) #绘制散点图 plt.show(); def nn_model(X,Y,n_h,num_iterations,print_cost=False): """ 参数： X - 数据集,维度为（2，示例数） Y - 标签，维度为（1，示例数） n_h - 隐藏层的数量 num_iterations - 梯度下降循环中的迭代次数 print_cost - 如果为True，则每1000次迭代打印一次成本数值返回： parameters - 模型学习的参数，它们可以用来进行预测。 """ np.random.seed(3) #指定随机种子 n_x = layer_sizes(X, Y)[0] n_y = layer_sizes(X, Y)[2] parameters = initialize_parameters(n_x,n_h,n_y) W1 = parameters["W1"] b1 = parameters["b1"] W2 = parameters["W2"] b2 = parameters["b2"] for i in range(num_iterations): A2 , cache = forward_propagation(X,parameters) cost = compute_cost(A2,Y,parameters) grads = backward_propagation(parameters,cache,X,Y) parameters = update_parameters(parameters,grads,learning_rate = 0.5) if print_cost: if i00 == 0: print("第 ",i," 次循环，成本为："+str(cost)) return parameters #测试nn_model print("=========================测试nn_model=========================") parameters = nn_model(X, Y, 4, num_iterations=10000, print_cost=True)

五：模型预测

def predict(parameters,X): """ 使用学习的参数，为X中的每个示例预测一个类参数： parameters - 包含参数的字典类型的变量。 X - 输入数据（n_x，m）返回 predictions - 我们模型预测的向量（红色：0 /蓝色：1） """ A2 , cache = forward_propagation(X,parameters) predictions = np.round(A2) return predictions #print("=========================测试predict=========================") # # parameters, X_assess = predict_test_case() print("=========================准确率情况=========================") predictions = predict(parameters, X) print ('准确率: %d' % float((np.dot(Y, predictions.T) + np.dot(1 - Y, 1 - predictions.T)) / float(Y.size) * 100) + '%') plot_decision_boundary(lambda x: predict(parameters, x.T), X, Y) plt.title("Decision Boundary for hidden layer size " + str(4)) plt.show() print("=========================不同隐藏层数量时的准确率情况=========================") hidden_layer_sizes = [1, 2, 3, 4, 5, 20, 50] #隐藏层数量 for i, n_h in enumerate(hidden_layer_sizes): plt.subplot(5, 2, i + 1) plt.title('Hidden Layer of size %d' % n_h) parameters = nn_model(X, Y, n_h, num_iterations=5000) plot_decision_boundary(lambda x: predict(parameters, x.T), X, Y) predictions = predict(parameters, X) accuracy = float((np.dot(Y, predictions.T) + np.dot(1 - Y, 1 - predictions.T)) / float(Y.size) * 100) print ("隐藏层的节点数量： {} ，准确率: {} %".format(n_h, accuracy)) plt.show()

参考资源：博主「何宽」原文链接：https://blog.csdn.net/u013733326/article/details/79639509

最新回复(0)