来自Tensorflow 机器学习实战指南 手打代码… 和原书上还是有区别的…2 处
#!/usr/bin/env python3 # encoding:utf-8 # -*-coding:utf-8 -*- import matplotlib.pyplot as plt import numpy as np import tensorflow as tf import requests from sklearn import datasets from sklearn.preprocessing import normalize from tensorflow.python.framework import ops ops.reset_default_graph() sess=tf.compat.v1.Session() #A 原书上的https://www.umass.edu/statdata/statdata/data/lowbwt.dat 我的网络无法打开,换成了以下数据集 birthdata_url='https://github.com/nfmcclure/tensorflow_cookbook/raw/master/01_Introduction/07_Working_with_Data_Sources/birthweight_data/birthweight.dat' birth_file=requests.get(birthdata_url) #print(birth_file.text) # B下面几行中也有和原书不一样的 birth_data = birth_file.text.split('\r\n') birth_header = birth_data[0].split('\t') #birth_header = [x for x in birth_data[0].split(' ') if len(x)>=1] birth_data = [[float(x) for x in y.split('\t') if len(x) >= 1] for y in birth_data[1:] if len(y) >= 1] #birth_data = [[float(x) for x in y.split(' ') if len(x)>=1] for y in birth_data[1:] if len(y)>=1] #print(len(birth_data)) #print(len(birth_data[0])) y_vals=np.array([x[0] for x in birth_data]) x_vals=np.array([x[1:9] for x in birth_data]) train_indices=np.random.choice(len(x_vals),round(len(x_vals)*0.8),replace=False) test_indices=np.array(list(set(range(len(x_vals)))-set(train_indices))) x_vals_train=x_vals[train_indices] x_vals_test=x_vals[test_indices] y_vals_train=y_vals[train_indices] y_vals_test=y_vals[test_indices] #print(x_vals_test) #把所有特征缩放到0到1 def normalize_cols(m): col_max=m.max(axis=0) col_min=m.min(axis=0) return (m-col_min)/(col_max-col_min) x_vals_train=np.nan_to_num(normalize_cols(x_vals_train)) x_vals_test=np.nan_to_num(normalize_cols(x_vals_test)) print(x_vals_test.shape) #print(x_vals) # iris=datasets.load_iris() # #print(iris.data.shape) # x_vals=np.array([x[3] for x in iris.data]) # y_vals=np.array([y[0] for y in iris.data]) leaning_rate=0.05 batch_size=25 # interations=50 x_data=tf.placeholder(shape=[None,8],dtype=tf.float32) y_target=tf.placeholder(shape=[None,1],dtype=tf.float32) A=tf.Variable(tf.random_normal(shape=[8,1])) b=tf.Variable(tf.random_normal(shape=[1,1])) model_output=tf.add(tf.matmul(x_data,A),b) # #demming operation # demming_numberator=tf.abs(tf.subtract(y_target,tf.add(tf.matmul(x_data,A),b))) # demming_denominator=tf.sqrt(tf.add(tf.square(A),1)) loss=tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(labels=y_target,logits=model_output)) init=tf.compat.v1.global_variables_initializer() sess.run(init) my_ops=tf.compat.v1.train.GradientDescentOptimizer(leaning_rate) train_step=my_ops.minimize(loss) prediction=tf.round(tf.sigmoid(model_output)) predictions_correct=tf.cast(tf.equal(prediction,y_target),tf.float32) accuracy=tf.reduce_mean(predictions_correct) lose_vec=[] train_acc=[] test_acc=[] for i in range(1500): rand_index=np.random.choice(len(x_vals_train),size=batch_size) #rand_x=np.transpose([x_vals_train[rand_index]]) rand_y=np.transpose([y_vals_train[rand_index]]) rand_x=x_vals_train[rand_index] #rand_y=y_vals_train[rand_index] sess.run(train_step,feed_dict={x_data:rand_x,y_target:rand_y}) temp_lose=sess.run(loss,feed_dict={x_data:rand_x,y_target:rand_y}) lose_vec.append(temp_lose) temp_acc_train=sess.run(accuracy,feed_dict={x_data:x_vals_train,y_target:np.transpose([y_vals_train])}) train_acc.append(temp_acc_train) temp_acc_test=sess.run(accuracy,feed_dict={x_data:x_vals_test,y_target:np.transpose([y_vals_test])}) test_acc.append(temp_acc_test) plt.plot(lose_vec,'k-') plt.title('crross Entropy Loss per Generation') plt.xlabel('Generation') plt.ylabel('Crocss entropy loss') plt.show() plt.plot(train_acc,'k-',label='Train Set Accuracy') plt.plot(test_acc,'r--',label='Test Set Accuracy') plt.title('arccurce Train Vs Test') plt.xlabel('Generation') plt.ylabel('arccurce') plt.show()问题: 测试的准确率有时候好低,有时候和训练的准确率差不多.