本文共 5301 字,大约阅读时间需要 17 分钟。
import numpy as npclass NearestNeighbor(object): def __init__(self): pass def train(self, X, y): """ X:N x D形状,N为样本数,D为像素数量 Y:1维,大小为N """ # 所有最近邻需要的训练数据集 self.Xtrain = X self.ytrain = y def predict(self, Xtest): """对输入的X若干个测试图片,每个进行预测""" num_test = Xtest.shape[0] # 确保输出类型一样 Ypred = np.zeros(num_test, dtype = self.ytrain.dtype) # 循环所有测试数据 for i in xrange(num_test): # 使用L1距离找到i最近的训练图片 distances = np.sum(np.abs(self.Xtrain - Xtest[i,:]), axis = 1) min_index = np.argmin(distances)# 获取最近的距离的图像下标 Ypred[i] = self.ytrain[min_index]# 预测标签(获取对应训练那张图片的目标标签) return Ypred
0log(0.10)+0log(0.05)+0log(0.15)+0log(0.10)+0log(0.05)+0log(0.20)+1log(0.10)+0log(0.05)+0log(0.10)+0log(0.10)
假设简单的模型为y =sigmoid(w1x1+w2x2+b), 我们在这里给几个随机的输入的值和权重,带入来计算一遍,其中在点x1,x2 = (-1 -2),目标值为1,假设给一个初始化w1,w2,b=(2, -3, -3),由于中间有sigmoid的计算过程,所以我们用代码来呈现刚才的过程。
# 假设一些随机数据和权重,以及目标结果1w = [2,-3,-3]x = [-1, -2]y = 1# 前向传播z = w[0]*x[0] + w[1]*x[1] + w[2]a = 1.0 / (1 + np.exp(-z))cost = -np.sum(y * np.log(a) + (1 - y) * np.log(1 - a))# 对神经元反向传播# 点积变量的梯度, 使用sigmoid函数求导dz = a - y# 回传计算x梯度dx = [w[0] * dz, w[1] * dz]# #回传计算w梯度dw = [x[0] * dz, x[1] * dz, 1.0 * dz]
def model(X_train, Y_train, X_test, Y_test, num_iterations=2000, learning_rate=0.5): """ """ # 初始化参数 w, b = initialize_with_zeros(X_train.shape[0]) # 梯度下降 # params:更新后的网络参数 # grads:最后一次梯度 # costs:每次更新的损失列表 params, grads, costs = optimize(w, b, X_train, Y_train, num_iterations, learning_rate) # 获取训练的参数 # 预测结果 w = params['w'] b = params['b'] Y_prediction_train = predict(w, b, X_train) Y_prediction_test = predict(w, b, X_test) # 打印准确率 print("训练集准确率: {} ".format(100 - np.mean(np.abs(Y_prediction_train - Y_train)) * 100)) print("测试集准确率: {} ".format(100 - np.mean(np.abs(Y_prediction_test - Y_test)) * 100)) d = { "costs": costs, "Y_prediction_test": Y_prediction_test, "Y_prediction_train": Y_prediction_train, "w": w, "b": b, "learning_rate": learning_rate, "num_iterations": num_iterations} return d
def optimize(w, b, X, Y, num_iterations, learning_rate): """ 参数: w:权重,b:偏置,X特征,Y目标值,num_iterations总迭代次数,learning_rate学习率 Returns: params:更新后的参数字典 grads:梯度 costs:损失结果 """ costs = [] for i in range(num_iterations): # 梯度更新计算函数 grads, cost = propagate(w, b, X, Y) # 取出两个部分参数的梯度 dw = grads['dw'] db = grads['db'] # 按照梯度下降公式去计算 w = w - learning_rate * dw b = b - learning_rate * db if i % 100 == 0: costs.append(cost) if i % 100 == 0: print("损失结果 %i: %f" % (i, cost)) print(b) params = { "w": w, "b": b} grads = { "dw": dw, "db": db} return params, grads, costsdef propagate(w, b, X, Y): """ 参数:w,b,X,Y:网络参数和数据 Return: 损失cost、参数W的梯度dw、参数b的梯度db """ m = X.shape[1] # 前向传播 # w (n,1), x (n, m) A = basic_sigmoid(np.dot(w.T, X) + b) # 计算损失 cost = -1 / m * np.sum(Y * np.log(A) + (1 - Y) * np.log(1 - A)) # 反向传播 dz = A - Y dw = 1 / m * np.dot(X, dz.T) db = 1 / m * np.sum(dz) grads = { "dw": dw, "db": db} return grads, cost
预测函数为:
def predict(w, b, X): ''' 利用训练好的参数预测 return:预测结果 ''' m = X.shape[1] Y_prediction = np.zeros((1, m)) w = w.reshape(X.shape[0], 1) # 计算结果 A = basic_sigmoid(np.dot(w.T, X) + b) for i in range(A.shape[1]): if A[0, i] <= 0.5: Y_prediction[0, i] = 0 else: Y_prediction[0, i] = 1 assert (Y_prediction.shape == (1, m)) return Y_prediction
运行结果显示:
训练集的样本数: 209测试集的样本数: 50train_x形状: (209, 64, 64, 3)train_y形状: (1, 209)test_x形状: (50, 64, 64, 3)test_x形状: (1, 50)损失结果 0: 0.693147-0.000777511961722488损失结果 100: 0.584508-0.004382762341768198损失结果 200: 0.466949-0.006796745374030192损失结果 300: 0.376007-0.008966216045043067损失结果 400: 0.331463-0.010796335272035083损失结果 500: 0.303273-0.012282447313396519损失结果 600: 0.279880-0.013402386273819053损失结果 700: 0.260042-0.014245091216970799损失结果 800: 0.242941-0.014875420165524832损失结果 900: 0.228004-0.015341288386626626损失结果 1000: 0.214820-0.015678788375442378损失结果 1100: 0.203078-0.015915536343924556损失结果 1200: 0.192544-0.01607292624287493损失结果 1300: 0.183033-0.016167692508505707损失结果 1400: 0.174399-0.016213022073676534损失结果 1500: 0.166521-0.016219364232163875损失结果 1600: 0.159305-0.01619503271238927损失结果 1700: 0.152667-0.016146661324349904损失结果 1800: 0.146542-0.01607955397736277损失结果 1900: 0.140872-0.015997956805040348训练集准确率: 99.04306220095694 测试集准确率: 70.0
转载地址:http://atxzk.baihongyu.com/