Sample

gradient
勾配

勾配法
simplenet
実行過程と補足
勾配確認

構成・方式など
タスク
導入
Sample

用語 gradient #!/usr/bin/env python3 # gradient_2d.py 勾配 import numpy as np import matplotlib.pylab as plt from mpl_toolkits.mplot3d import Axes3D def _numerical_gradient_no_batch(f, x): h = 1e-4 # 0.0001 grad = np.zeros_like(x) # x と同じ形状の配列を生成 for idx in range(x.size): tmp_val = x[idx] x[idx] = float(tmp_val) + h fxh1 = f(x) # f(x+h) x[idx] = tmp_val - h fxh2 = f(x) # f(x-h) grad[idx] = (fxh1 - fxh2) / (2*h) x[idx] = tmp_val # 値を元に戻す return grad def numerical_gradient(f, X): if X.ndim == 1: return _numerical_gradient_no_batch(f, X) else: grad = np.zeros_like(X) for idx, x in enumerate(X): grad[idx] = _numerical_gradient_no_batch(f, x) return grad def function_2(x): if x.ndim == 1: return np.sum(x**2) else: return np.sum(x**2, axis=1) if __name__ == '__main__': x0 = np.arange(-2, 2.5, 0.25) x1 = np.arange(-2, 2.5, 0.25) X, Y = np.meshgrid(x0, x1) X = X.flatten() Y = Y.flatten() grad = numerical_gradient(function_2, np.array([X, Y]).T).T plt.figure() plt.quiver(X, Y, -grad[0], -grad[1], angles="xy",color="#666666") plt.xlim([-2, 2]) plt.ylim([-2, 2]) plt.xlabel('x0') plt.ylabel('x1') plt.grid() plt.draw() # plt.show() plt.savefig("gradient_d2.png") print ("end")
勾配法（更新経緯） #!/usr/bin/env python3 # gradient_method.py import numpy as np import matplotlib.pylab as plt import gradient_2d from gradient_2d import numerical_gradient def gradient_descent(f, init_x, lr=0.01, step_num=100): x = init_x x_history = [] for i in range(step_num): x_history.append( x.copy() ) grad = numerical_gradient(f, x) x -= lr * grad return x, np.array(x_history) def function_2(x): return x[0]**2 + x[1]**2 init_x = np.array([-3.0, 4.0]) lr = 0.1 step_num = 20 x, x_history = gradient_descent(function_2, init_x, lr=lr, step_num=step_num) plt.plot( [-5, 5], [0,0], '--b') plt.plot( [0,0], [-5, 5], '--b') plt.plot(x_history[:,0], x_history[:,1], 'o') plt.xlim(-3.5, 3.5) plt.ylim(-4.5, 4.5) plt.xlabel("X0") plt.ylabel("X1") # plt.show() plt.savefig("gradient_method.png") print ("end")
simplenet（勾配を求める、重み W（2*3）） #!/usr/bin/env python3 # gradient_simplenet import sys, os sys.path.append(os.pardir) # import numpy as np from functions import softmax, cross_entropy_error from gradient import numerical_gradient class simpleNet: def __init__(self): self.W = np.random.randn(2,3) # 重みをガウス分布で初期化 def predict(self, x): return np.dot(x, self.W) def loss(self, x, t): z = self.predict(x) y = softmax(z) loss = cross_entropy_error(y, t) return loss print("初期設定") x = np.array([0.6, 0.9]) # 入力データ t = np.array([0, 0, 1]) # 正解ラベル net = simpleNet() print("初期化された重み(W)") print(net.W) print("net.W.shape", net.W.shape) print("入力データ(x)", x) print("x.shape", x.shape) print("正解ラベル(t)", t) print("t.shape", t.shape) print("開始") f = lambda w: net.loss(x, t) print("net.predict(x)", net.predict(x)) print("最大値のインデックス", np.argmax(net.predict(x))) print("net.predict(x).shape", net.predict(x).shape) print("net.loss(x, t)", net.loss(x, t)) dW = numerical_gradient(f, net.W) print("numerical_gradient(f, net.W)") print(dW)
simplenet実行過程とその補足 $ python3 gradient_simplenet.py # 重みパラメータ W = # [[ w11 w12 w13] # [ w21 w22 w32]] # 損失関数（L）と重みパラメータ（W）の勾配 ∂L / ∂W = # [[ ∂L / w11 ∂L / w12 ∂L / w13] # [ ∂L / w21 ∂L / w22 ∂L / w32]] # 損失関数は交差エントロピー誤差（k = 3（正解）のみ有効） # 1層の出力 # y = f(np.dot(x * W)) # (y1～y3)、(x1～x2)、、(w11～w23) 初期設定初期化された重み(W) [[ 1.49922367 -2.62467675 -0.45382021] [ 0.63682249 -0.95077712 -1.84946753]] net.W.shape (2, 3) 入力データ(x) [0.6 0.9] x.shape (2,) ラベル(t) [0 0 1] t.shape (3,) 開始 net.predict(x) [ 1.47267444 -2.43050546 -1.9368129 ] net.predict(x).shape (3,) 最大値のインデックス（np.argmax(net.predict(x)）、 0 net.loss(x, t) 3.4613512873592134 numerical_gradient(net.loss(x, t), net.W) # 勾配を更新 [[ 0.56967104 0.01149469 -0.58116571] [ 0.85450626 0.01724216 -0.87174838]]
勾配確認 #!/usr/bin/env python3 # gradient_check import sys, os sys.path.append(os.pardir) import numpy as np from mnist import load_mnist from two_layer_net import TwoLayerNet # データの読み込み (x_train, t_train), (x_test, t_test) = load_mnist(normalize=True, one_hot_label=True) network = TwoLayerNet(input_size=784, hidden_size=50, output_size=10) x_batch = x_train[:3] t_batch = t_train[:3] grad_numerical = network.numerical_gradient(x_batch, t_batch) grad_backprop = network.gradient(x_batch, t_batch) for key in grad_numerical.keys(): diff = np.average( np.abs(grad_backprop[key] - grad_numerical[key]) ) print(key + ":" + str(diff))
勾配確認結果 重みの初期化 weight_init_std 0.01 self.params['W1'].shape (784, 50) self.params['b1'].shape (50,) self.params['W2'].shape (50, 10) self.params['b2'].shape (10,) (self.params['W1'])[0, 0] 0.014651789996112342 (self.params['W1'])[783, 49] 0.0031184770568468217 (self.params['b1'])[0] 0.0 (self.params['W2'])[0, 0] -0.005016748169428633 (self.params['W2'])[49, 9] -0.007822061200039213 (self.params['b2'])[0] 0.0 W1:1.4421339284356914e-09 b1:8.24371103976939e-09 W2:1.513292379811959e-07 b2:1.1997404825062064e-06


All Rights Reserved. Copyright (C) ITCL