Index A I | ニューラルNW Sample |
gradient 勾配 勾配法 simplenet 実行過程と補足 勾配確認 構成・方式など タスク 導入 Sample 用語 |
gradient
#!/usr/bin/env python3
# gradient_2d.py 勾配
import numpy as np
import matplotlib.pylab as plt
from mpl_toolkits.mplot3d import Axes3D
def _numerical_gradient_no_batch(f, x):
h = 1e-4 # 0.0001
grad = np.zeros_like(x) # x と同じ形状の配列を生成
for idx in range(x.size):
tmp_val = x[idx]
x[idx] = float(tmp_val) + h
fxh1 = f(x) # f(x+h)
x[idx] = tmp_val - h
fxh2 = f(x) # f(x-h)
grad[idx] = (fxh1 - fxh2) / (2*h)
x[idx] = tmp_val # 値を元に戻す
return grad
def numerical_gradient(f, X):
if X.ndim == 1:
return _numerical_gradient_no_batch(f, X)
else:
grad = np.zeros_like(X)
for idx, x in enumerate(X):
grad[idx] = _numerical_gradient_no_batch(f, x)
return grad
def function_2(x):
if x.ndim == 1:
return np.sum(x**2)
else:
return np.sum(x**2, axis=1)
if __name__ == '__main__':
x0 = np.arange(-2, 2.5, 0.25)
x1 = np.arange(-2, 2.5, 0.25)
X, Y = np.meshgrid(x0, x1)
X = X.flatten()
Y = Y.flatten()
grad = numerical_gradient(function_2, np.array([X, Y]).T).T
plt.figure()
plt.quiver(X, Y, -grad[0], -grad[1], angles="xy",color="#666666")
plt.xlim([-2, 2])
plt.ylim([-2, 2])
plt.xlabel('x0')
plt.ylabel('x1')
plt.grid()
plt.draw()
# plt.show()
plt.savefig("gradient_d2.png")
print ("end")
勾配法(更新経緯)
#!/usr/bin/env python3
# gradient_method.py
import numpy as np
import matplotlib.pylab as plt
import gradient_2d
from gradient_2d import numerical_gradient
def gradient_descent(f, init_x, lr=0.01, step_num=100):
x = init_x
x_history = []
for i in range(step_num):
x_history.append( x.copy() )
grad = numerical_gradient(f, x)
x -= lr * grad
return x, np.array(x_history)
def function_2(x):
return x[0]**2 + x[1]**2
init_x = np.array([-3.0, 4.0])
lr = 0.1
step_num = 20
x, x_history = gradient_descent(function_2, init_x, lr=lr, step_num=step_num)
plt.plot( [-5, 5], [0,0], '--b')
plt.plot( [0,0], [-5, 5], '--b')
plt.plot(x_history[:,0], x_history[:,1], 'o')
plt.xlim(-3.5, 3.5)
plt.ylim(-4.5, 4.5)
plt.xlabel("X0")
plt.ylabel("X1")
# plt.show()
plt.savefig("gradient_method.png")
print ("end")
simplenet(勾配を求める、重み W(2*3))
#!/usr/bin/env python3
# gradient_simplenet
import sys, os
sys.path.append(os.pardir) #
import numpy as np
from functions import softmax, cross_entropy_error
from gradient import numerical_gradient
class simpleNet:
def __init__(self):
self.W = np.random.randn(2,3) # 重みをガウス分布で初期化
def predict(self, x):
return np.dot(x, self.W)
def loss(self, x, t):
z = self.predict(x)
y = softmax(z)
loss = cross_entropy_error(y, t)
return loss
print("初期設定")
x = np.array([0.6, 0.9]) # 入力データ
t = np.array([0, 0, 1]) # 正解ラベル
net = simpleNet()
print("初期化された重み(W)")
print(net.W)
print("net.W.shape", net.W.shape)
print("入力データ(x)", x)
print("x.shape", x.shape)
print("正解ラベル(t)", t)
print("t.shape", t.shape)
print("開始")
f = lambda w: net.loss(x, t)
print("net.predict(x)", net.predict(x))
print("最大値のインデックス", np.argmax(net.predict(x)))
print("net.predict(x).shape", net.predict(x).shape)
print("net.loss(x, t)", net.loss(x, t))
dW = numerical_gradient(f, net.W)
print("numerical_gradient(f, net.W)")
print(dW)
simplenet実行過程とその補足
$ python3 gradient_simplenet.py
# 重みパラメータ W =
# [[ w11 w12 w13]
# [ w21 w22 w32]]
# 損失関数(L)と重みパラメータ(W)の勾配 ∂L / ∂W =
# [[ ∂L / w11 ∂L / w12 ∂L / w13]
# [ ∂L / w21 ∂L / w22 ∂L / w32]]
# 損失関数は交差エントロピー誤差(k = 3(正解)のみ有効)
# 1層の出力
# y = f(np.dot(x * W)) # (y1~y3)、(x1~x2)、、(w11~w23)
初期設定
初期化された重み(W)
[[ 1.49922367 -2.62467675 -0.45382021]
[ 0.63682249 -0.95077712 -1.84946753]]
net.W.shape (2, 3)
入力データ(x) [0.6 0.9]
x.shape (2,)
ラベル(t) [0 0 1]
t.shape (3,)
開始
net.predict(x) [ 1.47267444 -2.43050546 -1.9368129 ]
net.predict(x).shape (3,)
最大値のインデックス(np.argmax(net.predict(x))、 0
net.loss(x, t) 3.4613512873592134
numerical_gradient(net.loss(x, t), net.W) # 勾配を更新
[[ 0.56967104 0.01149469 -0.58116571]
[ 0.85450626 0.01724216 -0.87174838]]
勾配確認
#!/usr/bin/env python3
# gradient_check
import sys, os
sys.path.append(os.pardir)
import numpy as np
from mnist import load_mnist
from two_layer_net import TwoLayerNet
# データの読み込み
(x_train, t_train), (x_test, t_test) = load_mnist(normalize=True, one_hot_label=True)
network = TwoLayerNet(input_size=784, hidden_size=50, output_size=10)
x_batch = x_train[:3]
t_batch = t_train[:3]
grad_numerical = network.numerical_gradient(x_batch, t_batch)
grad_backprop = network.gradient(x_batch, t_batch)
for key in grad_numerical.keys():
diff = np.average( np.abs(grad_backprop[key] - grad_numerical[key]) )
print(key + ":" + str(diff))
勾配確認結果
重みの初期化 weight_init_std 0.01
self.params['W1'].shape (784, 50)
self.params['b1'].shape (50,)
self.params['W2'].shape (50, 10)
self.params['b2'].shape (10,)
(self.params['W1'])[0, 0] 0.014651789996112342
(self.params['W1'])[783, 49] 0.0031184770568468217
(self.params['b1'])[0] 0.0
(self.params['W2'])[0, 0] -0.005016748169428633
(self.params['W2'])[49, 9] -0.007822061200039213
(self.params['b2'])[0] 0.0
W1:1.4421339284356914e-09
b1:8.24371103976939e-09
W2:1.513292379811959e-07
b2:1.1997404825062064e-06
|
All Rights Reserved. Copyright (C) ITCL |