Index A I | mnistSample |
データをセット 実行結果 画像表示 数値勾配学習 TowLayerNet 誤差逆伝播法 SimpleC学習 SimpleConvNet Trainer 深層学習 DeepConvNet MNIST推論 ミニバッチ作成 構成・方式など タスク 導入 Sample 用語 |
sampleなどはCentOS7で実施 ($ python3 xxxx.py) MNISTデータをセット (pklファイル作成)
#!/usr/bin/env python3
# mnist.py
#import urllib.request
try:
import urllib.request
except ImportError:
raise ImportError('You should use Python 3.x')
import gzip
import numpy as np
import os
import os.path
import pickle
import urllib.request
url_base = 'http://yann.lecun.com/exdb/mnist/'
key_file = {
'train_img':'train-images-idx3-ubyte.gz',
'train_label':'train-labels-idx1-ubyte.gz',
'test_img':'t10k-images-idx3-ubyte.gz',
'test_label':'t10k-labels-idx1-ubyte.gz'
}
dataset_dir = os.path.dirname(os.path.abspath(__file__)) # 現在のDir
save_file = dataset_dir + "/data" + "/mnist.pkl" # pklファイル用 data Dirは作成済
print("***save_file***", save_file)
# train_num = 60000
# test_num = 10000
# img_dim = (1, 28, 28)
# img_size = 784
def init_mnist():
print("***download***")
download_mnist()
print("Downloading Done")
dataset = _convert_numpy()
print("_convert_numpy() Done!")
with open(save_file, 'wb') as f:
pickle.dump(dataset, f, -1)
print("Creating pickle file ... Done!")
def download_mnist():
for v in key_file.values():
_download(v)
def _download(file_name):
file_path = dataset_dir + "/data" + "/" + file_name
if os.path.exists(file_path):
return
urllib.request.urlretrieve(url_base + file_name, file_path)
def _convert_numpy():
dataset = {}
dataset['train_img'] = _load_img(key_file['train_img'])
dataset['train_label'] = _load_label(key_file['train_label'])
# np.set_printoptions(linewidth=118)
# print(dataset['train_img'][999].reshape((28, 28)))
# print(dataset['train_label'][999])
dataset['test_img'] = _load_img(key_file['test_img'])
dataset['test_label'] = _load_label(key_file['test_label'])
print("Dataset Done")
return dataset
def _load_img(file_name):
file_path = dataset_dir + "/data" + "/" + file_name
print("Converting " + file_name + " to NumPy Array ...")
with gzip.open(file_path, 'rb') as f:
data = np.frombuffer(f.read(), np.uint8, offset=16)
data = data.reshape(-1, img_size)
return data
def _load_label(file_name):
file_path = dataset_dir + "/data" + "/" + file_name
print("Converting " + file_name + " to NumPy Array ...")
with gzip.open(file_path, 'rb') as f:
labels = np.frombuffer(f.read(), np.uint8, offset=8)
return labels
def _change_one_hot_label(X):
T = np.zeros((X.size, 10))
for idx, row in enumerate(T):
row[X[idx]] = 1
return T
def load_mnist(normalize=True, flatten=True, one_hot_label=False):
if not os.path.exists(save_file):
init_mnist() # 無ければダウンロード
with open(save_file, 'rb') as f:
dataset = pickle.load(f)
if normalize:
for key in ('train_img', 'test_img'):
dataset[key] = dataset[key].astype(np.float32)
dataset[key] /= 255.0
if one_hot_label:
dataset['train_label'] = _change_one_hot_label(dataset['train_label'])
dataset['test_label'] = _change_one_hot_label(dataset['test_label'])
if not flatten:
for key in ('train_img', 'test_img'):
dataset[key] = dataset[key].reshape(-1, 1, 28, 28)
return (dataset['train_img'], dataset['train_label']), (dataset['test_img'], dataset['test_label'])
if __name__ == '__main__': init_mnist()
実行結果 (mnist.py)
現在のディレクトリは xxxx
指定保存場所にダウンロード
Downloading train-images-idx3-ubyte.gz ...
Done
・・・・
Converting train-images-idx3-ubyte.gz to NumPy Array ...
Done
・・・・
train_img.shape (60000, 784)
train_label.shape (60000,)
test_img.shape (10000, 784)
test_label.shape (10000,)
dataset_end
Creating pickle file ...
pickle_end
MNISTデータ画像表示
#!/usr/bin/env python3
# mnistDataHyouji.py
# MNISTデータ表示
import os.path
import numpy as np
import pickle
import matplotlib.pyplot as plt
from mnist import load_mnist
(x_train, t_train), (x_test, t_test) = load_mnist(flatten=False) # データロード
dataset_dir = os.path.dirname(os.path.abspath(__file__))
print ("現在のディレクトリは",dataset_dir)
save_file = dataset_dir + "/data" + "/mnist.pkl"
print("***save_file***", save_file)
dataset = {}
with open(save_file, 'rb') as f:
dataset = pickle.load(f)
print ("データ確認")
print (dataset['train_img'].shape)
print (dataset['train_label'].shape)
print (dataset['test_img'].shape)
print (dataset['test_label'].shape)
np.set_printoptions(linewidth=118)
print (dataset['train_img'][999].reshape((28, 28))) # 1000番目の画像データ
example = dataset['train_img'][999].reshape((28, 28))
plt.imshow(example)
#plt.show() # データ表示または保存
plt.savefig("example999.png")
print ("disp_end")
数値勾配ミニバッチ学習 (数値微分法ミニバッチ学習 mnistNumerical_gradient.py)
#!/usr/bin/env python3
# mnistNumerical_gradient.py
# ミニバッチ学習
import sys, os
sys.path.append(os.pardir)
import numpy as np
import matplotlib.pyplot as plt
from mnist import load_mnist
from two_layer_net import TowLayerNet
# データの読み込み
(x_train, t_train), (x_test, t_test) = load_mnist(normalize=True, one_hot_label=True)
print ("データ確認")
print ("x_train.shape", x_train.shape)
print ("t_train.shape", t_train.shape)
print ("x_test.shape", x_test.shape)
print ("t_test.shape", t_test.shape)
print ("one_hot_label=True")
print ("2層nnクラスの入力層は784、中間層は50、出力層は10")
network = TowLayerNet(input_size=784, hidden_size=50, output_size=10)
print ("ハイパーパラメータ")
iters_num = 10000 # 繰り返しの回数を適宜設定する
print ("勾配法によるパラメータの更新回数", iters_num)
train_size = x_train.shape[0]
print ("訓練データ数", train_size)
batch_size = 100
print ("ミニバッチの数", batch_size)
learning_rate = 0.1
print ("学習率", learning_rate)
train_loss_list = []
train_acc_list = []
test_acc_list = []
iter_per_epoch = max(train_size / batch_size, 1)
print ("1エポックの繰り返し数", iter_per_epoch)
for i in range(iters_num): #学習
batch_mask = np.random.choice(train_size, batch_size)
x_batch = x_train[batch_mask]
t_batch = t_train[batch_mask]
grad = network.numerical_gradient(x_batch, t_batch)
#grad = network.gradient(x_batch, t_batch)
#print (grad)
for key in ('W1', 'b1', 'W2', 'b2'): # パラメータの更新
network.params[key] -= learning_rate * grad[key]
loss = network.loss(x_batch, t_batch) # 損失関数の値
train_loss_list.append(loss)
if i % iter_per_epoch == 0: # 1エポックごとに認識の精度を計算
print("iter_per_epoch", i)
train_acc = network.accuracy(x_train, t_train)
test_acc = network.accuracy(x_test, t_test)
train_acc_list.append(train_acc)
test_acc_list.append(test_acc)
print("train acc, test acc | " + str(train_acc) + ", " + str(test_acc))
markers = {'train': 'o', 'test': 's'} # グラフの描画
x = np.arange(len(train_acc_list))
plt.plot(x, train_acc_list, label='train acc')
plt.plot(x, test_acc_list, label='test acc', linestyle='--')
plt.xlabel("epochs")
plt.ylabel("accuracy")
plt.ylim(0, 1.0)
plt.legend(loc='lower right')
# plt.show()
plt.savefig("train_neuralnet.png")
print ("ミニバッチ学習_end")
TowLayerNet
#!/usr/bin/env python3
# two_layer_net.py
import numpy as np
from functions import sigmoid, softmax, numerical_gradient, cross_entropy_error, sigmoid_grad
class TwoLayerNet:
def __init__(self, input_size, hidden_size, output_size, weight_init_std = 0.01):
# 重みの初期化
self.params = {}
self.params['W1'] = weight_init_std * np.random.randn(input_size, hidden_size)
# 0.01*標準正規分布による784*50の行列
self.params['b1'] = np.zeros(hidden_size)
self.params['W2'] = weight_init_std * np.random.randn(hidden_size, output_size)
self.params['b2'] = np.zeros(output_size)
print("重みの初期化 weight_init_std", weight_init_std)
print("self.params['W1'].shape", self.params['W1'].shape)
print("self.params['b1'].shape", self.params['b1'].shape)
print("self.params['W2'].shape", self.params['W2'].shape)
print("self.params['b2'].shape", self.params['b2'].shape)
# print("(self.params['W1'])", (self.params['W1']))
print("(self.params['W1'])[0, 0]", (self.params['W1'])[0, 0])
print("(self.params['W1'])[783, 49]", (self.params['W1'])[783, 49])
print("(self.params['b1'])[0]", (self.params['b1'])[0])
# print("(self.params['W2'])[49, 9]", (self.params['W2']))
print("(self.params['W2'])[0, 0]", (self.params['W2'])[0, 0])
print("(self.params['W2'])[49, 9]", (self.params['W2'])[49, 9])
print("(self.params['b2'])[0]", (self.params['b2'])[0])
def predict(self, x): # 予測
W1, W2 = self.params['W1'], self.params['W2']
b1, b2 = self.params['b1'], self.params['b2']
a1 = np.dot(x, W1) + b1
z1 = sigmoid(a1) # シグモイド関数
a2 = np.dot(z1, W2) + b2
y = softmax(a2) # ソフトマックス関数
return y
def loss(self, x, t): # 損失関数, x:入力データ, t:教師データ
y = self.predict(x)
return cross_entropy_error(y, t)
def accuracy(self, x, t): # 認識精度
y = self.predict(x)
y = np.argmax(y, axis=1)
t = np.argmax(t, axis=1)
accuracy = np.sum(y == t) / float(x.shape[0])
return accuracy
def numerical_gradient(self, x, t): # 重みパラメーターに対する勾配の算出, x:入力データ, t:教師データ
grads = {}
grads['W1'] = numerical_gradient(lambda: self.loss(x, t), self.params['W1'])
grads['b1'] = numerical_gradient(lambda: self.loss(x, t), self.params['b1'])
grads['W2'] = numerical_gradient(lambda: self.loss(x, t), self.params['W2'])
grads['b2'] = numerical_gradient(lambda: self.loss(x, t), self.params['b2'])
return grads
def gradient(self, x, t): # 誤差逆伝播法用
W1, W2 = self.params['W1'], self.params['W2']
b1, b2 = self.params['b1'], self.params['b2']
grads = {}
batch_num = x.shape[0]
# forward
a1 = np.dot(x, W1) + b1
z1 = sigmoid(a1)
a2 = np.dot(z1, W2) + b2
y = softmax(a2)
# backward
dy = (y - t) / batch_num
grads['W2'] = np.dot(z1.T, dy)
grads['b2'] = np.sum(dy, axis=0)
dz1 = np.dot(dy, W2.T)
da1 = sigmoid_grad(a1) * dz1
grads['W1'] = np.dot(x.T, da1)
grads['b1'] = np.sum(da1, axis=0)
return grads
誤差逆伝播法によるミニバッチ学習
#!/usr/bin/env python3
# ミニバッチ学習
import sys, os
sys.path.append(os.pardir)
import numpy as np
import matplotlib.pyplot as plt
from mnist import load_mnist
from two_layer_net import TowLayerNet
# データの読み込み
(x_train, t_train), (x_test, t_test) = load_mnist(normalize=True, one_hot_label=True)
print ("データ確認")
print ("x_train.shape", x_train.shape)
print ("t_train.shape", t_train.shape)
print ("x_test.shape", x_test.shape)
print ("t_test.shape", t_test.shape)
print ("one_hot_label=True")
print ("2層nnクラスの入力層は784、中間層は50、出力層は10")
network = TwoLayerNet(input_size=784, hidden_size=50, output_size=10)
print ("ハイパーパラメータ")
iters_num = 10000 # 繰り返しの回数を適宜設定する
print ("勾配法によるパラメータの更新回数", iters_num)
train_size = x_train.shape[0]
print ("訓練データ数", train_size)
batch_size = 100
print ("ミニバッチの数", batch_size)
learning_rate = 0.1
print ("学習率", learning_rate)
train_loss_list = []
train_acc_list = []
test_acc_list = []
#iter_per_epoch = 10
iter_per_epoch = max(train_size / batch_size, 1)
print ("1エポックの繰り返し数", iter_per_epoch)
for i in range(iters_num):
batch_mask = np.random.choice(train_size, batch_size)
x_batch = x_train[batch_mask]
t_batch = t_train[batch_mask]
# 勾配の計算
#grad = network.numerical_gradient(x_batch, t_batch)
grad = network.gradient(x_batch, t_batch)
#print (grad)
# パラメータの更新
for key in ('W1', 'b1', 'W2', 'b2'):
network.params[key] -= learning_rate * grad[key]
loss = network.loss(x_batch, t_batch)
train_loss_list.append(loss)
if i % iter_per_epoch == 0:
print("iter_per_epoch", i)
train_acc = network.accuracy(x_train, t_train)
test_acc = network.accuracy(x_test, t_test)
train_acc_list.append(train_acc)
test_acc_list.append(test_acc)
print("train acc, test acc | " + str(train_acc) + ", " + str(test_acc))
# グラフの描画
markers = {'train': 'o', 'test': 's'}
x = np.arange(len(train_acc_list))
plt.plot(x, train_acc_list, label='train acc')
plt.plot(x, test_acc_list, label='test acc', linestyle='--')
plt.xlabel("epochs")
plt.ylabel("accuracy")
plt.ylim(0, 1.0)
plt.legend(loc='lower right')
# plt.show()
plt.savefig("train_neuralnet.png")
print ("ミニバッチ学習_end")
SimpleConvNetによる学習
#!/usr/bin/env python3
# SimpleConvNetによる学習
import sys, os
sys.path.append(os.pardir)
import numpy as np
import matplotlib.pyplot as plt
from mnist import load_mnist
from simpleConvNet import SimpleConvNet
from trainer import Trainer
#from collections import OrderedDict
(x_train, t_train), (x_test, t_test) = load_mnist(flatten=False) # データロード
max_epochs = 20
network = SimpleConvNet(input_dim=(1,28,28),
conv_param = {'filter_num': 30, 'filter_size': 5, 'pad': 0, 'stride': 1},
hidden_size=100, output_size=10, weight_init_std=0.01)
trainer = Trainer(network, x_train, t_train, x_test, t_test,
epochs=max_epochs, mini_batch_size=100,
optimizer='Adam', optimizer_param={'lr': 0.001},
evaluate_sample_num_per_epoch=1000)
trainer.train()
network.save_params("simleConvNet_params.pkl") # パラメータ保存
print("Saved Network Parameters!")
# グラフの描画
markers = {'train': 'o', 'test': 's'}
x = np.arange(max_epochs)
plt.plot(x, trainer.train_acc_list, marker='o', label='train', markevery=2)
plt.plot(x, trainer.test_acc_list, marker='s', label='test', markevery=2)
plt.xlabel("epochs")
plt.ylabel("accuracy")
plt.ylim(0, 1.0)
plt.legend(loc='lower right')
# plt.show()
plt.savefig("train_neuralnet.png")
print ("SimpleConvNet学習_end")
SimpleConvNet
#!/usr/bin/env python3
# SimpleConvNet
import pickle
import sys, os
sys.path.append(os.pardir)
import numpy as np
from layers import *
from gradient import numerical_gradient
from collections import OrderedDict
class SimpleConvNet:
# 単純なConvNet、conv - relu - pool - affine - relu - affine - softmax
# Parameters
# input_size : 入力サイズ(MNISTの場合は784)
# hidden_size_list : 隠れ層のニューロンの数のリスト(e.g. [100, 100, 100])
# output_size : 出力サイズ(MNISTの場合は10)
# activation : 'relu' or 'sigmoid'
# weight_init_std : 重みの標準偏差を指定(e.g. 0.01)
# 'relu'または'he'を指定した場合は「Heの初期値」を設定
# 'sigmoid'または'xavier'を指定した場合は「Xavierの初期値」を設定
def __init__(self, input_dim=(1, 28, 28),
conv_param={'filter_num':30, 'filter_size':5, 'pad':0, 'stride':1},
hidden_size=100, output_size=10, weight_init_std=0.01):
filter_num = conv_param['filter_num']
filter_size = conv_param['filter_size']
filter_pad = conv_param['pad']
filter_stride = conv_param['stride']
input_size = input_dim[1]
conv_output_size = (input_size - filter_size + 2*filter_pad) / filter_stride + 1
pool_output_size = int(filter_num * (conv_output_size/2) * (conv_output_size/2))
# 重みの初期化
self.params = {}
self.params['W1'] = weight_init_std * np.random.randn(filter_num, input_dim[0], filter_size, filter_size)
self.params['b1'] = np.zeros(filter_num)
self.params['W2'] = weight_init_std * np.random.randn(pool_output_size, hidden_size)
self.params['b2'] = np.zeros(hidden_size)
self.params['W3'] = weight_init_std * np.random.randn(hidden_size, output_size)
self.params['b3'] = np.zeros(output_size)
# レイヤの生成
self.layers = OrderedDict() # 順序が保持される
self.layers['Conv1'] = Convolution(self.params['W1'], self.params['b1'],
conv_param['stride'], conv_param['pad'])
self.layers['Relu1'] = Relu()
self.layers['Pool1'] = Pooling(pool_h=2, pool_w=2, stride=2)
self.layers['Affine1'] = Affine(self.params['W2'], self.params['b2'])
self.layers['Relu2'] = Relu()
self.layers['Affine2'] = Affine(self.params['W3'], self.params['b3'])
self.last_layer = SoftmaxWithLoss()
def predict(self, x): # 推論用
for layer in self.layers.values():
x = layer.forward(x)
return x
# 損失関数を求める
def loss(self, x, t): # x:入力データ, t:教師データ
y = self.predict(x)
return self.last_layer.forward(y, t)
# 認識精度を算出(正解率)
def accuracy(self, x, t, batch_size=100): # x:入力データ, t:教師データ
if t.ndim != 1 : t = np.argmax(t, axis=1)
acc = 0.0
for i in range(int(x.shape[0] / batch_size)):
tx = x[i*batch_size:(i+1)*batch_size]
tt = t[i*batch_size:(i+1)*batch_size]
y = self.predict(tx)
y = np.argmax(y, axis=1)
acc += np.sum(y == tt)
return acc / x.shape[0]
# 勾配を求める(数値微分)
def numerical_gradient(self, x, t): # x:入力データ, t:教師データ
# 各層の勾配を持ったディクショナリ変数
# grads['W1']、grads['W2']、・・・は各層の重み
# grads['b1']、grads['b2']、・・・は各層のバイアス
loss_W = lambda W: self.loss(x, t)
grads = {}
for idx in (1, 2, 3):
grads['W' + str(idx)] = numerical_gradient(loss_w, self.params['W' + str(idx)])
grads['b' + str(idx)] = numerical_gradient(loss_w, self.params['b' + str(idx)])
return grads
# 勾配を求める(誤差逆伝搬法)
def gradient(self, x, t): # x:入力データ, t:教師データ
# forward
self.loss(x, t)
# backward
dout = 1
dout = self.last_layer.backward(dout)
layers = list(self.layers.values())
layers.reverse()
for layer in layers:
dout = layer.backward(dout)
# 設定
grads = {}
grads['W1'], grads['b1'] = self.layers['Conv1'].dW, self.layers['Conv1'].db
grads['W2'], grads['b2'] = self.layers['Affine1'].dW, self.layers['Affine1'].db
grads['W3'], grads['b3'] = self.layers['Affine2'].dW, self.layers['Affine2'].db
return grads
# パラメータ格納
def save_params(self, file_name="params.pkl"):
params = {}
for key, val in self.params.items():
params[key] = val
with open(file_name, 'wb') as f:
pickle.dump(params, f)
# パラメータload
def load_params(self, file_name="params.pkl"):
with open(file_name, 'rb') as f:
params = pickle.load(f)
for key, val in params.items():
self.params[key] = val
for i, key in enumerate(['Conv1', 'Affine1', 'Affine2']):
self.layers[key].W = self.params['W' + str(i+1)]
self.layers[key].b = self.params['b' + str(i+1)]
Trainer
#!/usr/bin/env python3
# Trainer.py
import numpy as np
from optimizer import *
class Trainer: #ニューラルネットの訓練を行うクラス
def __init__(self, network, x_train, t_train, x_test, t_test,
epochs=20, mini_batch_size=100,
optimizer='SGD', optimizer_param={'lr':0.01},
evaluate_sample_num_per_epoch=None, verbose=True):
self.network = network
self.verbose = verbose
self.x_train = x_train
self.t_train = t_train
self.x_test = x_test
self.t_test = t_test
self.epochs = epochs
self.batch_size = mini_batch_size
self.evaluate_sample_num_per_epoch = evaluate_sample_num_per_epoch
# optimizer
optimizer_class_dict = {'sgd':SGD, 'momentum':Momentum, 'nesterov':Nesterov,
'adagrad':AdaGrad, 'rmsprpo':RMSprop, 'adam':Adam}
self.optimizer = optimizer_class_dict[optimizer.lower()](**optimizer_param)
self.train_size = x_train.shape[0]
self.iter_per_epoch = max(self.train_size / mini_batch_size, 1)
self.max_iter = int(epochs * self.iter_per_epoch)
self.current_iter = 0
self.current_epoch = 0
self.train_loss_list = []
self.train_acc_list = []
self.test_acc_list = []
def train_step(self):
batch_mask = np.random.choice(self.train_size, self.batch_size)
x_batch = self.x_train[batch_mask]
t_batch = self.t_train[batch_mask]
grads = self.network.gradient(x_batch, t_batch)
self.optimizer.update(self.network.params, grads)
loss = self.network.loss(x_batch, t_batch)
self.train_loss_list.append(loss)
if self.verbose: print("train loss:" + str(loss))
if self.current_iter % self.iter_per_epoch == 0:
self.current_epoch += 1
x_train_sample, t_train_sample = self.x_train, self.t_train
x_test_sample, t_test_sample = self.x_test, self.t_test
if not self.evaluate_sample_num_per_epoch is None:
t = self.evaluate_sample_num_per_epoch
x_train_sample, t_train_sample = self.x_train[:t], self.t_train[:t]
x_test_sample, t_test_sample = self.x_test[:t], self.t_test[:t]
train_acc = self.network.accuracy(x_train_sample, t_train_sample)
test_acc = self.network.accuracy(x_test_sample, t_test_sample)
self.train_acc_list.append(train_acc)
self.test_acc_list.append(test_acc)
if self.verbose: print("=== epoch:" + str(self.current_epoch) + ", train acc:" + str(train_acc) + ", test acc:" + str(test_acc) + " ===")
self.current_iter += 1
def train(self): # 訓練開始
for i in range(self.max_iter):
self.train_step()
test_acc = self.network.accuracy(self.x_test, self.t_test)
if self.verbose:
print("Final Test Accuracy")
print("test acc:" + str(test_acc))
MNIST深層学習
#!/usr/bin/env python3
# train_deepnet
import sys, os
sys.path.append(os.pardir)
import numpy as np
import matplotlib.pyplot as plt
from mnist import load_mnist
from deepConvNet import DeepConvNet
from trainer import Trainer
(x_train, t_train), (x_test, t_test) = load_mnist(flatten=False) # データロード
network = DeepConvNet()
trainer = Trainer(network, x_train, t_train, x_test, t_test,
epochs=20, mini_batch_size=100,
optimizer='Adam', optimizer_param={'lr':0.001},
evaluate_sample_num_per_epoch=1000)
trainer.train()
network.save_params("deep_convnet_params.pkl") # パラメータ保存
print("Saved Network Parameters!")
DeepConvNet $ python3 xxxx.py
#!/usr/bin/env python3
# DeepConvNet
import pickle
import numpy as np
from collections import OrderedDict
from layers import *
class DeepConvNet: # 認識率99%以上のネットワーク構成
# conv - relu - conv- relu - pool - conv - relu - conv- relu - pool-
# conv - relu - conv- relu - pool - affine - relu - dropout -
# affine - dropout - softmax
def __init__(self, input_dim=(1, 28, 28),
conv_param_1 = {'filter_num':16, 'filter_size':3, 'pad':1, 'stride':1},
conv_param_2 = {'filter_num':16, 'filter_size':3, 'pad':1, 'stride':1},
conv_param_3 = {'filter_num':32, 'filter_size':3, 'pad':1, 'stride':1},
conv_param_4 = {'filter_num':32, 'filter_size':3, 'pad':2, 'stride':1},
conv_param_5 = {'filter_num':64, 'filter_size':3, 'pad':1, 'stride':1},
conv_param_6 = {'filter_num':64, 'filter_size':3, 'pad':1, 'stride':1},
hidden_size=50, output_size=10):
# 重みの初期化
# 各層のニューロンひとつあたりが、前層のニューロンといくつのつながりがあるか(TODO:自動で計算する)
pre_node_nums = np.array([1*3*3, 16*3*3, 16*3*3, 32*3*3, 32*3*3, 64*3*3, 64*4*4, hidden_size])
weight_init_scales = np.sqrt(2.0 / pre_node_nums) # ReLUを使う場合に推奨される初期値
self.params = {}
pre_channel_num = input_dim[0]
for idx, conv_param in enumerate([conv_param_1, conv_param_2, conv_param_3, conv_param_4, conv_param_5, conv_param_6]):
self.params['W' + str(idx+1)] = weight_init_scales[idx] * np.random.randn(conv_param['filter_num'], pre_channel_num, conv_param['filter_size'], conv_param['filter_size'])
self.params['b' + str(idx+1)] = np.zeros(conv_param['filter_num'])
pre_channel_num = conv_param['filter_num']
self.params['W7'] = weight_init_scales[6] * np.random.randn(64*4*4, hidden_size)
self.params['b7'] = np.zeros(hidden_size)
self.params['W8'] = weight_init_scales[7] * np.random.randn(hidden_size, output_size)
self.params['b8'] = np.zeros(output_size)
# レイヤの生成===========
self.layers = [] #
self.layers.append(Convolution(self.params['W1'], self.params['b1'],
conv_param_1['stride'], conv_param_1['pad']))
self.layers.append(Relu())
self.layers.append(Convolution(self.params['W2'], self.params['b2'],
conv_param_2['stride'], conv_param_2['pad']))
self.layers.append(Relu())
self.layers.append(Pooling(pool_h=2, pool_w=2, stride=2))
self.layers.append(Convolution(self.params['W3'], self.params['b3'],
conv_param_3['stride'], conv_param_3['pad']))
self.layers.append(Relu())
self.layers.append(Convolution(self.params['W4'], self.params['b4'],
conv_param_4['stride'], conv_param_4['pad']))
self.layers.append(Relu())
self.layers.append(Pooling(pool_h=2, pool_w=2, stride=2))
self.layers.append(Convolution(self.params['W5'], self.params['b5'],
conv_param_5['stride'], conv_param_5['pad']))
self.layers.append(Relu())
self.layers.append(Convolution(self.params['W6'], self.params['b6'],
conv_param_6['stride'], conv_param_6['pad']))
self.layers.append(Relu())
self.layers.append(Pooling(pool_h=2, pool_w=2, stride=2))
self.layers.append(Affine(self.params['W7'], self.params['b7']))
self.layers.append(Relu())
self.layers.append(Dropout(0.5))
self.layers.append(Affine(self.params['W8'], self.params['b8']))
self.layers.append(Dropout(0.5))
self.last_layer = SoftmaxWithLoss()
def predict(self, x, train_flg=False): # 予測
for layer in self.layers:
if isinstance(layer, Dropout): # layerオブジェクトがDropoutならTrue
x = layer.forward(x, train_flg)
else:
x = layer.forward(x)
return x
def loss(self, x, t): # x:入力データ、t:教師データ
y = self.predict(x, train_flg=True)
return self.last_layer.forward(y, t) # last_layerはSoftmaxWithLoss()
def accuracy(self, x, t, batch_size=100):
if t.ndim != 1 : t = np.argmax(t, axis=1)
acc = 0.0
for i in range(int(x.shape[0] / batch_size)):
tx = x[i*batch_size:(i+1)*batch_size]
tt = t[i*batch_size:(i+1)*batch_size]
y = self.predict(tx, train_flg=False)
y = np.argmax(y, axis=1)
acc += np.sum(y == tt)
return acc / x.shape[0]
def gradient(self, x, t): # 勾配
# forward
self.loss(x, t)
# backward
dout = 1
dout = self.last_layer.backward(dout)
tmp_layers = self.layers.copy()
tmp_layers.reverse()
for layer in tmp_layers:
dout = layer.backward(dout)
# 設定
grads = {}
for i, layer_idx in enumerate((0, 2, 5, 7, 10, 12, 15, 18)):
grads['W' + str(i+1)] = self.layers[layer_idx].dW
grads['b' + str(i+1)] = self.layers[layer_idx].db
return grads
def save_params(self, file_name="params.pkl"):
params = {}
for key, val in self.params.items():
params[key] = val
with open(file_name, 'wb') as f:
pickle.dump(params, f)
def load_params(self, file_name="params.pkl"):
with open(file_name, 'rb') as f:
params = pickle.load(f)
for key, val in params.items():
self.params[key] = val
for i, layer_idx in enumerate((0, 2, 5, 7, 10, 12, 15, 18)):
self.layers[layer_idx].W = self.params['W' + str(i+1)]
self.layers[layer_idx].b = self.params['b' + str(i+1)]
MNIST推論 $ python3 xxxx.py
#!/usr/bin/env python3
# neuralnet_mnist_batch
import sys, os
sys.path.append(os.pardir)
import numpy as np
import pickle
from mnist import load_mnist
from functions import sigmoid, softmax
def get_data(): # テストデータ抽出
(x_train, t_train), (x_test, t_test) = load_mnist(normalize=True, flatten=True, one_h
ot_label=False)
return x_test, t_test
def init_network(): # 学習済重みパラメータ取り出し
with open("sample_weight.pkl", 'rb') as f:
network = pickle.load(f)
return network
def predict(network, x): # 訓練データ、テストデータの出力結果
w1, w2, w3 = network['W1'], network['W2'], network['W3']
b1, b2, b3 = network['b1'], network['b2'], network['b3']
a1 = np.dot(x, w1) + b1
z1 = sigmoid(a1)
a2 = np.dot(z1, w2) + b2
z2 = sigmoid(a2)
a3 = np.dot(z2, w3) + b3
y = softmax(a3)
return y
x, t = get_data()
network = init_network()
w1, w2, w3 = network['W1'], network['W2'], network['W3']
b1, b2, b3 = network['b1'], network['b2'], network['b3']
#print(network['W3'])
print(w1.shape)
print(w2.shape)
print(w3.shape)
#print(network['b1'])
print(b1.shape)
#print(x)
print(x.shape)
batch_size = 100 # バッチの数
accuracy_cnt = 0
for i in range(0, len(x), batch_size): # テストデータで精度を確認
x_batch = x[i:i+batch_size]
y_batch = predict(network, x_batch)
p = np.argmax(y_batch, axis=1)
accuracy_cnt += np.sum(p == t[i:i+batch_size]) # テストデータの出力と正解ラベルを比較
print("Accuracy:" + str(float(accuracy_cnt) / len(x))) # 正解率
ミニバッチ作成
#!/usr/bin/env python3
# MNISTミニバッチ作成
import os.path
import numpy as np
import pickle
import matplotlib.pyplot as plt
dataset_dir = os.path.dirname(os.path.abspath(__file__))
print ("現在のディレクトリは",dataset_dir)
dataset = {}
save_file = dataset_dir + '/mnist.pkl'
with open(save_file, 'rb') as f:
dataset = pickle.load(f)
def _change_one_hot_label(X):
T = np.zeros((X.size, 10))
for idx, row in enumerate(T):
row[X[idx]] = 1
return T
dataset['train_label'] = _change_one_hot_label(dataset['train_label'])
train_size = dataset['train_img'].shape[0]
batch_size = 10
batch_mask = np.random.choice(train_size, batch_size)
print ("データ確認")
print (dataset['train_img'].shape)
print (dataset['train_label'].shape)
print ("訓練データ数は", train_size)
print ("バッチ数は", batch_size)
print ("選ぶインデックは",batch_mask)
print ("disp_end")
|
All Rights Reserved. Copyright (C) ITCL |