3層NNの実装 
 TowLayerNet 
 layers 
    Relu 
    Affine 
    SoftmaxWithLoss 
    Dropout 
    BatchNormalization 
    Convolution 
    Pooling 

 構成・方式など
 タスク
 導入
 Sample

 用語
 sampleなどはCentOS7で実施
$ python3 xxxx.py

 3層ニューラルネットワークの実装
入力信号、重み、バイアスは適当な値 #!/usr/bin/env python3 import matplotlib matplotlib.use("Agg") import numpy as np import matplotlib.pylab as plt def sigmoid(x): return 1 / (1 + np.exp(-x)) def identity_function(x): return x def init_network():    # 重みパラメータとバイアスの初期化 network = {}    # デクショナリ型変数に格納 network['W1'] = np.array([[0.1, 0.3, 0.5], [0.2, 0.4, 0.6]]) network['b1'] = np.array([0.1, 0.2, 0.3]) network['W2'] = np.array([[0.1, 0.4], [0.2, 0.5], [0.3, 0.6]]) network['b2'] = np.array([0.1, 0.2]) network['W3'] = np.array([[0.1, 0.3], [0.2, 0.4]]) network['b3'] = np.array([0.1, 0.2]) return network def forward(network, x):    # 入力信号を出力信号に変換 W1, W2, W3 = network['W1'], network['W2'], network['W3'] b1, b2, b3 = network['b1'], network['b2'], network['b3'] a1 = np.dot(x, W1 + b1) z1 = sigmoid(a1) a2 = np.dot(z1, W2 + b2) z2 = sigmoid(a2) a3 = np.dot(z2, W3 + b3) y = identity_function(a3) return y network = init_network()    # 重みとバイアスの初期化 x = np.array([1.0, 0.5])    # 入力信号 y = forward(network, x) print(y) print ("end")
 TowLayerNet (誤差逆伝播法)
$ python3 xxxx.py #!/usr/bin/env python3 # TowLayerNet(誤差逆伝播法) import numpy as np from layers import * from gradient import numerical_gradient from collections import OrderedDict class TwoLayerNet: def __init__(self, input_size, hidden_size, output_size, weight_init_std = 0.01): # 重みの初期化 self.params = {} self.params['W1'] = weight_init_std * np.random.randn(input_size, hidden_size) # 0.01*標準正規分布による784*50の行列 self.params['b1'] = np.zeros(hidden_size) self.params['W2'] = weight_init_std * np.random.randn(hidden_size, output_size) self.params['b2'] = np.zeros(output_size) print("重みの初期化 weight_init_std", weight_init_std) print("self.params['W1'].shape", self.params['W1'].shape) print("self.params['b1'].shape", self.params['b1'].shape) print("self.params['W2'].shape", self.params['W2'].shape) print("self.params['b2'].shape", self.params['b2'].shape) # print("(self.params['W1'])", (self.params['W1'])) print("(self.params['W1'])[0, 0]", (self.params['W1'])[0, 0]) print("(self.params['W1'])[783, 49]", (self.params['W1'])[783, 49]) print("(self.params['b1'])[0]", (self.params['b1'])[0]) # print("(self.params['W2'])[49, 9]", (self.params['W2'])) print("(self.params['W2'])[0, 0]", (self.params['W2'])[0, 0]) print("(self.params['W2'])[49, 9]", (self.params['W2'])[49, 9]) print("(self.params['b2'])[0]", (self.params['b2'])[0]) # レイヤの生成 self.layers = OrderedDict() # 順序が保持される self.layers['Affine1'] = Affine(self.params['W1'], self.params['b1']) self.layers['Relu1'] = Relu() self.layers['Affine2'] = Affine(self.params['W2'], self.params['b2']) self.lastLayer = SoftmaxWithLoss() print("レイヤの生成、Affine1、Relu1、Affine2、lastLayer") print("self.layers['Affine1'] = Affine(self.params['W1'], self.params['b1']") print("self.layers['Relu1'] = Relu()") print("self.layers['Affine2'] = Affine(self.params['W2'], self.params['b2']") print("self.lastLayer = SoftmaxWithLoss()") def predict(self, x): for layer in self.layers.values(): x = layer.forward(x) return x # x:入力データ, t:教師データ def loss(self, x, t): y = self.predict(x) return self.lastLayer.forward(y, t) def accuracy(self, x, t): y = self.predict(x) y = np.argmax(y, axis=1) if t.ndim != 1 : t = np.argmax(t, axis=1) accuracy = np.sum(y == t) / float(x.shape[0]) return accuracy # x:入力データ, t:教師データ def numerical_gradient(self, x, t): loss_W = lambda W: self.loss(x, t) grads = {} grads['W1'] = numerical_gradient(loss_W, self.params['W1']) grads['b1'] = numerical_gradient(loss_W, self.params['b1']) grads['W2'] = numerical_gradient(loss_W, self.params['W2']) grads['b2'] = numerical_gradient(loss_W, self.params['b2']) return grads def gradient(self, x, t): # forward self.loss(x, t) # backward dout = 1 dout = self.lastLayer.backward(dout) layers = list(self.layers.values()) layers.reverse() for layer in layers: dout = layer.backward(dout) # 設定 grads = {} grads['W1'], grads['b1'] = self.layers['Affine1'].dW, self.layers['Affine1'].db grads['W2'], grads['b2'] = self.layers['Affine2'].dW, self.layers['Affine2'].db return grads
 layers
$ python3 xxxx.py #!/usr/bin/env python3 # lyers import numpy as np from functions import * from util import im2col, col2im class Relu: def __init__(self): self.mask = None def forward(self, x): # 全角あり self.mask = (x <= 0) out = x.copy() out[self.mask] = 0 return out def backward(self, dout): dout[self.mask] = 0 dx = dout return dx class Sigmoid: def __init__(self): self.out = None def forward(self, x): out = sigmoid(x) self.out = out return out def backward(self, dout): dx = dout * (1.0 - self.out) * self.out return dx class Affine: def __init__(self, W, b): self.W =W self.b = b self.x = None self.original_x_shape = None # 重み・バイアスパラメータの微分 self.dW = None self.db = None def forward(self, x): # テンソル対応 self.original_x_shape = x.shape x = x.reshape(x.shape[0], -1) self.x = x out = np.dot(self.x, self.W) + self.b return out def backward(self, dout): dx = np.dot(dout, self.W.T) self.dW = np.dot(self.x.T, dout) self.db = np.sum(dout, axis=0) dx = dx.reshape(*self.original_x_shape) # 入力データの形状に戻す(テンソル対応) return dx class SoftmaxWithLoss: def __init__(self): self.loss = None self.y = None # softmaxの出力 self.t = None # 教師データ def forward(self, x, t): self.t = t self.y = softmax(x) self.loss = cross_entropy_error(self.y, self.t) return self.loss def backward(self, dout=1): batch_size = self.t.shape[0] if self.t.size == self.y.size: # 教師データがone-hot-vectorの場合 dx = (self.y - self.t) / batch_size else: dx = self.y.copy() dx[np.arange(batch_size), self.t] -= 1 dx = dx / batch_size return dx class Dropout: """ http://arxiv.org/abs/1207.0580 """ def __init__(self, dropout_ratio=0.5): self.dropout_ratio = dropout_ratio self.mask = None def forward(self, x, train_flg=True): if train_flg: self.mask = np.random.rand(*x.shape) > self.dropout_ratio return x * self.mask else: return x * (1.0 - self.dropout_ratio) def backward(self, dout): return dout * self.mask class BatchNormalization: """ http://arxiv.org/abs/1502.03167 """ def __init__(self, gamma, beta, momentum=0.9, running_mean=None, running_var=None): self.gamma = gamma self.beta = beta self.momentum = momentum self.input_shape = None # Conv層の場合は4次元、全結合層の場合は2次元 # テスト時に使用する平均と分散 self.running_mean = running_mean self.running_var = running_var # backward時に使用する中間データ self.batch_size = None self.xc = None self.std = None self.dgamma = None self.dbeta = None def forward(self, x, train_flg=True): self.input_shape = x.shape if x.ndim != 2: N, C, H, W = x.shape x = x.reshape(N, -1) out = self.__forward(x, train_flg) return out.reshape(*self.input_shape) def __forward(self, x, train_flg): if self.running_mean is None: N, D = x.shape self.running_mean = np.zeros(D) self.running_var = np.zeros(D) if train_flg: mu = x.mean(axis=0) xc = x - mu var = np.mean(xc**2, axis=0) std = np.sqrt(var + 10e-7) xn = xc / std self.batch_size = x.shape[0] self.xc = xc self.xn = xn self.std = std self.running_mean = self.momentum * self.running_mean + (1-self.momentum) * mu self.running_var = self.momentum * self.running_var + (1-self.momentum) * var else: xc = x - self.running_mean xn = xc / ((np.sqrt(self.running_var + 10e-7))) out = self.gamma * xn + self.beta return out def backward(self, dout): if dout.ndim != 2: N, C, H, W = dout.shape dout = dout.reshape(N, -1) dx = self.__backward(dout) dx = dx.reshape(*self.input_shape) return dx def __backward(self, dout): dbeta = dout.sum(axis=0) dgamma = np.sum(self.xn * dout, axis=0) dxn = self.gamma * dout dxc = dxn / self.std dstd = -np.sum((dxn * self.xc) / (self.std * self.std), axis=0) dvar = 0.5 * dstd / self.std dxc += (2.0 / self.batch_size) * self.xc * dvar dmu = np.sum(dxc, axis=0) dx = dxc - dmu / self.batch_size self.dgamma = dgamma self.dbeta = dbeta return dx class Convolution: def __init__(self, W, b, stride=1, pad=0): self.W = W self.b = b self.stride = stride self.pad = pad # 中間データ(backward時に使用) self.x = None self.col = None self.col_W = None # 重み・バイアスパラメータの勾配 self.dW = None self.db = None def forward(self, x): FN, C, FH, FW = self.W.shape N, C, H, W = x.shape out_h = 1 + int((H + 2*self.pad - FH) / self.stride) out_w = 1 + int((W + 2*self.pad - FW) / self.stride) col = im2col(x, FH, FW, self.stride, self.pad) col_W = self.W.reshape(FN, -1).T out = np.dot(col, col_W) + self.b out = out.reshape(N, out_h, out_w, -1).transpose(0, 3, 1, 2) self.x = x self.col = col self.col_W = col_W return out def backward(self, dout): FN, C, FH, FW = self.W.shape dout = dout.transpose(0,2,3,1).reshape(-1, FN) self.db = np.sum(dout, axis=0) self.dW = np.dot(self.col.T, dout) self.dW = self.dW.transpose(1, 0).reshape(FN, C, FH, FW) dcol = np.dot(dout, self.col_W.T) dx = col2im(dcol, self.x.shape, FH, FW, self.stride, self.pad) return dx class Pooling: def __init__(self, pool_h, pool_w, stride=1, pad=0): self.pool_h = pool_h self.pool_w = pool_w self.stride = stride self.pad = pad self.x = None self.arg_max = None def forward(self, x): N, C, H, W = x.shape out_h = int(1 + (H - self.pool_h) / self.stride) out_w = int(1 + (W - self.pool_w) / self.stride) col = im2col(x, self.pool_h, self.pool_w, self.stride, self.pad) col = col.reshape(-1, self.pool_h*self.pool_w) arg_max = np.argmax(col, axis=1) out = np.max(col, axis=1) out = out.reshape(N, out_h, out_w, C).transpose(0, 3, 1, 2) self.x = x self.arg_max = arg_max return out def backward(self, dout): dout = dout.transpose(0, 2, 3, 1) pool_size = self.pool_h * self.pool_w dmax = np.zeros((dout.size, pool_size)) dmax[np.arange(self.arg_max.size), self.arg_max.flatten()] = dout.flatten() dmax = dmax.reshape(dout.shape + (pool_size,)) dcol = dmax.reshape(dmax.shape[0] * dmax.shape[1] * dmax.shape[2], -1) dx = col2im(dcol, self.x.shape, self.pool_h, self.pool_w, self.stride, self.pad) return dx
 functions
$ python3 xxxx.py #!/usr/bin/env python3 # functions import numpy as np def identity_function(x): return x def step_function(x): return np.array(x > 0, dtype=np.int) def sigmoid(x): return 1 / (1 + np.exp(-x)) def sigmoid_grad(x): return (1.0 - sigmoid(x)) * sigmoid(x) def relu(x): return np.maximum(0, x) def relu_grad(x): grad = np.zeros(x) grad[x>=0] = 1 return grad def softmax(x): if x.ndim == 2: x = x.T x = x - np.max(x, axis=0) y = np.exp(x) / np.sum(np.exp(x), axis=0) return y.T x = x - np.max(x) # オーバーフロー対策 return np.exp(x) / np.sum(np.exp(x)) def sum_squared_error(y, t): return 0.5 * np.sum((y-t)**2) def cross_entropy_error(y, t): if y.ndim == 1: t = t.reshape(1, t.size) y = y.reshape(1, y.size) # 教師データがone-hot-vectorの場合、正解ラベルのインデックスに変換 if t.size == y.size: t = t.argmax(axis=1) batch_size = y.shape[0] return -np.sum(np.log(y[np.arange(batch_size), t] + 1e-7)) / batch_size def softmax_loss(X, t): y = softmax(X) return cross_entropy_error(y, t)
 util
$ python3 xxxx.py #!/usr/bin/env python3 # util import numpy as np def smooth_curve(x): """損失関数のグラフを滑らかにするために用いる 参考:http://glowingpython.blogspot.jp/2012/02/convolution-with-numpy.html """ window_len = 11 s = np.r_[x[window_len-1:0:-1], x, x[-1:-window_len:-1]] w = np.kaiser(window_len, 2) y = np.convolve(w/w.sum(), s, mode='valid') return y[5:len(y)-5] def shuffle_dataset(x, t): """データセットのシャッフルを行う Parameters ---------- x : 訓練データ t : 教師データ Returns ------- x, t : シャッフルを行った訓練データと教師データ """ permutation = np.random.permutation(x.shape[0]) x = x[permutation,:] if x.ndim == 2 else x[permutation,:,:,:] t = t[permutation] return x, t def conv_output_size(input_size, filter_size, stride=1, pad=0): return (input_size + 2*pad - filter_size) / stride + 1 def im2col(input_data, filter_h, filter_w, stride=1, pad=0): """ Parameters ---------- input_data : (データ数, チャンネル, 高さ, 幅)の4次元配列からなる入力データ filter_h : フィルターの高さ filter_w : フィルターの幅 stride : ストライド pad : パディング Returns ------- col : 2次元配列 """ N, C, H, W = input_data.shape out_h = (H + 2*pad - filter_h)//stride + 1 out_w = (W + 2*pad - filter_w)//stride + 1 img = np.pad(input_data, [(0,0), (0,0), (pad, pad), (pad, pad)], 'constant') col = np.zeros((N, C, filter_h, filter_w, out_h, out_w)) for y in range(filter_h): y_max = y + stride*out_h for x in range(filter_w): x_max = x + stride*out_w col[:, :, y, x, :, :] = img[:, :, y:y_max:stride, x:x_max:stride] col = col.transpose(0, 4, 5, 1, 2, 3).reshape(N*out_h*out_w, -1) return col def col2im(col, input_shape, filter_h, filter_w, stride=1, pad=0): """ Parameters ---------- col : input_shape : 入力データの形状(例:(10, 1, 28, 28)) filter_h : filter_w stride pad Returns ------- """ N, C, H, W = input_shape out_h = (H + 2*pad - filter_h)//stride + 1 out_w = (W + 2*pad - filter_w)//stride + 1 col = col.reshape(N, out_h, out_w, C, filter_h, filter_w).transpose(0, 3, 4, 5, 1, 2) img = np.zeros((N, C, H + 2*pad + stride - 1, W + 2*pad + stride - 1)) for y in range(filter_h): y_max = y + stride*out_h for x in range(filter_w): x_max = x + stride*out_w img[:, :, y:y_max:stride, x:x_max:stride] += col[:, :, y, x, :, :] return img[:, :, pad:H + pad, pad:W + pad]