import numpy as np
from slugnet.activation import Noop, ReLU
from slugnet.initializations import _zero, GlorotUniform
class Layer(object):
first_layer = False
def set_first_layer(self):
self.first_layer = True
def get_params(self):
return []
def get_grads(self):
return []
[docs]class Dense(Layer):
r"""
A common densely connected neural network layer.
:param outshape: The output shape at this layer.
:type outshape: int
:param inshape: The input shape at this layer.
:type inshape: int
:param activation: The activation function to be used at the layer.
:type activation: slugnet.activation.Activation
:param init: The initialization function to be used
:type init: slugnet.initializations.Initializer
"""
def __init__(self, outshape, inshape=None, activation=Noop(),
init=GlorotUniform()):
self.outshape = None, outshape
self.activation = activation
self.inshape = inshape
self.init = init
def connect(self, prev_layer=None):
if prev_layer:
if len(prev_layer.outshape) != 2:
raise ValueError('Previous layers outshape is incompatible')
self.inshape = prev_layer.outshape[-1]
elif not self.inshape:
raise ValueError('inshape must be given to first layer of network')
self.shape = self.inshape, self.outshape[-1]
self.w = self.init(self.shape)
self.b = _zero((self.outshape[-1], ))
self.dw = None
self.db = None
def call(self, X, *args, **kwargs):
self.last_X = X
return self.activation.call(np.dot(X, self.w) + self.b)
[docs] def backprop(self, nxt_grad):
"""
Computes the derivative for the next layer
and computes update for this layers weights
"""
act_grad = nxt_grad * self.activation.derivative()
self.dw = np.dot(self.last_X.T, act_grad)
self.db = np.mean(act_grad, axis=0)
return np.dot(act_grad, self.w.T)
def get_params(self):
return self.w, self.b
def get_grads(self):
return self.dw, self.db
[docs]class Dropout(Layer):
r"""
A layer that removes units from a network with probability :code:`p`.
:param p: The probability of a non-ouput node being removed from the network.
:type p: float
"""
def __init__(self, p=0.0, *args, **kwargs):
super().__init__(*args, **kwargs)
self.p = p
def connect(self, prev_layer=None):
self.outshape = prev_layer.outshape
def call(self, X, train=True, *args, **kwargs):
if 0. > self.p or self.p > 1.:
return X
if not train:
return X * (1 - self.p)
binomial = np.random.binomial(1, 1 - self.p, X.shape)
self.last_mask = binomial / (1 - self.p)
return X * self.last_mask
def backprop(self, pre_grad, *args, **kwargs):
if 0. < self.p < 1.:
return pre_grad * self.last_mask
return pre_grad
[docs]class Convolution(Layer):
r"""
A layer that implements the convolution operation.
:param nb_kernel: The number of kernels to use.
:type nb_kernel: int
:param kernel_size: The size of the kernel as a tuple, heigh by width
:type kernel_size: (int, int)
:param stride: The stide width to use
:type stride: int
:param init: The initializer to use
:type init: slugnet.initializations.Initializer
:param activation: The activation function to be used at the layer.
:type activation: slugnet.activation.Activation
"""
def __init__(self, nb_kernel, kernel_size, stride=1,
inshape=None, init=GlorotUniform(), activation=None):
if activation is None:
activation = ReLU()
self.inshape = inshape
self.nb_kernel = nb_kernel
self.kernel_size = kernel_size
self.stride = stride
self.w, self.b = None, None
self.dw, self.db = None, None
self.outshape = None
self.last_output = None
self.last_input = None
self.init = init
self.activation = activation
def connect(self, prev_layer=None):
if prev_layer:
self.inshape = prev_layer.outshape
elif not self.inshape:
raise ValueError('inshape must be given to first layer of network')
prev_nb_kernel = self.inshape[1]
kernel_h, kernel_w = self.kernel_size
self.w = self.init((self.nb_kernel, prev_nb_kernel, kernel_h, kernel_w))
self.b = _zero((self.nb_kernel,))
prev_kh, prev_kw = self.inshape[2], self.inshape[3]
height = (prev_kh - kernel_h) // self.stride + 1
width = (prev_kw - kernel_w) // self.stride + 1
self.outshape = (self.inshape[0], self.nb_kernel, height, width)
def call(self, X, *args, **kwargs):
self.last_input = X
batch_size, depth, height, width = X.shape
kernel_h, kernel_w = self.kernel_size
out_h, out_w = self.outshape[2:]
outputs = _zero((batch_size, self.nb_kernel, out_h, out_w))
for x in np.arange(batch_size):
for y in np.arange(self.nb_kernel):
for h in np.arange(out_h):
for w in np.arange(out_w):
h1, w1 = h * self.stride, w * self.stride
h2, w2 = h1 + kernel_h, w1 + kernel_w
patch = X[x, :, h1: h2, w1: w2]
conv_product = patch * self.w[y]
outputs[x, y, h, w] = np.sum(conv_product) + self.b[y]
self.last_output = self.activation.call(outputs)
return self.last_output
def backprop(self, grad, *args, **kwargs):
batch_size, depth, input_h, input_w = self.last_input.shape
out_h, out_w = self.outshape[2:]
kernel_h, kernel_w = self.kernel_size
# gradients
self.dw = _zero(self.w.shape)
self.db = _zero(self.b.shape)
delta = grad * self.activation.derivative()
# dw
for r in np.arange(self.nb_kernel):
for t in np.arange(depth):
for h in np.arange(kernel_h):
for w in np.arange(kernel_w):
input_window = self.last_input[:, t,
h:input_h - kernel_h + h + 1:self.stride,
w:input_w - kernel_w + w + 1:self.stride]
delta_window = delta[:, r]
self.dw[r, t, h, w] = np.sum(input_window * delta_window) / batch_size
# db
for r in np.arange(self.nb_kernel):
self.db[r] = np.sum(delta[:, r]) / batch_size
# dX
if not self.first_layer:
layer_grads = _zero(self.last_input.shape)
for b in np.arange(batch_size):
for r in np.arange(self.nb_kernel):
for t in np.arange(depth):
for h in np.arange(out_h):
for w in np.arange(out_w):
h1, w1 = h * self.stride, w * self.stride
h2, w2 = h1 + kernel_h, w1 + kernel_w
layer_grads[b, t, h1:h2, w1:w2] += self.w[r, t] * delta[b, r, h, w]
return layer_grads
[docs]class MeanPooling(Layer):
r"""
Pool outputs using the arithmetic mean.
"""
def __init__(self, pool_size, inshape=None):
self.pool_size = pool_size
self.outshape = None
self.inshape = inshape
def connect(self, prev_layer=None):
if prev_layer:
self.inshape = prev_layer.outshape
elif not self.inshape:
raise ValueError('inshape must be given to first layer of network')
old_h, old_w = self.inshape[-2:]
pool_h, pool_w = self.pool_size
new_h, new_w = old_h // pool_h, old_w // pool_w
self.outshape = self.inshape[:-2] + (new_h, new_w)
def call(self, X, *args, **kwargs):
self.inshape = X.shape
pool_h, pool_w = self.pool_size
new_h, new_w = self.outshape[-2:]
# forward
outputs = _zero(self.inshape[:-2] + self.outshape[-2:])
if np.ndim(X) == 4:
nb_batch, nb_axis, _, _ = X.shape
for a in np.arange(nb_batch):
for b in np.arange(nb_axis):
for h in np.arange(new_h):
for w in np.arange(new_w):
outputs[a, b, h, w] = np.mean(X[a, b, h:h + pool_h, w:w + pool_w])
elif np.ndim(X) == 3:
nb_batch, _, _ = X.shape
for a in np.arange(nb_batch):
for h in np.arange(new_h):
for w in np.arange(new_w):
outputs[a, h, w] = np.mean(X[a, h:h + pool_h, w:w + pool_w])
else:
raise ValueError()
return outputs
def backprop(self, pre_grad, *args, **kwargs):
new_h, new_w = self.outshape[-2:]
pool_h, pool_w = self.pool_size
length = np.prod(self.pool_size)
layer_grads = _zero(self.inshape)
if np.ndim(pre_grad) == 4:
nb_batch, nb_axis, _, _ = pre_grad.shape
for a in np.arange(nb_batch):
for b in np.arange(nb_axis):
for h in np.arange(new_h):
for w in np.arange(new_w):
h1, w1 = h * pool_h, w * pool_w
h2, w2 = h1 + pool_h, w1 + pool_w
layer_grads[a, b, h1:h2, w1:w2] = pre_grad[a, b, h, w] / length
elif np.ndim(pre_grad) == 3:
nb_batch, _, _ = pre_grad.shape
for a in np.arange(nb_batch):
for h in np.arange(new_h):
for w in np.arange(new_w):
h_shift, w_shift = h * pool_h, w * pool_w
layer_grads[a, h_shift: h_shift + pool_h, w_shift: w_shift + pool_w] = \
pre_grad[a, h, w] / length
else:
raise ValueError()
return layer_grads
class Flatten(Layer):
def __init__(self, outdim=2, inshape=None):
self.outdim = outdim
if outdim < 1:
raise ValueError('Dim must be >0, was %i', outdim)
self.last_input_shape = None
self.outshape = None
self.inshape = inshape
def connect(self, prev_layer=None):
if prev_layer:
self.inshape = prev_layer.outshape
elif not self.inshape:
raise ValueError('inshape must be given to first layer of network')
to_flatten = np.prod(self.inshape[self.outdim - 1:])
flattened_shape = self.inshape[:self.outdim - 1] + (to_flatten,)
self.outshape = flattened_shape
def call(self, X, *args, **kwargs):
self.last_input_shape = X.shape
# to_flatten = np.prod(self.last_input_shape[self.outdim-1:])
# flattened_shape = input.shape[:self.outdim-1] + (to_flatten, )
flattened_shape = X.shape[:self.outdim - 1] + (-1,)
return np.reshape(X, flattened_shape)
def backprop(self, pre_grad, *args, **kwargs):
return np.reshape(pre_grad, self.last_input_shape)