ACDC_KNOSYS-2021/ATL/NeuralNetwork.m

1010 lines
44 KiB
Mathematica
Raw Permalink Normal View History

2021-10-04 18:31:00 +08:00
classdef NeuralNetwork < handle & ElasticNodes & NeuralNetworkConstants
%NEURALNETWORK It encapsulate a common MLP (Multilayer Perceptron, aka
%Feedforward network)
% This object has the main attributes a Neural Network needs to
% operate, along with its main functions/behaviors. Some extra
% behaviors were built in order to achieve research goals.
%
% This class features elastic network width by ElasticNodes
% inheritance. Network edith adaptation supports automatic generation
% of new hidden nodes and prunning of inconsequential nodes. This
% mechanism is controlled by the NS (Network Significance) method
% which estimates the network generalization power in terms of bias
% and variance.
%% Standard Neural Network public properties
properties (Access = public)
layers %Layers of a standard neural network
layerValue % Layer (Input and Hidden Layer) values
outputLayerValue % Output layers values
weight % Weights
bias % Added bias
momentum % Weight momentum
biasMomentum
outputWeight % Weights to output layer
outputBias % Bias to output layer
outputMomentum % Weight momentum from output layer
outputBiasMomentum
gradient % Gradients
outputGradient % Gradients from output layers
biasGradient;
outputBiasGradient;
activationFunction % Each real layer activation function
outputActivationFunctionLossFunction % Each output activation function
learningRate = 0.01; % Learning rate
momentumRate = 0.95; % Momentum rate
errorValue % Network error
lossValue % Network Loss
lambda = 0.001;
end
%% Standard Neural Network protected properties
properties (Access = protected)
nHiddenLayers % Number of hidden layers (i.e., not counting input and output layer
inputSize % Size of input layer
outputSize % Size of output layer
end
%% TODO define section name
properties (Access = public)
agmm
end
properties (Access = protected)
isAgmmAble = false;
end
%% Metrics and performance public properties
properties (Access = public)
%test metrics
sigma % Network's prediction
misclassifications % Number of misclassifications after test
classificationRate % Classification rate after test
residualError % Residual error after test
outputedClasses % Classed outputed during classes
trueClasses % True target classes
end
%% Helpers protected properties
properties (Access = protected)
util = Util; % Caller for several util computations
end
%% Standard Neural Network public methods
methods (Access = public)
function self = NeuralNetwork(layers)
%NeuralNetwork
% layers (array)
% This array describes a FeedForward Network structure by
% the number of layers on it.
% An FFNN with an input layer of 8 nodes, a hidden layer
% of 10 nodes and an output layer of 3 nodes would be
% described by [8 10 3].
% An FFNN with an input layer of 784 nodes, a hidden
% layer 1 of 800 nodes, a hidden layer 2 of 400 nodes and
% an output layer of 10 nodes would be described as [784 800 400 10]
self@ElasticNodes(numel(layers) - 1);
self.inputSize = layers(1);
self.outputSize = layers(end);
self.layers = layers;
self.nHiddenLayers = length(layers) - 2;
for i = 1 : self.nHiddenLayers
self.weight{i} = normrnd(0, sqrt(2 / self.layers(i) + 1), [self.layers(i + 1), self.layers(i)]);
self.bias{i} = normrnd(0, sqrt(2 / self.layers(i) + 1), [1, self.layers(i + 1)]);
self.momentum{i} = zeros(size(self.weight{i}));
self.biasMomentum{i} = zeros(size(self.bias{i}));
self.activationFunction(i) = self.ACTIVATION_FUNCTION_SIGMOID();
end
self.outputWeight = normrnd(0, sqrt(2 / self.layers(end) + 1), [self.layers(end), self.layers(end - 1)]);
self.outputBias = normrnd(0, sqrt(2 / self.layers(end) + 1), [1, self.layers(end)]);
self.outputMomentum = zeros(size(self.outputWeight));
self.outputBiasMomentum = zeros(size(self.outputBias));
self.outputActivationFunctionLossFunction = self.ACTIVATION_LOSS_FUNCTION_SOFTMAX_CROSS_ENTROPY();
end
function feedforward(self, X, y)
% % feedforward
% Perform the forwarding pass throughout the network and
% calculate the network error
% X (matrix)
% Input matrix
% y (matrix)
% Target matrix
self.forwardpass(X)
self.calculateError(y)
end
function forwardpass(self, X)
% forwardpass
% Perform the forwarding pass throughout the network without
% calculate the network error, that's why it doesn't need the
% target class.
% Because of this, we can use this class just to populate the
% hidden layers from the source data
% X (matrix)
% Input matrix
self.layerValue{1} = X;
for i = 1 : self.nHiddenLayers
previousLayerValueWithBias = [ones(size(self.layerValue{i}, 1), 1) self.layerValue{i}];
switch self.activationFunction(i)
case self.ACTIVATION_FUNCTION_SIGMOID()
self.layerValue{i + 1} = sigmf(previousLayerValueWithBias * [self.bias{i}' self.weight{i}]', [1, 0]);
case self.ACTIVATION_FUNCTION_TANH()
error('Not implemented');
case self.ACTIVATION_FUNCTION_RELU()
error('Not implemented yet');
case self.ACTIVATION_FUNCTION_LINEAR()
error('Not implemented yet');
case self.ACTIVATION_FUNCTION_SOFTMAX()
error('Not implemented yet');
end
end
previousLayerValueWithBias = [ones(size(self.layerValue{end}, 1), 1) self.layerValue{end}];
switch self.outputActivationFunctionLossFunction
case self.ACTIVATION_LOSS_FUNCTION_SIGMOID_MSE()
self.outputLayerValue = sigmf(previousLayerValueWithBias * [self.outputBias' self.outputWeight]', [1, 0]);
case self.ACTIVATION_LOSS_FUNCTION_TANH()
error('Not implemented yet');
case self.ACTIVATION_LOSS_FUNCTION_RELU()
error('Not implemented yet');
case self.ACTIVATION_LOSS_FUNCTION_SOFTMAX_CROSS_ENTROPY()
self.outputLayerValue = previousLayerValueWithBias * [self.outputBias' self.outputWeight]';
self.outputLayerValue = exp(self.outputLayerValue - max(self.outputLayerValue, [], 2));
self.outputLayerValue = self.outputLayerValue./sum(self.outputLayerValue, 2);
case self.ACTIVATION_LOSS_FUNCTION_LINEAR_CROSS_ENTROPY()
error('Not implemented yet');
end
end
function backpropagate(self)
%backpropagate
% Perform back-propagation thoughout the network.
% We assume that you already populate the hidden layers and the
% network error by calling the feedforward method.
dW = {zeros(1, self.nHiddenLayers + 1)};
db = {zeros(1, self.nHiddenLayers + 1)};
for i = self.nHiddenLayers : - 1 : 1
if i == self.nHiddenLayers
% THIS IS THE GRADIENT OF THE LOSS FUNCTION
switch self.outputActivationFunctionLossFunction
case self.ACTIVATION_LOSS_FUNCTION_SIGMOID_MSE()
dW{i + 1} = - self.errorValue .* self.outputLayerValue .* (1 - self.outputLayerValue);
db{i + 1} = - sum(self.errorValue, 1)/size(self.errorValue, 1);
case self.ACTIVATION_LOSS_FUNCTION_TANH()
error('Not implemented');
case self.ACTIVATION_LOSS_FUNCTION_RELU()
error('Not implemented');
case self.ACTIVATION_LOSS_FUNCTION_SOFTMAX_CROSS_ENTROPY()
dW{i + 1} = - self.errorValue;
db{i + 1} = - sum(self.errorValue, 1)/size(self.errorValue, 1);
case self.ACTIVATION_LOSS_FUNCTION_LINEAR_CROSS_ENTROPY()
dW{i + 1} = - self.errorValue;
db{i + 1} = - sum(self.errorValue, 1)/size(self.errorValue, 1);
end
end
switch char(self.activationFunction(i))
case self.ACTIVATION_FUNCTION_SIGMOID()
dActivationFunction = self.layerValue{i + 1} .* (1 - self.layerValue{i + 1});
case self.ACTIVATION_FUNCTION_TANH()
error('Not implemented');
case self.ACTIVATION_FUNCTION_RELU()
error('Not implemented');
case self.ACTIVATION_FUNCTION_LINEAR()
dActivationFunction = 1;
case self.ACTIVATION_FUNCTION_SOFTMAX()
error('Not implemented');
end
if i == self.nHiddenLayers
z = dW{i + 1} * self.outputWeight;
dW{i} = z .* dActivationFunction;
db{i} = sum(dW{i}, 1)/size(dW{i}, 1);
else
z = dW{i + 1} * self.weight{i + 1};
dW{i} = z .* dActivationFunction;
db{i} = sum(dW{i}, 1)/size(dW{i}, 1);
end
end
self.outputGradient = dW{end}' * self.layerValue{end};
self.outputBiasGradient = db{end};
for i = 1 : self.nHiddenLayers
self.gradient{i} = dW{i}' * self.layerValue{i};
self.biasGradient{i} = db{i};
end
end
function test(self, X, y)
%test
% Test the neural network, getting its output by an ensemble
% composed of a selected numbers of outputLayers.
% It also has the ability to update the importance weight of
% each output layer, if necessary.
% X (matrix)
% Input matrix
% y (matrix)
% Target matrix
self.feedforward(X, y);
m = size(y, 1);
[~, self.trueClasses] = max(y, [], 2);
self.sigma = self.outputLayerValue;
[rawOutput, outputtedClasses] = max(self.sigma, [], 2);
self.misclassifications = find(outputtedClasses ~= self.trueClasses);
self.classificationRate = 1 - numel(self.misclassifications) / m;
self.residualError = 1 - rawOutput;
self.outputedClasses = outputtedClasses;
end
function train(self, X, y, weightNo)
%train
% Train the neural network performing 3 complete stages:
% - Feed-forward
% - Back-propagation
% - Weight updates
% X (matrix)
% Input matrix
% y (matrix)
% Target matrix
% weightNo (integer) [optional]
% You has the ability to define which weight and bias you
% want to update using backpropagation. This method will
% update only that weight and bias, even if there is
% weights and biases on layers before and after that.
% The number of the weight and bias you want to update.
% Remember that 1 indicates the weight and bias that get
% out of the input layer.
self.feedforward(X,y);
self.backpropagate();
switch nargin
case 4
self.trainWeight(weightNo);
case 3
for i = self.nHiddenLayers + 1 : -1 : 1
self.trainWeight(i);
end
end
end
function trainWeight(self,weightNo)
%trainWeight
% This methods will only update a set of weights and biases.
% Normally you will not call this method directly, but will
% the method train as a middle man.
% weightNo (integer)
% The number of the weight and bias you want to update.
% Remember that 1 indicates the weight and bias that get
% out of the input layer.
self.updateWeight(weightNo);
end
end
%% Standard Neural Network private methods
methods (Access = private)
function updateWeight(self, weightNo)
%updateWeights
% Perform weight and bias update into a single weight and bias
% weightNo (integer)
% Number/Position of the weight/bias you want to update
w = weightNo; %readability
if w > self.nHiddenLayers
dW = self.learningRate .* self.outputGradient;
db = self.learningRate' .* self.outputBiasGradient;
if self.momentumRate > 0
self.outputMomentum = self.momentumRate * self.outputMomentum + dW;
self.outputBiasMomentum = self.momentumRate * self.outputBiasMomentum + db;
dW = self.outputMomentum;
db = self.outputBiasMomentum;
end
if true == false
self.outputWeight = (1 - self.learningRate * self.lambda) * self.outputWeight - dW;
self.outputBias = (1 - self.learningRate * self.lambda) * self.outputBias - db;
self.outputWeight = (1 - self.learningRate * self.lambda) * self.outputWeight - dW;
self.outputBias = (1 - self.learningRate * self.lambda) * self.outputBias - db;
else
self.outputWeight = self.outputWeight - dW;
self.outputBias = self.outputBias - db;
end
else
dW = self.learningRate .* self.gradient{w};
db = self.learningRate' .* self.biasGradient{w};
if self.momentumRate > 0
self.momentum{w} = self.momentumRate * self.momentum{w} + dW;
self.biasMomentum{w} = self.momentumRate * self.biasMomentum{w} + db;
dW = self.momentum{w};
db = self.biasMomentum{w};
end
if true == false
self.weight{w} = (1 - self.learningRate * self.lambda) * self.weight{w} - dW;
self.bias{w} = (1 - self.learningRate * self.lambda) * self.bias{w} - db;
else
self.weight{w} = self.weight{w} - dW;
self.bias{w} = self.bias{w} - db;
end
end
end
function calculateError(self, y)
%calculateError
% Calculates the error.
% This method probably will be called by the feedforward
% method and seldom will be used standalone.
m = size(y,1);
%TODO: Add the possibility to input which error function we
%want to use
switch self.outputActivationFunctionLossFunction
case self.ACTIVATION_LOSS_FUNCTION_SIGMOID_MSE()
self.errorValue = y - self.outputLayerValue;
self.lossValue = 1 / 2 * sum(sum(self.errorValue .^ 2)) / m;
case self.ACTIVATION_LOSS_FUNCTION_TANH()
error('Not implemented');
case self.ACTIVATION_LOSS_FUNCTION_RELU()
error('Not implemented');
case self.ACTIVATION_LOSS_FUNCTION_SOFTMAX_CROSS_ENTROPY()
self.errorValue = y - self.outputLayerValue;
self.lossValue = - sum(sum(y .* log(self.outputLayerValue))) / m;
case self.ACTIVATION_LOSS_FUNCTION_LINEAR_CROSS_ENTROPY()
error('Not implemented yet');
end
end
end
%% Standard Neural Network statistical metrics public methods
methods (Access = public)
function bias2 = computeNetworkBiasSquare(self, y)
%computeNetworkBias
% Compute the Network Squared Bias in relation to a target
%
% y (vector)
% A single target
% agmm (object)
% AGMM object
%
% Returns
% The squared bias of the network related to this target
dataMean = self.dataMean;
dataStd = self.dataStd;
dataVar = self.dataVar;
self.nSamplesFeed = self.nSamplesFeed + 1;
[~, ~, Ez, ~] = self.computeExpectedValues(self.nHiddenLayers + 1);
bias2 = self.computeBIAS2(Ez, y);
self.nSamplesFeed = self.nSamplesFeed - 1;
self.dataMean = dataMean;
self.dataStd = dataStd;
self.dataVar = dataVar;
end
function var = computeNetworkVariance(self)
%computeNetworkVariance
% Compute the Network Variance in relation to a target
%
% agmm (object)
% AGMM object
%
% Returns
% The squared bias of the network related to this target
dataMean = self.dataMean;
dataStd = self.dataStd;
dataVar = self.dataVar;
self.nSamplesFeed = self.nSamplesFeed + 1;
[~, ~, Ez, Ez2] = self.computeExpectedValues(self.nHiddenLayers + 1);
var = self.computeVAR(Ez, Ez2);
self.nSamplesFeed = self.nSamplesFeed - 1;
self.dataMean = dataMean;
self.dataStd = dataStd;
self.dataVar = dataVar;
end
end
%% Standard Neural Networks extra public methods
methods (Access = public)
function loss = updateWeightsByKullbackLeibler(self, Xs, ys, Xt, yt, GAMMA)
%updateWeightsByKullbackLeibler
% This method is used on Transfer Learning procedures. The
% idea is to approximate the source and target distributions.
% If you don't have access to the target domain classes, call
% this method from a generative model (AutoEncoder or
% DenoisingAutoEncoder)
% Xs (matrix)
% Source input
% ys (matrix)
% Source target
% Xt (matrix)
% Target input
% yt (matrix)
% Target target
% GAMMA (float)
% Regularizer coeficient
if nargin == 5
GAMMA = 0.0001;
end
nHL = self.nHiddenLayers + 1; %readability
self.forwardpass(Xs);
sourceLayerValue = self.layerValue;
sourceOutputLayerValue = self.outputLayerValue;
self.forwardpass(Xt);
targetLayerValue = self.layerValue;
targetOutputLayerValue = self.outputLayerValue;
klLoss = 0;
for i = nHL : -1 : 2
klLoss = klLoss + self.util.KLDiv(sum(sourceLayerValue{i}), sum(targetLayerValue{i}))...
+ self.util.KLDiv(sum(targetLayerValue{i}), sum(sourceLayerValue{i}));
end
if ~isfinite(klLoss)
loss = 1000;
else
loss = klLoss;
end
dSource{nHL} = (sourceOutputLayerValue - ys) .* sourceOutputLayerValue .* (1 - sourceOutputLayerValue);
dTarget{nHL} = (targetOutputLayerValue - yt) .* targetOutputLayerValue .* (1 - targetOutputLayerValue);
for i = 1 : self.nHiddenLayers
dSource{i} = sourceLayerValue{i + 1} .* (1 - sourceLayerValue{i + 1});
dTarget{i} = targetLayerValue{i + 1} .* (1 - targetLayerValue{i + 1});
end
for i = nHL : -1 : 1
if (i == nHL)
inboundSourceLayerValue = sourceLayerValue{end};
inboundTargetLayerValue = targetLayerValue{end};
else
if (i == self.nHiddenLayers)
outboundWeight = self.outputWeight;
else
outboundWeight = self.weight{i + 1};
end
inboundSourceLayerValue = sourceLayerValue{i};
inboundTargetLayerValue = targetLayerValue{i};
end
if (i == nHL)
dW{i} = (2 * dSource{i}' * inboundSourceLayerValue)...
+ (2 * dTarget{i}' * inboundTargetLayerValue);
b = sum((2 * dSource{i}) + (2 * dTarget{i}), 1);
db{i} = sum(b, 1) / size(b, 1);
else
dW{i} = ((2 * dSource{i + 1} * outboundWeight) .* dSource{i})' * inboundSourceLayerValue...
+ ((2 * dTarget{i + 1} * outboundWeight) .* dTarget{i})' * inboundTargetLayerValue;
b = ((2 * dSource{i + 1} * outboundWeight) .* dSource{i})...
+ ((2 * dTarget{i + 1} * outboundWeight) .* dTarget{i});
db{i} = sum(b, 1) / size(b, 1);
end
end
for i = nHL : -1 : 1
if (i == nHL)
self.outputWeight = self.outputWeight - self.learningRate * dW{i};
self.outputBias = self.outputBias - self.learningRate * db{i};
else
self.weight{i} = self.weight{i} - self.learningRate * dW{i};
self.bias{i} = self.bias{i} - self.learningRate * db{i};
end
end
end
end
%% Elastic/Evolving Neural Network public methods
methods (Access = public)
function widthAdaptationStepwise(self, layerNo, y)
%widthAdaptationStepwise
% Performs network width adaptation in a specific layer,
% stepwise (it means that it execute one row at a time).
% Also, this method assume that you already passe the input
% data through the model via forwardpass procedure.
% layerNo (integer)
% Number of the layer you want to perform width
% adaptation. This is normally a hidden layer.
% y (double or vector)
% Double, if you are performing regression
% Vector if you are performing classification
% The targer data to be used as validation
nhl = layerNo; % readability
self.nSamplesFeed = self.nSamplesFeed + 1;
self.nSamplesLayer(nhl) = self.nSamplesLayer(nhl) + 1;
[Ex, ~, Ey, Ey2] = computeExpectedValues(self, nhl);
bias2 = self.computeBIAS2(Ey, y);
var = self.computeVAR(Ey, Ey2);
[self.meanBIAS(nhl), self.varBIAS(nhl), self.stdBIAS(nhl)] ...
= self.util.recursiveMeanStd(bias2, self.meanBIAS(nhl), self.varBIAS(nhl), self.nSamplesFeed);
[self.meanVAR(nhl), self.varVAR(nhl), self.stdVAR(nhl)] ...
= self.util.recursiveMeanStd(var, self.meanVAR(nhl), self.varVAR(nhl), self.nSamplesFeed);
if self.nSamplesLayer(nhl) <= 1 || self.growable(nhl) == true
self.minMeanBIAS(nhl) = self.meanBIAS(nhl);
self.minStdBIAS(nhl) = self.stdBIAS(nhl);
else
self.minMeanBIAS(nhl) = min(self.minMeanBIAS(nhl), self.meanBIAS(nhl));
self.minStdBIAS(nhl) = min(self.minStdBIAS(nhl), self.stdBIAS(nhl));
end
if self.nSamplesLayer(nhl) <= self.inputSize + 1 || self.prunable{nhl}(1) ~= 0
self.minMeanVAR(nhl) = self.meanVAR(nhl);
self.minStdVAR(nhl) = self.stdVAR(nhl);
else
self.minMeanVAR(nhl) = min(self.minMeanVAR(nhl), self.meanVAR(nhl));
self.minStdVAR(nhl) = min(self.minStdVAR(nhl), self.stdVAR(nhl));
end
self.BIAS2{nhl} = [self.BIAS2{nhl} self.meanBIAS(nhl)];
self.VAR{nhl} = [self.VAR{nhl} self.meanVAR(nhl)];
self.growable(nhl) = self.isGrowable(nhl, bias2);
if nargin == 3
self.prunable{nhl} = self.isPrunable(nhl, var, Ex, self.PRUNE_SINGLE_LEAST_CONTRIBUTION_NODES());
elseif nargin == 4
self.prunable{nhl} = self.isPrunable(nhl, var, Ex, self.PRUNE_MULTIPLE_NODES_WITH_CONTRIBUTION_BELOW_EXPECTED());
end
end
function grow(self, layerNo)
%grow
% Add 1 new node to a hidden layer. Because of this, it will
% add 1 extra weight and bias at the outbound row and 1 extra
% weight at the inbound row.
% layerNo (integer)
% Number of the layer you want to add a node.
self.layers(layerNo) = self.layers(layerNo) + 1;
if layerNo > 1
self.growWeightRow(layerNo - 1)
self.growBias(layerNo - 1);
end
if layerNo < numel(self.layers)
self.growWeightColumn(layerNo)
end
end
function prune(self, layerNo, nodeNo)
%prune
% Remove 1 node from the hidden layer. Because of this, it
% will remove 1 weight and bias at the outbound row and 1
% weight from the inbound row.
% layerNo (integer)
% Number of the layer you want to add a node.
% nodeNo (integer)
% Position of the node to be removed
self.layers(layerNo) = self.layers(layerNo) - 1;
if layerNo > 1
self.pruneWeightRow(layerNo - 1, nodeNo);
self.pruneBias(layerNo - 1, nodeNo);
end
if layerNo < numel(self.layers)
self.pruneWeightColumn(layerNo, nodeNo);
end
end
end
%% Elastic/Evolving Neural Network protected methods
methods (Access = protected)
function isGrowable = isGrowable(self, layerNo, BIAS2)
%isGrowable
% Evaluate if a specific layer need a node added to have its
% network significance parameters stable
% layerNo (integer)
% Layer which the evaluation will be performed. Usually
% it is a hidden layer.
% BIAS2 (double)
% The squished BIAS2 of that layer at that time
%
% returns a boolean indicating if that layer is ready to
% receive a new node or not.
nhl = layerNo; %readability
isGrowable = false;
ALPHA_1 = 1.25;
ALPHA_2 = 0.75;
current = (self.meanBIAS(nhl) + self.stdBIAS(nhl));
biased_min = (self.minMeanBIAS(nhl)...
+ (ALPHA_1 * exp(-BIAS2) + ALPHA_2)...
* self.minStdBIAS(nhl));
if self.nSamplesLayer(nhl) > 1 && current >= biased_min
isGrowable = true;
end
end
function prunableNodes = isPrunable(self, layerNo, VAR, expectedY, option)
%isPrunable
% Evaluate if a specific layer need a node pruned to have its
% network significance parameters stable
% layerNo (integer)
% Layer which the evaluation will be performed. Usually
% it is a hidden layer.
% VAR (double)
% The squished VAR of that layer at that time
% expectedY (vector)
% See self.getExpectedValues
% This value is used to determine the node with minimum
% contribution to the network.
% option (string)
% 'least_contribution': In case the pruning rule get
% approved, it will return the position for the least
% contributing node.
% 'below_contribution': In case the pruning rule get
% approved, it will return an array with the position for
% all nodes that have the contribution below a certain
% quantity
%
% returns a integer indicating the position of which node
% should be removed from that layer. If no node should be
% removed, returns zero instead.
nhl = layerNo; %readability
prunableNodes = 0;
ALPHA_1 = 2.5;
ALPHA_2 = 1.5;
current = (self.meanVAR(nhl) + self.stdVAR(nhl));
biased_min = (self.minMeanVAR(nhl)...
+ (ALPHA_1 * exp(-VAR) + ALPHA_2)...
* self.minStdVAR(nhl));
if self.growable(nhl) == false ...
&& self.layers(nhl) > 1 ...
&& self.nSamplesLayer(nhl) > self.inputSize + 1 ...
&& current >= biased_min
switch option
case self.PRUNE_SINGLE_LEAST_CONTRIBUTION_NODES()
[~, prunableNodes] = min(expectedY);
case self.PRUNE_MULTIPLE_NODES_WITH_CONTRIBUTION_BELOW_EXPECTED()
nodesToPrune = expectedY <= abs(mean(expectedY) - var(expectedY));
if sum(nodesToPrune)
prunableNodes = find(expectedY <= abs(mean(expectedY) - var(expectedY)));
else
[~, prunableNodes] = min(expectedY);
end
end
end
end
function growWeightRow(self, weightArrayNo)
%growWeightRow
% Add 1 extra weight at the inbound row.
% weightArrayNo (integer)
% Weight position
w = weightArrayNo; % readability
if w > numel(self.weight)
[n_in, n_out] = size(self.outputWeight);
n_in = n_in + 1;
self.outputWeight = [self.outputWeight; normrnd(0, sqrt(2 / (n_in)), [1, n_out])];
self.outputMomentum = [self.outputMomentum; zeros(1, n_out)];
else
[n_in, n_out] = size(self.weight{w});
n_in = n_in + 1;
self.weight{w} = [self.weight{w}; normrnd(0, sqrt(2 / (n_in)), [1, n_out])];
self.momentum{w} = [self.momentum{w}; zeros(1, n_out)];
end
end
function growWeightColumn(self, weightArrayNo)
%growWeightColumn
% Add 1 extra weight at the outbound column.
% weightArrayNo (integer)
% Weight position
w = weightArrayNo; % readability
if w > numel(self.weight)
[n_out, n_in] = size(self.outputWeight);
n_in = n_in + 1;
self.outputWeight = [self.outputWeight normrnd(0, sqrt(2 / (n_in)), [n_out, 1])];
self.outputMomentum = [self.outputMomentum zeros(n_out, 1)];
else
[n_out, n_in] = size(self.weight{w});
n_in = n_in + 1;
self.weight{w} = [self.weight{w} normrnd(0, sqrt(2 / (n_in)), [n_out, 1])];
self.momentum{w} = [self.momentum{w} zeros(n_out, 1)];
end
end
function pruneWeightRow(self, weightNo, nodeNo)
%pruneWeightRow
% Remove 1 weight from the inbound row.
% weightNo (integer)
% Weight position
% nodeNo (integer)
% Position of the node to be removed
w = weightNo; % readability
n = nodeNo; %readability
if w > numel(self.weight)
self.outputWeight(n, :) = [];
self.outputMomentum(n, :) = [];
else
self.weight{w}(n, :) = [];
self.momentum{w}(n, :) = [];
end
end
function pruneWeightColumn(self, weightNo, nodeNo)
%pruneWeightColumn
% Remove 1 weight from the outbound column
% weightArrayNo (integer)
% Weight position
% nodeNo (integer)
% Position of the node to be removed
w = weightNo; % readability
n = nodeNo; %readability
if w > numel(self.weight)
self.outputWeight(:, n) = [];
self.outputMomentum(:, n) = [];
else
self.weight{w}(:, n) = [];
self.momentum{w}(:, n) = [];
end
end
function growBias(self, biasArrayNo)
%growBias
% Add 1 extra bias at the inbound row.
% biasArrayNo (integer)
% Bias position
b = biasArrayNo; %readability
if b > numel(self.weight)
self.outputBias = [self.outputBias normrnd(0, sqrt(2 / (self.layers(end) + 1)))];
self.outputBiasMomentum = [self.outputBiasMomentum 0];
else
self.bias{b} = [self.bias{b} normrnd(0, sqrt(2 / (self.layers(b) + 1)))];
self.biasMomentum{b} = [self.biasMomentum{b} 0];
end
end
function pruneBias(self, biasArrayNo, nodeNo)
%pruneBias
% Remove 1 bias from the inbound row.
% biasArrayNo (integer)
% Bias position
% nodeNo (integer)
% Position of the node to be removed
b = biasArrayNo; % readability
n = nodeNo; %readability
if b > numel(self.weight)
self.outputBias(n) = [];
self.outputBiasMomentum(n) = [];
else
self.bias{b}(n) = [];
self.biasMomentum{b}(n) = [];
end
end
function [Ex, Ex2, Ez, Ez2] = computeExpectedValues(self, nHiddenLayer)
%computeExpectedValues
% Compute statisticals expectations values for a specific
% hidden layer
%
% Returns Ex = Expected value of that layer
% Ex2 = Expected squared value of that layer
% Ez = Expected outbound value of that layer
% Ez2 = Expected outbound squared value of that layer
nhl = nHiddenLayer; %readability
x = self.layerValue{1};
[self.dataMean, self.dataVar, self.dataStd]...
= self.util.recursiveMeanStd(x, self.dataMean, self.dataVar, self.nSamplesFeed);
if self.isAgmmAble
Ex = 0;
Ex2 = 0;
for m = 1 : self.agmm.M()
gmm = self.agmm.gmmArray(m);
[tempEx, tempEx2] = computeInboundExpectedValues(self, nhl, gmm);
Ex = Ex + tempEx;
Ex2 = Ex2 + tempEx2;
end
else
[Ex, Ex2] = computeInboundExpectedValues(self, nhl);
end
[Ez, Ez2] = computeOutboundExpectedValues(self, Ex, Ex2);
end
function [Ex, Ex2] = computeInboundExpectedValues(self, layerNo, gmm)
%computeInboundExpectedValues
% Compute statisticals expectations values for a specific
% hidden layer
% nHiddenLayer (integer)
% layer to be evaluated
%
% Returns Ex = Expected value of that layer
% Ex2 = Expected squared value of that layer
nhl = layerNo - 1; %readability
if nhl == 1
inference = 1;
center = self.dataMean;
std = self.dataStd;
if nargin == 3
inference = gmm.weight;
center = gmm.center;
std = sqrt(gmm.var);
end
py = self.util.probit(center, std);
Ex = inference * sigmf(self.weight{1} * py' + self.bias{1}', [1, 0]);
else
[Ex, ~] = self.computeInboundExpectedValues(nhl);
weight_ = self.outputWeight;
bias_ = self.outputBias;
if nhl < self.nHiddenLayers + 1
weight_ = self.weight{nhl};
bias_ = self.bias{nhl};
end
Ex = sigmf(weight_ * Ex + bias_', [1, 0]);
end
Ex2 = Ex .^ 2;
end
function [Ez, Ez2] = computeOutboundExpectedValues(self, Ex, Ex2)
%computeOutboundExpectedValues
% Compute statisticals expectations values for a specific
% hidden layer
% Ey (double, vector or matrix)
% Expected value
% Ey2 (double, vector or matrix)
% Expected squared value
%
% Returns Ez = Expected outbound value of that layer
% Ez2 = Expected outbound squared value of that layer
Ez = self.outputWeight * Ex + self.outputBias';
Ez = exp(Ez - max(Ez));
Ez = Ez ./ sum(Ez);
Ez2 = self.outputWeight * Ex2 + self.outputBias';
Ez2 = exp(Ez2 - max(Ez2));
Ez2 = Ez2 ./ sum(Ez2);
end
function NS = computeNetworkSignificance(self, Ez, Ez2, y)
%computeNetworkSignificance
% Compute the current Network Significance of the model in
% respect to a target
% Ez (double, vector or matrix)
% Expected outbound value of that layer
% Ez2 (double, vector or matrix)
% Expected outbound squared value of that layer
% y (double, vector or matrix)
% A target class
%
% return NS = The network significance
NS = self.computeBIAS2(Ez, z) + self.computeVAR(Ez, Ez2);
end
function BIAS2 = computeBIAS2(~, Ez, y)
%computeBIAS2
% Compute the current BIAS2 of the model wrt a target
% Ez (double, vector or matrix)
% Expected outbound value of that layer
% y (double, vector or matrix)
% A target class
%
% return BIAS2 = The network squared BIAS
BIAS2 = norm((Ez - y') .^2 , 'fro');
end
function VAR = computeVAR(~, Ez, Ez2)
%computeVAR
% Compute the current VAR of the model
% Ez (double, vector or matrix)
% Expected outbound value of that layer
% Ez2 (double, vector or matrix)
% Expected outbound squared value of that layer
%
% return VAR = The network VAR (variance)
VAR = norm(Ez2 - Ez .^ 2, 'fro');
end
end
%% GIVE A NAME TO THIS SECTION
methods (Access = public)
function agmm = runAgmm(self, x, y)
bias2 = self.computeNetworkBiasSquare(y);
self.agmm.run(x, bias2);
agmm = self.agmm;
end
end
%% Getters and Setters
methods (Access = public)
function setAgmm(self, agmm)
%setAgmm
% You can use this method to set your own AGMM to this
% network
% agmm (AGMM)
% The AGMM you want to set to this network.
self.isAgmmAble = true;
self.agmm = agmm;
end
function agmm = getAgmm(self)
%getAgmm
% Gets the agmm that the network is using. If the network has
% an empty agmm or is not using a agmm, it will enable AGMM
% and return to you a new AGMM
if isempty(self.agmm) || self.isAgmmAble == false
self.enableAgmm();
end
agmm = self.agmm;
end
function enableAgmm(self)
%enableAgmm
% Tell the network that it will use AGMM from now on
% It also creates a random AGMM. If you want to use your own
% AGMM, make sure to use setAgmm method afterwards
self.isAgmmAble = true;
self.agmm = AGMM();
end
function disableAgmm(self)
%disableAgmm
% Tell the network that it will NOT use AGMM frmo now on.
% It deletes the agmm that was attached to this model. If you
% want to keep track of that agmm, make sure to load it into
% some variable using the getAgmm method.
self.isAgmmAble = false;
self.agmm = [];
end
function nHiddenLayers = getNumberHiddenLayers(self)
%getNumberHiddenLayers
% Return the number of hidden layers in the network
%
% Returns
% nHiddenLayers (integer): Number of hidden layers
nHiddenLayers = self.nHiddenLayers;
end
end
end