classdef NeuralNetwork < handle & ElasticNodes & NeuralNetworkConstants %NEURALNETWORK It encapsulate a common MLP (Multilayer Perceptron, aka %Feedforward network) % This object has the main attributes a Neural Network needs to % operate, along with its main functions/behaviors. Some extra % behaviors were built in order to achieve research goals. % % This class features elastic network width by ElasticNodes % inheritance. Network edith adaptation supports automatic generation % of new hidden nodes and prunning of inconsequential nodes. This % mechanism is controlled by the NS (Network Significance) method % which estimates the network generalization power in terms of bias % and variance. %% Standard Neural Network public properties properties (Access = public) layers %Layers of a standard neural network layerValue % Layer (Input and Hidden Layer) values outputLayerValue % Output layers values weight % Weights bias % Added bias momentum % Weight momentum biasMomentum outputWeight % Weights to output layer outputBias % Bias to output layer outputMomentum % Weight momentum from output layer outputBiasMomentum gradient % Gradients outputGradient % Gradients from output layers biasGradient; outputBiasGradient; activationFunction % Each real layer activation function outputActivationFunctionLossFunction % Each output activation function learningRate = 0.01; % Learning rate momentumRate = 0.95; % Momentum rate errorValue % Network error lossValue % Network Loss lambda = 0.001; end %% Standard Neural Network protected properties properties (Access = protected) nHiddenLayers % Number of hidden layers (i.e., not counting input and output layer inputSize % Size of input layer outputSize % Size of output layer end %% TODO define section name properties (Access = public) agmm end properties (Access = protected) isAgmmAble = false; end %% Metrics and performance public properties properties (Access = public) %test metrics sigma % Network's prediction misclassifications % Number of misclassifications after test classificationRate % Classification rate after test residualError % Residual error after test outputedClasses % Classed outputed during classes trueClasses % True target classes end %% Helpers protected properties properties (Access = protected) util = Util; % Caller for several util computations end %% Standard Neural Network public methods methods (Access = public) function self = NeuralNetwork(layers) %NeuralNetwork % layers (array) % This array describes a FeedForward Network structure by % the number of layers on it. % An FFNN with an input layer of 8 nodes, a hidden layer % of 10 nodes and an output layer of 3 nodes would be % described by [8 10 3]. % An FFNN with an input layer of 784 nodes, a hidden % layer 1 of 800 nodes, a hidden layer 2 of 400 nodes and % an output layer of 10 nodes would be described as [784 800 400 10] self@ElasticNodes(numel(layers) - 1); self.inputSize = layers(1); self.outputSize = layers(end); self.layers = layers; self.nHiddenLayers = length(layers) - 2; for i = 1 : self.nHiddenLayers self.weight{i} = normrnd(0, sqrt(2 / self.layers(i) + 1), [self.layers(i + 1), self.layers(i)]); self.bias{i} = normrnd(0, sqrt(2 / self.layers(i) + 1), [1, self.layers(i + 1)]); self.momentum{i} = zeros(size(self.weight{i})); self.biasMomentum{i} = zeros(size(self.bias{i})); self.activationFunction(i) = self.ACTIVATION_FUNCTION_SIGMOID(); end self.outputWeight = normrnd(0, sqrt(2 / self.layers(end) + 1), [self.layers(end), self.layers(end - 1)]); self.outputBias = normrnd(0, sqrt(2 / self.layers(end) + 1), [1, self.layers(end)]); self.outputMomentum = zeros(size(self.outputWeight)); self.outputBiasMomentum = zeros(size(self.outputBias)); self.outputActivationFunctionLossFunction = self.ACTIVATION_LOSS_FUNCTION_SOFTMAX_CROSS_ENTROPY(); end function feedforward(self, X, y) % % feedforward % Perform the forwarding pass throughout the network and % calculate the network error % X (matrix) % Input matrix % y (matrix) % Target matrix self.forwardpass(X) self.calculateError(y) end function forwardpass(self, X) % forwardpass % Perform the forwarding pass throughout the network without % calculate the network error, that's why it doesn't need the % target class. % Because of this, we can use this class just to populate the % hidden layers from the source data % X (matrix) % Input matrix self.layerValue{1} = X; for i = 1 : self.nHiddenLayers previousLayerValueWithBias = [ones(size(self.layerValue{i}, 1), 1) self.layerValue{i}]; switch self.activationFunction(i) case self.ACTIVATION_FUNCTION_SIGMOID() self.layerValue{i + 1} = sigmf(previousLayerValueWithBias * [self.bias{i}' self.weight{i}]', [1, 0]); case self.ACTIVATION_FUNCTION_TANH() error('Not implemented'); case self.ACTIVATION_FUNCTION_RELU() error('Not implemented yet'); case self.ACTIVATION_FUNCTION_LINEAR() error('Not implemented yet'); case self.ACTIVATION_FUNCTION_SOFTMAX() error('Not implemented yet'); end end previousLayerValueWithBias = [ones(size(self.layerValue{end}, 1), 1) self.layerValue{end}]; switch self.outputActivationFunctionLossFunction case self.ACTIVATION_LOSS_FUNCTION_SIGMOID_MSE() self.outputLayerValue = sigmf(previousLayerValueWithBias * [self.outputBias' self.outputWeight]', [1, 0]); case self.ACTIVATION_LOSS_FUNCTION_TANH() error('Not implemented yet'); case self.ACTIVATION_LOSS_FUNCTION_RELU() error('Not implemented yet'); case self.ACTIVATION_LOSS_FUNCTION_SOFTMAX_CROSS_ENTROPY() self.outputLayerValue = previousLayerValueWithBias * [self.outputBias' self.outputWeight]'; self.outputLayerValue = exp(self.outputLayerValue - max(self.outputLayerValue, [], 2)); self.outputLayerValue = self.outputLayerValue./sum(self.outputLayerValue, 2); case self.ACTIVATION_LOSS_FUNCTION_LINEAR_CROSS_ENTROPY() error('Not implemented yet'); end end function backpropagate(self) %backpropagate % Perform back-propagation thoughout the network. % We assume that you already populate the hidden layers and the % network error by calling the feedforward method. dW = {zeros(1, self.nHiddenLayers + 1)}; db = {zeros(1, self.nHiddenLayers + 1)}; for i = self.nHiddenLayers : - 1 : 1 if i == self.nHiddenLayers % THIS IS THE GRADIENT OF THE LOSS FUNCTION switch self.outputActivationFunctionLossFunction case self.ACTIVATION_LOSS_FUNCTION_SIGMOID_MSE() dW{i + 1} = - self.errorValue .* self.outputLayerValue .* (1 - self.outputLayerValue); db{i + 1} = - sum(self.errorValue, 1)/size(self.errorValue, 1); case self.ACTIVATION_LOSS_FUNCTION_TANH() error('Not implemented'); case self.ACTIVATION_LOSS_FUNCTION_RELU() error('Not implemented'); case self.ACTIVATION_LOSS_FUNCTION_SOFTMAX_CROSS_ENTROPY() dW{i + 1} = - self.errorValue; db{i + 1} = - sum(self.errorValue, 1)/size(self.errorValue, 1); case self.ACTIVATION_LOSS_FUNCTION_LINEAR_CROSS_ENTROPY() dW{i + 1} = - self.errorValue; db{i + 1} = - sum(self.errorValue, 1)/size(self.errorValue, 1); end end switch char(self.activationFunction(i)) case self.ACTIVATION_FUNCTION_SIGMOID() dActivationFunction = self.layerValue{i + 1} .* (1 - self.layerValue{i + 1}); case self.ACTIVATION_FUNCTION_TANH() error('Not implemented'); case self.ACTIVATION_FUNCTION_RELU() error('Not implemented'); case self.ACTIVATION_FUNCTION_LINEAR() dActivationFunction = 1; case self.ACTIVATION_FUNCTION_SOFTMAX() error('Not implemented'); end if i == self.nHiddenLayers z = dW{i + 1} * self.outputWeight; dW{i} = z .* dActivationFunction; db{i} = sum(dW{i}, 1)/size(dW{i}, 1); else z = dW{i + 1} * self.weight{i + 1}; dW{i} = z .* dActivationFunction; db{i} = sum(dW{i}, 1)/size(dW{i}, 1); end end self.outputGradient = dW{end}' * self.layerValue{end}; self.outputBiasGradient = db{end}; for i = 1 : self.nHiddenLayers self.gradient{i} = dW{i}' * self.layerValue{i}; self.biasGradient{i} = db{i}; end end function test(self, X, y) %test % Test the neural network, getting its output by an ensemble % composed of a selected numbers of outputLayers. % It also has the ability to update the importance weight of % each output layer, if necessary. % X (matrix) % Input matrix % y (matrix) % Target matrix self.feedforward(X, y); m = size(y, 1); [~, self.trueClasses] = max(y, [], 2); self.sigma = self.outputLayerValue; [rawOutput, outputtedClasses] = max(self.sigma, [], 2); self.misclassifications = find(outputtedClasses ~= self.trueClasses); self.classificationRate = 1 - numel(self.misclassifications) / m; self.residualError = 1 - rawOutput; self.outputedClasses = outputtedClasses; end function train(self, X, y, weightNo) %train % Train the neural network performing 3 complete stages: % - Feed-forward % - Back-propagation % - Weight updates % X (matrix) % Input matrix % y (matrix) % Target matrix % weightNo (integer) [optional] % You has the ability to define which weight and bias you % want to update using backpropagation. This method will % update only that weight and bias, even if there is % weights and biases on layers before and after that. % The number of the weight and bias you want to update. % Remember that 1 indicates the weight and bias that get % out of the input layer. self.feedforward(X,y); self.backpropagate(); switch nargin case 4 self.trainWeight(weightNo); case 3 for i = self.nHiddenLayers + 1 : -1 : 1 self.trainWeight(i); end end end function trainWeight(self,weightNo) %trainWeight % This methods will only update a set of weights and biases. % Normally you will not call this method directly, but will % the method train as a middle man. % weightNo (integer) % The number of the weight and bias you want to update. % Remember that 1 indicates the weight and bias that get % out of the input layer. self.updateWeight(weightNo); end end %% Standard Neural Network private methods methods (Access = private) function updateWeight(self, weightNo) %updateWeights % Perform weight and bias update into a single weight and bias % weightNo (integer) % Number/Position of the weight/bias you want to update w = weightNo; %readability if w > self.nHiddenLayers dW = self.learningRate .* self.outputGradient; db = self.learningRate' .* self.outputBiasGradient; if self.momentumRate > 0 self.outputMomentum = self.momentumRate * self.outputMomentum + dW; self.outputBiasMomentum = self.momentumRate * self.outputBiasMomentum + db; dW = self.outputMomentum; db = self.outputBiasMomentum; end if true == false self.outputWeight = (1 - self.learningRate * self.lambda) * self.outputWeight - dW; self.outputBias = (1 - self.learningRate * self.lambda) * self.outputBias - db; self.outputWeight = (1 - self.learningRate * self.lambda) * self.outputWeight - dW; self.outputBias = (1 - self.learningRate * self.lambda) * self.outputBias - db; else self.outputWeight = self.outputWeight - dW; self.outputBias = self.outputBias - db; end else dW = self.learningRate .* self.gradient{w}; db = self.learningRate' .* self.biasGradient{w}; if self.momentumRate > 0 self.momentum{w} = self.momentumRate * self.momentum{w} + dW; self.biasMomentum{w} = self.momentumRate * self.biasMomentum{w} + db; dW = self.momentum{w}; db = self.biasMomentum{w}; end if true == false self.weight{w} = (1 - self.learningRate * self.lambda) * self.weight{w} - dW; self.bias{w} = (1 - self.learningRate * self.lambda) * self.bias{w} - db; else self.weight{w} = self.weight{w} - dW; self.bias{w} = self.bias{w} - db; end end end function calculateError(self, y) %calculateError % Calculates the error. % This method probably will be called by the feedforward % method and seldom will be used standalone. m = size(y,1); %TODO: Add the possibility to input which error function we %want to use switch self.outputActivationFunctionLossFunction case self.ACTIVATION_LOSS_FUNCTION_SIGMOID_MSE() self.errorValue = y - self.outputLayerValue; self.lossValue = 1 / 2 * sum(sum(self.errorValue .^ 2)) / m; case self.ACTIVATION_LOSS_FUNCTION_TANH() error('Not implemented'); case self.ACTIVATION_LOSS_FUNCTION_RELU() error('Not implemented'); case self.ACTIVATION_LOSS_FUNCTION_SOFTMAX_CROSS_ENTROPY() self.errorValue = y - self.outputLayerValue; self.lossValue = - sum(sum(y .* log(self.outputLayerValue))) / m; case self.ACTIVATION_LOSS_FUNCTION_LINEAR_CROSS_ENTROPY() error('Not implemented yet'); end end end %% Standard Neural Network statistical metrics public methods methods (Access = public) function bias2 = computeNetworkBiasSquare(self, y) %computeNetworkBias % Compute the Network Squared Bias in relation to a target % % y (vector) % A single target % agmm (object) % AGMM object % % Returns % The squared bias of the network related to this target dataMean = self.dataMean; dataStd = self.dataStd; dataVar = self.dataVar; self.nSamplesFeed = self.nSamplesFeed + 1; [~, ~, Ez, ~] = self.computeExpectedValues(self.nHiddenLayers + 1); bias2 = self.computeBIAS2(Ez, y); self.nSamplesFeed = self.nSamplesFeed - 1; self.dataMean = dataMean; self.dataStd = dataStd; self.dataVar = dataVar; end function var = computeNetworkVariance(self) %computeNetworkVariance % Compute the Network Variance in relation to a target % % agmm (object) % AGMM object % % Returns % The squared bias of the network related to this target dataMean = self.dataMean; dataStd = self.dataStd; dataVar = self.dataVar; self.nSamplesFeed = self.nSamplesFeed + 1; [~, ~, Ez, Ez2] = self.computeExpectedValues(self.nHiddenLayers + 1); var = self.computeVAR(Ez, Ez2); self.nSamplesFeed = self.nSamplesFeed - 1; self.dataMean = dataMean; self.dataStd = dataStd; self.dataVar = dataVar; end end %% Standard Neural Networks extra public methods methods (Access = public) function loss = updateWeightsByKullbackLeibler(self, Xs, ys, Xt, yt, GAMMA) %updateWeightsByKullbackLeibler % This method is used on Transfer Learning procedures. The % idea is to approximate the source and target distributions. % If you don't have access to the target domain classes, call % this method from a generative model (AutoEncoder or % DenoisingAutoEncoder) % Xs (matrix) % Source input % ys (matrix) % Source target % Xt (matrix) % Target input % yt (matrix) % Target target % GAMMA (float) % Regularizer coeficient if nargin == 5 GAMMA = 0.0001; end nHL = self.nHiddenLayers + 1; %readability self.forwardpass(Xs); sourceLayerValue = self.layerValue; sourceOutputLayerValue = self.outputLayerValue; self.forwardpass(Xt); targetLayerValue = self.layerValue; targetOutputLayerValue = self.outputLayerValue; klLoss = 0; for i = nHL : -1 : 2 klLoss = klLoss + self.util.KLDiv(sum(sourceLayerValue{i}), sum(targetLayerValue{i}))... + self.util.KLDiv(sum(targetLayerValue{i}), sum(sourceLayerValue{i})); end if ~isfinite(klLoss) loss = 1000; else loss = klLoss; end dSource{nHL} = (sourceOutputLayerValue - ys) .* sourceOutputLayerValue .* (1 - sourceOutputLayerValue); dTarget{nHL} = (targetOutputLayerValue - yt) .* targetOutputLayerValue .* (1 - targetOutputLayerValue); for i = 1 : self.nHiddenLayers dSource{i} = sourceLayerValue{i + 1} .* (1 - sourceLayerValue{i + 1}); dTarget{i} = targetLayerValue{i + 1} .* (1 - targetLayerValue{i + 1}); end for i = nHL : -1 : 1 if (i == nHL) inboundSourceLayerValue = sourceLayerValue{end}; inboundTargetLayerValue = targetLayerValue{end}; else if (i == self.nHiddenLayers) outboundWeight = self.outputWeight; else outboundWeight = self.weight{i + 1}; end inboundSourceLayerValue = sourceLayerValue{i}; inboundTargetLayerValue = targetLayerValue{i}; end if (i == nHL) dW{i} = (2 * dSource{i}' * inboundSourceLayerValue)... + (2 * dTarget{i}' * inboundTargetLayerValue); b = sum((2 * dSource{i}) + (2 * dTarget{i}), 1); db{i} = sum(b, 1) / size(b, 1); else dW{i} = ((2 * dSource{i + 1} * outboundWeight) .* dSource{i})' * inboundSourceLayerValue... + ((2 * dTarget{i + 1} * outboundWeight) .* dTarget{i})' * inboundTargetLayerValue; b = ((2 * dSource{i + 1} * outboundWeight) .* dSource{i})... + ((2 * dTarget{i + 1} * outboundWeight) .* dTarget{i}); db{i} = sum(b, 1) / size(b, 1); end end for i = nHL : -1 : 1 if (i == nHL) self.outputWeight = self.outputWeight - self.learningRate * dW{i}; self.outputBias = self.outputBias - self.learningRate * db{i}; else self.weight{i} = self.weight{i} - self.learningRate * dW{i}; self.bias{i} = self.bias{i} - self.learningRate * db{i}; end end end end %% Elastic/Evolving Neural Network public methods methods (Access = public) function widthAdaptationStepwise(self, layerNo, y) %widthAdaptationStepwise % Performs network width adaptation in a specific layer, % stepwise (it means that it execute one row at a time). % Also, this method assume that you already passe the input % data through the model via forwardpass procedure. % layerNo (integer) % Number of the layer you want to perform width % adaptation. This is normally a hidden layer. % y (double or vector) % Double, if you are performing regression % Vector if you are performing classification % The targer data to be used as validation nhl = layerNo; % readability self.nSamplesFeed = self.nSamplesFeed + 1; self.nSamplesLayer(nhl) = self.nSamplesLayer(nhl) + 1; [Ex, ~, Ey, Ey2] = computeExpectedValues(self, nhl); bias2 = self.computeBIAS2(Ey, y); var = self.computeVAR(Ey, Ey2); [self.meanBIAS(nhl), self.varBIAS(nhl), self.stdBIAS(nhl)] ... = self.util.recursiveMeanStd(bias2, self.meanBIAS(nhl), self.varBIAS(nhl), self.nSamplesFeed); [self.meanVAR(nhl), self.varVAR(nhl), self.stdVAR(nhl)] ... = self.util.recursiveMeanStd(var, self.meanVAR(nhl), self.varVAR(nhl), self.nSamplesFeed); if self.nSamplesLayer(nhl) <= 1 || self.growable(nhl) == true self.minMeanBIAS(nhl) = self.meanBIAS(nhl); self.minStdBIAS(nhl) = self.stdBIAS(nhl); else self.minMeanBIAS(nhl) = min(self.minMeanBIAS(nhl), self.meanBIAS(nhl)); self.minStdBIAS(nhl) = min(self.minStdBIAS(nhl), self.stdBIAS(nhl)); end if self.nSamplesLayer(nhl) <= self.inputSize + 1 || self.prunable{nhl}(1) ~= 0 self.minMeanVAR(nhl) = self.meanVAR(nhl); self.minStdVAR(nhl) = self.stdVAR(nhl); else self.minMeanVAR(nhl) = min(self.minMeanVAR(nhl), self.meanVAR(nhl)); self.minStdVAR(nhl) = min(self.minStdVAR(nhl), self.stdVAR(nhl)); end self.BIAS2{nhl} = [self.BIAS2{nhl} self.meanBIAS(nhl)]; self.VAR{nhl} = [self.VAR{nhl} self.meanVAR(nhl)]; self.growable(nhl) = self.isGrowable(nhl, bias2); if nargin == 3 self.prunable{nhl} = self.isPrunable(nhl, var, Ex, self.PRUNE_SINGLE_LEAST_CONTRIBUTION_NODES()); elseif nargin == 4 self.prunable{nhl} = self.isPrunable(nhl, var, Ex, self.PRUNE_MULTIPLE_NODES_WITH_CONTRIBUTION_BELOW_EXPECTED()); end end function grow(self, layerNo) %grow % Add 1 new node to a hidden layer. Because of this, it will % add 1 extra weight and bias at the outbound row and 1 extra % weight at the inbound row. % layerNo (integer) % Number of the layer you want to add a node. self.layers(layerNo) = self.layers(layerNo) + 1; if layerNo > 1 self.growWeightRow(layerNo - 1) self.growBias(layerNo - 1); end if layerNo < numel(self.layers) self.growWeightColumn(layerNo) end end function prune(self, layerNo, nodeNo) %prune % Remove 1 node from the hidden layer. Because of this, it % will remove 1 weight and bias at the outbound row and 1 % weight from the inbound row. % layerNo (integer) % Number of the layer you want to add a node. % nodeNo (integer) % Position of the node to be removed self.layers(layerNo) = self.layers(layerNo) - 1; if layerNo > 1 self.pruneWeightRow(layerNo - 1, nodeNo); self.pruneBias(layerNo - 1, nodeNo); end if layerNo < numel(self.layers) self.pruneWeightColumn(layerNo, nodeNo); end end end %% Elastic/Evolving Neural Network protected methods methods (Access = protected) function isGrowable = isGrowable(self, layerNo, BIAS2) %isGrowable % Evaluate if a specific layer need a node added to have its % network significance parameters stable % layerNo (integer) % Layer which the evaluation will be performed. Usually % it is a hidden layer. % BIAS2 (double) % The squished BIAS2 of that layer at that time % % returns a boolean indicating if that layer is ready to % receive a new node or not. nhl = layerNo; %readability isGrowable = false; ALPHA_1 = 1.25; ALPHA_2 = 0.75; current = (self.meanBIAS(nhl) + self.stdBIAS(nhl)); biased_min = (self.minMeanBIAS(nhl)... + (ALPHA_1 * exp(-BIAS2) + ALPHA_2)... * self.minStdBIAS(nhl)); if self.nSamplesLayer(nhl) > 1 && current >= biased_min isGrowable = true; end end function prunableNodes = isPrunable(self, layerNo, VAR, expectedY, option) %isPrunable % Evaluate if a specific layer need a node pruned to have its % network significance parameters stable % layerNo (integer) % Layer which the evaluation will be performed. Usually % it is a hidden layer. % VAR (double) % The squished VAR of that layer at that time % expectedY (vector) % See self.getExpectedValues % This value is used to determine the node with minimum % contribution to the network. % option (string) % 'least_contribution': In case the pruning rule get % approved, it will return the position for the least % contributing node. % 'below_contribution': In case the pruning rule get % approved, it will return an array with the position for % all nodes that have the contribution below a certain % quantity % % returns a integer indicating the position of which node % should be removed from that layer. If no node should be % removed, returns zero instead. nhl = layerNo; %readability prunableNodes = 0; ALPHA_1 = 2.5; ALPHA_2 = 1.5; current = (self.meanVAR(nhl) + self.stdVAR(nhl)); biased_min = (self.minMeanVAR(nhl)... + (ALPHA_1 * exp(-VAR) + ALPHA_2)... * self.minStdVAR(nhl)); if self.growable(nhl) == false ... && self.layers(nhl) > 1 ... && self.nSamplesLayer(nhl) > self.inputSize + 1 ... && current >= biased_min switch option case self.PRUNE_SINGLE_LEAST_CONTRIBUTION_NODES() [~, prunableNodes] = min(expectedY); case self.PRUNE_MULTIPLE_NODES_WITH_CONTRIBUTION_BELOW_EXPECTED() nodesToPrune = expectedY <= abs(mean(expectedY) - var(expectedY)); if sum(nodesToPrune) prunableNodes = find(expectedY <= abs(mean(expectedY) - var(expectedY))); else [~, prunableNodes] = min(expectedY); end end end end function growWeightRow(self, weightArrayNo) %growWeightRow % Add 1 extra weight at the inbound row. % weightArrayNo (integer) % Weight position w = weightArrayNo; % readability if w > numel(self.weight) [n_in, n_out] = size(self.outputWeight); n_in = n_in + 1; self.outputWeight = [self.outputWeight; normrnd(0, sqrt(2 / (n_in)), [1, n_out])]; self.outputMomentum = [self.outputMomentum; zeros(1, n_out)]; else [n_in, n_out] = size(self.weight{w}); n_in = n_in + 1; self.weight{w} = [self.weight{w}; normrnd(0, sqrt(2 / (n_in)), [1, n_out])]; self.momentum{w} = [self.momentum{w}; zeros(1, n_out)]; end end function growWeightColumn(self, weightArrayNo) %growWeightColumn % Add 1 extra weight at the outbound column. % weightArrayNo (integer) % Weight position w = weightArrayNo; % readability if w > numel(self.weight) [n_out, n_in] = size(self.outputWeight); n_in = n_in + 1; self.outputWeight = [self.outputWeight normrnd(0, sqrt(2 / (n_in)), [n_out, 1])]; self.outputMomentum = [self.outputMomentum zeros(n_out, 1)]; else [n_out, n_in] = size(self.weight{w}); n_in = n_in + 1; self.weight{w} = [self.weight{w} normrnd(0, sqrt(2 / (n_in)), [n_out, 1])]; self.momentum{w} = [self.momentum{w} zeros(n_out, 1)]; end end function pruneWeightRow(self, weightNo, nodeNo) %pruneWeightRow % Remove 1 weight from the inbound row. % weightNo (integer) % Weight position % nodeNo (integer) % Position of the node to be removed w = weightNo; % readability n = nodeNo; %readability if w > numel(self.weight) self.outputWeight(n, :) = []; self.outputMomentum(n, :) = []; else self.weight{w}(n, :) = []; self.momentum{w}(n, :) = []; end end function pruneWeightColumn(self, weightNo, nodeNo) %pruneWeightColumn % Remove 1 weight from the outbound column % weightArrayNo (integer) % Weight position % nodeNo (integer) % Position of the node to be removed w = weightNo; % readability n = nodeNo; %readability if w > numel(self.weight) self.outputWeight(:, n) = []; self.outputMomentum(:, n) = []; else self.weight{w}(:, n) = []; self.momentum{w}(:, n) = []; end end function growBias(self, biasArrayNo) %growBias % Add 1 extra bias at the inbound row. % biasArrayNo (integer) % Bias position b = biasArrayNo; %readability if b > numel(self.weight) self.outputBias = [self.outputBias normrnd(0, sqrt(2 / (self.layers(end) + 1)))]; self.outputBiasMomentum = [self.outputBiasMomentum 0]; else self.bias{b} = [self.bias{b} normrnd(0, sqrt(2 / (self.layers(b) + 1)))]; self.biasMomentum{b} = [self.biasMomentum{b} 0]; end end function pruneBias(self, biasArrayNo, nodeNo) %pruneBias % Remove 1 bias from the inbound row. % biasArrayNo (integer) % Bias position % nodeNo (integer) % Position of the node to be removed b = biasArrayNo; % readability n = nodeNo; %readability if b > numel(self.weight) self.outputBias(n) = []; self.outputBiasMomentum(n) = []; else self.bias{b}(n) = []; self.biasMomentum{b}(n) = []; end end function [Ex, Ex2, Ez, Ez2] = computeExpectedValues(self, nHiddenLayer) %computeExpectedValues % Compute statisticals expectations values for a specific % hidden layer % % Returns Ex = Expected value of that layer % Ex2 = Expected squared value of that layer % Ez = Expected outbound value of that layer % Ez2 = Expected outbound squared value of that layer nhl = nHiddenLayer; %readability x = self.layerValue{1}; [self.dataMean, self.dataVar, self.dataStd]... = self.util.recursiveMeanStd(x, self.dataMean, self.dataVar, self.nSamplesFeed); if self.isAgmmAble Ex = 0; Ex2 = 0; for m = 1 : self.agmm.M() gmm = self.agmm.gmmArray(m); [tempEx, tempEx2] = computeInboundExpectedValues(self, nhl, gmm); Ex = Ex + tempEx; Ex2 = Ex2 + tempEx2; end else [Ex, Ex2] = computeInboundExpectedValues(self, nhl); end [Ez, Ez2] = computeOutboundExpectedValues(self, Ex, Ex2); end function [Ex, Ex2] = computeInboundExpectedValues(self, layerNo, gmm) %computeInboundExpectedValues % Compute statisticals expectations values for a specific % hidden layer % nHiddenLayer (integer) % layer to be evaluated % % Returns Ex = Expected value of that layer % Ex2 = Expected squared value of that layer nhl = layerNo - 1; %readability if nhl == 1 inference = 1; center = self.dataMean; std = self.dataStd; if nargin == 3 inference = gmm.weight; center = gmm.center; std = sqrt(gmm.var); end py = self.util.probit(center, std); Ex = inference * sigmf(self.weight{1} * py' + self.bias{1}', [1, 0]); else [Ex, ~] = self.computeInboundExpectedValues(nhl); weight_ = self.outputWeight; bias_ = self.outputBias; if nhl < self.nHiddenLayers + 1 weight_ = self.weight{nhl}; bias_ = self.bias{nhl}; end Ex = sigmf(weight_ * Ex + bias_', [1, 0]); end Ex2 = Ex .^ 2; end function [Ez, Ez2] = computeOutboundExpectedValues(self, Ex, Ex2) %computeOutboundExpectedValues % Compute statisticals expectations values for a specific % hidden layer % Ey (double, vector or matrix) % Expected value % Ey2 (double, vector or matrix) % Expected squared value % % Returns Ez = Expected outbound value of that layer % Ez2 = Expected outbound squared value of that layer Ez = self.outputWeight * Ex + self.outputBias'; Ez = exp(Ez - max(Ez)); Ez = Ez ./ sum(Ez); Ez2 = self.outputWeight * Ex2 + self.outputBias'; Ez2 = exp(Ez2 - max(Ez2)); Ez2 = Ez2 ./ sum(Ez2); end function NS = computeNetworkSignificance(self, Ez, Ez2, y) %computeNetworkSignificance % Compute the current Network Significance of the model in % respect to a target % Ez (double, vector or matrix) % Expected outbound value of that layer % Ez2 (double, vector or matrix) % Expected outbound squared value of that layer % y (double, vector or matrix) % A target class % % return NS = The network significance NS = self.computeBIAS2(Ez, z) + self.computeVAR(Ez, Ez2); end function BIAS2 = computeBIAS2(~, Ez, y) %computeBIAS2 % Compute the current BIAS2 of the model wrt a target % Ez (double, vector or matrix) % Expected outbound value of that layer % y (double, vector or matrix) % A target class % % return BIAS2 = The network squared BIAS BIAS2 = norm((Ez - y') .^2 , 'fro'); end function VAR = computeVAR(~, Ez, Ez2) %computeVAR % Compute the current VAR of the model % Ez (double, vector or matrix) % Expected outbound value of that layer % Ez2 (double, vector or matrix) % Expected outbound squared value of that layer % % return VAR = The network VAR (variance) VAR = norm(Ez2 - Ez .^ 2, 'fro'); end end %% GIVE A NAME TO THIS SECTION methods (Access = public) function agmm = runAgmm(self, x, y) bias2 = self.computeNetworkBiasSquare(y); self.agmm.run(x, bias2); agmm = self.agmm; end end %% Getters and Setters methods (Access = public) function setAgmm(self, agmm) %setAgmm % You can use this method to set your own AGMM to this % network % agmm (AGMM) % The AGMM you want to set to this network. self.isAgmmAble = true; self.agmm = agmm; end function agmm = getAgmm(self) %getAgmm % Gets the agmm that the network is using. If the network has % an empty agmm or is not using a agmm, it will enable AGMM % and return to you a new AGMM if isempty(self.agmm) || self.isAgmmAble == false self.enableAgmm(); end agmm = self.agmm; end function enableAgmm(self) %enableAgmm % Tell the network that it will use AGMM from now on % It also creates a random AGMM. If you want to use your own % AGMM, make sure to use setAgmm method afterwards self.isAgmmAble = true; self.agmm = AGMM(); end function disableAgmm(self) %disableAgmm % Tell the network that it will NOT use AGMM frmo now on. % It deletes the agmm that was attached to this model. If you % want to keep track of that agmm, make sure to load it into % some variable using the getAgmm method. self.isAgmmAble = false; self.agmm = []; end function nHiddenLayers = getNumberHiddenLayers(self) %getNumberHiddenLayers % Return the number of hidden layers in the network % % Returns % nHiddenLayers (integer): Number of hidden layers nHiddenLayers = self.nHiddenLayers; end end end