ACDC_KNOSYS-2021/ATL/DataManipulator.m

classdef DataManipulator < handle
    %DataManipulator It manipulates and prepare data
    %   It manipulates and prepare data used to train and test our research
    %   models.
    %   It is already prepared to load and interact with mostly of the data
    %   used in our lab.
    properties (Access = public)
        data = []; % Whole dataset
        nFeatures = 0; % Number of features from the dataset
        nClasses = 0; % Number of classes from the dataset
        
        nFoldElements = 0; % Number of elements per fold
        nMinibatches = 0; % Number of minibatches
        
        source = {}; % Souce data
        target = {}; % Target data
    end
    properties (Access = private)
        X  = {}; % Input data
        y  = {}; % Class data
        Xs = {}; % Source input data
        ys = {}; % Source class data
        Xt = {}; % Target input data
        yt = {}; % Target class data
        
        permutedX = {}; % Permutted Input data
        permutedy = {}; % Permutted Class data
        
        indexPermutation = {}; % Permuttation index (in order to know if it source or target)
        
        dataFolderPath = '';
    end
    
    methods (Access = public)
        function self = DataManipulator(dataFolderPath)
            self.dataFolderPath = dataFolderPath;
        end
        
        function loadSourceCSV(self, dataset)
            self.loadCustomCSV(join([dataset, '_source.csv']))
        end
        
        function loadTargetCSV(self, dataset)
            self.loadCustomCSV(join([dataset, '_target.csv']))
        end
        
        function loadCustomCSV(self, filename)
            self.data = [];
            self.data = csvread(strcat(self.dataFolderPath, filename));
            self.checkDatasetEven();
            self.data = double(self.data);
            self.nFeatures = size(self.data, 2) - 1;
            self.nClasses = 1;
            self.X = self.data(:,1:end-self.nClasses);
            self.y = self.data(:,self.nFeatures+1:end);
            self.nClasses = max(self.y);
            
            y_one_hot = zeros(size(self.y, 1), self.nClasses);
            for i = 1 : self.nClasses
                rows = self.y == i;
                y_one_hot(rows, i) = 1;
            end
            self.y = y_one_hot;
            self.data = [self.X self.y];
        end
        
        function normalize(self)
            %normalize
            %   Normalize every feature between 0 and 1
            fprintf('Normalizing data\n');
            for i = 1 : self.nFeatures
                self.data(:, i) = (self.data(:, i) - min(self.data(:, i), [], 'all'))/max(self.data(:, i), [], 'all');
            end
            
            
            self.X = self.data(:, 1 : self.nFeatures);
            self.y = self.data(:, self.nFeatures + 1 : end);
        end
        
        function splitAsSourceTargetStreams(self, nFoldElements, method, samplingRatio)
            %splitAsSourceTargetStreams
            %   Split the function to simulate a Multistream classification
            %   input domains.
            %   In a Multistream classification problem, we consider that
            %   two different but related processes generate data
            %   continuously from a domain D (in this case, self.data). The
            %   first process operates in a supervised environment, i.e.,
            %   all the data instances that are generated from the first
            %   process are labeled. On the contraty, the second process
            %   generates unlabeled data from the same domain. The stream
            %   of data generated form the above processes are called the
            %   source stream and the target stream.
            %   This functions will return label for the target stream,
            %   which the user should only use for ensemble evaluation
            %   purposes
            %   nFoldElements (integer)
            %       Both source and target data will be splited in chunks
            %       of data containing n elements per chunk/fold.
            %       If you only want one chunk, pass zero or size(data,1)
            %       as argument.
            %   method (string)
            %       What kind of method will be used to generated
            %       distribute the data into source and target. Usually,
            %       Multistream Classification problems distribute the data
            %       using some bias probability.
            %       Options:
            %           'none': Source and Target streams will be splited on
            %           half
            %           'dallas_1: Source and Target streams will be splited
            %           on half using the bias described by paper "An
            %           adaptive framework for multistream classification"
            %           from the CS deparment of the university of Texas at
            %           Dallas
            %           'dallas_2:' Source and Target streams will be
            %           splited on half using the bias described by paper
            %           "FUSION - An online method for multistream
            %           classification" from the university of Texas at
            %           Dallas.
            %   samplingRatio (double)
            %       Value in the interval [0.0,1.0] which describes the
            %       percentage of sampling that would go to Source Stream.
            %       Target will have 1 - n percentagem of data.
            if nFoldElements == 0
                self.nFoldElements = length(self.data);
            else
                self.nFoldElements = nFoldElements;
            end
            
            switch method
                case 'none'
                    self.splitAsSourceTargetStreams_none(self.nFoldElements, samplingRatio);
                case 'dallas_1'
                    self.splitAsSourceTargetStreams_dallas1(self.nFoldElements, samplingRatio);
                case 'dallas_2'
                    self.splitAsSourceTargetStreams_dallas2(self.nFoldElements, samplingRatio);
            end
            
            self.createXsYsXtYt()
        end
        
        function X = getX(self, idx)
            X = self.X(idx,:);
        end
        
        function y = getY(self, idx)
            y = self.y(idx,:);
        end
        
        function Xs = getXs(self, nMinibatch)
            %getXs
            %   Get the input matrix from a specific source data stream.
            %   The source stream will be only created when we are dealing 
            %   with a dataset that was splitted into source and target 
            %   data streams.
            %   nMinibatch (integer)
            %       The minibatch iteration
            Xs = self.Xs{nMinibatch};
        end
        function ys = getYs(self, nMinibatch)
            %getXs
            %   Get the target matrix from a specific source data stream.
            %   The source stream will be only created when we are dealing 
            %   with a dataset that was splitted into source and target 
            %   data streams.
            %   nMinibatch (integer)
            %       The minibatch iteration
            ys = self.ys{nMinibatch};
        end
        function Xt = getXt(self, nMinibatch)
            %getXt
            %   Get the input matrix from a specific target data stream.
            %   The target stream will be only created when we are dealing 
            %   with a dataset that was splitted into source and target 
            %   data streams.
            %   nMinibatch (integer)
            %       The minibatch iteration
            Xt = self.Xt{nMinibatch};
        end
        function yt = getYt(self, nMinibatch)
            %getXs
            %   Get the target matrix from a specific target data stream.
            %   The target stream will be only created when we are dealing 
            %   with a dataset that was splitted into source and target 
            %   data streams.
            %   nMinibatch (integer)
            %       The minibatch iteration
            yt = self.yt{nMinibatch};
        end
    end
    methods (Access = private)
        function splitAsSourceTargetStreams_none(self, elementsPerFold, samplingRatio)
            %splitAsSourceTargetStreams_none
            %   Split the function to simulate a Multistream classification
            %   input domains.
            %
            %   Source and Target streams will be splited on half
            %
            %   nFoldElements (integer)
            %       Both source and target data will be splited in chunks
            %       of data containing n elements per chunk/fold.
            %       If you only want one chunk, pass zero or size(data,1)
            %       as argument.
            %   samplingRatio (double)
            %       Value in the interval [0.0,1.0] which describes the
            %       percentage of sampling that would go to Source Stream.
            %       Target will have 1 - n percentagem of data.
            [rowsNumber, ~] = size(self.data);
            
            self.nFoldElements = elementsPerFold;
            
            j = 0;
            b = 1;
            i = 1;
            source = [];
            while i < size(self.data, 1)
                while j < self.nFoldElements && i < size(self.data, 1)
                    source = [source; self.data(i,:)];
                    j = j + 1;
                    i = i + 1;
                end
                self.source{b} = source;
                self.target{b} = source;
                source = [];
                j = 0;
                b = b + 1;
            end
            
            self.nMinibatches = b - 1;
        end
        function splitAsSourceTargetStreams_dallas1(self, elementsPerFold, samplingRatio)
            %splitAsSourceTargetStreams_dallas1
            %   Split the function to simulate a Multistream classification
            %   input domains.
            %
            %   Source and Target streams will be splited on half using the
            %   bias described by paper "An adaptive framework for 
            %   multistream classification" from the CS deparment of the 
            %   university of Texas at Dallas
            %
            %   nFoldElements (integer)
            %       Both source and target data will be splited in chunks
            %       of data containing n elements per chunk/fold.
            %       If you only want one chunk, pass zero or size(data,1)
            %       as argument.
            %   samplingRatio (double)
            %       Value in the interval [0.0,1.0] which describes the
            %       percentage of sampling that would go to Source Stream.
            %       Target will have 1 - n percentagem of data.
            [rowsNumber, ~] = size(self.data);
    
            numberOfFolds = round(length(self.data)/elementsPerFold);
            chunkSize = round(rowsNumber/numberOfFolds);
            numberOfFoldsRounded = round(rowsNumber/chunkSize);
            self.nFoldElements = min(elementsPerFold, length(self.data)/numberOfFoldsRounded);
            
            if length(self.data)/numberOfFoldsRounded > elementsPerFold
                numberOfFolds = numberOfFolds + 1;
            end
            self.nMinibatches = numberOfFolds;
            ck = self.nFoldElements;
            
            for i = 1:numberOfFolds
                x = [];
                data = [];
                if i > numberOfFoldsRounded
                    x = self.data(ck * (i-1) + 1:end,1:end-self.nClasses);
                    data = self.data(ck * (i-1) + 1:end,1:end);
                else
                    x = self.data(ck * (i-1) + 1:ck * i,1:end-self.nClasses);
                    data = self.data(ck * (i-1) + 1:ck * i,1:end);
                end
                
                x_mean = mean(x);
                probability = exp(-abs(x - x_mean).^2);
                [~,idx] = sort(probability);
                
                m = size(data,1);
                source = data(idx(1:ceil(m*samplingRatio)),1:end);
                target = data(idx(ceil(m*samplingRatio)+1:length(data)),1:end);
                
                self.source{i} = source;
                self.target{i} = target;
            end
        end
        function splitAsSourceTargetStreams_dallas2(self, elementsPerFold, samplingRatio)
            %splitAsSourceTargetStreams_dallas2
            %   Split the function to simulate a Multistream classification
            %   input domains.
            %
            %   Source and Target streams will be splited on half using the
            %   bias described by paper "FUSION - An online method for 
            %   multistream classification" from the university of Texas at
            %   Dallas.
            %
            %   nFoldElements (integer)
            %       Both source and target data will be splited in chunks
            %       of data containing n elements per chunk/fold.
            %       If you only want one chunk, pass zero or size(data,1)
            %       as argument.
            %   samplingRatio (double)
            %       Value in the interval [0.0,1.0] which describes the
            %       percentage of sampling that would go to Source Stream.
            %       Target will have 1 - n percentagem of data.

            [rowsNumber, ~] = size(self.data);
    
            numberOfFolds = round(length(self.data)/elementsPerFold);
            chunkSize = round(rowsNumber/numberOfFolds);
            numberOfFoldsRounded = round(rowsNumber/chunkSize);
            if mod(floor(size(self.data, 1)/numberOfFoldsRounded), 2) == 0
                self.nFoldElements = min(elementsPerFold, floor(size(self.data, 1)/numberOfFoldsRounded));
            else
                self.nFoldElements = min(elementsPerFold, floor(size(self.data, 1)/numberOfFoldsRounded) - 1);
            end
            
            
            if length(self.data)/numberOfFoldsRounded > elementsPerFold
                numberOfFolds = numberOfFolds + 1;
            end
            self.nMinibatches = numberOfFolds;
            ck = self.nFoldElements;
            
            for i = 1 : numberOfFolds
                x = [];
                data = [];
                if i > numberOfFoldsRounded
                    x = self.data(ck * (i-1) + 1:end,1:end-self.nClasses);
                    data = self.data(ck * (i-1) + 1:end,1:end);
                else
                    x = self.data(ck * (i-1) + 1:ck * i,1:end-self.nClasses);
                    data = self.data(ck * (i-1) + 1:ck * i,1:end);
                end
                
                x_mean = mean(x);
                norm_1 = vecnorm((x - x_mean)',1)';
                norm_2 = vecnorm((x - x_mean)',2)';
                numerator   = norm_2;
                denominator = 2 * std(norm_1) ^ 2;
                probability = exp(-numerator/denominator);
                [~,idx] = sort(probability);
                
                m = size(data,1);
                source = data(idx(1 : ceil(m * samplingRatio)), 1 : end);
                target = data(idx(ceil(m * samplingRatio) + 1: size(data, 1)), 1 : end);
                
                self.source{i} = source;
                self.target{i} = target;
            end
        end
        
        function createXsYsXtYt(self)
            %createXsYsXtYt
            %   Split the datastream data into sets of input, output, input
            %   from source, output from source, input from target, output
            %   from target
            %   It also creates a permutted version of this data, in 
            self.X  = {};
            self.y  = {};
            self.Xs = {};
            self.ys = {};
            self.Xt = {};
            self.yt = {};
            self.permutedX = {};
            self.permutedy = {};
            for i = 1 : self.nMinibatches
                self.Xs{i} = self.source{i}(:,1:end-self.nClasses);
                self.ys{i} = self.source{i}(:,self.nFeatures+1:end);
                self.Xt{i} = self.target{i}(:,1:end-self.nClasses);
                self.yt{i} = self.target{i}(:,self.nFeatures+1:end);
                self.X{i}  = [self.Xs{i};self.Xt{i}];
                self.y{i}  = [self.ys{i};self.yt{i}];
                
                x = self.X{i};
                Y = self.y{i};
                
                p  = randperm(size(x, 1));
                self.permutedX{i} = x(p,:);
                self.permutedy{i} = Y(p,:);
                self.indexPermutation{i} = p;
            end
        end
        
        function checkDatasetEven(self)
            %checkDatasetEven
            %   Check if the number of rows in the whole dataset is even,
            %   so we can split in a equal number of elements for source
            %   and stream (when splitting by 0.5 ratio)
            %   If the number is odd, randomly trow a row away.
            if mod(length(self.data),2) ~= 0
                p = ceil(rand() * length(self.data));
                self.data = [self.data(1:p-1,:);self.data(p+1:end,:)];
            end
        end
    end
end
Add files via upload 2021-10-04 18:31:00 +08:00			`classdef DataManipulator < handle`
			`%DataManipulator It manipulates and prepare data`
			`% It manipulates and prepare data used to train and test our research`
			`% models.`
			`% It is already prepared to load and interact with mostly of the data`
			`% used in our lab.`
			`properties (Access = public)`
			`data = []; % Whole dataset`
			`nFeatures = 0; % Number of features from the dataset`
			`nClasses = 0; % Number of classes from the dataset`

			`nFoldElements = 0; % Number of elements per fold`
			`nMinibatches = 0; % Number of minibatches`

			`source = {}; % Souce data`
			`target = {}; % Target data`
			`end`
			`properties (Access = private)`
			`X = {}; % Input data`
			`y = {}; % Class data`
			`Xs = {}; % Source input data`
			`ys = {}; % Source class data`
			`Xt = {}; % Target input data`
			`yt = {}; % Target class data`

			`permutedX = {}; % Permutted Input data`
			`permutedy = {}; % Permutted Class data`

			`indexPermutation = {}; % Permuttation index (in order to know if it source or target)`

			`dataFolderPath = '';`
			`end`

			`methods (Access = public)`
			`function self = DataManipulator(dataFolderPath)`
			`self.dataFolderPath = dataFolderPath;`
			`end`

			`function loadSourceCSV(self, dataset)`
			`self.loadCustomCSV(join([dataset, '_source.csv']))`
			`end`

			`function loadTargetCSV(self, dataset)`
			`self.loadCustomCSV(join([dataset, '_target.csv']))`
			`end`

			`function loadCustomCSV(self, filename)`
			`self.data = [];`
			`self.data = csvread(strcat(self.dataFolderPath, filename));`
			`self.checkDatasetEven();`
			`self.data = double(self.data);`
			`self.nFeatures = size(self.data, 2) - 1;`
			`self.nClasses = 1;`
			`self.X = self.data(:,1:end-self.nClasses);`
			`self.y = self.data(:,self.nFeatures+1:end);`
			`self.nClasses = max(self.y);`

			`y_one_hot = zeros(size(self.y, 1), self.nClasses);`
			`for i = 1 : self.nClasses`
			`rows = self.y == i;`
			`y_one_hot(rows, i) = 1;`
			`end`
			`self.y = y_one_hot;`
			`self.data = [self.X self.y];`
			`end`

			`function normalize(self)`
			`%normalize`
			`% Normalize every feature between 0 and 1`
			`fprintf('Normalizing data\n');`
			`for i = 1 : self.nFeatures`
			`self.data(:, i) = (self.data(:, i) - min(self.data(:, i), [], 'all'))/max(self.data(:, i), [], 'all');`
			`end`


			`self.X = self.data(:, 1 : self.nFeatures);`
			`self.y = self.data(:, self.nFeatures + 1 : end);`
			`end`

			`function splitAsSourceTargetStreams(self, nFoldElements, method, samplingRatio)`
			`%splitAsSourceTargetStreams`
			`% Split the function to simulate a Multistream classification`
			`% input domains.`
			`% In a Multistream classification problem, we consider that`
			`% two different but related processes generate data`
			`% continuously from a domain D (in this case, self.data). The`
			`% first process operates in a supervised environment, i.e.,`
			`% all the data instances that are generated from the first`
			`% process are labeled. On the contraty, the second process`
			`% generates unlabeled data from the same domain. The stream`
			`% of data generated form the above processes are called the`
			`% source stream and the target stream.`
			`% This functions will return label for the target stream,`
			`% which the user should only use for ensemble evaluation`
			`% purposes`
			`% nFoldElements (integer)`
			`% Both source and target data will be splited in chunks`
			`% of data containing n elements per chunk/fold.`
			`% If you only want one chunk, pass zero or size(data,1)`
			`% as argument.`
			`% method (string)`
			`% What kind of method will be used to generated`
			`% distribute the data into source and target. Usually,`
			`% Multistream Classification problems distribute the data`
			`% using some bias probability.`
			`% Options:`
			`% 'none': Source and Target streams will be splited on`
			`% half`
			`% 'dallas_1: Source and Target streams will be splited`
			`% on half using the bias described by paper "An`
			`% adaptive framework for multistream classification"`
			`% from the CS deparment of the university of Texas at`
			`% Dallas`
			`% 'dallas_2:' Source and Target streams will be`
			`% splited on half using the bias described by paper`
			`% "FUSION - An online method for multistream`
			`% classification" from the university of Texas at`
			`% Dallas.`
			`% samplingRatio (double)`
			`% Value in the interval [0.0,1.0] which describes the`
			`% percentage of sampling that would go to Source Stream.`
			`% Target will have 1 - n percentagem of data.`
			`if nFoldElements == 0`
			`self.nFoldElements = length(self.data);`
			`else`
			`self.nFoldElements = nFoldElements;`
			`end`

			`switch method`
			`case 'none'`
			`self.splitAsSourceTargetStreams_none(self.nFoldElements, samplingRatio);`
			`case 'dallas_1'`
			`self.splitAsSourceTargetStreams_dallas1(self.nFoldElements, samplingRatio);`
			`case 'dallas_2'`
			`self.splitAsSourceTargetStreams_dallas2(self.nFoldElements, samplingRatio);`
			`end`

			`self.createXsYsXtYt()`
			`end`

			`function X = getX(self, idx)`
			`X = self.X(idx,:);`
			`end`

			`function y = getY(self, idx)`
			`y = self.y(idx,:);`
			`end`

			`function Xs = getXs(self, nMinibatch)`
			`%getXs`
			`% Get the input matrix from a specific source data stream.`
			`% The source stream will be only created when we are dealing`
			`% with a dataset that was splitted into source and target`
			`% data streams.`
			`% nMinibatch (integer)`
			`% The minibatch iteration`
			`Xs = self.Xs{nMinibatch};`
			`end`
			`function ys = getYs(self, nMinibatch)`
			`%getXs`
			`% Get the target matrix from a specific source data stream.`
			`% The source stream will be only created when we are dealing`
			`% with a dataset that was splitted into source and target`
			`% data streams.`
			`% nMinibatch (integer)`
			`% The minibatch iteration`
			`ys = self.ys{nMinibatch};`
			`end`
			`function Xt = getXt(self, nMinibatch)`
			`%getXt`
			`% Get the input matrix from a specific target data stream.`
			`% The target stream will be only created when we are dealing`
			`% with a dataset that was splitted into source and target`
			`% data streams.`
			`% nMinibatch (integer)`
			`% The minibatch iteration`
			`Xt = self.Xt{nMinibatch};`
			`end`
			`function yt = getYt(self, nMinibatch)`
			`%getXs`
			`% Get the target matrix from a specific target data stream.`
			`% The target stream will be only created when we are dealing`
			`% with a dataset that was splitted into source and target`
			`% data streams.`
			`% nMinibatch (integer)`
			`% The minibatch iteration`
			`yt = self.yt{nMinibatch};`
			`end`
			`end`
			`methods (Access = private)`
			`function splitAsSourceTargetStreams_none(self, elementsPerFold, samplingRatio)`
			`%splitAsSourceTargetStreams_none`
			`% Split the function to simulate a Multistream classification`
			`% input domains.`
			`%`
			`% Source and Target streams will be splited on half`
			`%`
			`% nFoldElements (integer)`
			`% Both source and target data will be splited in chunks`
			`% of data containing n elements per chunk/fold.`
			`% If you only want one chunk, pass zero or size(data,1)`
			`% as argument.`
			`% samplingRatio (double)`
			`% Value in the interval [0.0,1.0] which describes the`
			`% percentage of sampling that would go to Source Stream.`
			`% Target will have 1 - n percentagem of data.`
			`[rowsNumber, ~] = size(self.data);`

			`self.nFoldElements = elementsPerFold;`

			`j = 0;`
			`b = 1;`
			`i = 1;`
			`source = [];`
			`while i < size(self.data, 1)`
			`while j < self.nFoldElements && i < size(self.data, 1)`
			`source = [source; self.data(i,:)];`
			`j = j + 1;`
			`i = i + 1;`
			`end`
			`self.source{b} = source;`
			`self.target{b} = source;`
			`source = [];`
			`j = 0;`
			`b = b + 1;`
			`end`

			`self.nMinibatches = b - 1;`
			`end`
			`function splitAsSourceTargetStreams_dallas1(self, elementsPerFold, samplingRatio)`
			`%splitAsSourceTargetStreams_dallas1`
			`% Split the function to simulate a Multistream classification`
			`% input domains.`
			`%`
			`% Source and Target streams will be splited on half using the`
			`% bias described by paper "An adaptive framework for`
			`% multistream classification" from the CS deparment of the`
			`% university of Texas at Dallas`
			`%`
			`% nFoldElements (integer)`
			`% Both source and target data will be splited in chunks`
			`% of data containing n elements per chunk/fold.`
			`% If you only want one chunk, pass zero or size(data,1)`
			`% as argument.`
			`% samplingRatio (double)`
			`% Value in the interval [0.0,1.0] which describes the`
			`% percentage of sampling that would go to Source Stream.`
			`% Target will have 1 - n percentagem of data.`
			`[rowsNumber, ~] = size(self.data);`

			`numberOfFolds = round(length(self.data)/elementsPerFold);`
			`chunkSize = round(rowsNumber/numberOfFolds);`
			`numberOfFoldsRounded = round(rowsNumber/chunkSize);`
			`self.nFoldElements = min(elementsPerFold, length(self.data)/numberOfFoldsRounded);`

			`if length(self.data)/numberOfFoldsRounded > elementsPerFold`
			`numberOfFolds = numberOfFolds + 1;`
			`end`
			`self.nMinibatches = numberOfFolds;`
			`ck = self.nFoldElements;`

			`for i = 1:numberOfFolds`
			`x = [];`
			`data = [];`
			`if i > numberOfFoldsRounded`
			`x = self.data(ck * (i-1) + 1:end,1:end-self.nClasses);`
			`data = self.data(ck * (i-1) + 1:end,1:end);`
			`else`
			`x = self.data(ck * (i-1) + 1:ck * i,1:end-self.nClasses);`
			`data = self.data(ck * (i-1) + 1:ck * i,1:end);`
			`end`

			`x_mean = mean(x);`
			`probability = exp(-abs(x - x_mean).^2);`
			`[~,idx] = sort(probability);`

			`m = size(data,1);`
			`source = data(idx(1:ceil(m*samplingRatio)),1:end);`
			`target = data(idx(ceil(m*samplingRatio)+1:length(data)),1:end);`

			`self.source{i} = source;`
			`self.target{i} = target;`
			`end`
			`end`
			`function splitAsSourceTargetStreams_dallas2(self, elementsPerFold, samplingRatio)`
			`%splitAsSourceTargetStreams_dallas2`
			`% Split the function to simulate a Multistream classification`
			`% input domains.`
			`%`
			`% Source and Target streams will be splited on half using the`
			`% bias described by paper "FUSION - An online method for`
			`% multistream classification" from the university of Texas at`
			`% Dallas.`
			`%`
			`% nFoldElements (integer)`
			`% Both source and target data will be splited in chunks`
			`% of data containing n elements per chunk/fold.`
			`% If you only want one chunk, pass zero or size(data,1)`
			`% as argument.`
			`% samplingRatio (double)`
			`% Value in the interval [0.0,1.0] which describes the`
			`% percentage of sampling that would go to Source Stream.`
			`% Target will have 1 - n percentagem of data.`

			`[rowsNumber, ~] = size(self.data);`

			`numberOfFolds = round(length(self.data)/elementsPerFold);`
			`chunkSize = round(rowsNumber/numberOfFolds);`
			`numberOfFoldsRounded = round(rowsNumber/chunkSize);`
			`if mod(floor(size(self.data, 1)/numberOfFoldsRounded), 2) == 0`
			`self.nFoldElements = min(elementsPerFold, floor(size(self.data, 1)/numberOfFoldsRounded));`
			`else`
			`self.nFoldElements = min(elementsPerFold, floor(size(self.data, 1)/numberOfFoldsRounded) - 1);`
			`end`


			`if length(self.data)/numberOfFoldsRounded > elementsPerFold`
			`numberOfFolds = numberOfFolds + 1;`
			`end`
			`self.nMinibatches = numberOfFolds;`
			`ck = self.nFoldElements;`

			`for i = 1 : numberOfFolds`
			`x = [];`
			`data = [];`
			`if i > numberOfFoldsRounded`
			`x = self.data(ck * (i-1) + 1:end,1:end-self.nClasses);`
			`data = self.data(ck * (i-1) + 1:end,1:end);`
			`else`
			`x = self.data(ck * (i-1) + 1:ck * i,1:end-self.nClasses);`
			`data = self.data(ck * (i-1) + 1:ck * i,1:end);`
			`end`

			`x_mean = mean(x);`
			`norm_1 = vecnorm((x - x_mean)',1)';`
			`norm_2 = vecnorm((x - x_mean)',2)';`
			`numerator = norm_2;`
			`denominator = 2 * std(norm_1) ^ 2;`
			`probability = exp(-numerator/denominator);`
			`[~,idx] = sort(probability);`

			`m = size(data,1);`
			`source = data(idx(1 : ceil(m * samplingRatio)), 1 : end);`
			`target = data(idx(ceil(m * samplingRatio) + 1: size(data, 1)), 1 : end);`

			`self.source{i} = source;`
			`self.target{i} = target;`
			`end`
			`end`

			`function createXsYsXtYt(self)`
			`%createXsYsXtYt`
			`% Split the datastream data into sets of input, output, input`
			`% from source, output from source, input from target, output`
			`% from target`
			`% It also creates a permutted version of this data, in`
			`self.X = {};`
			`self.y = {};`
			`self.Xs = {};`
			`self.ys = {};`
			`self.Xt = {};`
			`self.yt = {};`
			`self.permutedX = {};`
			`self.permutedy = {};`
			`for i = 1 : self.nMinibatches`
			`self.Xs{i} = self.source{i}(:,1:end-self.nClasses);`
			`self.ys{i} = self.source{i}(:,self.nFeatures+1:end);`
			`self.Xt{i} = self.target{i}(:,1:end-self.nClasses);`
			`self.yt{i} = self.target{i}(:,self.nFeatures+1:end);`
			`self.X{i} = [self.Xs{i};self.Xt{i}];`
			`self.y{i} = [self.ys{i};self.yt{i}];`

			`x = self.X{i};`
			`Y = self.y{i};`

			`p = randperm(size(x, 1));`
			`self.permutedX{i} = x(p,:);`
			`self.permutedy{i} = Y(p,:);`
			`self.indexPermutation{i} = p;`
			`end`
			`end`

			`function checkDatasetEven(self)`
			`%checkDatasetEven`
			`% Check if the number of rows in the whole dataset is even,`
			`% so we can split in a equal number of elements for source`
			`% and stream (when splitting by 0.5 ratio)`
			`% If the number is odd, randomly trow a row away.`
			`if mod(length(self.data),2) ~= 0`
			`p = ceil(rand() * length(self.data));`
			`self.data = [self.data(1:p-1,:);self.data(p+1:end,:)];`
			`end`
			`end`
			`end`
			`end`