ACDC_KNOSYS-2021/MSC/manager.py

718 lines
26 KiB
Python
Raw Normal View History

2021-10-04 18:29:54 +08:00
from __future__ import print_function
from properties import Properties
from changedetection import ChangeDetection
from ensemble import Ensemble
from stream import Stream
from model import Model
import time, sys
from py4j.java_gateway import JavaGateway, GatewayParameters, CallbackServerParameters
import numpy as np
class Manager(object):
def __init__(self, sourceFile, targetFile):
self.SWindow = []
self.TWindow = []
self.TPredictWindow = []
self.SDataBuffer = [] #Queue
self.TDataBuffer = [] #Queue
self.SInitialDataBuffer = []
self.TInitialDataBuffer = []
self.changeDetector = ChangeDetection(Properties.GAMMA, Properties.SENSITIVITY, Properties.MAX_WINDOW_SIZE)
self.ensemble = Ensemble(Properties.ENSEMBLE_SIZE)
classNameList = []
self.source = Stream(sourceFile, classNameList, Properties.INITIAL_DATA_SIZE)
self.target = Stream(targetFile, classNameList, Properties.INITIAL_DATA_SIZE)
Properties.MAXVAR = self.source.MAXVAR
self.gateway = JavaGateway(start_callback_server=True, gateway_parameters=GatewayParameters(port=Properties.PY4JPORT), callback_server_parameters=CallbackServerParameters(port=Properties.PY4JPORT+1))
self.app = self.gateway.entry_point
"""
Detect drift on a given data stream.
Returns the change point index on the stream array.
"""
def __detectDrift(self, slidingWindow, flagStream):
changePoint = -1
if flagStream == 0:
changePoint = self.changeDetector.detectSourceChange(slidingWindow)
elif flagStream == 1:
changePoint = self.changeDetector.detectTargetChange(slidingWindow)
else:
raise Exception('flagStream var has value ' + str(flagStream) + ' that is not supported.')
return changePoint
def __detectDriftJava(self, slidingWindow, flagStream):
changePoint = -1
sw = self.gateway.jvm.java.util.ArrayList()
for i in xrange(len(slidingWindow)):
sw.append(float(slidingWindow[i]))
if flagStream == 0:
changePoint = self.app.detectSourceChange(sw)
elif flagStream == 1:
changePoint = self.app.detectTargetChange(sw)
else:
raise Exception('flagStream var has value ' + str(flagStream) + ' that is not supported.')
# print('ChangePoint = ' + str(changePoint))
return changePoint
"""
Write value (accuracy or confidence) to a file with DatasetName as an identifier.
"""
def __saveResult(self, acc, datasetName):
with open(datasetName + '_' + Properties.OUTFILENAME, 'a') as f:
f.write(str(acc) + '\n')
f.close()
"""
The main method handling MDC logic (using single ensemble).
"""
def start(self, datasetName):
#Get initial data buffer
self.SInitialDataBuffer= self.source.initialData
self.TInitialDataBuffer= self.target.initialData
Properties.logger.info('Initializing Ensemble ...')
#source model
self.ensemble.generateNewModel(self.SInitialDataBuffer, self.TInitialDataBuffer, True)
#target model
self.ensemble.generateNewModel(self.SInitialDataBuffer, self.TInitialDataBuffer, False)
Properties.logger.info(self.ensemble.getEnsembleSummary())
sourceIndex = 0
targetIndex = 0
trueSourceNum = 0
trueTargetNum = 0
targetConfSum = 0
Properties.logger.info('Starting MDC ...')
while len(self.source.data) + len(self.target.data) > sourceIndex + targetIndex:
ratio = (len(self.source.data) - sourceIndex) / (len(self.source.data) + len(self.target.data) - sourceIndex + targetIndex + 0.0)
if (np.random.rand() <= ratio and sourceIndex < len(self.source.data)) or (targetIndex >= len(self.target.data) and sourceIndex < len(self.source.data)):
sdata = self.source.data[sourceIndex]
self.SDataBuffer.append(sdata)
resSource = self.ensemble.evaluateEnsemble(sdata, True)
self.SWindow.append(resSource[0]) # prediction of 0 or 1
print('S', end="")
# get Source Accuracy
sourceIndex += 1
trueSourceNum += resSource[0]
elif targetIndex < len(self.target.data):
tdata = self.target.data[targetIndex]
self.TDataBuffer.append(tdata)
resTarget = self.ensemble.evaluateEnsemble(tdata, False)
conf = resTarget[1] # confidence
targetIndex += 1
print('T', end="")
# If conf is very close to 0.0 or 1.0, beta probability might become zero, which can make problems in change detection. Handling this scenario.
if conf < 0.1:
self.TWindow.append(0.1)
elif conf > 0.995:
self.TWindow.append(0.995)
else:
self.TWindow.append(resTarget[1])
self.TPredictWindow.append(resTarget[0])
#get Target Accuracy
if resTarget[0] == tdata[-1]:
trueTargetNum += 1
acc = float(trueTargetNum)/(targetIndex)
self.__saveResult(acc, datasetName)
#save confidence
targetConfSum += conf
self.__saveResult(float(targetConfSum)/(targetIndex), datasetName+'_confidence')
#Drift detection
start = time.time()
# srcCP = self.__detectDrift(self.SWindow, 0)
# trgCP = self.__detectDrift(self.TWindow, 1)
srcCP = self.__detectDriftJava(self.SWindow, 0)
trgCP = self.__detectDriftJava(self.TWindow, 1)
end = time.time()
# print(int(end - start), end="")
if srcCP != -1:
self.__saveResult(5555555.0, datasetName+'_confidence')
Properties.logger.info('-------------------------- S O U R C E D R I F T ------------------------------------')
Properties.logger.info('\nDrift found on source stream.')
Properties.logger.info('dataIndex=' + str((targetIndex+sourceIndex)) + '\tsrcCP=' + str(srcCP) + '\ttrgCP=' + str(trgCP))
#remove data from buffer till srcCP
for i in xrange(srcCP):
del self.SDataBuffer[0]
del self.SWindow[0]
#Exception with srcCP=0 (windowsize hit max or avg error is less than cutoff).
#Keep atleast cushion number of instances
if srcCP == 0:
while len(self.SDataBuffer) > Properties.CUSHION:
del self.SDataBuffer[0]
del self.SWindow[0]
Properties.logger.info('Instances left in source sliding window : ' + str(len(self.SDataBuffer)))
Properties.logger.info('Instances left in target sliding window : ' + str(len(self.TDataBuffer)))
Properties.logger.info('Updating ensemble weights')
self.ensemble.updateWeight(self.SDataBuffer, True)
Properties.logger.info('Training a model for source stream')
self.ensemble.generateNewModel(self.SDataBuffer, self.TDataBuffer, True)
Properties.logger.info(self.ensemble.getEnsembleSummary())
if trgCP != -1:
self.__saveResult(7777777.0, datasetName+'_confidence')
Properties.logger.info('-------------------------- T A R G E T D R I F T ------------------------------------')
Properties.logger.info('Drift found on target stream.')
Properties.logger.info('dataIndex=' + str((targetIndex+sourceIndex)) + '\tsrcCP=' + str(srcCP) + '\ttrgCP=' + str(trgCP))
#remove data from buffer till trgCP
for i in xrange(trgCP):
del self.TDataBuffer[0]
del self.TWindow[0]
del self.TPredictWindow[0]
#Exception with trgCP=0 (windowsize hit max or avg error is less than cutoff).
#Keep atleast cushion number of instances
if trgCP == 0:
while len(self.TDataBuffer) > Properties.CUSHION:
del self.TDataBuffer[0]
del self.TWindow[0]
del self.TPredictWindow[0]
Properties.logger.info('Instances left in source sliding window : ' + str(len(self.SDataBuffer)))
Properties.logger.info('Instances left in target sliding window : ' + str(len(self.TDataBuffer)))
Properties.logger.info('Updating ensemble weights')
self.ensemble.updateWeight(self.TDataBuffer, False)
if (len(self.SDataBuffer) > 0 and len(self.TDataBuffer)> 0):
Properties.logger.info('Training a model for target stream')
self.ensemble.generateNewModel(self.SDataBuffer, self.TDataBuffer, False)
Properties.logger.info(self.ensemble.getEnsembleSummary())
if (targetIndex+sourceIndex)%100 == 0:
print('')
Properties.logger.info('Done !!')
return float(trueSourceNum)/(sourceIndex), float(trueTargetNum)/(targetIndex)
"""
Main module for MDC2 logic (using two separate ensembles)
"""
def start2(self, datasetName):
#Get initial data buffer
self.SInitialDataBuffer= self.source.initialData
self.TInitialDataBuffer= self.target.initialData
#Initialize Ensembles
srcEnsemble = Ensemble(Properties.ENSEMBLE_SIZE)
trgEnsemble = Ensemble(Properties.ENSEMBLE_SIZE)
Properties.logger.info('Initializing Ensemble ...')
#source model
srcEnsemble.generateNewModel(self.SInitialDataBuffer, self.TInitialDataBuffer, True)
Properties.logger.info('Source Ensemble')
Properties.logger.info(srcEnsemble.getEnsembleSummary())
#target model
trgEnsemble.generateNewModel(self.SInitialDataBuffer, self.TInitialDataBuffer, False)
Properties.logger.info('Target Ensemble')
Properties.logger.info(trgEnsemble.getEnsembleSummary())
dataIndex = 0
trueTargetNum = 0
targetConfSum = 0
Properties.logger.info('Starting MDC2 ...')
while(len(self.source.data) > dataIndex):
print('.', end="")
#Source Stream
sdata = self.source.data[dataIndex]
self.SDataBuffer.append(sdata)
resSource = srcEnsemble.evaluateEnsemble(sdata, True)
self.SWindow.append(resSource[0]) #prediction of 0 or 1
#Target Stream
tdata = self.target.data[dataIndex]
self.TDataBuffer.append(tdata)
resTarget = trgEnsemble.evaluateEnsemble(tdata, False)
conf = resTarget[1] #confidence
# If conf is very close to 0.0 or 1.0, beta probability might become zero, which can make problems in change detection. Handling this scenario.
if conf < 0.1:
self.TWindow.append(0.1)
elif conf > 0.995:
self.TWindow.append(0.995)
else:
self.TWindow.append(resTarget[1])
self.TPredictWindow.append(resTarget[0])
#get Target Accuracy
if resTarget[0] == tdata[-1]:
trueTargetNum += 1
acc = float(trueTargetNum)/(dataIndex + 1)
self.__saveResult(acc, datasetName)
#save confidence
targetConfSum += conf
self.__saveResult(float(targetConfSum)/(dataIndex+1), datasetName+'_confidence')
#Drift detection
start = time.time()
# srcCP = self.__detectDrift(self.SWindow, 0)
# trgCP = self.__detectDrift(self.TWindow, 1)
srcCP = self.__detectDriftJava(self.SWindow, 0)
trgCP = self.__detectDriftJava(self.TWindow, 1)
end = time.time()
# print(int(end - start), end="")
if srcCP != -1:
self.__saveResult(5555555.0, datasetName+'_confidence')
Properties.logger.info('-------------------------- S O U R C E D R I F T ------------------------------------')
Properties.logger.info('\nDrift found on source stream.')
Properties.logger.info('dataIndex=' + str(dataIndex) + '\tsrcCP=' + str(srcCP) + '\ttrgCP=' + str(trgCP))
#remove data from buffer till srcCP
for i in xrange(srcCP):
del self.SDataBuffer[0]
del self.SWindow[0]
#Exception with srcCP=0 (windowsize hit max or avg error is less than cutoff).
#Keep atleast cushion number of instances
if srcCP == 0:
while len(self.SDataBuffer) > Properties.CUSHION:
del self.SDataBuffer[0]
del self.SWindow[0]
Properties.logger.info('Instances left in source sliding window : ' + str(len(self.SDataBuffer)))
Properties.logger.info('Instances left in target sliding window : ' + str(len(self.TDataBuffer)))
#Updating source Ensemble
Properties.logger.info('Updating source ensemble weights')
srcEnsemble.updateWeight(self.SDataBuffer, True)
Properties.logger.info('Training a model for source stream')
srcEnsemble.generateNewModel(self.SDataBuffer, self.TDataBuffer, True)
Properties.logger.info('Source Ensemble')
Properties.logger.info(srcEnsemble.getEnsembleSummary())
if trgCP != -1:
self.__saveResult(7777777.0, datasetName+'_confidence')
Properties.logger.info('-------------------------- T A R G E T D R I F T ------------------------------------')
Properties.logger.info('Drift found on target stream.')
Properties.logger.info('dataIndex=' + str(dataIndex) + '\tsrcCP=' + str(srcCP) + '\ttrgCP=' + str(trgCP))
#remove data from buffer till trgCP
for i in xrange(trgCP):
del self.TDataBuffer[0]
del self.TWindow[0]
del self.TPredictWindow[0]
#Exception with trgCP=0 (windowsize hit max or avg error is less than cutoff).
#Keep atleast cushion number of instances
if trgCP == 0:
while len(self.TDataBuffer) > Properties.CUSHION:
del self.TDataBuffer[0]
del self.TWindow[0]
del self.TPredictWindow[0]
Properties.logger.info('Instances left in source sliding window : ' + str(len(self.SDataBuffer)))
Properties.logger.info('Instances left in target sliding window : ' + str(len(self.TDataBuffer)))
Properties.logger.info('Updating target ensemble weights')
trgEnsemble.updateWeight(self.TDataBuffer, False)
Properties.logger.info('Training a model for target stream')
trgEnsemble.generateNewModel(self.SDataBuffer, self.TDataBuffer, False)
Properties.logger.info('Target Ensemble')
Properties.logger.info(trgEnsemble.getEnsembleSummary())
dataIndex += 1
if dataIndex%100 == 0:
print('')
Properties.logger.info('Done !!')
"""
Baseline skmm (single target model with initial train only)
"""
def start_skmm(self, datasetName):
#Get initial data buffer
self.SInitialDataBuffer= self.source.initialData
self.TInitialDataBuffer= self.target.initialData
#Initialize Model
model = Model()
model.train(self.SInitialDataBuffer, self.TInitialDataBuffer, Properties.MAXVAR)
dataIndex = 0
trueTargetNum = 0
Properties.logger.info('Starting skmm baseline ...')
while(len(self.source.data) > dataIndex):
print('.', end="")
#Source Stream
sdata = self.source.data[dataIndex]
self.SDataBuffer.append(sdata)
#Target Stream
tdata = self.target.data[dataIndex]
self.TDataBuffer.append(tdata)
#test data instance in each model
resTarget = model.test([tdata], Properties.MAXVAR)
#get Target Accuracy
if resTarget[0][0] == tdata[-1]:
trueTargetNum += 1
acc = float(trueTargetNum)/(dataIndex + 1)
self.__saveResult(acc, datasetName)
dataIndex += 1
if dataIndex%100 == 0:
print('')
Properties.logger.info('Done !!')
"""
Baseline mkmm (single target model trained periodically)
"""
def start_mkmm(self, datasetName):
#Get initial data buffer
self.SInitialDataBuffer= self.source.initialData
self.TInitialDataBuffer= self.target.initialData
#Initialize Model
model = Model()
model.train(self.SInitialDataBuffer, self.TInitialDataBuffer, Properties.MAXVAR)
dataIndex = 0
trueTargetNum = 0
Properties.logger.info('Starting skmm baseline ...')
while(len(self.source.data) > dataIndex):
print('.', end="")
#Source Stream
sdata = self.source.data[dataIndex]
self.SDataBuffer.append(sdata)
#Target Stream
tdata = self.target.data[dataIndex]
self.TDataBuffer.append(tdata)
#test data instance in each model
resTarget = model.test([tdata], Properties.MAXVAR)
#get Target Accuracy
if resTarget[0][0] == tdata[-1]:
trueTargetNum += 1
acc = float(trueTargetNum)/(dataIndex + 1)
self.__saveResult(acc, datasetName)
dataIndex += 1
if dataIndex%100 == 0:
print('')
if dataIndex%Properties.MAX_WINDOW_SIZE == 0:
model = Model()
model.train(self.SDataBuffer, self.TDataBuffer, Properties.MAXVAR)
self.SDataBuffer = []
self.TDataBuffer = []
Properties.logger.info('Done !!')
"""
Baseline srconly using an ensemble of only source classifiers.
Target labels predicted from this ensemble using its target weights.
"""
def start_srconly(self, datasetName):
#Get initial data buffer
self.SInitialDataBuffer= self.source.initialData
self.TInitialDataBuffer= self.target.initialData
#Initialize Ensembles
srcEnsemble = Ensemble(Properties.ENSEMBLE_SIZE)
Properties.logger.info('Initializing Ensemble ...')
#source model
srcEnsemble.generateNewModel(self.SInitialDataBuffer, self.TInitialDataBuffer, True)
Properties.logger.info('Source Ensemble')
Properties.logger.info(srcEnsemble.getEnsembleSummary())
dataIndex = 0
trueTargetNum = 0
targetConfSum = 0
Properties.logger.info('Starting srconly-MDC ...')
while(len(self.source.data) > dataIndex):
print('.', end="")
#Source Stream
sdata = self.source.data[dataIndex]
self.SDataBuffer.append(sdata)
resSource = srcEnsemble.evaluateEnsemble(sdata, True)
self.SWindow.append(resSource[0]) #prediction of 0 or 1
#Target Stream
tdata = self.target.data[dataIndex]
self.TDataBuffer.append(tdata)
resTarget = srcEnsemble.evaluateEnsemble(tdata, False)
conf = resTarget[1] #confidence
# If conf is very close to 0.0 or 1.0, beta probability might become zero, which can make problems in change detection. Handling this scenario.
if conf < 0.1:
self.TWindow.append(0.1)
elif conf > 0.995:
self.TWindow.append(0.995)
else:
self.TWindow.append(resTarget[1])
self.TPredictWindow.append(resTarget[0])
#get Target Accuracy
if resTarget[0] == tdata[-1]:
trueTargetNum += 1
acc = float(trueTargetNum)/(dataIndex + 1)
self.__saveResult(acc, datasetName)
#save confidence
targetConfSum += conf
self.__saveResult(float(targetConfSum)/(dataIndex+1), datasetName+'_confidence')
#Drift detection
start = time.time()
# srcCP = self.__detectDrift(self.SWindow, 0)
# trgCP = self.__detectDrift(self.TWindow, 1)
srcCP = self.__detectDriftJava(self.SWindow, 0)
trgCP = self.__detectDriftJava(self.TWindow, 1)
end = time.time()
# print(int(end - start), end="")
if srcCP != -1:
self.__saveResult(5555555.0, datasetName+'_confidence')
Properties.logger.info('-------------------------- S O U R C E D R I F T ------------------------------------')
Properties.logger.info('\nDrift found on source stream.')
Properties.logger.info('dataIndex=' + str(dataIndex) + '\tsrcCP=' + str(srcCP) + '\ttrgCP=' + str(trgCP))
#remove data from buffer till srcCP
for i in xrange(srcCP):
del self.SDataBuffer[0]
del self.SWindow[0]
#Exception with srcCP=0 (windowsize hit max or avg error is less than cutoff).
#Keep atleast cushion number of instances
if srcCP == 0:
while len(self.SDataBuffer) > Properties.CUSHION:
del self.SDataBuffer[0]
del self.SWindow[0]
Properties.logger.info('Instances left in source sliding window : ' + str(len(self.SDataBuffer)))
Properties.logger.info('Instances left in target sliding window : ' + str(len(self.TDataBuffer)))
#Updating source Ensemble
Properties.logger.info('Updating source ensemble weights')
srcEnsemble.updateWeight(self.SDataBuffer, True)
Properties.logger.info('Training a model for source stream')
srcEnsemble.generateNewModel(self.SDataBuffer, self.TDataBuffer, True)
Properties.logger.info('Source Ensemble')
Properties.logger.info(srcEnsemble.getEnsembleSummary())
if trgCP != -1:
self.__saveResult(7777777.0, datasetName+'_confidence')
Properties.logger.info('-------------------------- T A R G E T D R I F T ------------------------------------')
Properties.logger.info('Drift found on target stream.')
Properties.logger.info('dataIndex=' + str(dataIndex) + '\tsrcCP=' + str(srcCP) + '\ttrgCP=' + str(trgCP))
#remove data from buffer till trgCP
for i in xrange(trgCP):
del self.TDataBuffer[0]
del self.TWindow[0]
del self.TPredictWindow[0]
#Exception with trgCP=0 (windowsize hit max or avg error is less than cutoff).
#Keep atleast cushion number of instances
if trgCP == 0:
while len(self.TDataBuffer) > Properties.CUSHION:
del self.TDataBuffer[0]
del self.TWindow[0]
del self.TPredictWindow[0]
Properties.logger.info('Instances left in source sliding window : ' + str(len(self.SDataBuffer)))
Properties.logger.info('Instances left in target sliding window : ' + str(len(self.TDataBuffer)))
Properties.logger.info('Updating target ensemble weights')
srcEnsemble.updateWeight(self.TDataBuffer, False)
Properties.logger.info('Training a model for target stream')
srcEnsemble.generateNewModel(self.SDataBuffer, self.TDataBuffer, True)
Properties.logger.info('Target Ensemble')
Properties.logger.info(srcEnsemble.getEnsembleSummary())
dataIndex += 1
if dataIndex%100 == 0:
print('')
Properties.logger.info('Done !!')
"""
Baseline trgonly using an ensemble of only target classifiers.
Target labels predicted from this ensemble using its target weights.
Source drift is computed using source-weighted ensemble prediction.
"""
def start_trgonly(self, datasetName):
#Get initial data buffer
self.SInitialDataBuffer= self.source.initialData
self.TInitialDataBuffer= self.target.initialData
#Initialize Ensembles
trgEnsemble = Ensemble(Properties.ENSEMBLE_SIZE)
Properties.logger.info('Initializing Ensemble ...')
#target model
trgEnsemble.generateNewModel(self.SInitialDataBuffer, self.TInitialDataBuffer, False)
Properties.logger.info('Target Ensemble')
Properties.logger.info(trgEnsemble.getEnsembleSummary())
dataIndex = 0
trueTargetNum = 0
targetConfSum = 0
Properties.logger.info('Starting trgonly-MDC ...')
while(len(self.source.data) > dataIndex):
print('.', end="")
#Source Stream
sdata = self.source.data[dataIndex]
self.SDataBuffer.append(sdata)
resSource = trgEnsemble.evaluateEnsemble(sdata, True)
self.SWindow.append(resSource[0]) #prediction of 0 or 1
#Target Stream
tdata = self.target.data[dataIndex]
self.TDataBuffer.append(tdata)
resTarget = trgEnsemble.evaluateEnsemble(tdata, False)
conf = resTarget[1] #confidence
# If conf is very close to 0.0 or 1.0, beta probability might become zero, which can make problems in change detection. Handling this scenario.
if conf < 0.1:
self.TWindow.append(0.1)
elif conf > 0.995:
self.TWindow.append(0.995)
else:
self.TWindow.append(resTarget[1])
self.TPredictWindow.append(resTarget[0])
#get Target Accuracy
if resTarget[0] == tdata[-1]:
trueTargetNum += 1
acc = float(trueTargetNum)/(dataIndex + 1)
self.__saveResult(acc, datasetName)
#save confidence
targetConfSum += conf
self.__saveResult(float(targetConfSum)/(dataIndex+1), datasetName+'_confidence')
#Drift detection
start = time.time()
# srcCP = self.__detectDrift(self.SWindow, 0)
# trgCP = self.__detectDrift(self.TWindow, 1)
srcCP = self.__detectDriftJava(self.SWindow, 0)
trgCP = self.__detectDriftJava(self.TWindow, 1)
end = time.time()
# print(int(end - start), end="")
if srcCP != -1:
self.__saveResult(5555555.0, datasetName+'_confidence')
Properties.logger.info('-------------------------- S O U R C E D R I F T ------------------------------------')
Properties.logger.info('\nDrift found on source stream.')
Properties.logger.info('dataIndex=' + str(dataIndex) + '\tsrcCP=' + str(srcCP) + '\ttrgCP=' + str(trgCP))
#remove data from buffer till srcCP
for i in xrange(srcCP):
del self.SDataBuffer[0]
del self.SWindow[0]
#Exception with srcCP=0 (windowsize hit max or avg error is less than cutoff).
#Keep atleast cushion number of instances
if srcCP == 0:
while len(self.SDataBuffer) > Properties.CUSHION:
del self.SDataBuffer[0]
del self.SWindow[0]
Properties.logger.info('Instances left in source sliding window : ' + str(len(self.SDataBuffer)))
Properties.logger.info('Instances left in target sliding window : ' + str(len(self.TDataBuffer)))
#Updating source Ensemble
Properties.logger.info('Updating source ensemble weights')
trgEnsemble.updateWeight(self.SDataBuffer, True)
Properties.logger.info('Training a model for source stream')
trgEnsemble.generateNewModel(self.SDataBuffer, self.TDataBuffer, False)
Properties.logger.info('Source Ensemble')
Properties.logger.info(trgEnsemble.getEnsembleSummary())
if trgCP != -1:
self.__saveResult(7777777.0, datasetName+'_confidence')
Properties.logger.info('-------------------------- T A R G E T D R I F T ------------------------------------')
Properties.logger.info('Drift found on target stream.')
Properties.logger.info('dataIndex=' + str(dataIndex) + '\tsrcCP=' + str(srcCP) + '\ttrgCP=' + str(trgCP))
#remove data from buffer till trgCP
for i in xrange(trgCP):
del self.TDataBuffer[0]
del self.TWindow[0]
del self.TPredictWindow[0]
#Exception with trgCP=0 (windowsize hit max or avg error is less than cutoff).
#Keep atleast cushion number of instances
if trgCP == 0:
while len(self.TDataBuffer) > Properties.CUSHION:
del self.TDataBuffer[0]
del self.TWindow[0]
del self.TPredictWindow[0]
Properties.logger.info('Instances left in source sliding window : ' + str(len(self.SDataBuffer)))
Properties.logger.info('Instances left in target sliding window : ' + str(len(self.TDataBuffer)))
Properties.logger.info('Updating target ensemble weights')
trgEnsemble.updateWeight(self.TDataBuffer, False)
Properties.logger.info('Training a model for target stream')
trgEnsemble.generateNewModel(self.SDataBuffer, self.TDataBuffer, False)
Properties.logger.info('Target Ensemble')
Properties.logger.info(trgEnsemble.getEnsembleSummary())
dataIndex += 1
if dataIndex%100 == 0:
print('')
Properties.logger.info('Done !!')