from __future__ import division import numpy as np import math as m class Kliep(object): def __init__(self, kliepParEta, kliepParLambda, kliepParB, kliepParThreshold, kliepDefSigma=0.01): self.kliepDefSigma = kliepDefSigma self.kliepParEta = kliepParEta self.kliepParLambda = kliepParLambda self.kliepParB = kliepParB self.kliepParThreshold = kliepParThreshold def pdf_gaussian(self, x, mu): x_size = np.shape(x) d = x_size[0] nx = x_size[len(x_size) - 1] tmp = (x - np.tile(mu, (1, nx)))/(np.sqrt(2) * np.tile(self.kliepDefSigma, (1, nx))) denom = m.pow((2 * m.pi), (-1 / 2)) px = np.exp(-np.power(tmp, 2, dtype='float64'))*(denom / self.kliepDefSigma) return px def kernel_Gaussian(self, x, c): x_size = np.shape(x) dx = x_size[0] nx = x_size[len(x_size) - 1] c_size = np.shape(c) dc = c_size[0] nc = c_size[len(c_size) - 1] x2 = np.power(x, 2, dtype=np.float) c2 = np.power(c, 2, dtype=np.float) # if the array is 1D, need to add an axis first before doing transpose. distance2 = np.tile(c2, (nx, 1)) + np.tile(x2.T, (1, nc)) - (2 * x.T * c) X = np.exp(-distance2 / (2 * m.pow(self.kliepDefSigma, 2)), dtype='float64') return X """ - find kernel gaussians for multi dim x - in x and c, each column represents one data point """ def kernel_Gaussian_mdim(self, x, c): x_size = np.shape(x) dx = x_size[0] nx = x_size[len(x_size) - 1] c_size = np.shape(c) dc = c_size[0] nc = c_size[len(c_size) - 1] distance2 = None for i in range(0, nx): # though we extract a column, it become a row matrix in python. x_col_i = x[:, i] dist_x_col_i_c = self.distance(x_col_i[np.newaxis].T, dx, 1, c, dc, nc) if distance2 is None: # since X will have more rows, so while copying need to add a new axis also. distance2 = dist_x_col_i_c[np.newaxis] else: distance2 = np.append(distance2, dist_x_col_i_c[np.newaxis], axis=0) X = np.exp(-distance2 / (2 * m.pow(self.kliepDefSigma, 2)), dtype='float64') return X def kernel_Gaussian_mdim_choose_sigma(self, x, c, sigma): x_size = np.shape(x) dx = x_size[0] nx = x_size[len(x_size) - 1] c_size = np.shape(c) dc = c_size[0] nc = c_size[len(c_size) - 1] distance2 = None for i in range(0, nx): # though we extract a column, it become a row matrix in python. x_col_i = x[:, i] dist_x_col_i_c = self.distance(x_col_i[np.newaxis].T, dx, 1, c, dc, nc) if distance2 is None: # since X will have more rows, so while copying need to add a new axis also. distance2 = dist_x_col_i_c[np.newaxis] else: distance2 = np.append(distance2, dist_x_col_i_c[np.newaxis], axis=0) X = np.exp(-distance2/(2*m.pow(sigma, 2)), dtype='float64') return X """ x_col_i represents ith instance in row matrix format. c represents the selected test points. ith column represents ith selected test point. distance returns a row matrix, dimension 1*c_ncol, where (1,i) element is the distance between the instance represented by x_col_i and ith instance in c, i.e., ith column in c """ def distance(self, x_col_i, x_col_i_nrow, x_col_i_ncol, c, c_nrow, c_ncol): dist_tmp = np.power(np.tile(x_col_i, (1, c_ncol)) - c, 2, dtype='float64') # need to do column-wise sum dist_2 = np.sum(dist_tmp, axis=0, dtype='float64') return dist_2 def KLIEP_projection(self, alpha, Xte, meanDistSrcData, c): # b_alpha = np.sum(b*alpha) b_alpha = np.dot(meanDistSrcData.T, alpha) alpha = alpha + meanDistSrcData * (1 - b_alpha) * np.linalg.pinv(c, rcond=1e-20) # alpha = np.max(0,alpha[np.newaxis]) alpha[alpha < 0] = 0 b_alpha_new = np.dot(meanDistSrcData.T, alpha) alpha = alpha * np.linalg.pinv(b_alpha_new, rcond=1e-20) Xte_alpha = np.dot(Xte, alpha) Xte_alpha[(Xte_alpha-0)<0.00001] = 0.00001 #Xte_alpha_no_zeros = np.array([(1/100) if (h - 0) < 0.00001 else h for h in Xte_alpha]) log_xte_alpha = np.log(Xte_alpha, dtype='float64') score = np.mean(log_xte_alpha, dtype='float64') return alpha, Xte_alpha, score def KLIEP_projection_wo_score(self, alpha, meanDistSrcData, c): b_alpha = np.dot(meanDistSrcData.T, alpha) alpha = alpha + meanDistSrcData * (1 - b_alpha) * np.linalg.pinv(c, rcond=1e-20) # alpha = np.max(0,alpha[np.newaxis]) alpha[alpha < 0] = 0 b_alpha_new = np.dot(meanDistSrcData.T, alpha) alpha = alpha * np.linalg.pinv(b_alpha_new, rcond=1e-20) return alpha def KLIEP_learning(self, mean_X_de, X_nu): X_nu_size = np.shape(X_nu) n_nu = X_nu_size[0] nc = X_nu_size[len(X_nu_size) - 1] max_iteration = 100 epsilon_list = np.power(10, range(3, -4, -1), dtype='float64') # c = sum(np.power(mean_X_de, 2, dtype=np.float)) c = np.dot(mean_X_de.T, mean_X_de) alpha = np.ones((nc, 1)) [alpha, X_nu_alpha, score] = self.KLIEP_projection(alpha, X_nu, mean_X_de, c) for epsilon in epsilon_list: for iteration in range(1, max_iteration): alpha_tmp = alpha + (epsilon * np.dot(X_nu.T, (1 / X_nu_alpha))) [alpha_new, X_nu_alpha_new, score_new] = self.KLIEP_projection(alpha_tmp, X_nu, mean_X_de, c) if (score_new - score) <= 0: break score = score_new alpha = alpha_new X_nu_alpha = X_nu_alpha_new return alpha def KLIEP(self, srcData, trgData): srcDataSize = np.shape(srcData) nRowSrcData = srcDataSize[0] nColSrcData = srcDataSize[len(srcDataSize) - 1] trgDataSize = np.shape(trgData) nRowTrgData = trgDataSize[0] nColTrgData = trgDataSize[len(trgDataSize) - 1] b = min(self.kliepParB, nColTrgData) #rand_index = np.random.permutation(nColTrgData) # rand_index = genfromtxt('rand_index.csv', delimiter=',')-1 #refPoints = trgData[:, rand_index[0:b].tolist()] refPoints = trgData[:, -b:] ######### Computing the final solution wh_x_de kernelMatSrcData = self.kernel_Gaussian_mdim(srcData, refPoints) kernelMatTrgData = self.kernel_Gaussian_mdim(trgData, refPoints) meanDistSrcData = np.transpose(np.mean(kernelMatSrcData, 0)[np.newaxis]) alphah = self.KLIEP_learning(meanDistSrcData, kernelMatTrgData) # wh_x_nu = np.transpose(np.dot(X_nu, alphah)) #weightTrgData = np.dot(kernelMatTrgData, alphah) return alphah, kernelMatSrcData, kernelMatTrgData, refPoints def chooseSigma(self, srcData, trgData, fold=5): srcDataSize = np.shape(srcData) nRowSrcData = srcDataSize[0] nColSrcData = srcDataSize[len(srcDataSize) - 1] trgDataSize = np.shape(trgData) nRowTrgData = trgDataSize[0] nColTrgData = trgDataSize[len(trgDataSize) - 1] print "Choose Sigma" ####### Choosing Gaussian kernel center `x_ce' # rand_index = np.random.permutation(n_nu) b = min(self.kliepParB, nColTrgData) # undo after finishing debug # x_ce = np.array(x_nu) # np.random.shuffle(x_ce) rand_index = np.random.permutation(nColTrgData) # rand_index = genfromtxt('rand_index.csv', delimiter=',')-1 refPoints = trgData[:, rand_index[0:b].tolist()] ####### Searching Gaussian kernel width `sigma_chosen' sigma = 10 score = -float("inf") epsilon_list = range(int(m.log10(sigma)) - 1, -2, -1) for epsilon in epsilon_list: for iteration in range(1, 10, 1): sigma_new = sigma - m.pow(10, epsilon) print "sigma = ", sigma, " epsilon=", epsilon, "sigma_new=", sigma_new # undo after finishing debug cv_index = np.random.permutation(nColTrgData) # cv_index = genfromtxt('cv_index' + str(epsilon) + '_' + str(iteration) + '.csv', delimiter=',')-1 cv_split = np.floor(np.divide(np.multiply(range(0, nColTrgData), fold), nColTrgData)) + 1 score_new = 0 kernelMatSrcData = self.kernel_Gaussian_mdim_choose_sigma(srcData, refPoints, sigma_new) kernelMatTrgData = self.kernel_Gaussian_mdim_choose_sigma(trgData, refPoints, sigma_new) # axis = 0 means column-wise mean meanDistSrcData = np.transpose(np.mean(kernelMatSrcData, axis=0)[np.newaxis]) for i in range(1, fold + 1, 1): alpha_cv = self.KLIEP_learning(meanDistSrcData, kernelMatTrgData[cv_index[cv_split != i].tolist(), :]) wh_cv = np.dot(kernelMatTrgData[cv_index[cv_split == i].tolist(), :], alpha_cv) score_new = score_new + (np.mean(np.log(wh_cv), dtype=np.float)/fold) if (score_new - score) <= 0: break score = score_new sigma = sigma_new print "score=", score, " sigma=", sigma, "epsilon=", epsilon, "iteration=", iteration print "Sigma = ", str(sigma) return sigma def changeDetection(self, trgData, refPointsOld, alphahOld, refPointsNew, alphahNew, kernelMatTrgDataNew=None): if len(np.shape(trgData)) == 1: trgData = trgData[np.newaxis] if kernelMatTrgDataNew is None: kernelMatTrgDataNew = self.kernel_Gaussian_mdim(trgData, refPointsNew) kernelMatTrgDataOld = self.kernel_Gaussian_mdim(trgData, refPointsOld) weightTrgDataNew = self.calcInstanceWeights(kernelMatTrgDataNew, alphahNew) weightTrgDataNew[(weightTrgDataNew - 0) < 0.00001] = 0.00001 #weightTrgDataNew_no_zeros = np.array([float(0.0001) if (h-0)<0.00001 else h for h in weightTrgDataNew[0]]) weightTrgDataOld = self.calcInstanceWeights(kernelMatTrgDataOld, alphahOld) weightTrgDataOld[(weightTrgDataOld - 0) < 0.00001] = 0.00001 #weightTrgDataOld_no_zeros = np.array([float(0.0001) if (h - 0) < 0.00001 else h for h in weightTrgDataOld[0]]) l_ratios = weightTrgDataNew/weightTrgDataOld lnWeightTrgData = np.log(l_ratios, dtype='float64') changeScore = np.sum(lnWeightTrgData, dtype='float64') #print "ChangeScore=", changeScore return changeScore > self.kliepParThreshold, changeScore, kernelMatTrgDataNew """ updateAlpha parameters: srcData - contains instances from src stream trgData - contains instances from trg stream, including the new point newTrgPoint - is the new point, last column of trgData should match with newTrgPoint alphah - most recent set of alpha """ def updateAlpha(self, srcData, trgData, newTrgPoint, refPoints, alphah, kernelMatSrcData=None): if len(np.shape(srcData)) == 1: srcData = srcData[np.newaxis] if len(np.shape(trgData)) == 1: trgData = trgData[np.newaxis] # calculate c trgDataSize = np.shape(trgData) nRowTrgData = trgDataSize[0] nColTrgData = trgDataSize[len(trgDataSize) - 1] if newTrgPoint.ndim == 1: newTrgPoint = newTrgPoint[np.newaxis] kernelNewTrgPoint = self.kernel_Gaussian_mdim(newTrgPoint, refPoints) # alphah is a column vector, each row of kernel_x_new represents distances for one data point c = np.dot(kernelNewTrgPoint, alphah) # update alpha values tmp = 1 - (self.kliepParEta * self.kliepParLambda) alphah = alphah * tmp alphah = alphah[1:, :] alphah = np.append(alphah, self.kliepParEta/c, axis=0) alphah, kernelMatSrcData = self.satConstraints(srcData, trgData, refPoints, alphah, kernelMatSrcData) return alphah, kernelMatSrcData def satConstraints(self, srcData, trgData, refPoints, alphah, kernelMatSrcData=None): trgDataSize = np.shape(trgData) nRowTrgData = trgDataSize[0] nColTrgData = trgDataSize[len(trgDataSize) - 1] if kernelMatSrcData is None: kernelMatSrcData = self.kernel_Gaussian_mdim(srcData, refPoints) meanDistSrcData = self.colWiseMeanTransposed(kernelMatSrcData) # c = sum(np.power(mean_X_de, 2, dtype=np.float)) c = np.dot(meanDistSrcData.T, meanDistSrcData) alphah = self.KLIEP_projection_wo_score(alphah, meanDistSrcData, c) return alphah, kernelMatSrcData """ returns transpose of matrix resulting from taking column wise mean of mat. """ def colWiseMeanTransposed(self, mat): return np.transpose(np.mean(mat, 0)[np.newaxis]) """ Returns instance weights as a row vector """ def calcInstanceWeights(self, kernelMat, alphah): return np.dot(kernelMat, alphah).T