318 lines
12 KiB
Python
318 lines
12 KiB
Python
from __future__ import division
|
|
import numpy as np
|
|
import math as m
|
|
|
|
|
|
class Kliep(object):
|
|
|
|
def __init__(self, kliepParEta, kliepParLambda, kliepParB, kliepParThreshold, kliepDefSigma=0.01):
|
|
self.kliepDefSigma = kliepDefSigma
|
|
self.kliepParEta = kliepParEta
|
|
self.kliepParLambda = kliepParLambda
|
|
self.kliepParB = kliepParB
|
|
self.kliepParThreshold = kliepParThreshold
|
|
|
|
def pdf_gaussian(self, x, mu):
|
|
x_size = np.shape(x)
|
|
d = x_size[0]
|
|
nx = x_size[len(x_size) - 1]
|
|
|
|
tmp = (x - np.tile(mu, (1, nx)))/(np.sqrt(2) * np.tile(self.kliepDefSigma, (1, nx)))
|
|
denom = m.pow((2 * m.pi), (-1 / 2))
|
|
px = np.exp(-np.power(tmp, 2, dtype='float64'))*(denom / self.kliepDefSigma)
|
|
return px
|
|
|
|
def kernel_Gaussian(self, x, c):
|
|
x_size = np.shape(x)
|
|
dx = x_size[0]
|
|
nx = x_size[len(x_size) - 1]
|
|
|
|
c_size = np.shape(c)
|
|
dc = c_size[0]
|
|
nc = c_size[len(c_size) - 1]
|
|
|
|
x2 = np.power(x, 2, dtype=np.float)
|
|
c2 = np.power(c, 2, dtype=np.float)
|
|
# if the array is 1D, need to add an axis first before doing transpose.
|
|
distance2 = np.tile(c2, (nx, 1)) + np.tile(x2.T, (1, nc)) - (2 * x.T * c)
|
|
X = np.exp(-distance2 / (2 * m.pow(self.kliepDefSigma, 2)), dtype='float64')
|
|
return X
|
|
|
|
"""
|
|
- find kernel gaussians for multi dim x
|
|
- in x and c, each column represents one data point
|
|
"""
|
|
|
|
def kernel_Gaussian_mdim(self, x, c):
|
|
x_size = np.shape(x)
|
|
dx = x_size[0]
|
|
nx = x_size[len(x_size) - 1]
|
|
|
|
c_size = np.shape(c)
|
|
dc = c_size[0]
|
|
nc = c_size[len(c_size) - 1]
|
|
distance2 = None
|
|
|
|
for i in range(0, nx):
|
|
# though we extract a column, it become a row matrix in python.
|
|
x_col_i = x[:, i]
|
|
dist_x_col_i_c = self.distance(x_col_i[np.newaxis].T, dx, 1, c, dc, nc)
|
|
if distance2 is None:
|
|
# since X will have more rows, so while copying need to add a new axis also.
|
|
distance2 = dist_x_col_i_c[np.newaxis]
|
|
else:
|
|
distance2 = np.append(distance2, dist_x_col_i_c[np.newaxis], axis=0)
|
|
X = np.exp(-distance2 / (2 * m.pow(self.kliepDefSigma, 2)), dtype='float64')
|
|
return X
|
|
|
|
def kernel_Gaussian_mdim_choose_sigma(self, x, c, sigma):
|
|
x_size = np.shape(x)
|
|
dx = x_size[0]
|
|
nx = x_size[len(x_size) - 1]
|
|
|
|
c_size = np.shape(c)
|
|
dc = c_size[0]
|
|
nc = c_size[len(c_size) - 1]
|
|
distance2 = None
|
|
|
|
for i in range(0, nx):
|
|
# though we extract a column, it become a row matrix in python.
|
|
x_col_i = x[:, i]
|
|
dist_x_col_i_c = self.distance(x_col_i[np.newaxis].T, dx, 1, c, dc, nc)
|
|
if distance2 is None:
|
|
# since X will have more rows, so while copying need to add a new axis also.
|
|
distance2 = dist_x_col_i_c[np.newaxis]
|
|
else:
|
|
distance2 = np.append(distance2, dist_x_col_i_c[np.newaxis], axis=0)
|
|
X = np.exp(-distance2/(2*m.pow(sigma, 2)), dtype='float64')
|
|
return X
|
|
|
|
"""
|
|
x_col_i represents ith instance in row matrix format.
|
|
c represents the selected test points. ith column represents ith selected test point.
|
|
|
|
distance returns a row matrix, dimension 1*c_ncol, where (1,i) element is the distance between the instance
|
|
represented by x_col_i and ith instance in c, i.e., ith column in c
|
|
"""
|
|
|
|
def distance(self, x_col_i, x_col_i_nrow, x_col_i_ncol, c, c_nrow, c_ncol):
|
|
dist_tmp = np.power(np.tile(x_col_i, (1, c_ncol)) - c, 2, dtype='float64')
|
|
# need to do column-wise sum
|
|
dist_2 = np.sum(dist_tmp, axis=0, dtype='float64')
|
|
return dist_2
|
|
|
|
def KLIEP_projection(self, alpha, Xte, meanDistSrcData, c):
|
|
# b_alpha = np.sum(b*alpha)
|
|
b_alpha = np.dot(meanDistSrcData.T, alpha)
|
|
alpha = alpha + meanDistSrcData * (1 - b_alpha) * np.linalg.pinv(c, rcond=1e-20)
|
|
# alpha = np.max(0,alpha[np.newaxis])
|
|
alpha[alpha < 0] = 0
|
|
b_alpha_new = np.dot(meanDistSrcData.T, alpha)
|
|
alpha = alpha * np.linalg.pinv(b_alpha_new, rcond=1e-20)
|
|
Xte_alpha = np.dot(Xte, alpha)
|
|
Xte_alpha[(Xte_alpha-0)<0.00001] = 0.00001
|
|
#Xte_alpha_no_zeros = np.array([(1/100) if (h - 0) < 0.00001 else h for h in Xte_alpha])
|
|
log_xte_alpha = np.log(Xte_alpha, dtype='float64')
|
|
score = np.mean(log_xte_alpha, dtype='float64')
|
|
return alpha, Xte_alpha, score
|
|
|
|
def KLIEP_projection_wo_score(self, alpha, meanDistSrcData, c):
|
|
b_alpha = np.dot(meanDistSrcData.T, alpha)
|
|
alpha = alpha + meanDistSrcData * (1 - b_alpha) * np.linalg.pinv(c, rcond=1e-20)
|
|
# alpha = np.max(0,alpha[np.newaxis])
|
|
alpha[alpha < 0] = 0
|
|
b_alpha_new = np.dot(meanDistSrcData.T, alpha)
|
|
alpha = alpha * np.linalg.pinv(b_alpha_new, rcond=1e-20)
|
|
return alpha
|
|
|
|
def KLIEP_learning(self, mean_X_de, X_nu):
|
|
X_nu_size = np.shape(X_nu)
|
|
n_nu = X_nu_size[0]
|
|
nc = X_nu_size[len(X_nu_size) - 1]
|
|
|
|
max_iteration = 100
|
|
epsilon_list = np.power(10, range(3, -4, -1), dtype='float64')
|
|
# c = sum(np.power(mean_X_de, 2, dtype=np.float))
|
|
c = np.dot(mean_X_de.T, mean_X_de)
|
|
alpha = np.ones((nc, 1))
|
|
|
|
[alpha, X_nu_alpha, score] = self.KLIEP_projection(alpha, X_nu, mean_X_de, c)
|
|
|
|
for epsilon in epsilon_list:
|
|
for iteration in range(1, max_iteration):
|
|
alpha_tmp = alpha + (epsilon * np.dot(X_nu.T, (1 / X_nu_alpha)))
|
|
[alpha_new, X_nu_alpha_new, score_new] = self.KLIEP_projection(alpha_tmp, X_nu, mean_X_de, c)
|
|
if (score_new - score) <= 0:
|
|
break
|
|
score = score_new
|
|
alpha = alpha_new
|
|
X_nu_alpha = X_nu_alpha_new
|
|
return alpha
|
|
|
|
def KLIEP(self, srcData, trgData):
|
|
srcDataSize = np.shape(srcData)
|
|
nRowSrcData = srcDataSize[0]
|
|
nColSrcData = srcDataSize[len(srcDataSize) - 1]
|
|
|
|
trgDataSize = np.shape(trgData)
|
|
nRowTrgData = trgDataSize[0]
|
|
nColTrgData = trgDataSize[len(trgDataSize) - 1]
|
|
|
|
b = min(self.kliepParB, nColTrgData)
|
|
|
|
#rand_index = np.random.permutation(nColTrgData)
|
|
# rand_index = genfromtxt('rand_index.csv', delimiter=',')-1
|
|
#refPoints = trgData[:, rand_index[0:b].tolist()]
|
|
refPoints = trgData[:, -b:]
|
|
|
|
######### Computing the final solution wh_x_de
|
|
kernelMatSrcData = self.kernel_Gaussian_mdim(srcData, refPoints)
|
|
kernelMatTrgData = self.kernel_Gaussian_mdim(trgData, refPoints)
|
|
meanDistSrcData = np.transpose(np.mean(kernelMatSrcData, 0)[np.newaxis])
|
|
alphah = self.KLIEP_learning(meanDistSrcData, kernelMatTrgData)
|
|
# wh_x_nu = np.transpose(np.dot(X_nu, alphah))
|
|
#weightTrgData = np.dot(kernelMatTrgData, alphah)
|
|
|
|
return alphah, kernelMatSrcData, kernelMatTrgData, refPoints
|
|
|
|
|
|
def chooseSigma(self, srcData, trgData, fold=5):
|
|
srcDataSize = np.shape(srcData)
|
|
nRowSrcData = srcDataSize[0]
|
|
nColSrcData = srcDataSize[len(srcDataSize) - 1]
|
|
|
|
trgDataSize = np.shape(trgData)
|
|
nRowTrgData = trgDataSize[0]
|
|
nColTrgData = trgDataSize[len(trgDataSize) - 1]
|
|
|
|
print "Choose Sigma"
|
|
####### Choosing Gaussian kernel center `x_ce'
|
|
# rand_index = np.random.permutation(n_nu)
|
|
b = min(self.kliepParB, nColTrgData)
|
|
|
|
# undo after finishing debug
|
|
# x_ce = np.array(x_nu)
|
|
# np.random.shuffle(x_ce)
|
|
rand_index = np.random.permutation(nColTrgData)
|
|
# rand_index = genfromtxt('rand_index.csv', delimiter=',')-1
|
|
refPoints = trgData[:, rand_index[0:b].tolist()]
|
|
|
|
####### Searching Gaussian kernel width `sigma_chosen'
|
|
sigma = 10
|
|
score = -float("inf")
|
|
epsilon_list = range(int(m.log10(sigma)) - 1, -2, -1)
|
|
for epsilon in epsilon_list:
|
|
for iteration in range(1, 10, 1):
|
|
sigma_new = sigma - m.pow(10, epsilon)
|
|
print "sigma = ", sigma, " epsilon=", epsilon, "sigma_new=", sigma_new
|
|
# undo after finishing debug
|
|
cv_index = np.random.permutation(nColTrgData)
|
|
# cv_index = genfromtxt('cv_index' + str(epsilon) + '_' + str(iteration) + '.csv', delimiter=',')-1
|
|
|
|
cv_split = np.floor(np.divide(np.multiply(range(0, nColTrgData), fold), nColTrgData)) + 1
|
|
score_new = 0
|
|
|
|
kernelMatSrcData = self.kernel_Gaussian_mdim_choose_sigma(srcData, refPoints, sigma_new)
|
|
kernelMatTrgData = self.kernel_Gaussian_mdim_choose_sigma(trgData, refPoints, sigma_new)
|
|
# axis = 0 means column-wise mean
|
|
meanDistSrcData = np.transpose(np.mean(kernelMatSrcData, axis=0)[np.newaxis])
|
|
for i in range(1, fold + 1, 1):
|
|
alpha_cv = self.KLIEP_learning(meanDistSrcData, kernelMatTrgData[cv_index[cv_split != i].tolist(), :])
|
|
wh_cv = np.dot(kernelMatTrgData[cv_index[cv_split == i].tolist(), :], alpha_cv)
|
|
score_new = score_new + (np.mean(np.log(wh_cv), dtype=np.float)/fold)
|
|
|
|
if (score_new - score) <= 0:
|
|
break
|
|
score = score_new
|
|
sigma = sigma_new
|
|
print "score=", score, " sigma=", sigma, "epsilon=", epsilon, "iteration=", iteration
|
|
|
|
print "Sigma = ", str(sigma)
|
|
return sigma
|
|
|
|
def changeDetection(self, trgData, refPointsOld, alphahOld, refPointsNew, alphahNew, kernelMatTrgDataNew=None):
|
|
if len(np.shape(trgData)) == 1:
|
|
trgData = trgData[np.newaxis]
|
|
|
|
if kernelMatTrgDataNew is None:
|
|
kernelMatTrgDataNew = self.kernel_Gaussian_mdim(trgData, refPointsNew)
|
|
kernelMatTrgDataOld = self.kernel_Gaussian_mdim(trgData, refPointsOld)
|
|
|
|
weightTrgDataNew = self.calcInstanceWeights(kernelMatTrgDataNew, alphahNew)
|
|
weightTrgDataNew[(weightTrgDataNew - 0) < 0.00001] = 0.00001
|
|
#weightTrgDataNew_no_zeros = np.array([float(0.0001) if (h-0)<0.00001 else h for h in weightTrgDataNew[0]])
|
|
|
|
weightTrgDataOld = self.calcInstanceWeights(kernelMatTrgDataOld, alphahOld)
|
|
weightTrgDataOld[(weightTrgDataOld - 0) < 0.00001] = 0.00001
|
|
#weightTrgDataOld_no_zeros = np.array([float(0.0001) if (h - 0) < 0.00001 else h for h in weightTrgDataOld[0]])
|
|
|
|
l_ratios = weightTrgDataNew/weightTrgDataOld
|
|
|
|
lnWeightTrgData = np.log(l_ratios, dtype='float64')
|
|
changeScore = np.sum(lnWeightTrgData, dtype='float64')
|
|
#print "ChangeScore=", changeScore
|
|
return changeScore > self.kliepParThreshold, changeScore, kernelMatTrgDataNew
|
|
|
|
|
|
"""
|
|
updateAlpha parameters:
|
|
srcData - contains instances from src stream
|
|
trgData - contains instances from trg stream, including the new point
|
|
newTrgPoint - is the new point, last column of trgData should match with newTrgPoint
|
|
alphah - most recent set of alpha
|
|
"""
|
|
def updateAlpha(self, srcData, trgData, newTrgPoint, refPoints, alphah, kernelMatSrcData=None):
|
|
if len(np.shape(srcData)) == 1:
|
|
srcData = srcData[np.newaxis]
|
|
if len(np.shape(trgData)) == 1:
|
|
trgData = trgData[np.newaxis]
|
|
|
|
# calculate c
|
|
trgDataSize = np.shape(trgData)
|
|
nRowTrgData = trgDataSize[0]
|
|
nColTrgData = trgDataSize[len(trgDataSize) - 1]
|
|
|
|
if newTrgPoint.ndim == 1:
|
|
newTrgPoint = newTrgPoint[np.newaxis]
|
|
|
|
kernelNewTrgPoint = self.kernel_Gaussian_mdim(newTrgPoint, refPoints)
|
|
# alphah is a column vector, each row of kernel_x_new represents distances for one data point
|
|
c = np.dot(kernelNewTrgPoint, alphah)
|
|
|
|
# update alpha values
|
|
tmp = 1 - (self.kliepParEta * self.kliepParLambda)
|
|
alphah = alphah * tmp
|
|
alphah = alphah[1:, :]
|
|
alphah = np.append(alphah, self.kliepParEta/c, axis=0)
|
|
alphah, kernelMatSrcData = self.satConstraints(srcData, trgData, refPoints, alphah, kernelMatSrcData)
|
|
|
|
return alphah, kernelMatSrcData
|
|
|
|
|
|
def satConstraints(self, srcData, trgData, refPoints, alphah, kernelMatSrcData=None):
|
|
trgDataSize = np.shape(trgData)
|
|
nRowTrgData = trgDataSize[0]
|
|
nColTrgData = trgDataSize[len(trgDataSize) - 1]
|
|
|
|
if kernelMatSrcData is None:
|
|
kernelMatSrcData = self.kernel_Gaussian_mdim(srcData, refPoints)
|
|
meanDistSrcData = self.colWiseMeanTransposed(kernelMatSrcData)
|
|
# c = sum(np.power(mean_X_de, 2, dtype=np.float))
|
|
c = np.dot(meanDistSrcData.T, meanDistSrcData)
|
|
alphah = self.KLIEP_projection_wo_score(alphah, meanDistSrcData, c)
|
|
|
|
return alphah, kernelMatSrcData
|
|
|
|
|
|
"""
|
|
returns transpose of matrix resulting from taking column wise mean of mat.
|
|
"""
|
|
def colWiseMeanTransposed(self, mat):
|
|
return np.transpose(np.mean(mat, 0)[np.newaxis])
|
|
|
|
|
|
"""
|
|
Returns instance weights as a row vector
|
|
"""
|
|
def calcInstanceWeights(self, kernelMat, alphah):
|
|
return np.dot(kernelMat, alphah).T |