Add files via upload
This commit is contained in:
parent
58b086d088
commit
128e7ddda7
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
|
@ -0,0 +1,21 @@
|
|||
MIT License
|
||||
|
||||
Copyright (c) 2021 ACDC-paper-double-review
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in all
|
||||
copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
SOFTWARE.
|
Binary file not shown.
Binary file not shown.
|
@ -0,0 +1,17 @@
|
|||
# Multistream Classification
|
||||
|
||||
Classification (class label prediction) over two non-stationary data streams, one with labeled data (source) and the other with unlabeled data (target). Covariate shift is assumed between the source and target streams.
|
||||
|
||||
The problem is to predict the class label data on target stream using labeled data from the source stream, both of which can have concept drift asynchronously. More details in the publication at [CIKM 2016](http://www.utdallas.edu/~swarup.chandra/papers/multistream_cikm16.pdf)
|
||||
|
||||
# Environment
|
||||
|
||||
1. Java code for change point detection is based from [this](http://www.aaai.org/ocs/index.php/AAAI/AAAI16/paper/download/12335/11786) paper.
|
||||
2. We use the instance weighted libSVM code from [here](https://www.csie.ntu.edu.tw/~cjlin/libsvm/).
|
||||
3. config.properties file specifies data path and other configurable items.
|
||||
4. Python v2.7
|
||||
|
||||
# Execution
|
||||
```
|
||||
$ python multistream.py <dataset_name>
|
||||
```
|
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
|
@ -0,0 +1,173 @@
|
|||
from properties import Properties
|
||||
import math, numpy as np
|
||||
from scipy.stats import beta, binom
|
||||
from decimal import Decimal
|
||||
import sys, random, time
|
||||
|
||||
|
||||
class ChangeDetection(object):
|
||||
|
||||
def __init__(self, gamma, sensitivity, maxWindowSize):
|
||||
self.gamma = gamma
|
||||
self.sensitivity = sensitivity
|
||||
self.maxWindowSize = maxWindowSize
|
||||
|
||||
|
||||
"""
|
||||
Functions to estimate beta distribution parameters
|
||||
"""
|
||||
def __calcBetaDistAlpha(self, list, sampleMean, sampleVar):
|
||||
if sampleMean == -1:
|
||||
sampleMean = np.mean(list)
|
||||
if sampleVar == -1:
|
||||
sampleVar = np.var(list)
|
||||
c = (sampleMean * (1-sampleMean)/sampleVar) - 1
|
||||
return sampleMean * c
|
||||
|
||||
|
||||
def __calcBetaDistBeta(self, list, alphaChange, sampleMean):
|
||||
if sampleMean == -1:
|
||||
sampleMean = np.mean(list)
|
||||
return alphaChange * ((1.0/sampleMean) - 1)
|
||||
|
||||
|
||||
"""
|
||||
input: The dynamic sliding window containing confidence of target classifier
|
||||
output: -1 if no change found, otherwise the change point
|
||||
"""
|
||||
def detectTargetChange(self, slidingWindow):
|
||||
estimatedChangePoint = -1
|
||||
N = len(slidingWindow)
|
||||
cushion = max(Properties.CUSHION, int(math.floor(N ** self.gamma)))
|
||||
|
||||
#If mean confidence fall below 0.3, must retrain the classifier, so return a changepoint
|
||||
if N > self.maxWindowSize:
|
||||
Properties.logger.info('Current target Window Size is: ' + str(N) + ', which exceeds max limit, so update classifier')
|
||||
return 0
|
||||
if N > 2*cushion and np.mean(slidingWindow[0:N]) <= Properties.CONFCUTOFF:
|
||||
Properties.logger.info('Current target Window Size is: ' + str(N))
|
||||
Properties.logger.info('But overall confidence fell below ' + str(Properties.CONFCUTOFF) + ', so update classifier')
|
||||
return 0
|
||||
|
||||
threshold = -math.log(self.sensitivity)
|
||||
w = 0.0
|
||||
kAtMaxW = -1
|
||||
|
||||
kindex = np.arange(cushion, N - cushion + 1)
|
||||
for k in kindex:
|
||||
xbar0 = np.mean(slidingWindow[:k])
|
||||
var0 = np.var(slidingWindow[:k])
|
||||
xbar1 = np.mean(slidingWindow[k:])
|
||||
var1 = np.mean(slidingWindow[k:])
|
||||
|
||||
if xbar1 <= 0.9*xbar0:
|
||||
skn = 0.0
|
||||
alphaPreChange = self.__calcBetaDistAlpha(slidingWindow[:k], xbar0, var0)
|
||||
betaPreChange = self.__calcBetaDistBeta(slidingWindow[:k], alphaPreChange, xbar0)
|
||||
alphaPostChange = self.__calcBetaDistAlpha(slidingWindow[k:], xbar1, var1)
|
||||
betaPostChange = self.__calcBetaDistBeta(slidingWindow[k:], alphaPostChange, xbar1)
|
||||
|
||||
# for i in range(k, N):
|
||||
# try:
|
||||
# #Get Beta distribution
|
||||
# denom = beta.pdf(float(slidingWindow[i]), alphaPreChange, betaPreChange)
|
||||
# if denom == 0:
|
||||
# Properties.logger.info('beta distribution pdf is zero for X = ' + str(float(slidingWindow[i])))
|
||||
# denom = 0.001
|
||||
# val = Decimal(beta.pdf(float(slidingWindow[i])/denom, alphaPostChange, betaPostChange))
|
||||
# skn += float(val.ln())
|
||||
# except:
|
||||
# e = sys.exc_info()
|
||||
# print str(e[1])
|
||||
# raise Exception('Error in calculating skn.')
|
||||
|
||||
try:
|
||||
swin = map(float, slidingWindow[k:])
|
||||
denom = [beta.pdf(s, alphaPreChange, betaPreChange) for s in swin]
|
||||
nor_denom = np.array([0.001 if h == 0 else h for h in denom])
|
||||
nor_swin = swin/nor_denom
|
||||
skn = sum([Decimal(beta.pdf(ns, alphaPostChange, betaPostChange)).ln() for ns in nor_swin])
|
||||
except:
|
||||
e = sys.exc_info()
|
||||
print str(e[1])
|
||||
raise Exception('Error in calculating skn')
|
||||
|
||||
if skn > w:
|
||||
w = skn
|
||||
kAtMaxW = k
|
||||
|
||||
if w >= threshold and kAtMaxW != -1:
|
||||
estimatedChangePoint = kAtMaxW
|
||||
Properties.logger.info('Estimated change point is ' + str(estimatedChangePoint) + ', detected at ' + str(N))
|
||||
|
||||
|
||||
return estimatedChangePoint
|
||||
|
||||
|
||||
|
||||
"""
|
||||
input: The dynamic sliding window containing accuracy of source classifier
|
||||
output: -1 if no change found, otherwise the change point
|
||||
"""
|
||||
def detectSourceChange(self, slidingWindow):
|
||||
estimatedChangePoint = -1
|
||||
N = len(slidingWindow)
|
||||
cushion = max(Properties.CUSHION, int(math.floor(N ** self.gamma)))
|
||||
|
||||
#If mean confidence fall below 0.3, must retrain the classifier, so return a changepoint
|
||||
if N > self.maxWindowSize:
|
||||
Properties.logger.info('Current target Window Size is: ' + str(N) + ', which exceeds max limit, so update classifier')
|
||||
return 0
|
||||
if N > 2*cushion and np.mean(slidingWindow) <= Properties.CONFCUTOFF:
|
||||
Properties.logger.info('Current target Window Size is: ' + str(N))
|
||||
Properties.logger.info('But overall confidence fell below ' + str(Properties.CONFCUTOFF) + ', so update classifier')
|
||||
return 0
|
||||
|
||||
threshold = -math.log(self.sensitivity)
|
||||
w = 0.0
|
||||
kAtMaxW = -1
|
||||
|
||||
kindex = np.arange(cushion, N - cushion + 1)
|
||||
for k in kindex:
|
||||
xbar0 = np.mean(slidingWindow[:k])
|
||||
xbar1 = np.mean(slidingWindow[k:])
|
||||
|
||||
# means should set 1=accurate, 0=erroneous
|
||||
if xbar1 <= 0.9*xbar0:
|
||||
skn = 0.0
|
||||
|
||||
# for i in range(k, N):
|
||||
# try:
|
||||
# denom = binom.pmf(float(slidingWindow[i]), k, xbar0)
|
||||
# if denom == 0:
|
||||
# Properties.logger.info('binomial distribution pmf is zero for X = ' + str(float(slidingWindow[i])))
|
||||
# denom = 0.001
|
||||
# val = Decimal(binom.pmf(float(slidingWindow[i])/denom, N-k, xbar1))
|
||||
# skn += float(val.ln())
|
||||
# except:
|
||||
# e = sys.exc_info()
|
||||
# print str(e[1])
|
||||
# raise Exception('Error in calculating skn')
|
||||
|
||||
try:
|
||||
swin = map(float, slidingWindow[k:N])
|
||||
denom = [binom.pmf(s, k, xbar0) for s in swin]
|
||||
nor_denom = np.array([0.001 if h == 0 else h for h in denom])
|
||||
nor_swin = swin/nor_denom
|
||||
skn = sum([Decimal(binom.pmf(ns, N-k, xbar1)).ln() for ns in nor_swin])
|
||||
except:
|
||||
e = sys.exc_info()
|
||||
print str(e[1])
|
||||
raise Exception('Error in calculating skn')
|
||||
|
||||
if skn > w:
|
||||
w = skn
|
||||
kAtMaxW = k
|
||||
|
||||
if w >= threshold and kAtMaxW != -1:
|
||||
estimatedChangePoint = kAtMaxW
|
||||
Properties.logger.info('Estimated change point is ' + str(estimatedChangePoint) + ', detected at: ' + str(N))
|
||||
Properties.logger.info('Value of w: ' + str(w) + ', Value of Threshold: ' + str(threshold))
|
||||
|
||||
|
||||
return estimatedChangePoint
|
|
@ -0,0 +1,14 @@
|
|||
baseDir=
|
||||
srcfileAppend=_source.csv
|
||||
trgfileAppend=_target.csv
|
||||
gamma=0.5
|
||||
cushion=100
|
||||
sensitivity=0.01
|
||||
ensemble_size=10
|
||||
confthreshold=0.9
|
||||
confcutoff=0.5
|
||||
maxWindowSize=1000
|
||||
initialDataSize=10
|
||||
output_file_name=result.out
|
||||
logfile=multistream.log
|
||||
tempDir=temp/
|
|
@ -0,0 +1,143 @@
|
|||
import math
|
||||
from model import Model
|
||||
from properties import Properties
|
||||
|
||||
|
||||
class Ensemble(object):
|
||||
|
||||
def __init__(self, ensemble_size):
|
||||
self.models = []
|
||||
self.size = ensemble_size
|
||||
|
||||
|
||||
"""
|
||||
Update weights for all models in the ensemble.
|
||||
"""
|
||||
def updateWeight(self, data, isSource):
|
||||
for m in self.models:
|
||||
m.computeModelWeight(data, isSource, Properties.MAXVAR)
|
||||
|
||||
"""
|
||||
Adding a new model to the Ensemble.
|
||||
Returns the index of the Ensemble array where the model is added.
|
||||
"""
|
||||
def __addModel(self, model):
|
||||
index = 0
|
||||
if len(self.models) < self.size:
|
||||
self.models.append(model)
|
||||
index = len(self.models)-1
|
||||
else:
|
||||
#replace least desirable model
|
||||
index = self.__getLeastDesirableModel()
|
||||
Properties.logger.info('Least desirable model removed at ' + str(index))
|
||||
self.models[index] = model
|
||||
return index
|
||||
|
||||
"""
|
||||
Compute the least desirable model to be replaced when the ensemble size has reached its limit.
|
||||
Least desirable is one having least target weight, but not the largest source weight.
|
||||
Returns the array index of the least desired model.
|
||||
"""
|
||||
def __getLeastDesirableModel(self):
|
||||
sweights = {}
|
||||
tweights = {}
|
||||
for i in xrange(len(self.models)):
|
||||
sweights[i] = self.models[i].sweight
|
||||
tweights[i] = self.models[i].tweight
|
||||
|
||||
skeys = sorted(sweights, reverse=True, key=sweights.get)
|
||||
tkeys = sorted(tweights, key=tweights.get)
|
||||
|
||||
# skeys = sweights.keys()
|
||||
# tkeys = tweights.keys()
|
||||
|
||||
for i in xrange(len(skeys)):
|
||||
if tkeys[i] == skeys[i]:
|
||||
continue
|
||||
else:
|
||||
return tkeys[i]
|
||||
|
||||
return tkeys[0]
|
||||
|
||||
"""
|
||||
Initiate the creation of appropriate model in the ensemble for given source or target data.
|
||||
Also compute weights for the new model based on the current data.
|
||||
"""
|
||||
def generateNewModel(self, sourceData, targetData, isSource):
|
||||
model = Model()
|
||||
|
||||
if len(sourceData) == 0 or len(targetData) == 0:
|
||||
raise Exception('Source or Target stream should have some elements')
|
||||
|
||||
#Create new model
|
||||
if isSource:
|
||||
Properties.logger.info('Source model creation')
|
||||
model.train(sourceData, None, Properties.MAXVAR)
|
||||
else:
|
||||
Properties.logger.info('Target model creation')
|
||||
model.train(sourceData, targetData, Properties.MAXVAR)
|
||||
|
||||
#compute source and target weight
|
||||
Properties.logger.info('Computing model weights')
|
||||
model.computeModelWeight(sourceData, True, Properties.MAXVAR)
|
||||
model.computeModelWeight(targetData, False, Properties.MAXVAR)
|
||||
|
||||
#update ensemble
|
||||
index = self.__addModel(model)
|
||||
Properties.logger.info('Ensemble updated at ' + str(index))
|
||||
|
||||
"""
|
||||
Get prediction for a given data instance from each model.
|
||||
For source data: Ensemble prediction is 1 if maximum weighted vote class label matches true class label, else 0.
|
||||
For target data: Ensemble prediction class with max weighted vote class label, and average (for all class) confidence measure.
|
||||
"""
|
||||
def evaluateEnsemble(self, dataInstance, isSource):
|
||||
|
||||
classSum = {}
|
||||
for m in self.models:
|
||||
#test data instance in each model
|
||||
result = m.test([dataInstance], Properties.MAXVAR)
|
||||
#gather result
|
||||
if isSource:
|
||||
if int(result[0][0]) in classSum:
|
||||
classSum[int(result[0][0])] += m.sweight
|
||||
else:
|
||||
classSum[int(result[0][0])] = m.sweight
|
||||
else:
|
||||
if int(result[0][0]) in classSum:
|
||||
classSum[int(result[0][0])] += result[0][1]
|
||||
else:
|
||||
classSum[int(result[0][0])] = result[0][1]
|
||||
|
||||
#get maximum voted sum class label
|
||||
classMax = 0.0
|
||||
sumMax = max(classSum.values())
|
||||
for i in classSum:
|
||||
if classSum[i] == sumMax:
|
||||
classMax = i
|
||||
|
||||
if isSource:
|
||||
#for source data, check true vs predicted class label
|
||||
if classMax == dataInstance[-1]:
|
||||
return [1, -1]
|
||||
else:
|
||||
return [0, -1]
|
||||
else:
|
||||
# for target data
|
||||
return [classMax, sumMax/len(self.models)]
|
||||
|
||||
"""
|
||||
Get summary of models in ensemble.
|
||||
"""
|
||||
def getEnsembleSummary(self):
|
||||
summry = '************************* E N S E M B L E S U M M A R Y ************************\n'
|
||||
summry += 'Ensemble has currently ' + str(len(self.models)) + ' models.\n'
|
||||
for i in xrange(len(self.models)):
|
||||
summry += 'Model' + str(i+1) + ': weights<' + str(self.models[i].sweight) + ', ' + str(self.models[i].tweight) + '>\n'
|
||||
return summry
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
|
@ -0,0 +1,111 @@
|
|||
name: MSC
|
||||
channels:
|
||||
- conda-forge
|
||||
- omnia
|
||||
- defaults
|
||||
dependencies:
|
||||
- apipkg=1.5=py27_0
|
||||
- attrs=19.3.0=py_0
|
||||
- backports=1.0=py_2
|
||||
- backports.shutil_get_terminal_size=1.0.0=py27_2
|
||||
- backports.shutil_which=3.5.2=py27_0
|
||||
- backports_abc=0.5=py27h0ec6b72_0
|
||||
- blas=1.0=mkl
|
||||
- bleach=3.1.0=py27_0
|
||||
- ca-certificates=2019.11.27=0
|
||||
- certifi=2019.11.28=py27_0
|
||||
- colorama=0.4.3=py_0
|
||||
- configparser=4.0.2=py27_0
|
||||
- cvxopt=1.1.8=py27_0
|
||||
- decorator=4.4.1=py_0
|
||||
- defusedxml=0.6.0=py_0
|
||||
- entrypoints=0.3=py27_0
|
||||
- enum34=1.1.6=py27_1
|
||||
- execnet=1.7.1=py_0
|
||||
- functools32=3.2.3.2=py27_1
|
||||
- futures=3.3.0=py27_0
|
||||
- icc_rt=2019.0.0=h0cc432a_1
|
||||
- icu=58.2=h2aa20d9_1
|
||||
- intel-openmp=2019.4=245
|
||||
- ipaddress=1.0.23=py_0
|
||||
- ipykernel=4.10.0=py27_0
|
||||
- ipython=5.8.0=py27_0
|
||||
- ipython_genutils=0.2.0=py27_0
|
||||
- ipywidgets=7.5.1=py_0
|
||||
- jinja2=2.10.3=py_0
|
||||
- jpeg=9b=ha175dff_2
|
||||
- jsonschema=3.0.2=py27_0
|
||||
- jupyter=1.0.0=py27_7
|
||||
- jupyter_client=5.3.4=py27_0
|
||||
- jupyter_console=5.2.0=py27_1
|
||||
- jupyter_core=4.6.1=py27_0
|
||||
- jupyterlab=0.33.11=py27_0
|
||||
- jupyterlab_launcher=0.11.2=py27h28b3542_0
|
||||
- libpng=1.6.37=h7a46e7a_0
|
||||
- libsodium=1.0.16=h8b3e59e_0
|
||||
- libsvm=323=0
|
||||
- m2w64-gcc-libgfortran=5.3.0=6
|
||||
- m2w64-gcc-libs=5.3.0=7
|
||||
- m2w64-gcc-libs-core=5.3.0=7
|
||||
- m2w64-gmp=6.1.0=2
|
||||
- m2w64-libwinpthread-git=5.0.0.4634.697f757=2
|
||||
- markupsafe=1.1.1=py27h0c8e037_0
|
||||
- mistune=0.8.4=py27h0c8e037_0
|
||||
- mkl=2019.4=245
|
||||
- mkl-service=2.3.0=py27h0b88c2a_0
|
||||
- mkl_fft=1.0.15=py27h44c1dab_0
|
||||
- msys2-conda-epoch=20160418=1
|
||||
- nbconvert=5.6.1=py27_0
|
||||
- nbformat=4.4.0=py27_0
|
||||
- notebook=5.7.8=py27_0
|
||||
- numpy=1.16.5=py27h5fc8d92_0
|
||||
- numpy-base=1.16.5=py27hb1d0314_0
|
||||
- openssl=1.0.2u=h0c8e037_0
|
||||
- pandas=0.24.2=py27hc56fc5f_0
|
||||
- pandoc=2.2.3.2=0
|
||||
- pandocfilters=1.4.2=py27_1
|
||||
- pathlib2=2.3.5=py27_0
|
||||
- pickleshare=0.7.5=py27_0
|
||||
- pip=19.3.1=py27_0
|
||||
- prometheus_client=0.7.1=py_0
|
||||
- prompt_toolkit=1.0.15=py27h3a8ec6a_0
|
||||
- py4j=0.10.8.1=py_0
|
||||
- pygments=2.5.2=py_0
|
||||
- pyqt=5.6.0=py27h6e61f57_6
|
||||
- pyrsistent=0.15.6=py27h0c8e037_0
|
||||
- python=2.7.17=h930f6bb_0
|
||||
- python-dateutil=2.8.1=py_0
|
||||
- pytz=2019.3=py_0
|
||||
- pywin32=227=py27h0c8e037_0
|
||||
- pywinpty=0.5.5=py27_1000
|
||||
- pyzmq=18.1.0=py27hc56fc5f_0
|
||||
- qt=5.6.2=vc9hc26998b_12
|
||||
- qtconsole=4.6.0=py_1
|
||||
- scandir=1.10.0=py27h0c8e037_0
|
||||
- scikit-learn=0.20.3=py27hf381715_0
|
||||
- scipy=1.2.1=py27h4c3ab11_0
|
||||
- send2trash=1.5.0=py27_0
|
||||
- setuptools=44.0.0=py27_0
|
||||
- simplegeneric=0.8.1=py27_2
|
||||
- singledispatch=3.4.0.3=py27h3f9d112_0
|
||||
- sip=4.18.1=py27hc56fc5f_2
|
||||
- six=1.13.0=py27_0
|
||||
- sqlite=3.30.1=h0c8e037_0
|
||||
- subprocess32=3.5.4=py27h0c8e037_0
|
||||
- terminado=0.8.3=py27_0
|
||||
- testpath=0.4.4=py_0
|
||||
- tornado=5.1.1=py27h0c8e037_0
|
||||
- traitlets=4.3.3=py27_0
|
||||
- vc=9=h7299396_1
|
||||
- vs2008_runtime=9.00.30729.1=hfaea7d5_1
|
||||
- wcwidth=0.1.7=py27_0
|
||||
- webencodings=0.5.1=py27_1
|
||||
- wheel=0.33.6=py27_0
|
||||
- widgetsnbextension=3.5.1=py27_0
|
||||
- win_unicode_console=0.5=py27hc037021_0
|
||||
- wincertstore=0.2=py27hf04cefb_0
|
||||
- winpty=0.4.3=4
|
||||
- zeromq=4.3.1=h2880e7c_3
|
||||
- zlib=1.2.11=h3cc03e0_3
|
||||
prefix: C:\Users\ivsuc\Miniconda3\envs\MSC
|
||||
|
|
@ -0,0 +1,500 @@
|
|||
#!/usr/bin/env python
|
||||
__all__ = ['find_parameters']
|
||||
|
||||
import os, sys, traceback, getpass, time, re
|
||||
from threading import Thread
|
||||
from subprocess import *
|
||||
|
||||
if sys.version_info[0] < 3:
|
||||
from Queue import Queue
|
||||
else:
|
||||
from queue import Queue
|
||||
|
||||
telnet_workers = []
|
||||
ssh_workers = []
|
||||
nr_local_worker = 1
|
||||
|
||||
class GridOption:
|
||||
def __init__(self, dataset_pathname, options):
|
||||
dirname = os.path.dirname(__file__)
|
||||
if sys.platform != 'win32':
|
||||
self.svmtrain_pathname = os.path.join(dirname, 'libsvm-weights-3.20/svm-train')
|
||||
self.gnuplot_pathname = '/usr/bin/gnuplot'
|
||||
else:
|
||||
# example for windows
|
||||
self.svmtrain_pathname = os.path.join(dirname, r'libsvm-weights-3.20\windows\svm-train.exe')
|
||||
# svmtrain_pathname = r'c:\Program Files\libsvm\windows\svm-train.exe'
|
||||
self.gnuplot_pathname = r'c:\tmp\gnuplot\binary\pgnuplot.exe'
|
||||
self.fold = 5
|
||||
self.c_begin, self.c_end, self.c_step = -5, 15, 2
|
||||
self.g_begin, self.g_end, self.g_step = 3, -15, -2
|
||||
self.grid_with_c, self.grid_with_g = True, True
|
||||
self.dataset_pathname = dataset_pathname
|
||||
self.dataset_title = os.path.split(dataset_pathname)[1]
|
||||
self.out_pathname = '{0}.out'.format(self.dataset_title)
|
||||
self.png_pathname = '{0}.png'.format(self.dataset_title)
|
||||
self.pass_through_string = ' '
|
||||
self.resume_pathname = None
|
||||
self.parse_options(options)
|
||||
|
||||
def parse_options(self, options):
|
||||
if type(options) == str:
|
||||
options = options.split()
|
||||
i = 0
|
||||
pass_through_options = []
|
||||
|
||||
while i < len(options):
|
||||
if options[i] == '-log2c':
|
||||
i = i + 1
|
||||
if options[i] == 'null':
|
||||
self.grid_with_c = False
|
||||
else:
|
||||
self.c_begin, self.c_end, self.c_step = map(float,options[i].split(','))
|
||||
elif options[i] == '-log2g':
|
||||
i = i + 1
|
||||
if options[i] == 'null':
|
||||
self.grid_with_g = False
|
||||
else:
|
||||
self.g_begin, self.g_end, self.g_step = map(float,options[i].split(','))
|
||||
elif options[i] == '-v':
|
||||
i = i + 1
|
||||
self.fold = options[i]
|
||||
elif options[i] in ('-c','-g'):
|
||||
raise ValueError('Use -log2c and -log2g.')
|
||||
elif options[i] == '-svmtrain':
|
||||
i = i + 1
|
||||
self.svmtrain_pathname = options[i]
|
||||
elif options[i] == '-gnuplot':
|
||||
i = i + 1
|
||||
if options[i] == 'null':
|
||||
self.gnuplot_pathname = None
|
||||
else:
|
||||
self.gnuplot_pathname = options[i]
|
||||
elif options[i] == '-out':
|
||||
i = i + 1
|
||||
if options[i] == 'null':
|
||||
self.out_pathname = None
|
||||
else:
|
||||
self.out_pathname = options[i]
|
||||
elif options[i] == '-png':
|
||||
i = i + 1
|
||||
self.png_pathname = options[i]
|
||||
elif options[i] == '-resume':
|
||||
if i == (len(options)-1) or options[i+1].startswith('-'):
|
||||
self.resume_pathname = self.dataset_title + '.out'
|
||||
else:
|
||||
i = i + 1
|
||||
self.resume_pathname = options[i]
|
||||
else:
|
||||
pass_through_options.append(options[i])
|
||||
i = i + 1
|
||||
|
||||
self.pass_through_string = ' '.join(pass_through_options)
|
||||
if not os.path.exists(self.svmtrain_pathname):
|
||||
raise IOError('svm-train executable not found')
|
||||
if not os.path.exists(self.dataset_pathname):
|
||||
raise IOError('dataset not found')
|
||||
if self.resume_pathname and not os.path.exists(self.resume_pathname):
|
||||
raise IOError('file for resumption not found')
|
||||
if not self.grid_with_c and not self.grid_with_g:
|
||||
raise ValueError('-log2c and -log2g should not be null simultaneously')
|
||||
if self.gnuplot_pathname and not os.path.exists(self.gnuplot_pathname):
|
||||
sys.stderr.write('gnuplot executable not found\n')
|
||||
self.gnuplot_pathname = None
|
||||
|
||||
def redraw(db,best_param,gnuplot,options,tofile=False):
|
||||
if len(db) == 0: return
|
||||
begin_level = round(max(x[2] for x in db)) - 3
|
||||
step_size = 0.5
|
||||
|
||||
best_log2c,best_log2g,best_rate = best_param
|
||||
|
||||
# if newly obtained c, g, or cv values are the same,
|
||||
# then stop redrawing the contour.
|
||||
if all(x[0] == db[0][0] for x in db): return
|
||||
if all(x[1] == db[0][1] for x in db): return
|
||||
if all(x[2] == db[0][2] for x in db): return
|
||||
|
||||
if tofile:
|
||||
gnuplot.write(b"set term png transparent small linewidth 2 medium enhanced\n")
|
||||
gnuplot.write("set output \"{0}\"\n".format(options.png_pathname.replace('\\','\\\\')).encode())
|
||||
#gnuplot.write(b"set term postscript color solid\n")
|
||||
#gnuplot.write("set output \"{0}.ps\"\n".format(options.dataset_title).encode().encode())
|
||||
elif sys.platform == 'win32':
|
||||
gnuplot.write(b"set term windows\n")
|
||||
else:
|
||||
gnuplot.write( b"set term x11\n")
|
||||
gnuplot.write(b"set xlabel \"log2(C)\"\n")
|
||||
gnuplot.write(b"set ylabel \"log2(gamma)\"\n")
|
||||
gnuplot.write("set xrange [{0}:{1}]\n".format(options.c_begin,options.c_end).encode())
|
||||
gnuplot.write("set yrange [{0}:{1}]\n".format(options.g_begin,options.g_end).encode())
|
||||
gnuplot.write(b"set contour\n")
|
||||
gnuplot.write("set cntrparam levels incremental {0},{1},100\n".format(begin_level,step_size).encode())
|
||||
gnuplot.write(b"unset surface\n")
|
||||
gnuplot.write(b"unset ztics\n")
|
||||
gnuplot.write(b"set view 0,0\n")
|
||||
gnuplot.write("set title \"{0}\"\n".format(options.dataset_title).encode())
|
||||
gnuplot.write(b"unset label\n")
|
||||
gnuplot.write("set label \"Best log2(C) = {0} log2(gamma) = {1} accuracy = {2}%\" \
|
||||
at screen 0.5,0.85 center\n". \
|
||||
format(best_log2c, best_log2g, best_rate).encode())
|
||||
gnuplot.write("set label \"C = {0} gamma = {1}\""
|
||||
" at screen 0.5,0.8 center\n".format(2**best_log2c, 2**best_log2g).encode())
|
||||
gnuplot.write(b"set key at screen 0.9,0.9\n")
|
||||
gnuplot.write(b"splot \"-\" with lines\n")
|
||||
|
||||
db.sort(key = lambda x:(x[0], -x[1]))
|
||||
|
||||
prevc = db[0][0]
|
||||
for line in db:
|
||||
if prevc != line[0]:
|
||||
gnuplot.write(b"\n")
|
||||
prevc = line[0]
|
||||
gnuplot.write("{0[0]} {0[1]} {0[2]}\n".format(line).encode())
|
||||
gnuplot.write(b"e\n")
|
||||
gnuplot.write(b"\n") # force gnuplot back to prompt when term set failure
|
||||
gnuplot.flush()
|
||||
|
||||
|
||||
def calculate_jobs(options):
|
||||
|
||||
def range_f(begin,end,step):
|
||||
# like range, but works on non-integer too
|
||||
seq = []
|
||||
while True:
|
||||
if step > 0 and begin > end: break
|
||||
if step < 0 and begin < end: break
|
||||
seq.append(begin)
|
||||
begin = begin + step
|
||||
return seq
|
||||
|
||||
def permute_sequence(seq):
|
||||
n = len(seq)
|
||||
if n <= 1: return seq
|
||||
|
||||
mid = int(n/2)
|
||||
left = permute_sequence(seq[:mid])
|
||||
right = permute_sequence(seq[mid+1:])
|
||||
|
||||
ret = [seq[mid]]
|
||||
while left or right:
|
||||
if left: ret.append(left.pop(0))
|
||||
if right: ret.append(right.pop(0))
|
||||
|
||||
return ret
|
||||
|
||||
|
||||
c_seq = permute_sequence(range_f(options.c_begin,options.c_end,options.c_step))
|
||||
g_seq = permute_sequence(range_f(options.g_begin,options.g_end,options.g_step))
|
||||
|
||||
if not options.grid_with_c:
|
||||
c_seq = [None]
|
||||
if not options.grid_with_g:
|
||||
g_seq = [None]
|
||||
|
||||
nr_c = float(len(c_seq))
|
||||
nr_g = float(len(g_seq))
|
||||
i, j = 0, 0
|
||||
jobs = []
|
||||
|
||||
while i < nr_c or j < nr_g:
|
||||
if i/nr_c < j/nr_g:
|
||||
# increase C resolution
|
||||
line = []
|
||||
for k in range(0,j):
|
||||
line.append((c_seq[i],g_seq[k]))
|
||||
i = i + 1
|
||||
jobs.append(line)
|
||||
else:
|
||||
# increase g resolution
|
||||
line = []
|
||||
for k in range(0,i):
|
||||
line.append((c_seq[k],g_seq[j]))
|
||||
j = j + 1
|
||||
jobs.append(line)
|
||||
|
||||
resumed_jobs = {}
|
||||
|
||||
if options.resume_pathname is None:
|
||||
return jobs, resumed_jobs
|
||||
|
||||
for line in open(options.resume_pathname, 'r'):
|
||||
line = line.strip()
|
||||
rst = re.findall(r'rate=([0-9.]+)',line)
|
||||
if not rst:
|
||||
continue
|
||||
rate = float(rst[0])
|
||||
|
||||
c, g = None, None
|
||||
rst = re.findall(r'log2c=([0-9.-]+)',line)
|
||||
if rst:
|
||||
c = float(rst[0])
|
||||
rst = re.findall(r'log2g=([0-9.-]+)',line)
|
||||
if rst:
|
||||
g = float(rst[0])
|
||||
|
||||
resumed_jobs[(c,g)] = rate
|
||||
|
||||
return jobs, resumed_jobs
|
||||
|
||||
|
||||
class WorkerStopToken: # used to notify the worker to stop or if a worker is dead
|
||||
pass
|
||||
|
||||
class Worker(Thread):
|
||||
def __init__(self,name,job_queue,result_queue,options):
|
||||
Thread.__init__(self)
|
||||
self.name = name
|
||||
self.job_queue = job_queue
|
||||
self.result_queue = result_queue
|
||||
self.options = options
|
||||
|
||||
def run(self):
|
||||
while True:
|
||||
(cexp,gexp) = self.job_queue.get()
|
||||
if cexp is WorkerStopToken:
|
||||
self.job_queue.put((cexp,gexp))
|
||||
# print('worker {0} stop.'.format(self.name))
|
||||
break
|
||||
try:
|
||||
c, g = None, None
|
||||
if cexp != None:
|
||||
c = 2.0**cexp
|
||||
if gexp != None:
|
||||
g = 2.0**gexp
|
||||
rate = self.run_one(c,g)
|
||||
if rate is None: raise RuntimeError('get no rate')
|
||||
except:
|
||||
# we failed, let others do that and we just quit
|
||||
|
||||
traceback.print_exception(sys.exc_info()[0], sys.exc_info()[1], sys.exc_info()[2])
|
||||
|
||||
self.job_queue.put((cexp,gexp))
|
||||
sys.stderr.write('worker {0} quit.\n'.format(self.name))
|
||||
break
|
||||
else:
|
||||
self.result_queue.put((self.name,cexp,gexp,rate))
|
||||
|
||||
def get_cmd(self,c,g):
|
||||
options=self.options
|
||||
cmdline = '"' + options.svmtrain_pathname + '"'
|
||||
if options.grid_with_c:
|
||||
cmdline += ' -c {0} '.format(c)
|
||||
if options.grid_with_g:
|
||||
cmdline += ' -g {0} '.format(g)
|
||||
cmdline += ' -v {0} {1} {2} '.format\
|
||||
(options.fold,options.pass_through_string,options.dataset_pathname)
|
||||
return cmdline
|
||||
|
||||
class LocalWorker(Worker):
|
||||
def run_one(self,c,g):
|
||||
cmdline = self.get_cmd(c,g)
|
||||
result = Popen(cmdline,shell=True,stdout=PIPE,stderr=PIPE,stdin=PIPE).stdout
|
||||
for line in result.readlines():
|
||||
if str(line).find('Cross') != -1:
|
||||
return float(line.split()[-1][0:-1])
|
||||
|
||||
class SSHWorker(Worker):
|
||||
def __init__(self,name,job_queue,result_queue,host,options):
|
||||
Worker.__init__(self,name,job_queue,result_queue,options)
|
||||
self.host = host
|
||||
self.cwd = os.getcwd()
|
||||
def run_one(self,c,g):
|
||||
cmdline = 'ssh -x -t -t {0} "cd {1}; {2}"'.format\
|
||||
(self.host,self.cwd,self.get_cmd(c,g))
|
||||
result = Popen(cmdline,shell=True,stdout=PIPE,stderr=PIPE,stdin=PIPE).stdout
|
||||
for line in result.readlines():
|
||||
if str(line).find('Cross') != -1:
|
||||
return float(line.split()[-1][0:-1])
|
||||
|
||||
class TelnetWorker(Worker):
|
||||
def __init__(self,name,job_queue,result_queue,host,username,password,options):
|
||||
Worker.__init__(self,name,job_queue,result_queue,options)
|
||||
self.host = host
|
||||
self.username = username
|
||||
self.password = password
|
||||
def run(self):
|
||||
import telnetlib
|
||||
self.tn = tn = telnetlib.Telnet(self.host)
|
||||
tn.read_until('login: ')
|
||||
tn.write(self.username + '\n')
|
||||
tn.read_until('Password: ')
|
||||
tn.write(self.password + '\n')
|
||||
|
||||
# XXX: how to know whether login is successful?
|
||||
tn.read_until(self.username)
|
||||
#
|
||||
print('login ok', self.host)
|
||||
tn.write('cd '+os.getcwd()+'\n')
|
||||
Worker.run(self)
|
||||
tn.write('exit\n')
|
||||
def run_one(self,c,g):
|
||||
cmdline = self.get_cmd(c,g)
|
||||
result = self.tn.write(cmdline+'\n')
|
||||
(idx,matchm,output) = self.tn.expect(['Cross.*\n'])
|
||||
for line in output.split('\n'):
|
||||
if str(line).find('Cross') != -1:
|
||||
return float(line.split()[-1][0:-1])
|
||||
|
||||
def find_parameters(dataset_pathname, options=''):
|
||||
|
||||
def update_param(c,g,rate,best_c,best_g,best_rate,worker,resumed):
|
||||
if (rate > best_rate) or (rate==best_rate and g==best_g and c<best_c):
|
||||
best_rate,best_c,best_g = rate,c,g
|
||||
stdout_str = '[{0}] {1} {2} (best '.format\
|
||||
(worker,' '.join(str(x) for x in [c,g] if x is not None),rate)
|
||||
output_str = ''
|
||||
if c != None:
|
||||
stdout_str += 'c={0}, '.format(2.0**best_c)
|
||||
output_str += 'log2c={0} '.format(c)
|
||||
if g != None:
|
||||
stdout_str += 'g={0}, '.format(2.0**best_g)
|
||||
output_str += 'log2g={0} '.format(g)
|
||||
stdout_str += 'rate={0})'.format(best_rate)
|
||||
print(stdout_str)
|
||||
if options.out_pathname and not resumed:
|
||||
output_str += 'rate={0}\n'.format(rate)
|
||||
result_file.write(output_str)
|
||||
result_file.flush()
|
||||
|
||||
return best_c,best_g,best_rate
|
||||
|
||||
options = GridOption(dataset_pathname, options);
|
||||
|
||||
if options.gnuplot_pathname:
|
||||
gnuplot = Popen(options.gnuplot_pathname,stdin = PIPE,stdout=PIPE,stderr=PIPE).stdin
|
||||
else:
|
||||
gnuplot = None
|
||||
|
||||
# put jobs in queue
|
||||
|
||||
jobs,resumed_jobs = calculate_jobs(options)
|
||||
job_queue = Queue(0)
|
||||
result_queue = Queue(0)
|
||||
|
||||
for (c,g) in resumed_jobs:
|
||||
result_queue.put(('resumed',c,g,resumed_jobs[(c,g)]))
|
||||
|
||||
for line in jobs:
|
||||
for (c,g) in line:
|
||||
if (c,g) not in resumed_jobs:
|
||||
job_queue.put((c,g))
|
||||
|
||||
# hack the queue to become a stack --
|
||||
# this is important when some thread
|
||||
# failed and re-put a job. It we still
|
||||
# use FIFO, the job will be put
|
||||
# into the end of the queue, and the graph
|
||||
# will only be updated in the end
|
||||
|
||||
job_queue._put = job_queue.queue.appendleft
|
||||
|
||||
# fire telnet workers
|
||||
|
||||
if telnet_workers:
|
||||
nr_telnet_worker = len(telnet_workers)
|
||||
username = getpass.getuser()
|
||||
password = getpass.getpass()
|
||||
for host in telnet_workers:
|
||||
worker = TelnetWorker(host,job_queue,result_queue,
|
||||
host,username,password,options)
|
||||
worker.start()
|
||||
|
||||
# fire ssh workers
|
||||
|
||||
if ssh_workers:
|
||||
for host in ssh_workers:
|
||||
worker = SSHWorker(host,job_queue,result_queue,host,options)
|
||||
worker.start()
|
||||
|
||||
# fire local workers
|
||||
|
||||
for i in range(nr_local_worker):
|
||||
worker = LocalWorker('local',job_queue,result_queue,options)
|
||||
worker.start()
|
||||
|
||||
# gather results
|
||||
|
||||
done_jobs = {}
|
||||
|
||||
if options.out_pathname:
|
||||
if options.resume_pathname:
|
||||
result_file = open(options.out_pathname, 'a')
|
||||
else:
|
||||
result_file = open(options.out_pathname, 'w')
|
||||
|
||||
|
||||
db = []
|
||||
best_rate = -1
|
||||
best_c,best_g = None,None
|
||||
|
||||
for (c,g) in resumed_jobs:
|
||||
rate = resumed_jobs[(c,g)]
|
||||
best_c,best_g,best_rate = update_param(c,g,rate,best_c,best_g,best_rate,'resumed',True)
|
||||
|
||||
for line in jobs:
|
||||
for (c,g) in line:
|
||||
while (c,g) not in done_jobs:
|
||||
(worker,c1,g1,rate1) = result_queue.get()
|
||||
done_jobs[(c1,g1)] = rate1
|
||||
if (c1,g1) not in resumed_jobs:
|
||||
best_c,best_g,best_rate = update_param(c1,g1,rate1,best_c,best_g,best_rate,worker,False)
|
||||
db.append((c,g,done_jobs[(c,g)]))
|
||||
if gnuplot and options.grid_with_c and options.grid_with_g:
|
||||
redraw(db,[best_c, best_g, best_rate],gnuplot,options)
|
||||
redraw(db,[best_c, best_g, best_rate],gnuplot,options,True)
|
||||
|
||||
|
||||
if options.out_pathname:
|
||||
result_file.close()
|
||||
job_queue.put((WorkerStopToken,None))
|
||||
best_param, best_cg = {}, []
|
||||
if best_c != None:
|
||||
best_param['c'] = 2.0**best_c
|
||||
best_cg += [2.0**best_c]
|
||||
if best_g != None:
|
||||
best_param['g'] = 2.0**best_g
|
||||
best_cg += [2.0**best_g]
|
||||
print('{0} {1}'.format(' '.join(map(str,best_cg)), best_rate))
|
||||
|
||||
return best_rate, best_param
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
||||
def exit_with_help():
|
||||
print("""\
|
||||
Usage: grid.py [grid_options] [svm_options] dataset
|
||||
|
||||
grid_options :
|
||||
-log2c {begin,end,step | "null"} : set the range of c (default -5,15,2)
|
||||
begin,end,step -- c_range = 2^{begin,...,begin+k*step,...,end}
|
||||
"null" -- do not grid with c
|
||||
-log2g {begin,end,step | "null"} : set the range of g (default 3,-15,-2)
|
||||
begin,end,step -- g_range = 2^{begin,...,begin+k*step,...,end}
|
||||
"null" -- do not grid with g
|
||||
-v n : n-fold cross validation (default 5)
|
||||
-svmtrain pathname : set svm executable path and name
|
||||
-gnuplot {pathname | "null"} :
|
||||
pathname -- set gnuplot executable path and name
|
||||
"null" -- do not plot
|
||||
-out {pathname | "null"} : (default dataset.out)
|
||||
pathname -- set output file path and name
|
||||
"null" -- do not output file
|
||||
-png pathname : set graphic output file path and name (default dataset.png)
|
||||
-resume [pathname] : resume the grid task using an existing output file (default pathname is dataset.out)
|
||||
This is experimental. Try this option only if some parameters have been checked for the SAME data.
|
||||
|
||||
svm_options : additional options for svm-train""")
|
||||
sys.exit(1)
|
||||
|
||||
if len(sys.argv) < 2:
|
||||
exit_with_help()
|
||||
dataset_pathname = sys.argv[-1]
|
||||
options = sys.argv[1:-1]
|
||||
try:
|
||||
find_parameters(dataset_pathname, options)
|
||||
except (IOError,ValueError) as e:
|
||||
sys.stderr.write(str(e) + '\n')
|
||||
sys.stderr.write('Try "grid.py" for more information.\n')
|
||||
sys.exit(1)
|
|
@ -0,0 +1,133 @@
|
|||
import math, numpy, sklearn.metrics.pairwise as sk, sys
|
||||
from sklearn import linear_model
|
||||
from cvxopt import matrix, solvers
|
||||
|
||||
#DENSITY ESTIMATION
|
||||
#KMM solving the quadratic programming problem to get betas (weights) for each training instance
|
||||
def kmm(Xtrain, Xtest, sigma):
|
||||
n_tr = len(Xtrain)
|
||||
n_te = len(Xtest)
|
||||
|
||||
#calculate Kernel
|
||||
print 'Computing kernel for training data ...'
|
||||
K_ns = sk.rbf_kernel(Xtrain, Xtrain, sigma)
|
||||
#make it symmetric
|
||||
K = 0.5*(K_ns + K_ns.transpose())
|
||||
|
||||
#calculate kappa
|
||||
print 'Computing kernel for kappa ...'
|
||||
kappa_r = sk.rbf_kernel(Xtrain, Xtest, sigma)
|
||||
ones = numpy.ones(shape=(n_te, 1))
|
||||
kappa = numpy.dot(kappa_r, ones)
|
||||
kappa = -(float(n_tr)/float(n_te)) * kappa
|
||||
|
||||
#calculate eps
|
||||
eps = (math.sqrt(n_tr) - 1)/math.sqrt(n_tr)
|
||||
|
||||
#constraints
|
||||
A0 = numpy.ones(shape=(1,n_tr))
|
||||
A1 = -numpy.ones(shape=(1,n_tr))
|
||||
A = numpy.vstack([A0, A1, -numpy.eye(n_tr), numpy.eye(n_tr)])
|
||||
b = numpy.array([[n_tr*(eps+1), n_tr*(eps-1)]])
|
||||
b = numpy.vstack([b.T, -numpy.zeros(shape=(n_tr,1)), numpy.ones(shape=(n_tr,1))*1000])
|
||||
|
||||
print 'Solving quadratic program for beta ...'
|
||||
P = matrix(K, tc='d')
|
||||
q = matrix(kappa, tc='d')
|
||||
G = matrix(A, tc='d')
|
||||
h = matrix(b, tc='d')
|
||||
beta = solvers.qp(P,q,G,h)
|
||||
return [i for i in beta['x']]
|
||||
|
||||
|
||||
#KMM PARAMETER TUNING
|
||||
#Train a linear regression model with Lasso (L1 regularization).
|
||||
#Model parameter selection via cross validation
|
||||
#Predict the target (Beta) for a given test dataset
|
||||
def regression(XTrain, betaTrain, XTest):
|
||||
model = linear_model.LassoCV(cv=10, alphas=[0.001,0.005,0.01,0.05,0.1,0.5,1,5,10])
|
||||
model.fit(XTrain, betaTrain)
|
||||
Beta = model.predict(XTest)
|
||||
return [i for i in Beta]
|
||||
|
||||
|
||||
#KMM PARAMETER TUNING
|
||||
#Compute J score for parameter tuning of KMM
|
||||
def computeJ(betaTrain, betaTest):
|
||||
tr = sum([i ** 2 for i in betaTrain])
|
||||
te = sum(betaTest)
|
||||
return ((1/float(len(betaTrain)))*tr) - ((2/float(len(betaTest)))*te)
|
||||
|
||||
|
||||
#I/O OPERATIONS
|
||||
#Read input csv file
|
||||
def getData(filename):
|
||||
data = []
|
||||
with open(filename) as f:
|
||||
content = f.readlines()
|
||||
|
||||
for line in content:
|
||||
line = line.strip()
|
||||
data.append(map(float,line.split(",")))
|
||||
return data
|
||||
|
||||
|
||||
#I/O OPERATIONS
|
||||
#Write Output to file
|
||||
def writeFile(filename, data):
|
||||
if len(data) == 0:
|
||||
return
|
||||
|
||||
with open(filename, 'w') as f:
|
||||
for i in data:
|
||||
f.write(str(i) + '\n')
|
||||
|
||||
|
||||
#MAIN ALGORITHM
|
||||
#compute beta
|
||||
def getBeta(traindata, testdata, gammab):
|
||||
|
||||
Jmin = 0
|
||||
beta = []
|
||||
|
||||
for g in gammab:
|
||||
betaTrain = kmm(traindata, testdata, g)
|
||||
betaTest = regression(traindata, betaTrain, testdata)
|
||||
J = computeJ(betaTrain,betaTest)
|
||||
|
||||
#print betaTrain
|
||||
#print betaTest
|
||||
#print J
|
||||
|
||||
if len(beta) == 0:
|
||||
Jmin = J
|
||||
beta = list(betaTrain)
|
||||
elif Jmin > J:
|
||||
Jmin = J
|
||||
beta = list(betaTrain)
|
||||
|
||||
return beta
|
||||
|
||||
|
||||
#MAIN METHOD
|
||||
def main():
|
||||
#traindata = [[1,2,3],[4,7,4],[3,3,3],[4,4,4],[5,5,5],[3,4,5],[1,2,3],[4,7,4],[3,3,3],[4,4,4],[5,5,5],[3,4,5],[1,2,3],[4,7,4],[3,3,3],[4,4,4],[5,5,5],[3,4,5],[1,2,3],[4,7,4],[3,3,3],[4,4,4],[5,5,5],[3,4,5]]
|
||||
#testdata = [[5,9,10],[4,5,6],[10,20,30],[1,2,3],[3,4,5],[5,6,7],[7,8,9],[100,100,100],[11,22,33],[12,11,5],[5,9,10],[4,5,6],[10,20,30],[1,2,3],[3,4,5],[5,6,7],[7,8,9],[100,100,100],[11,22,33],[12,11,5]]
|
||||
#gammab = [0.001]
|
||||
|
||||
if len(sys.argv) != 4:
|
||||
print 'Incorrect number of arguments.'
|
||||
print 'Arg: training_file, test_file, output_file.'
|
||||
return
|
||||
|
||||
traindata = getData(sys.argv[1])
|
||||
testdata = getData(sys.argv[2])
|
||||
gammab = [1/float(len(traindata)),0.0001,0.0005,0.001,0.005,0.01,0.05,0.1,0.5,1,5,10]
|
||||
print 'Got training and test data.'
|
||||
|
||||
beta = getBeta(traindata, testdata, gammab)
|
||||
|
||||
writeFile(sys.argv[3], beta)
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
|
@ -0,0 +1,31 @@
|
|||
|
||||
Copyright (c) 2000-2014 Chih-Chung Chang and Chih-Jen Lin
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
are met:
|
||||
|
||||
1. Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
|
||||
2. Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
3. Neither name of copyright holders nor the names of its contributors
|
||||
may be used to endorse or promote products derived from this software
|
||||
without specific prior written permission.
|
||||
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR
|
||||
CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,25 @@
|
|||
CXX ?= g++
|
||||
CFLAGS = -Wall -Wconversion -O3 -fPIC
|
||||
SHVER = 2
|
||||
OS = $(shell uname)
|
||||
|
||||
all: svm-train svm-predict svm-scale
|
||||
|
||||
lib: svm.o
|
||||
if [ "$(OS)" = "Darwin" ]; then \
|
||||
SHARED_LIB_FLAG="-dynamiclib -Wl,-install_name,libsvm.so.$(SHVER)"; \
|
||||
else \
|
||||
SHARED_LIB_FLAG="-shared -Wl,-soname,libsvm.so.$(SHVER)"; \
|
||||
fi; \
|
||||
$(CXX) $${SHARED_LIB_FLAG} svm.o -o libsvm.so.$(SHVER)
|
||||
|
||||
svm-predict: svm-predict.c svm.o
|
||||
$(CXX) $(CFLAGS) svm-predict.c svm.o -o svm-predict -lm
|
||||
svm-train: svm-train.c svm.o
|
||||
$(CXX) $(CFLAGS) svm-train.c svm.o -o svm-train -lm
|
||||
svm-scale: svm-scale.c
|
||||
$(CXX) $(CFLAGS) svm-scale.c -o svm-scale
|
||||
svm.o: svm.cpp svm.h
|
||||
$(CXX) $(CFLAGS) -c svm.cpp
|
||||
clean:
|
||||
rm -f *~ svm.o svm-train svm-predict svm-scale libsvm.so.$(SHVER)
|
|
@ -0,0 +1,33 @@
|
|||
#You must ensure nmake.exe, cl.exe, link.exe are in system path.
|
||||
#VCVARS32.bat
|
||||
#Under dosbox prompt
|
||||
#nmake -f Makefile.win
|
||||
|
||||
##########################################
|
||||
CXX = cl.exe
|
||||
CFLAGS = /nologo /O2 /EHsc /I. /D _WIN32 /D _CRT_SECURE_NO_DEPRECATE
|
||||
TARGET = windows
|
||||
|
||||
all: $(TARGET)\svm-train.exe $(TARGET)\svm-predict.exe $(TARGET)\svm-scale.exe $(TARGET)\svm-toy.exe lib
|
||||
|
||||
$(TARGET)\svm-predict.exe: svm.h svm-predict.c svm.obj
|
||||
$(CXX) $(CFLAGS) svm-predict.c svm.obj -Fe$(TARGET)\svm-predict.exe
|
||||
|
||||
$(TARGET)\svm-train.exe: svm.h svm-train.c svm.obj
|
||||
$(CXX) $(CFLAGS) svm-train.c svm.obj -Fe$(TARGET)\svm-train.exe
|
||||
|
||||
$(TARGET)\svm-scale.exe: svm.h svm-scale.c
|
||||
$(CXX) $(CFLAGS) svm-scale.c -Fe$(TARGET)\svm-scale.exe
|
||||
|
||||
$(TARGET)\svm-toy.exe: svm.h svm.obj svm-toy\windows\svm-toy.cpp
|
||||
$(CXX) $(CFLAGS) svm-toy\windows\svm-toy.cpp svm.obj user32.lib gdi32.lib comdlg32.lib -Fe$(TARGET)\svm-toy.exe
|
||||
|
||||
svm.obj: svm.cpp svm.h
|
||||
$(CXX) $(CFLAGS) -c svm.cpp
|
||||
|
||||
lib: svm.cpp svm.h svm.def
|
||||
$(CXX) $(CFLAGS) -LD svm.cpp -Fe$(TARGET)\libsvm -link -DEF:svm.def
|
||||
|
||||
clean:
|
||||
-erase /Q *.obj $(TARGET)\.
|
||||
|
|
@ -0,0 +1,771 @@
|
|||
Libsvm is a simple, easy-to-use, and efficient software for SVM
|
||||
classification and regression. It solves C-SVM classification, nu-SVM
|
||||
classification, one-class-SVM, epsilon-SVM regression, and nu-SVM
|
||||
regression. It also provides an automatic model selection tool for
|
||||
C-SVM classification. This document explains the use of libsvm.
|
||||
|
||||
Libsvm is available at
|
||||
http://www.csie.ntu.edu.tw/~cjlin/libsvm
|
||||
Please read the COPYRIGHT file before using libsvm.
|
||||
|
||||
Table of Contents
|
||||
=================
|
||||
|
||||
- Quick Start
|
||||
- Installation and Data Format
|
||||
- `svm-train' Usage
|
||||
- `svm-predict' Usage
|
||||
- `svm-scale' Usage
|
||||
- Tips on Practical Use
|
||||
- Examples
|
||||
- Precomputed Kernels
|
||||
- Library Usage
|
||||
- Java Version
|
||||
- Building Windows Binaries
|
||||
- Additional Tools: Sub-sampling, Parameter Selection, Format checking, etc.
|
||||
- MATLAB/OCTAVE Interface
|
||||
- Python Interface
|
||||
- Additional Information
|
||||
|
||||
Quick Start
|
||||
===========
|
||||
|
||||
If you are new to SVM and if the data is not large, please go to
|
||||
`tools' directory and use easy.py after installation. It does
|
||||
everything automatic -- from data scaling to parameter selection.
|
||||
|
||||
Usage: easy.py training_file [testing_file]
|
||||
|
||||
More information about parameter selection can be found in
|
||||
`tools/README.'
|
||||
|
||||
Installation and Data Format
|
||||
============================
|
||||
|
||||
On Unix systems, type `make' to build the `svm-train' and `svm-predict'
|
||||
programs. Run them without arguments to show the usages of them.
|
||||
|
||||
On other systems, consult `Makefile' to build them (e.g., see
|
||||
'Building Windows binaries' in this file) or use the pre-built
|
||||
binaries (Windows binaries are in the directory `windows').
|
||||
|
||||
The format of training and testing data file is:
|
||||
|
||||
<label> <index1>:<value1> <index2>:<value2> ...
|
||||
.
|
||||
.
|
||||
.
|
||||
|
||||
Each line contains an instance and is ended by a '\n' character. For
|
||||
classification, <label> is an integer indicating the class label
|
||||
(multi-class is supported). For regression, <label> is the target
|
||||
value which can be any real number. For one-class SVM, it's not used
|
||||
so can be any number. The pair <index>:<value> gives a feature
|
||||
(attribute) value: <index> is an integer starting from 1 and <value>
|
||||
is a real number. The only exception is the precomputed kernel, where
|
||||
<index> starts from 0; see the section of precomputed kernels. Indices
|
||||
must be in ASCENDING order. Labels in the testing file are only used
|
||||
to calculate accuracy or errors. If they are unknown, just fill the
|
||||
first column with any numbers.
|
||||
|
||||
A sample classification data included in this package is
|
||||
`heart_scale'. To check if your data is in a correct form, use
|
||||
`tools/checkdata.py' (details in `tools/README').
|
||||
|
||||
Type `svm-train heart_scale', and the program will read the training
|
||||
data and output the model file `heart_scale.model'. If you have a test
|
||||
set called heart_scale.t, then type `svm-predict heart_scale.t
|
||||
heart_scale.model output' to see the prediction accuracy. The `output'
|
||||
file contains the predicted class labels.
|
||||
|
||||
For classification, if training data are in only one class (i.e., all
|
||||
labels are the same), then `svm-train' issues a warning message:
|
||||
`Warning: training data in only one class. See README for details,'
|
||||
which means the training data is very unbalanced. The label in the
|
||||
training data is directly returned when testing.
|
||||
|
||||
There are some other useful programs in this package.
|
||||
|
||||
svm-scale:
|
||||
|
||||
This is a tool for scaling input data file.
|
||||
|
||||
svm-toy:
|
||||
|
||||
This is a simple graphical interface which shows how SVM
|
||||
separate data in a plane. You can click in the window to
|
||||
draw data points. Use "change" button to choose class
|
||||
1, 2 or 3 (i.e., up to three classes are supported), "load"
|
||||
button to load data from a file, "save" button to save data to
|
||||
a file, "run" button to obtain an SVM model, and "clear"
|
||||
button to clear the window.
|
||||
|
||||
You can enter options in the bottom of the window, the syntax of
|
||||
options is the same as `svm-train'.
|
||||
|
||||
Note that "load" and "save" consider dense data format both in
|
||||
classification and the regression cases. For classification,
|
||||
each data point has one label (the color) that must be 1, 2,
|
||||
or 3 and two attributes (x-axis and y-axis values) in
|
||||
[0,1). For regression, each data point has one target value
|
||||
(y-axis) and one attribute (x-axis values) in [0, 1).
|
||||
|
||||
Type `make' in respective directories to build them.
|
||||
|
||||
You need Qt library to build the Qt version.
|
||||
(available from http://www.trolltech.com)
|
||||
|
||||
You need GTK+ library to build the GTK version.
|
||||
(available from http://www.gtk.org)
|
||||
|
||||
The pre-built Windows binaries are in the `windows'
|
||||
directory. We use Visual C++ on a 32-bit machine, so the
|
||||
maximal cache size is 2GB.
|
||||
|
||||
`svm-train' Usage
|
||||
=================
|
||||
|
||||
Usage: svm-train [options] training_set_file [model_file]
|
||||
options:
|
||||
-s svm_type : set type of SVM (default 0)
|
||||
0 -- C-SVC (multi-class classification)
|
||||
1 -- nu-SVC (multi-class classification)
|
||||
2 -- one-class SVM
|
||||
3 -- epsilon-SVR (regression)
|
||||
4 -- nu-SVR (regression)
|
||||
-t kernel_type : set type of kernel function (default 2)
|
||||
0 -- linear: u'*v
|
||||
1 -- polynomial: (gamma*u'*v + coef0)^degree
|
||||
2 -- radial basis function: exp(-gamma*|u-v|^2)
|
||||
3 -- sigmoid: tanh(gamma*u'*v + coef0)
|
||||
4 -- precomputed kernel (kernel values in training_set_file)
|
||||
-d degree : set degree in kernel function (default 3)
|
||||
-g gamma : set gamma in kernel function (default 1/num_features)
|
||||
-r coef0 : set coef0 in kernel function (default 0)
|
||||
-c cost : set the parameter C of C-SVC, epsilon-SVR, and nu-SVR (default 1)
|
||||
-n nu : set the parameter nu of nu-SVC, one-class SVM, and nu-SVR (default 0.5)
|
||||
-p epsilon : set the epsilon in loss function of epsilon-SVR (default 0.1)
|
||||
-m cachesize : set cache memory size in MB (default 100)
|
||||
-e epsilon : set tolerance of termination criterion (default 0.001)
|
||||
-h shrinking : whether to use the shrinking heuristics, 0 or 1 (default 1)
|
||||
-b probability_estimates : whether to train a SVC or SVR model for probability estimates, 0 or 1 (default 0)
|
||||
-wi weight : set the parameter C of class i to weight*C, for C-SVC (default 1)
|
||||
-v n: n-fold cross validation mode
|
||||
-q : quiet mode (no outputs)
|
||||
|
||||
|
||||
The k in the -g option means the number of attributes in the input data.
|
||||
|
||||
option -v randomly splits the data into n parts and calculates cross
|
||||
validation accuracy/mean squared error on them.
|
||||
|
||||
See libsvm FAQ for the meaning of outputs.
|
||||
|
||||
`svm-predict' Usage
|
||||
===================
|
||||
|
||||
Usage: svm-predict [options] test_file model_file output_file
|
||||
options:
|
||||
-b probability_estimates: whether to predict probability estimates, 0 or 1 (default 0); for one-class SVM only 0 is supported
|
||||
|
||||
model_file is the model file generated by svm-train.
|
||||
test_file is the test data you want to predict.
|
||||
svm-predict will produce output in the output_file.
|
||||
|
||||
`svm-scale' Usage
|
||||
=================
|
||||
|
||||
Usage: svm-scale [options] data_filename
|
||||
options:
|
||||
-l lower : x scaling lower limit (default -1)
|
||||
-u upper : x scaling upper limit (default +1)
|
||||
-y y_lower y_upper : y scaling limits (default: no y scaling)
|
||||
-s save_filename : save scaling parameters to save_filename
|
||||
-r restore_filename : restore scaling parameters from restore_filename
|
||||
|
||||
See 'Examples' in this file for examples.
|
||||
|
||||
Tips on Practical Use
|
||||
=====================
|
||||
|
||||
* Scale your data. For example, scale each attribute to [0,1] or [-1,+1].
|
||||
* For C-SVC, consider using the model selection tool in the tools directory.
|
||||
* nu in nu-SVC/one-class-SVM/nu-SVR approximates the fraction of training
|
||||
errors and support vectors.
|
||||
* If data for classification are unbalanced (e.g. many positive and
|
||||
few negative), try different penalty parameters C by -wi (see
|
||||
examples below).
|
||||
* Specify larger cache size (i.e., larger -m) for huge problems.
|
||||
|
||||
Examples
|
||||
========
|
||||
|
||||
> svm-scale -l -1 -u 1 -s range train > train.scale
|
||||
> svm-scale -r range test > test.scale
|
||||
|
||||
Scale each feature of the training data to be in [-1,1]. Scaling
|
||||
factors are stored in the file range and then used for scaling the
|
||||
test data.
|
||||
|
||||
> svm-train -s 0 -c 5 -t 2 -g 0.5 -e 0.1 data_file
|
||||
|
||||
Train a classifier with RBF kernel exp(-0.5|u-v|^2), C=10, and
|
||||
stopping tolerance 0.1.
|
||||
|
||||
> svm-train -s 3 -p 0.1 -t 0 data_file
|
||||
|
||||
Solve SVM regression with linear kernel u'v and epsilon=0.1
|
||||
in the loss function.
|
||||
|
||||
> svm-train -c 10 -w1 1 -w-2 5 -w4 2 data_file
|
||||
|
||||
Train a classifier with penalty 10 = 1 * 10 for class 1, penalty 50 =
|
||||
5 * 10 for class -2, and penalty 20 = 2 * 10 for class 4.
|
||||
|
||||
> svm-train -s 0 -c 100 -g 0.1 -v 5 data_file
|
||||
|
||||
Do five-fold cross validation for the classifier using
|
||||
the parameters C = 100 and gamma = 0.1
|
||||
|
||||
> svm-train -s 0 -b 1 data_file
|
||||
> svm-predict -b 1 test_file data_file.model output_file
|
||||
|
||||
Obtain a model with probability information and predict test data with
|
||||
probability estimates
|
||||
|
||||
Precomputed Kernels
|
||||
===================
|
||||
|
||||
Users may precompute kernel values and input them as training and
|
||||
testing files. Then libsvm does not need the original
|
||||
training/testing sets.
|
||||
|
||||
Assume there are L training instances x1, ..., xL and.
|
||||
Let K(x, y) be the kernel
|
||||
value of two instances x and y. The input formats
|
||||
are:
|
||||
|
||||
New training instance for xi:
|
||||
|
||||
<label> 0:i 1:K(xi,x1) ... L:K(xi,xL)
|
||||
|
||||
New testing instance for any x:
|
||||
|
||||
<label> 0:? 1:K(x,x1) ... L:K(x,xL)
|
||||
|
||||
That is, in the training file the first column must be the "ID" of
|
||||
xi. In testing, ? can be any value.
|
||||
|
||||
All kernel values including ZEROs must be explicitly provided. Any
|
||||
permutation or random subsets of the training/testing files are also
|
||||
valid (see examples below).
|
||||
|
||||
Note: the format is slightly different from the precomputed kernel
|
||||
package released in libsvmtools earlier.
|
||||
|
||||
Examples:
|
||||
|
||||
Assume the original training data has three four-feature
|
||||
instances and testing data has one instance:
|
||||
|
||||
15 1:1 2:1 3:1 4:1
|
||||
45 2:3 4:3
|
||||
25 3:1
|
||||
|
||||
15 1:1 3:1
|
||||
|
||||
If the linear kernel is used, we have the following new
|
||||
training/testing sets:
|
||||
|
||||
15 0:1 1:4 2:6 3:1
|
||||
45 0:2 1:6 2:18 3:0
|
||||
25 0:3 1:1 2:0 3:1
|
||||
|
||||
15 0:? 1:2 2:0 3:1
|
||||
|
||||
? can be any value.
|
||||
|
||||
Any subset of the above training file is also valid. For example,
|
||||
|
||||
25 0:3 1:1 2:0 3:1
|
||||
45 0:2 1:6 2:18 3:0
|
||||
|
||||
implies that the kernel matrix is
|
||||
|
||||
[K(2,2) K(2,3)] = [18 0]
|
||||
[K(3,2) K(3,3)] = [0 1]
|
||||
|
||||
Library Usage
|
||||
=============
|
||||
|
||||
These functions and structures are declared in the header file
|
||||
`svm.h'. You need to #include "svm.h" in your C/C++ source files and
|
||||
link your program with `svm.cpp'. You can see `svm-train.c' and
|
||||
`svm-predict.c' for examples showing how to use them. We define
|
||||
LIBSVM_VERSION and declare `extern int libsvm_version; ' in svm.h, so
|
||||
you can check the version number.
|
||||
|
||||
Before you classify test data, you need to construct an SVM model
|
||||
(`svm_model') using training data. A model can also be saved in
|
||||
a file for later use. Once an SVM model is available, you can use it
|
||||
to classify new data.
|
||||
|
||||
- Function: struct svm_model *svm_train(const struct svm_problem *prob,
|
||||
const struct svm_parameter *param);
|
||||
|
||||
This function constructs and returns an SVM model according to
|
||||
the given training data and parameters.
|
||||
|
||||
struct svm_problem describes the problem:
|
||||
|
||||
struct svm_problem
|
||||
{
|
||||
int l;
|
||||
double *y;
|
||||
struct svm_node **x;
|
||||
};
|
||||
|
||||
where `l' is the number of training data, and `y' is an array containing
|
||||
their target values. (integers in classification, real numbers in
|
||||
regression) `x' is an array of pointers, each of which points to a sparse
|
||||
representation (array of svm_node) of one training vector.
|
||||
|
||||
For example, if we have the following training data:
|
||||
|
||||
LABEL ATTR1 ATTR2 ATTR3 ATTR4 ATTR5
|
||||
----- ----- ----- ----- ----- -----
|
||||
1 0 0.1 0.2 0 0
|
||||
2 0 0.1 0.3 -1.2 0
|
||||
1 0.4 0 0 0 0
|
||||
2 0 0.1 0 1.4 0.5
|
||||
3 -0.1 -0.2 0.1 1.1 0.1
|
||||
|
||||
then the components of svm_problem are:
|
||||
|
||||
l = 5
|
||||
|
||||
y -> 1 2 1 2 3
|
||||
|
||||
x -> [ ] -> (2,0.1) (3,0.2) (-1,?)
|
||||
[ ] -> (2,0.1) (3,0.3) (4,-1.2) (-1,?)
|
||||
[ ] -> (1,0.4) (-1,?)
|
||||
[ ] -> (2,0.1) (4,1.4) (5,0.5) (-1,?)
|
||||
[ ] -> (1,-0.1) (2,-0.2) (3,0.1) (4,1.1) (5,0.1) (-1,?)
|
||||
|
||||
where (index,value) is stored in the structure `svm_node':
|
||||
|
||||
struct svm_node
|
||||
{
|
||||
int index;
|
||||
double value;
|
||||
};
|
||||
|
||||
index = -1 indicates the end of one vector. Note that indices must
|
||||
be in ASCENDING order.
|
||||
|
||||
struct svm_parameter describes the parameters of an SVM model:
|
||||
|
||||
struct svm_parameter
|
||||
{
|
||||
int svm_type;
|
||||
int kernel_type;
|
||||
int degree; /* for poly */
|
||||
double gamma; /* for poly/rbf/sigmoid */
|
||||
double coef0; /* for poly/sigmoid */
|
||||
|
||||
/* these are for training only */
|
||||
double cache_size; /* in MB */
|
||||
double eps; /* stopping criteria */
|
||||
double C; /* for C_SVC, EPSILON_SVR, and NU_SVR */
|
||||
int nr_weight; /* for C_SVC */
|
||||
int *weight_label; /* for C_SVC */
|
||||
double* weight; /* for C_SVC */
|
||||
double nu; /* for NU_SVC, ONE_CLASS, and NU_SVR */
|
||||
double p; /* for EPSILON_SVR */
|
||||
int shrinking; /* use the shrinking heuristics */
|
||||
int probability; /* do probability estimates */
|
||||
};
|
||||
|
||||
svm_type can be one of C_SVC, NU_SVC, ONE_CLASS, EPSILON_SVR, NU_SVR.
|
||||
|
||||
C_SVC: C-SVM classification
|
||||
NU_SVC: nu-SVM classification
|
||||
ONE_CLASS: one-class-SVM
|
||||
EPSILON_SVR: epsilon-SVM regression
|
||||
NU_SVR: nu-SVM regression
|
||||
|
||||
kernel_type can be one of LINEAR, POLY, RBF, SIGMOID.
|
||||
|
||||
LINEAR: u'*v
|
||||
POLY: (gamma*u'*v + coef0)^degree
|
||||
RBF: exp(-gamma*|u-v|^2)
|
||||
SIGMOID: tanh(gamma*u'*v + coef0)
|
||||
PRECOMPUTED: kernel values in training_set_file
|
||||
|
||||
cache_size is the size of the kernel cache, specified in megabytes.
|
||||
C is the cost of constraints violation.
|
||||
eps is the stopping criterion. (we usually use 0.00001 in nu-SVC,
|
||||
0.001 in others). nu is the parameter in nu-SVM, nu-SVR, and
|
||||
one-class-SVM. p is the epsilon in epsilon-insensitive loss function
|
||||
of epsilon-SVM regression. shrinking = 1 means shrinking is conducted;
|
||||
= 0 otherwise. probability = 1 means model with probability
|
||||
information is obtained; = 0 otherwise.
|
||||
|
||||
nr_weight, weight_label, and weight are used to change the penalty
|
||||
for some classes (If the weight for a class is not changed, it is
|
||||
set to 1). This is useful for training classifier using unbalanced
|
||||
input data or with asymmetric misclassification cost.
|
||||
|
||||
nr_weight is the number of elements in the array weight_label and
|
||||
weight. Each weight[i] corresponds to weight_label[i], meaning that
|
||||
the penalty of class weight_label[i] is scaled by a factor of weight[i].
|
||||
|
||||
If you do not want to change penalty for any of the classes,
|
||||
just set nr_weight to 0.
|
||||
|
||||
*NOTE* Because svm_model contains pointers to svm_problem, you can
|
||||
not free the memory used by svm_problem if you are still using the
|
||||
svm_model produced by svm_train().
|
||||
|
||||
*NOTE* To avoid wrong parameters, svm_check_parameter() should be
|
||||
called before svm_train().
|
||||
|
||||
struct svm_model stores the model obtained from the training procedure.
|
||||
It is not recommended to directly access entries in this structure.
|
||||
Programmers should use the interface functions to get the values.
|
||||
|
||||
struct svm_model
|
||||
{
|
||||
struct svm_parameter param; /* parameter */
|
||||
int nr_class; /* number of classes, = 2 in regression/one class svm */
|
||||
int l; /* total #SV */
|
||||
struct svm_node **SV; /* SVs (SV[l]) */
|
||||
double **sv_coef; /* coefficients for SVs in decision functions (sv_coef[k-1][l]) */
|
||||
double *rho; /* constants in decision functions (rho[k*(k-1)/2]) */
|
||||
double *probA; /* pairwise probability information */
|
||||
double *probB;
|
||||
int *sv_indices; /* sv_indices[0,...,nSV-1] are values in [1,...,num_traning_data] to indicate SVs in the training set */
|
||||
|
||||
/* for classification only */
|
||||
|
||||
int *label; /* label of each class (label[k]) */
|
||||
int *nSV; /* number of SVs for each class (nSV[k]) */
|
||||
/* nSV[0] + nSV[1] + ... + nSV[k-1] = l */
|
||||
/* XXX */
|
||||
int free_sv; /* 1 if svm_model is created by svm_load_model*/
|
||||
/* 0 if svm_model is created by svm_train */
|
||||
};
|
||||
|
||||
param describes the parameters used to obtain the model.
|
||||
|
||||
nr_class is the number of classes. It is 2 for regression and one-class SVM.
|
||||
|
||||
l is the number of support vectors. SV and sv_coef are support
|
||||
vectors and the corresponding coefficients, respectively. Assume there are
|
||||
k classes. For data in class j, the corresponding sv_coef includes (k-1) y*alpha vectors,
|
||||
where alpha's are solutions of the following two class problems:
|
||||
1 vs j, 2 vs j, ..., j-1 vs j, j vs j+1, j vs j+2, ..., j vs k
|
||||
and y=1 for the first j-1 vectors, while y=-1 for the remaining k-j
|
||||
vectors. For example, if there are 4 classes, sv_coef and SV are like:
|
||||
|
||||
+-+-+-+--------------------+
|
||||
|1|1|1| |
|
||||
|v|v|v| SVs from class 1 |
|
||||
|2|3|4| |
|
||||
+-+-+-+--------------------+
|
||||
|1|2|2| |
|
||||
|v|v|v| SVs from class 2 |
|
||||
|2|3|4| |
|
||||
+-+-+-+--------------------+
|
||||
|1|2|3| |
|
||||
|v|v|v| SVs from class 3 |
|
||||
|3|3|4| |
|
||||
+-+-+-+--------------------+
|
||||
|1|2|3| |
|
||||
|v|v|v| SVs from class 4 |
|
||||
|4|4|4| |
|
||||
+-+-+-+--------------------+
|
||||
|
||||
See svm_train() for an example of assigning values to sv_coef.
|
||||
|
||||
rho is the bias term (-b). probA and probB are parameters used in
|
||||
probability outputs. If there are k classes, there are k*(k-1)/2
|
||||
binary problems as well as rho, probA, and probB values. They are
|
||||
aligned in the order of binary problems:
|
||||
1 vs 2, 1 vs 3, ..., 1 vs k, 2 vs 3, ..., 2 vs k, ..., k-1 vs k.
|
||||
|
||||
sv_indices[0,...,nSV-1] are values in [1,...,num_traning_data] to
|
||||
indicate support vectors in the training set.
|
||||
|
||||
label contains labels in the training data.
|
||||
|
||||
nSV is the number of support vectors in each class.
|
||||
|
||||
free_sv is a flag used to determine whether the space of SV should
|
||||
be released in free_model_content(struct svm_model*) and
|
||||
free_and_destroy_model(struct svm_model**). If the model is
|
||||
generated by svm_train(), then SV points to data in svm_problem
|
||||
and should not be removed. For example, free_sv is 0 if svm_model
|
||||
is created by svm_train, but is 1 if created by svm_load_model.
|
||||
|
||||
- Function: double svm_predict(const struct svm_model *model,
|
||||
const struct svm_node *x);
|
||||
|
||||
This function does classification or regression on a test vector x
|
||||
given a model.
|
||||
|
||||
For a classification model, the predicted class for x is returned.
|
||||
For a regression model, the function value of x calculated using
|
||||
the model is returned. For an one-class model, +1 or -1 is
|
||||
returned.
|
||||
|
||||
- Function: void svm_cross_validation(const struct svm_problem *prob,
|
||||
const struct svm_parameter *param, int nr_fold, double *target);
|
||||
|
||||
This function conducts cross validation. Data are separated to
|
||||
nr_fold folds. Under given parameters, sequentially each fold is
|
||||
validated using the model from training the remaining. Predicted
|
||||
labels (of all prob's instances) in the validation process are
|
||||
stored in the array called target.
|
||||
|
||||
The format of svm_prob is same as that for svm_train().
|
||||
|
||||
- Function: int svm_get_svm_type(const struct svm_model *model);
|
||||
|
||||
This function gives svm_type of the model. Possible values of
|
||||
svm_type are defined in svm.h.
|
||||
|
||||
- Function: int svm_get_nr_class(const svm_model *model);
|
||||
|
||||
For a classification model, this function gives the number of
|
||||
classes. For a regression or an one-class model, 2 is returned.
|
||||
|
||||
- Function: void svm_get_labels(const svm_model *model, int* label)
|
||||
|
||||
For a classification model, this function outputs the name of
|
||||
labels into an array called label. For regression and one-class
|
||||
models, label is unchanged.
|
||||
|
||||
- Function: void svm_get_sv_indices(const struct svm_model *model, int *sv_indices)
|
||||
|
||||
This function outputs indices of support vectors into an array called sv_indices.
|
||||
The size of sv_indices is the number of support vectors and can be obtained by calling svm_get_nr_sv.
|
||||
Each sv_indices[i] is in the range of [1, ..., num_traning_data].
|
||||
|
||||
- Function: int svm_get_nr_sv(const struct svm_model *model)
|
||||
|
||||
This function gives the number of total support vector.
|
||||
|
||||
- Function: double svm_get_svr_probability(const struct svm_model *model);
|
||||
|
||||
For a regression model with probability information, this function
|
||||
outputs a value sigma > 0. For test data, we consider the
|
||||
probability model: target value = predicted value + z, z: Laplace
|
||||
distribution e^(-|z|/sigma)/(2sigma)
|
||||
|
||||
If the model is not for svr or does not contain required
|
||||
information, 0 is returned.
|
||||
|
||||
- Function: double svm_predict_values(const svm_model *model,
|
||||
const svm_node *x, double* dec_values)
|
||||
|
||||
This function gives decision values on a test vector x given a
|
||||
model, and return the predicted label (classification) or
|
||||
the function value (regression).
|
||||
|
||||
For a classification model with nr_class classes, this function
|
||||
gives nr_class*(nr_class-1)/2 decision values in the array
|
||||
dec_values, where nr_class can be obtained from the function
|
||||
svm_get_nr_class. The order is label[0] vs. label[1], ...,
|
||||
label[0] vs. label[nr_class-1], label[1] vs. label[2], ...,
|
||||
label[nr_class-2] vs. label[nr_class-1], where label can be
|
||||
obtained from the function svm_get_labels. The returned value is
|
||||
the predicted class for x. Note that when nr_class = 1, this
|
||||
function does not give any decision value.
|
||||
|
||||
For a regression model, dec_values[0] and the returned value are
|
||||
both the function value of x calculated using the model. For a
|
||||
one-class model, dec_values[0] is the decision value of x, while
|
||||
the returned value is +1/-1.
|
||||
|
||||
- Function: double svm_predict_probability(const struct svm_model *model,
|
||||
const struct svm_node *x, double* prob_estimates);
|
||||
|
||||
This function does classification or regression on a test vector x
|
||||
given a model with probability information.
|
||||
|
||||
For a classification model with probability information, this
|
||||
function gives nr_class probability estimates in the array
|
||||
prob_estimates. nr_class can be obtained from the function
|
||||
svm_get_nr_class. The class with the highest probability is
|
||||
returned. For regression/one-class SVM, the array prob_estimates
|
||||
is unchanged and the returned value is the same as that of
|
||||
svm_predict.
|
||||
|
||||
- Function: const char *svm_check_parameter(const struct svm_problem *prob,
|
||||
const struct svm_parameter *param);
|
||||
|
||||
This function checks whether the parameters are within the feasible
|
||||
range of the problem. This function should be called before calling
|
||||
svm_train() and svm_cross_validation(). It returns NULL if the
|
||||
parameters are feasible, otherwise an error message is returned.
|
||||
|
||||
- Function: int svm_check_probability_model(const struct svm_model *model);
|
||||
|
||||
This function checks whether the model contains required
|
||||
information to do probability estimates. If so, it returns
|
||||
+1. Otherwise, 0 is returned. This function should be called
|
||||
before calling svm_get_svr_probability and
|
||||
svm_predict_probability.
|
||||
|
||||
- Function: int svm_save_model(const char *model_file_name,
|
||||
const struct svm_model *model);
|
||||
|
||||
This function saves a model to a file; returns 0 on success, or -1
|
||||
if an error occurs.
|
||||
|
||||
- Function: struct svm_model *svm_load_model(const char *model_file_name);
|
||||
|
||||
This function returns a pointer to the model read from the file,
|
||||
or a null pointer if the model could not be loaded.
|
||||
|
||||
- Function: void svm_free_model_content(struct svm_model *model_ptr);
|
||||
|
||||
This function frees the memory used by the entries in a model structure.
|
||||
|
||||
- Function: void svm_free_and_destroy_model(struct svm_model **model_ptr_ptr);
|
||||
|
||||
This function frees the memory used by a model and destroys the model
|
||||
structure. It is equivalent to svm_destroy_model, which
|
||||
is deprecated after version 3.0.
|
||||
|
||||
- Function: void svm_destroy_param(struct svm_parameter *param);
|
||||
|
||||
This function frees the memory used by a parameter set.
|
||||
|
||||
- Function: void svm_set_print_string_function(void (*print_func)(const char *));
|
||||
|
||||
Users can specify their output format by a function. Use
|
||||
svm_set_print_string_function(NULL);
|
||||
for default printing to stdout.
|
||||
|
||||
Java Version
|
||||
============
|
||||
|
||||
The pre-compiled java class archive `libsvm.jar' and its source files are
|
||||
in the java directory. To run the programs, use
|
||||
|
||||
java -classpath libsvm.jar svm_train <arguments>
|
||||
java -classpath libsvm.jar svm_predict <arguments>
|
||||
java -classpath libsvm.jar svm_toy
|
||||
java -classpath libsvm.jar svm_scale <arguments>
|
||||
|
||||
Note that you need Java 1.5 (5.0) or above to run it.
|
||||
|
||||
You may need to add Java runtime library (like classes.zip) to the classpath.
|
||||
You may need to increase maximum Java heap size.
|
||||
|
||||
Library usages are similar to the C version. These functions are available:
|
||||
|
||||
public class svm {
|
||||
public static final int LIBSVM_VERSION=320;
|
||||
public static svm_model svm_train(svm_problem prob, svm_parameter param);
|
||||
public static void svm_cross_validation(svm_problem prob, svm_parameter param, int nr_fold, double[] target);
|
||||
public static int svm_get_svm_type(svm_model model);
|
||||
public static int svm_get_nr_class(svm_model model);
|
||||
public static void svm_get_labels(svm_model model, int[] label);
|
||||
public static void svm_get_sv_indices(svm_model model, int[] indices);
|
||||
public static int svm_get_nr_sv(svm_model model);
|
||||
public static double svm_get_svr_probability(svm_model model);
|
||||
public static double svm_predict_values(svm_model model, svm_node[] x, double[] dec_values);
|
||||
public static double svm_predict(svm_model model, svm_node[] x);
|
||||
public static double svm_predict_probability(svm_model model, svm_node[] x, double[] prob_estimates);
|
||||
public static void svm_save_model(String model_file_name, svm_model model) throws IOException
|
||||
public static svm_model svm_load_model(String model_file_name) throws IOException
|
||||
public static String svm_check_parameter(svm_problem prob, svm_parameter param);
|
||||
public static int svm_check_probability_model(svm_model model);
|
||||
public static void svm_set_print_string_function(svm_print_interface print_func);
|
||||
}
|
||||
|
||||
The library is in the "libsvm" package.
|
||||
Note that in Java version, svm_node[] is not ended with a node whose index = -1.
|
||||
|
||||
Users can specify their output format by
|
||||
|
||||
your_print_func = new svm_print_interface()
|
||||
{
|
||||
public void print(String s)
|
||||
{
|
||||
// your own format
|
||||
}
|
||||
};
|
||||
svm.svm_set_print_string_function(your_print_func);
|
||||
|
||||
Building Windows Binaries
|
||||
=========================
|
||||
|
||||
Windows binaries are in the directory `windows'. To build them via
|
||||
Visual C++, use the following steps:
|
||||
|
||||
1. Open a DOS command box (or Visual Studio Command Prompt) and change
|
||||
to libsvm directory. If environment variables of VC++ have not been
|
||||
set, type
|
||||
|
||||
"C:\Program Files\Microsoft Visual Studio 10.0\VC\bin\vcvars32.bat"
|
||||
|
||||
You may have to modify the above command according which version of
|
||||
VC++ or where it is installed.
|
||||
|
||||
2. Type
|
||||
|
||||
nmake -f Makefile.win clean all
|
||||
|
||||
3. (optional) To build shared library libsvm.dll, type
|
||||
|
||||
nmake -f Makefile.win lib
|
||||
|
||||
4. (optional) To build 64-bit windows binaries, you must
|
||||
(1) Run vcvars64.bat instead of vcvars32.bat. Note that
|
||||
vcvars64.bat is located at "C:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\bin\amd64\"
|
||||
(2) Change CFLAGS in Makefile.win: /D _WIN32 to /D _WIN64
|
||||
|
||||
Another way is to build them from Visual C++ environment. See details
|
||||
in libsvm FAQ.
|
||||
|
||||
- Additional Tools: Sub-sampling, Parameter Selection, Format checking, etc.
|
||||
============================================================================
|
||||
|
||||
See the README file in the tools directory.
|
||||
|
||||
MATLAB/OCTAVE Interface
|
||||
=======================
|
||||
|
||||
Please check the file README in the directory `matlab'.
|
||||
|
||||
Python Interface
|
||||
================
|
||||
|
||||
See the README file in python directory.
|
||||
|
||||
Additional Information
|
||||
======================
|
||||
|
||||
If you find LIBSVM helpful, please cite it as
|
||||
|
||||
Chih-Chung Chang and Chih-Jen Lin, LIBSVM : a library for support
|
||||
vector machines. ACM Transactions on Intelligent Systems and
|
||||
Technology, 2:27:1--27:27, 2011. Software available at
|
||||
http://www.csie.ntu.edu.tw/~cjlin/libsvm
|
||||
|
||||
LIBSVM implementation document is available at
|
||||
http://www.csie.ntu.edu.tw/~cjlin/papers/libsvm.pdf
|
||||
|
||||
For any questions and comments, please email cjlin@csie.ntu.edu.tw
|
||||
|
||||
Acknowledgments:
|
||||
This work was supported in part by the National Science
|
||||
Council of Taiwan via the grant NSC 89-2213-E-002-013.
|
||||
The authors thank their group members and users
|
||||
for many helpful discussions and comments. They are listed in
|
||||
http://www.csie.ntu.edu.tw/~cjlin/libsvm/acknowledgements
|
||||
|
|
@ -0,0 +1,7 @@
|
|||
Usage:
|
||||
use '-W weight_file' to assign weights for each instance.
|
||||
Please make sure all weights are non-negative.
|
||||
|
||||
Example:
|
||||
$ ./svm-train -W heart_scale.wgt heart_scale
|
||||
|
|
@ -0,0 +1,270 @@
|
|||
+1 1:0.708333 2:1 3:1 4:-0.320755 5:-0.105023 6:-1 7:1 8:-0.419847 9:-1 10:-0.225806 12:1 13:-1
|
||||
-1 1:0.583333 2:-1 3:0.333333 4:-0.603774 5:1 6:-1 7:1 8:0.358779 9:-1 10:-0.483871 12:-1 13:1
|
||||
+1 1:0.166667 2:1 3:-0.333333 4:-0.433962 5:-0.383562 6:-1 7:-1 8:0.0687023 9:-1 10:-0.903226 11:-1 12:-1 13:1
|
||||
-1 1:0.458333 2:1 3:1 4:-0.358491 5:-0.374429 6:-1 7:-1 8:-0.480916 9:1 10:-0.935484 12:-0.333333 13:1
|
||||
-1 1:0.875 2:-1 3:-0.333333 4:-0.509434 5:-0.347032 6:-1 7:1 8:-0.236641 9:1 10:-0.935484 11:-1 12:-0.333333 13:-1
|
||||
-1 1:0.5 2:1 3:1 4:-0.509434 5:-0.767123 6:-1 7:-1 8:0.0534351 9:-1 10:-0.870968 11:-1 12:-1 13:1
|
||||
+1 1:0.125 2:1 3:0.333333 4:-0.320755 5:-0.406393 6:1 7:1 8:0.0839695 9:1 10:-0.806452 12:-0.333333 13:0.5
|
||||
+1 1:0.25 2:1 3:1 4:-0.698113 5:-0.484018 6:-1 7:1 8:0.0839695 9:1 10:-0.612903 12:-0.333333 13:1
|
||||
+1 1:0.291667 2:1 3:1 4:-0.132075 5:-0.237443 6:-1 7:1 8:0.51145 9:-1 10:-0.612903 12:0.333333 13:1
|
||||
+1 1:0.416667 2:-1 3:1 4:0.0566038 5:0.283105 6:-1 7:1 8:0.267176 9:-1 10:0.290323 12:1 13:1
|
||||
-1 1:0.25 2:1 3:1 4:-0.226415 5:-0.506849 6:-1 7:-1 8:0.374046 9:-1 10:-0.83871 12:-1 13:1
|
||||
-1 2:1 3:1 4:-0.0943396 5:-0.543379 6:-1 7:1 8:-0.389313 9:1 10:-1 11:-1 12:-1 13:1
|
||||
-1 1:-0.375 2:1 3:0.333333 4:-0.132075 5:-0.502283 6:-1 7:1 8:0.664122 9:-1 10:-1 11:-1 12:-1 13:-1
|
||||
+1 1:0.333333 2:1 3:-1 4:-0.245283 5:-0.506849 6:-1 7:-1 8:0.129771 9:-1 10:-0.16129 12:0.333333 13:-1
|
||||
-1 1:0.166667 2:-1 3:1 4:-0.358491 5:-0.191781 6:-1 7:1 8:0.343511 9:-1 10:-1 11:-1 12:-0.333333 13:-1
|
||||
-1 1:0.75 2:-1 3:1 4:-0.660377 5:-0.894977 6:-1 7:-1 8:-0.175573 9:-1 10:-0.483871 12:-1 13:-1
|
||||
+1 1:-0.291667 2:1 3:1 4:-0.132075 5:-0.155251 6:-1 7:-1 8:-0.251908 9:1 10:-0.419355 12:0.333333 13:1
|
||||
+1 2:1 3:1 4:-0.132075 5:-0.648402 6:1 7:1 8:0.282443 9:1 11:1 12:-1 13:1
|
||||
-1 1:0.458333 2:1 3:-1 4:-0.698113 5:-0.611872 6:-1 7:1 8:0.114504 9:1 10:-0.419355 12:-1 13:-1
|
||||
-1 1:-0.541667 2:1 3:-1 4:-0.132075 5:-0.666667 6:-1 7:-1 8:0.633588 9:1 10:-0.548387 11:-1 12:-1 13:1
|
||||
+1 1:0.583333 2:1 3:1 4:-0.509434 5:-0.52968 6:-1 7:1 8:-0.114504 9:1 10:-0.16129 12:0.333333 13:1
|
||||
-1 1:-0.208333 2:1 3:-0.333333 4:-0.320755 5:-0.456621 6:-1 7:1 8:0.664122 9:-1 10:-0.935484 12:-1 13:-1
|
||||
-1 1:-0.416667 2:1 3:1 4:-0.603774 5:-0.191781 6:-1 7:-1 8:0.679389 9:-1 10:-0.612903 12:-1 13:-1
|
||||
-1 1:-0.25 2:1 3:1 4:-0.660377 5:-0.643836 6:-1 7:-1 8:0.0992366 9:-1 10:-0.967742 11:-1 12:-1 13:-1
|
||||
-1 1:0.0416667 2:-1 3:-0.333333 4:-0.283019 5:-0.260274 6:1 7:1 8:0.343511 9:1 10:-1 11:-1 12:-0.333333 13:-1
|
||||
-1 1:-0.208333 2:-1 3:0.333333 4:-0.320755 5:-0.319635 6:-1 7:-1 8:0.0381679 9:-1 10:-0.935484 11:-1 12:-1 13:-1
|
||||
-1 1:-0.291667 2:-1 3:1 4:-0.169811 5:-0.465753 6:-1 7:1 8:0.236641 9:1 10:-1 12:-1 13:-1
|
||||
-1 1:-0.0833333 2:-1 3:0.333333 4:-0.509434 5:-0.228311 6:-1 7:1 8:0.312977 9:-1 10:-0.806452 11:-1 12:-1 13:-1
|
||||
+1 1:0.208333 2:1 3:0.333333 4:-0.660377 5:-0.525114 6:-1 7:1 8:0.435115 9:-1 10:-0.193548 12:-0.333333 13:1
|
||||
-1 1:0.75 2:-1 3:0.333333 4:-0.698113 5:-0.365297 6:1 7:1 8:-0.0992366 9:-1 10:-1 11:-1 12:-0.333333 13:-1
|
||||
+1 1:0.166667 2:1 3:0.333333 4:-0.358491 5:-0.52968 6:-1 7:1 8:0.206107 9:-1 10:-0.870968 12:-0.333333 13:1
|
||||
-1 1:0.541667 2:1 3:1 4:0.245283 5:-0.534247 6:-1 7:1 8:0.0229008 9:-1 10:-0.258065 11:-1 12:-1 13:0.5
|
||||
-1 1:-0.666667 2:-1 3:0.333333 4:-0.509434 5:-0.593607 6:-1 7:-1 8:0.51145 9:-1 10:-1 11:-1 12:-1 13:-1
|
||||
+1 1:0.25 2:1 3:1 4:0.433962 5:-0.086758 6:-1 7:1 8:0.0534351 9:1 10:0.0967742 11:1 12:-1 13:1
|
||||
+1 1:-0.125 2:1 3:1 4:-0.0566038 5:-0.6621 6:-1 7:1 8:-0.160305 9:1 10:-0.709677 12:-1 13:1
|
||||
+1 1:-0.208333 2:1 3:1 4:-0.320755 5:-0.406393 6:1 7:1 8:0.206107 9:1 10:-1 11:-1 12:0.333333 13:1
|
||||
+1 1:0.333333 2:1 3:1 4:-0.132075 5:-0.630137 6:-1 7:1 8:0.0229008 9:1 10:-0.387097 11:-1 12:-0.333333 13:1
|
||||
+1 1:0.25 2:1 3:-1 4:0.245283 5:-0.328767 6:-1 7:1 8:-0.175573 9:-1 10:-1 11:-1 12:-1 13:-1
|
||||
-1 1:-0.458333 2:1 3:0.333333 4:-0.320755 5:-0.753425 6:-1 7:-1 8:0.206107 9:-1 10:-1 11:-1 12:-1 13:-1
|
||||
-1 1:-0.208333 2:1 3:1 4:-0.471698 5:-0.561644 6:-1 7:1 8:0.755725 9:-1 10:-1 11:-1 12:-1 13:-1
|
||||
+1 1:-0.541667 2:1 3:1 4:0.0943396 5:-0.557078 6:-1 7:-1 8:0.679389 9:-1 10:-1 11:-1 12:-1 13:1
|
||||
-1 1:0.375 2:-1 3:1 4:-0.433962 5:-0.621005 6:-1 7:-1 8:0.40458 9:-1 10:-1 11:-1 12:-1 13:-1
|
||||
-1 1:-0.375 2:1 3:0.333333 4:-0.320755 5:-0.511416 6:-1 7:-1 8:0.648855 9:1 10:-0.870968 11:-1 12:-1 13:-1
|
||||
-1 1:-0.291667 2:1 3:-0.333333 4:-0.867925 5:-0.675799 6:1 7:-1 8:0.29771 9:-1 10:-1 11:-1 12:-1 13:1
|
||||
+1 1:0.25 2:1 3:0.333333 4:-0.396226 5:-0.579909 6:1 7:-1 8:-0.0381679 9:-1 10:-0.290323 12:-0.333333 13:0.5
|
||||
-1 1:0.208333 2:1 3:0.333333 4:-0.132075 5:-0.611872 6:1 7:1 8:0.435115 9:-1 10:-1 11:-1 12:-1 13:-1
|
||||
+1 1:-0.166667 2:1 3:0.333333 4:-0.54717 5:-0.894977 6:-1 7:1 8:-0.160305 9:-1 10:-0.741935 11:-1 12:1 13:-1
|
||||
+1 1:-0.375 2:1 3:1 4:-0.698113 5:-0.675799 6:-1 7:1 8:0.618321 9:-1 10:-1 11:-1 12:-0.333333 13:-1
|
||||
+1 1:0.541667 2:1 3:-0.333333 4:0.245283 5:-0.452055 6:-1 7:-1 8:-0.251908 9:1 10:-1 12:1 13:0.5
|
||||
+1 1:0.5 2:-1 3:1 4:0.0566038 5:-0.547945 6:-1 7:1 8:-0.343511 9:-1 10:-0.677419 12:1 13:1
|
||||
+1 1:-0.458333 2:1 3:1 4:-0.207547 5:-0.136986 6:-1 7:-1 8:-0.175573 9:1 10:-0.419355 12:-1 13:0.5
|
||||
-1 1:-0.0416667 2:1 3:-0.333333 4:-0.358491 5:-0.639269 6:1 7:-1 8:0.725191 9:-1 10:-1 11:-1 12:-1 13:-1
|
||||
-1 1:0.5 2:-1 3:0.333333 4:-0.132075 5:0.328767 6:1 7:1 8:0.312977 9:-1 10:-0.741935 11:-1 12:-0.333333 13:-1
|
||||
-1 1:0.416667 2:-1 3:-0.333333 4:-0.132075 5:-0.684932 6:-1 7:-1 8:0.648855 9:-1 10:-1 11:-1 12:0.333333 13:-1
|
||||
-1 1:-0.333333 2:-1 3:-0.333333 4:-0.320755 5:-0.506849 6:-1 7:1 8:0.587786 9:-1 10:-0.806452 12:-1 13:-1
|
||||
-1 1:-0.5 2:-1 3:-0.333333 4:-0.792453 5:-0.671233 6:-1 7:-1 8:0.480916 9:-1 10:-1 11:-1 12:-0.333333 13:-1
|
||||
+1 1:0.333333 2:1 3:1 4:-0.169811 5:-0.817352 6:-1 7:1 8:-0.175573 9:1 10:0.16129 12:-0.333333 13:-1
|
||||
-1 1:0.291667 2:-1 3:0.333333 4:-0.509434 5:-0.762557 6:1 7:-1 8:-0.618321 9:-1 10:-1 11:-1 12:-1 13:-1
|
||||
+1 1:0.25 2:-1 3:1 4:0.509434 5:-0.438356 6:-1 7:-1 8:0.0992366 9:1 10:-1 12:-1 13:-1
|
||||
+1 1:0.375 2:1 3:-0.333333 4:-0.509434 5:-0.292237 6:-1 7:1 8:-0.51145 9:-1 10:-0.548387 12:-0.333333 13:1
|
||||
-1 1:0.166667 2:1 3:0.333333 4:0.0566038 5:-1 6:1 7:-1 8:0.557252 9:-1 10:-0.935484 11:-1 12:-0.333333 13:1
|
||||
+1 1:-0.0833333 2:-1 3:1 4:-0.320755 5:-0.182648 6:-1 7:-1 8:0.0839695 9:1 10:-0.612903 12:-1 13:1
|
||||
-1 1:-0.375 2:1 3:0.333333 4:-0.509434 5:-0.543379 6:-1 7:-1 8:0.496183 9:-1 10:-1 11:-1 12:-1 13:-1
|
||||
-1 1:0.291667 2:-1 3:-1 4:0.0566038 5:-0.479452 6:-1 7:-1 8:0.526718 9:-1 10:-0.709677 11:-1 12:-1 13:-1
|
||||
-1 1:0.416667 2:1 3:-1 4:-0.0377358 5:-0.511416 6:1 7:1 8:0.206107 9:-1 10:-0.258065 11:1 12:-1 13:0.5
|
||||
+1 1:0.166667 2:1 3:1 4:0.0566038 5:-0.315068 6:-1 7:1 8:-0.374046 9:1 10:-0.806452 12:-0.333333 13:0.5
|
||||
-1 1:-0.0833333 2:1 3:1 4:-0.132075 5:-0.383562 6:-1 7:1 8:0.755725 9:1 10:-1 11:-1 12:-1 13:-1
|
||||
+1 1:0.208333 2:-1 3:-0.333333 4:-0.207547 5:-0.118721 6:1 7:1 8:0.236641 9:-1 10:-1 11:-1 12:0.333333 13:-1
|
||||
-1 1:-0.375 2:-1 3:0.333333 4:-0.54717 5:-0.47032 6:-1 7:-1 8:0.19084 9:-1 10:-0.903226 12:-0.333333 13:-1
|
||||
+1 1:-0.25 2:1 3:0.333333 4:-0.735849 5:-0.465753 6:-1 7:-1 8:0.236641 9:-1 10:-1 11:-1 12:-1 13:-1
|
||||
+1 1:0.333333 2:1 3:1 4:-0.509434 5:-0.388128 6:-1 7:-1 8:0.0534351 9:1 10:0.16129 12:-0.333333 13:1
|
||||
-1 1:0.166667 2:-1 3:1 4:-0.509434 5:0.0410959 6:-1 7:-1 8:0.40458 9:1 10:-0.806452 11:-1 12:-1 13:-1
|
||||
-1 1:0.708333 2:1 3:-0.333333 4:0.169811 5:-0.456621 6:-1 7:1 8:0.0992366 9:-1 10:-1 11:-1 12:-1 13:-1
|
||||
-1 1:0.958333 2:-1 3:0.333333 4:-0.132075 5:-0.675799 6:-1 8:-0.312977 9:-1 10:-0.645161 12:-1 13:-1
|
||||
-1 1:0.583333 2:-1 3:1 4:-0.773585 5:-0.557078 6:-1 7:-1 8:0.0839695 9:-1 10:-0.903226 11:-1 12:0.333333 13:-1
|
||||
+1 1:-0.333333 2:1 3:1 4:-0.0943396 5:-0.164384 6:-1 7:1 8:0.160305 9:1 10:-1 12:1 13:1
|
||||
-1 1:-0.333333 2:1 3:1 4:-0.811321 5:-0.625571 6:-1 7:1 8:0.175573 9:1 10:-0.0322581 12:-1 13:-1
|
||||
-1 1:-0.583333 2:-1 3:0.333333 4:-1 5:-0.666667 6:-1 7:-1 8:0.648855 9:-1 10:-1 11:-1 12:-1 13:-1
|
||||
-1 1:-0.458333 2:-1 3:0.333333 4:-0.509434 5:-0.621005 6:-1 7:-1 8:0.557252 9:-1 10:-1 12:-1 13:-1
|
||||
-1 1:0.125 2:1 3:-0.333333 4:-0.509434 5:-0.497717 6:-1 7:-1 8:0.633588 9:-1 10:-0.741935 11:-1 12:-1 13:-1
|
||||
+1 1:0.208333 2:1 3:1 4:-0.0188679 5:-0.579909 6:-1 7:-1 8:-0.480916 9:-1 10:-0.354839 12:-0.333333 13:1
|
||||
+1 1:-0.75 2:1 3:1 4:-0.509434 5:-0.671233 6:-1 7:-1 8:-0.0992366 9:1 10:-0.483871 12:-1 13:1
|
||||
+1 1:0.208333 2:1 3:1 4:0.0566038 5:-0.342466 6:-1 7:1 8:-0.389313 9:1 10:-0.741935 11:-1 12:-1 13:1
|
||||
-1 1:-0.5 2:1 3:0.333333 4:-0.320755 5:-0.598174 6:-1 7:1 8:0.480916 9:-1 10:-0.354839 12:-1 13:-1
|
||||
-1 1:0.166667 2:1 3:1 4:-0.698113 5:-0.657534 6:-1 7:-1 8:-0.160305 9:1 10:-0.516129 12:-1 13:0.5
|
||||
-1 1:-0.458333 2:1 3:-1 4:0.0188679 5:-0.461187 6:-1 7:1 8:0.633588 9:-1 10:-0.741935 11:-1 12:0.333333 13:-1
|
||||
-1 1:0.375 2:1 3:-0.333333 4:-0.358491 5:-0.625571 6:1 7:1 8:0.0534351 9:-1 10:-1 11:-1 12:-1 13:-1
|
||||
-1 1:0.25 2:1 3:-1 4:0.584906 5:-0.342466 6:-1 7:1 8:0.129771 9:-1 10:0.354839 11:1 12:-1 13:1
|
||||
-1 1:-0.5 2:-1 3:-0.333333 4:-0.396226 5:-0.178082 6:-1 7:-1 8:0.40458 9:-1 10:-1 11:-1 12:-1 13:-1
|
||||
+1 1:-0.125 2:1 3:1 4:0.0566038 5:-0.465753 6:-1 7:1 8:-0.129771 9:-1 10:-0.16129 12:-1 13:1
|
||||
-1 1:0.25 2:1 3:-0.333333 4:-0.132075 5:-0.56621 6:-1 7:-1 8:0.419847 9:1 10:-1 11:-1 12:-1 13:-1
|
||||
+1 1:0.333333 2:-1 3:1 4:-0.320755 5:-0.0684932 6:-1 7:1 8:0.496183 9:-1 10:-1 11:-1 12:-1 13:-1
|
||||
+1 1:0.0416667 2:1 3:1 4:-0.433962 5:-0.360731 6:-1 7:1 8:-0.419847 9:1 10:-0.290323 12:-0.333333 13:1
|
||||
+1 1:0.0416667 2:1 3:1 4:-0.698113 5:-0.634703 6:-1 7:1 8:-0.435115 9:1 10:-1 12:-0.333333 13:-1
|
||||
+1 1:-0.0416667 2:1 3:1 4:-0.415094 5:-0.607306 6:-1 7:-1 8:0.480916 9:-1 10:-0.677419 11:-1 12:0.333333 13:1
|
||||
+1 1:-0.25 2:1 3:1 4:-0.698113 5:-0.319635 6:-1 7:1 8:-0.282443 9:1 10:-0.677419 12:-0.333333 13:-1
|
||||
-1 1:0.541667 2:1 3:1 4:-0.509434 5:-0.196347 6:-1 7:1 8:0.221374 9:-1 10:-0.870968 12:-1 13:-1
|
||||
+1 1:0.208333 2:1 3:1 4:-0.886792 5:-0.506849 6:-1 7:-1 8:0.29771 9:-1 10:-0.967742 11:-1 12:-0.333333 13:1
|
||||
-1 1:0.458333 2:-1 3:0.333333 4:-0.132075 5:-0.146119 6:-1 7:-1 8:-0.0534351 9:-1 10:-0.935484 11:-1 12:-1 13:1
|
||||
-1 1:-0.125 2:-1 3:-0.333333 4:-0.509434 5:-0.461187 6:-1 7:-1 8:0.389313 9:-1 10:-0.645161 11:-1 12:-1 13:-1
|
||||
-1 1:-0.375 2:-1 3:0.333333 4:-0.735849 5:-0.931507 6:-1 7:-1 8:0.587786 9:-1 10:-0.806452 12:-1 13:-1
|
||||
+1 1:0.583333 2:1 3:1 4:-0.509434 5:-0.493151 6:-1 7:-1 8:-1 9:-1 10:-0.677419 12:-1 13:-1
|
||||
-1 1:-0.166667 2:-1 3:1 4:-0.320755 5:-0.347032 6:-1 7:-1 8:0.40458 9:-1 10:-1 11:-1 12:-1 13:-1
|
||||
+1 1:0.166667 2:1 3:1 4:0.339623 5:-0.255708 6:1 7:1 8:-0.19084 9:-1 10:-0.677419 12:1 13:1
|
||||
+1 1:0.416667 2:1 3:1 4:-0.320755 5:-0.415525 6:-1 7:1 8:0.160305 9:-1 10:-0.548387 12:-0.333333 13:1
|
||||
+1 1:-0.208333 2:1 3:1 4:-0.433962 5:-0.324201 6:-1 7:1 8:0.450382 9:-1 10:-0.83871 12:-1 13:1
|
||||
-1 1:-0.0833333 2:1 3:0.333333 4:-0.886792 5:-0.561644 6:-1 7:-1 8:0.0992366 9:1 10:-0.612903 12:-1 13:-1
|
||||
+1 1:0.291667 2:-1 3:1 4:0.0566038 5:-0.39726 6:-1 7:1 8:0.312977 9:-1 10:-0.16129 12:0.333333 13:1
|
||||
+1 1:0.25 2:1 3:1 4:-0.132075 5:-0.767123 6:-1 7:-1 8:0.389313 9:1 10:-1 11:-1 12:-0.333333 13:1
|
||||
-1 1:-0.333333 2:-1 3:-0.333333 4:-0.660377 5:-0.844749 6:-1 7:-1 8:0.0229008 9:-1 10:-1 12:-1 13:-1
|
||||
+1 1:0.0833333 2:-1 3:1 4:0.622642 5:-0.0821918 6:-1 8:-0.29771 9:1 10:0.0967742 12:-1 13:-1
|
||||
-1 1:-0.5 2:1 3:-0.333333 4:-0.698113 5:-0.502283 6:-1 7:-1 8:0.251908 9:-1 10:-1 11:-1 12:-1 13:-1
|
||||
+1 1:0.291667 2:-1 3:1 4:0.207547 5:-0.182648 6:-1 7:1 8:0.374046 9:-1 10:-1 11:-1 12:-1 13:-1
|
||||
-1 1:0.0416667 2:-1 3:0.333333 4:-0.226415 5:-0.187215 6:1 7:-1 8:0.51145 9:-1 10:-1 11:-1 12:-1 13:-1
|
||||
-1 1:-0.458333 2:1 3:-0.333333 4:-0.509434 5:-0.228311 6:-1 7:-1 8:0.389313 9:-1 10:-1 11:-1 12:-1 13:-1
|
||||
-1 1:-0.166667 2:-1 3:-0.333333 4:-0.245283 5:-0.3379 6:-1 7:-1 8:0.389313 9:-1 10:-1 12:-1 13:-1
|
||||
+1 1:-0.291667 2:1 3:1 4:-0.509434 5:-0.438356 6:-1 7:1 8:0.114504 9:-1 10:-0.741935 11:-1 12:-1 13:1
|
||||
+1 1:0.125 2:-1 3:1 4:1 5:-0.260274 6:1 7:1 8:-0.0534351 9:1 10:0.290323 11:1 12:0.333333 13:1
|
||||
-1 1:0.541667 2:-1 3:-1 4:0.0566038 5:-0.543379 6:-1 7:-1 8:-0.343511 9:-1 10:-0.16129 11:1 12:-1 13:-1
|
||||
+1 1:0.125 2:1 3:1 4:-0.320755 5:-0.283105 6:1 7:1 8:-0.51145 9:1 10:-0.483871 11:1 12:-1 13:1
|
||||
+1 1:-0.166667 2:1 3:0.333333 4:-0.509434 5:-0.716895 6:-1 7:-1 8:0.0381679 9:-1 10:-0.354839 12:1 13:1
|
||||
+1 1:0.0416667 2:1 3:1 4:-0.471698 5:-0.269406 6:-1 7:1 8:-0.312977 9:1 10:0.0322581 12:0.333333 13:-1
|
||||
+1 1:0.166667 2:1 3:1 4:0.0943396 5:-0.324201 6:-1 7:-1 8:-0.740458 9:1 10:-0.612903 12:-0.333333 13:1
|
||||
-1 1:0.5 2:-1 3:0.333333 4:0.245283 5:0.0684932 6:-1 7:1 8:0.221374 9:-1 10:-0.741935 11:-1 12:-1 13:-1
|
||||
-1 1:0.0416667 2:1 3:0.333333 4:-0.415094 5:-0.328767 6:-1 7:1 8:0.236641 9:-1 10:-0.83871 11:1 12:-0.333333 13:-1
|
||||
-1 1:0.0416667 2:-1 3:0.333333 4:0.245283 5:-0.657534 6:-1 7:-1 8:0.40458 9:-1 10:-1 11:-1 12:-0.333333 13:-1
|
||||
+1 1:0.375 2:1 3:1 4:-0.509434 5:-0.356164 6:-1 7:-1 8:-0.572519 9:1 10:-0.419355 12:0.333333 13:1
|
||||
-1 1:-0.0416667 2:-1 3:0.333333 4:-0.207547 5:-0.680365 6:-1 7:1 8:0.496183 9:-1 10:-0.967742 12:-1 13:-1
|
||||
-1 1:-0.0416667 2:1 3:-0.333333 4:-0.245283 5:-0.657534 6:-1 7:-1 8:0.328244 9:-1 10:-0.741935 11:-1 12:-0.333333 13:-1
|
||||
+1 1:0.291667 2:1 3:1 4:-0.566038 5:-0.525114 6:1 7:-1 8:0.358779 9:1 10:-0.548387 11:-1 12:0.333333 13:1
|
||||
+1 1:0.416667 2:-1 3:1 4:-0.735849 5:-0.347032 6:-1 7:-1 8:0.496183 9:1 10:-0.419355 12:0.333333 13:-1
|
||||
+1 1:0.541667 2:1 3:1 4:-0.660377 5:-0.607306 6:-1 7:1 8:-0.0687023 9:1 10:-0.967742 11:-1 12:-0.333333 13:-1
|
||||
-1 1:-0.458333 2:1 3:1 4:-0.132075 5:-0.543379 6:-1 7:-1 8:0.633588 9:-1 10:-1 11:-1 12:-1 13:-1
|
||||
+1 1:0.458333 2:1 3:1 4:-0.509434 5:-0.452055 6:-1 7:1 8:-0.618321 9:1 10:-0.290323 11:1 12:-0.333333 13:-1
|
||||
-1 1:0.0416667 2:1 3:0.333333 4:0.0566038 5:-0.515982 6:-1 7:1 8:0.435115 9:-1 10:-0.483871 11:-1 12:-1 13:1
|
||||
-1 1:-0.291667 2:-1 3:0.333333 4:-0.0943396 5:-0.767123 6:-1 7:1 8:0.358779 9:1 10:-0.548387 11:1 12:-1 13:-1
|
||||
-1 1:0.583333 2:-1 3:0.333333 4:0.0943396 5:-0.310502 6:-1 7:-1 8:0.541985 9:-1 10:-1 11:-1 12:-0.333333 13:-1
|
||||
+1 1:0.125 2:1 3:1 4:-0.415094 5:-0.438356 6:1 7:1 8:0.114504 9:1 10:-0.612903 12:-0.333333 13:-1
|
||||
-1 1:-0.791667 2:-1 3:-0.333333 4:-0.54717 5:-0.616438 6:-1 7:-1 8:0.847328 9:-1 10:-0.774194 11:-1 12:-1 13:-1
|
||||
-1 1:0.166667 2:1 3:1 4:-0.283019 5:-0.630137 6:-1 7:-1 8:0.480916 9:1 10:-1 11:-1 12:-1 13:1
|
||||
+1 1:0.458333 2:1 3:1 4:-0.0377358 5:-0.607306 6:-1 7:1 8:-0.0687023 9:-1 10:-0.354839 12:0.333333 13:0.5
|
||||
-1 1:0.25 2:1 3:1 4:-0.169811 5:-0.3379 6:-1 7:1 8:0.694656 9:-1 10:-1 11:-1 12:-1 13:-1
|
||||
+1 1:-0.125 2:1 3:0.333333 4:-0.132075 5:-0.511416 6:-1 7:-1 8:0.40458 9:-1 10:-0.806452 12:-0.333333 13:1
|
||||
-1 1:-0.0833333 2:1 3:-1 4:-0.415094 5:-0.60274 6:-1 7:1 8:-0.175573 9:1 10:-0.548387 11:-1 12:-0.333333 13:-1
|
||||
+1 1:0.0416667 2:1 3:-0.333333 4:0.849057 5:-0.283105 6:-1 7:1 8:0.89313 9:-1 10:-1 11:-1 12:-0.333333 13:1
|
||||
+1 2:1 3:1 4:-0.45283 5:-0.287671 6:-1 7:-1 8:-0.633588 9:1 10:-0.354839 12:0.333333 13:1
|
||||
+1 1:-0.0416667 2:1 3:1 4:-0.660377 5:-0.525114 6:-1 7:-1 8:0.358779 9:-1 10:-1 11:-1 12:-0.333333 13:-1
|
||||
+1 1:-0.541667 2:1 3:1 4:-0.698113 5:-0.812785 6:-1 7:1 8:-0.343511 9:1 10:-0.354839 12:-1 13:1
|
||||
+1 1:0.208333 2:1 3:0.333333 4:-0.283019 5:-0.552511 6:-1 7:1 8:0.557252 9:-1 10:0.0322581 11:-1 12:0.333333 13:1
|
||||
-1 1:-0.5 2:-1 3:0.333333 4:-0.660377 5:-0.351598 6:-1 7:1 8:0.541985 9:1 10:-1 11:-1 12:-1 13:-1
|
||||
-1 1:-0.5 2:1 3:0.333333 4:-0.660377 5:-0.43379 6:-1 7:-1 8:0.648855 9:-1 10:-1 11:-1 12:-1 13:-1
|
||||
-1 1:-0.125 2:-1 3:0.333333 4:-0.509434 5:-0.575342 6:-1 7:-1 8:0.328244 9:-1 10:-0.483871 12:-1 13:-1
|
||||
-1 1:0.0416667 2:-1 3:0.333333 4:-0.735849 5:-0.356164 6:-1 7:1 8:0.465649 9:-1 10:-1 11:-1 12:-1 13:-1
|
||||
-1 1:0.458333 2:-1 3:1 4:-0.320755 5:-0.191781 6:-1 7:-1 8:-0.221374 9:-1 10:-0.354839 12:0.333333 13:-1
|
||||
-1 1:-0.0833333 2:-1 3:0.333333 4:-0.320755 5:-0.406393 6:-1 7:1 8:0.19084 9:-1 10:-0.83871 11:-1 12:-1 13:-1
|
||||
-1 1:-0.291667 2:-1 3:-0.333333 4:-0.792453 5:-0.643836 6:-1 7:-1 8:0.541985 9:-1 10:-1 11:-1 12:-1 13:-1
|
||||
+1 1:0.0833333 2:1 3:1 4:-0.132075 5:-0.584475 6:-1 7:-1 8:-0.389313 9:1 10:0.806452 11:1 12:-1 13:1
|
||||
-1 1:-0.333333 2:1 3:-0.333333 4:-0.358491 5:-0.16895 6:-1 7:1 8:0.51145 9:-1 10:-1 11:-1 12:-1 13:-1
|
||||
-1 1:0.125 2:1 3:-1 4:-0.509434 5:-0.694064 6:-1 7:1 8:0.389313 9:-1 10:-0.387097 12:-1 13:1
|
||||
+1 1:0.541667 2:-1 3:1 4:0.584906 5:-0.534247 6:1 7:-1 8:0.435115 9:1 10:-0.677419 12:0.333333 13:1
|
||||
+1 1:-0.625 2:1 3:-1 4:-0.509434 5:-0.520548 6:-1 7:-1 8:0.694656 9:1 10:0.225806 12:-1 13:1
|
||||
+1 1:0.375 2:-1 3:1 4:0.0566038 5:-0.461187 6:-1 7:-1 8:0.267176 9:1 10:-0.548387 12:-1 13:-1
|
||||
-1 1:0.0833333 2:1 3:-0.333333 4:-0.320755 5:-0.378995 6:-1 7:-1 8:0.282443 9:-1 10:-1 11:-1 12:-1 13:-1
|
||||
+1 1:0.208333 2:1 3:1 4:-0.358491 5:-0.392694 6:-1 7:1 8:-0.0992366 9:1 10:-0.0322581 12:0.333333 13:1
|
||||
-1 1:-0.416667 2:1 3:1 4:-0.698113 5:-0.611872 6:-1 7:-1 8:0.374046 9:-1 10:-1 11:-1 12:-1 13:1
|
||||
-1 1:0.458333 2:-1 3:1 4:0.622642 5:-0.0913242 6:-1 7:-1 8:0.267176 9:1 10:-1 11:-1 12:-1 13:-1
|
||||
-1 1:-0.125 2:-1 3:1 4:-0.698113 5:-0.415525 6:-1 7:1 8:0.343511 9:-1 10:-1 11:-1 12:-1 13:-1
|
||||
-1 2:1 3:0.333333 4:-0.320755 5:-0.675799 6:1 7:1 8:0.236641 9:-1 10:-0.612903 11:1 12:-1 13:-1
|
||||
-1 1:-0.333333 2:-1 3:1 4:-0.169811 5:-0.497717 6:-1 7:1 8:0.236641 9:1 10:-0.935484 12:-1 13:-1
|
||||
+1 1:0.5 2:1 3:-1 4:-0.169811 5:-0.287671 6:1 7:1 8:0.572519 9:-1 10:-0.548387 12:-0.333333 13:-1
|
||||
-1 1:0.666667 2:1 3:-1 4:0.245283 5:-0.506849 6:1 7:1 8:-0.0839695 9:-1 10:-0.967742 12:-0.333333 13:-1
|
||||
+1 1:0.666667 2:1 3:0.333333 4:-0.132075 5:-0.415525 6:-1 7:1 8:0.145038 9:-1 10:-0.354839 12:1 13:1
|
||||
+1 1:0.583333 2:1 3:1 4:-0.886792 5:-0.210046 6:-1 7:1 8:-0.175573 9:1 10:-0.709677 12:0.333333 13:-1
|
||||
-1 1:0.625 2:-1 3:0.333333 4:-0.509434 5:-0.611872 6:-1 7:1 8:-0.328244 9:-1 10:-0.516129 12:-1 13:-1
|
||||
-1 1:-0.791667 2:1 3:-1 4:-0.54717 5:-0.744292 6:-1 7:1 8:0.572519 9:-1 10:-1 11:-1 12:-1 13:-1
|
||||
+1 1:0.375 2:-1 3:1 4:-0.169811 5:-0.232877 6:1 7:-1 8:-0.465649 9:-1 10:-0.387097 12:1 13:-1
|
||||
+1 1:-0.0833333 2:1 3:1 4:-0.132075 5:-0.214612 6:-1 7:-1 8:-0.221374 9:1 10:0.354839 12:1 13:1
|
||||
+1 1:-0.291667 2:1 3:0.333333 4:0.0566038 5:-0.520548 6:-1 7:-1 8:0.160305 9:-1 10:0.16129 12:-1 13:-1
|
||||
+1 1:0.583333 2:1 3:1 4:-0.415094 5:-0.415525 6:1 7:-1 8:0.40458 9:-1 10:-0.935484 12:0.333333 13:1
|
||||
-1 1:-0.125 2:1 3:0.333333 4:-0.339623 5:-0.680365 6:-1 7:-1 8:0.40458 9:-1 10:-1 11:-1 12:-1 13:-1
|
||||
-1 1:-0.458333 2:1 3:0.333333 4:-0.509434 5:-0.479452 6:1 7:-1 8:0.877863 9:-1 10:-0.741935 11:1 12:-1 13:1
|
||||
+1 1:0.125 2:-1 3:1 4:-0.245283 5:0.292237 6:-1 7:1 8:0.206107 9:1 10:-0.387097 12:0.333333 13:1
|
||||
+1 1:-0.5 2:1 3:1 4:-0.698113 5:-0.789954 6:-1 7:1 8:0.328244 9:-1 10:-1 11:-1 12:-1 13:1
|
||||
-1 1:-0.458333 2:-1 3:1 4:-0.849057 5:-0.365297 6:-1 7:1 8:-0.221374 9:-1 10:-0.806452 12:-1 13:-1
|
||||
-1 2:1 3:0.333333 4:-0.320755 5:-0.452055 6:1 7:1 8:0.557252 9:-1 10:-1 11:-1 12:1 13:-1
|
||||
-1 1:-0.416667 2:1 3:0.333333 4:-0.320755 5:-0.136986 6:-1 7:-1 8:0.389313 9:-1 10:-0.387097 11:-1 12:-0.333333 13:-1
|
||||
+1 1:0.125 2:1 3:1 4:-0.283019 5:-0.73516 6:-1 7:1 8:-0.480916 9:1 10:-0.322581 12:-0.333333 13:0.5
|
||||
-1 1:-0.0416667 2:1 3:1 4:-0.735849 5:-0.511416 6:1 7:-1 8:0.160305 9:-1 10:-0.967742 11:-1 12:1 13:1
|
||||
-1 1:0.375 2:-1 3:1 4:-0.132075 5:0.223744 6:-1 7:1 8:0.312977 9:-1 10:-0.612903 12:-1 13:-1
|
||||
+1 1:0.708333 2:1 3:0.333333 4:0.245283 5:-0.347032 6:-1 7:-1 8:-0.374046 9:1 10:-0.0645161 12:-0.333333 13:1
|
||||
-1 1:0.0416667 2:1 3:1 4:-0.132075 5:-0.484018 6:-1 7:-1 8:0.358779 9:-1 10:-0.612903 11:-1 12:-1 13:-1
|
||||
+1 1:0.708333 2:1 3:1 4:-0.0377358 5:-0.780822 6:-1 7:-1 8:-0.175573 9:1 10:-0.16129 11:1 12:-1 13:1
|
||||
-1 1:0.0416667 2:1 3:-0.333333 4:-0.735849 5:-0.164384 6:-1 7:-1 8:0.29771 9:-1 10:-1 11:-1 12:-1 13:1
|
||||
+1 1:-0.75 2:1 3:1 4:-0.396226 5:-0.287671 6:-1 7:1 8:0.29771 9:1 10:-1 11:-1 12:-1 13:1
|
||||
-1 1:-0.208333 2:1 3:0.333333 4:-0.433962 5:-0.410959 6:1 7:-1 8:0.587786 9:-1 10:-1 11:-1 12:0.333333 13:-1
|
||||
-1 1:0.0833333 2:-1 3:-0.333333 4:-0.226415 5:-0.43379 6:-1 7:1 8:0.374046 9:-1 10:-0.548387 12:-1 13:-1
|
||||
-1 1:0.208333 2:-1 3:1 4:-0.886792 5:-0.442922 6:-1 7:1 8:-0.221374 9:-1 10:-0.677419 12:-1 13:-1
|
||||
-1 1:0.0416667 2:-1 3:0.333333 4:-0.698113 5:-0.598174 6:-1 7:-1 8:0.328244 9:-1 10:-0.483871 12:-1 13:-1
|
||||
-1 1:0.666667 2:-1 3:-1 4:-0.132075 5:-0.484018 6:-1 7:-1 8:0.221374 9:-1 10:-0.419355 11:-1 12:0.333333 13:-1
|
||||
+1 1:1 2:1 3:1 4:-0.415094 5:-0.187215 6:-1 7:1 8:0.389313 9:1 10:-1 11:-1 12:1 13:-1
|
||||
-1 1:0.625 2:1 3:0.333333 4:-0.54717 5:-0.310502 6:-1 7:-1 8:0.221374 9:-1 10:-0.677419 11:-1 12:-0.333333 13:1
|
||||
+1 1:0.208333 2:1 3:1 4:-0.415094 5:-0.205479 6:-1 7:1 8:0.526718 9:-1 10:-1 11:-1 12:0.333333 13:1
|
||||
+1 1:0.291667 2:1 3:1 4:-0.415094 5:-0.39726 6:-1 7:1 8:0.0687023 9:1 10:-0.0967742 12:-0.333333 13:1
|
||||
+1 1:-0.0833333 2:1 3:1 4:-0.132075 5:-0.210046 6:-1 7:-1 8:0.557252 9:1 10:-0.483871 11:-1 12:-1 13:1
|
||||
+1 1:0.0833333 2:1 3:1 4:0.245283 5:-0.255708 6:-1 7:1 8:0.129771 9:1 10:-0.741935 12:-0.333333 13:1
|
||||
-1 1:-0.0416667 2:1 3:-1 4:0.0943396 5:-0.214612 6:1 7:-1 8:0.633588 9:-1 10:-0.612903 12:-1 13:1
|
||||
-1 1:0.291667 2:-1 3:0.333333 4:-0.849057 5:-0.123288 6:-1 7:-1 8:0.358779 9:-1 10:-1 11:-1 12:-0.333333 13:-1
|
||||
-1 1:0.208333 2:1 3:0.333333 4:-0.792453 5:-0.479452 6:-1 7:1 8:0.267176 9:1 10:-0.806452 12:-1 13:1
|
||||
+1 1:0.458333 2:1 3:0.333333 4:-0.415094 5:-0.164384 6:-1 7:-1 8:-0.0839695 9:1 10:-0.419355 12:-1 13:1
|
||||
-1 1:-0.666667 2:1 3:0.333333 4:-0.320755 5:-0.43379 6:-1 7:-1 8:0.770992 9:-1 10:0.129032 11:1 12:-1 13:-1
|
||||
+1 1:0.25 2:1 3:-1 4:0.433962 5:-0.260274 6:-1 7:1 8:0.343511 9:-1 10:-0.935484 12:-1 13:1
|
||||
-1 1:-0.0833333 2:1 3:0.333333 4:-0.415094 5:-0.456621 6:1 7:1 8:0.450382 9:-1 10:-0.225806 12:-1 13:-1
|
||||
-1 1:-0.416667 2:-1 3:0.333333 4:-0.471698 5:-0.60274 6:-1 7:-1 8:0.435115 9:-1 10:-0.935484 12:-1 13:-1
|
||||
+1 1:0.208333 2:1 3:1 4:-0.358491 5:-0.589041 6:-1 7:1 8:-0.0839695 9:1 10:-0.290323 12:1 13:1
|
||||
-1 1:-1 2:1 3:-0.333333 4:-0.320755 5:-0.643836 6:-1 7:1 8:1 9:-1 10:-1 11:-1 12:-1 13:-1
|
||||
-1 1:-0.5 2:-1 3:-0.333333 4:-0.320755 5:-0.643836 6:-1 7:1 8:0.541985 9:-1 10:-0.548387 11:-1 12:-1 13:-1
|
||||
-1 1:0.416667 2:-1 3:0.333333 4:-0.226415 5:-0.424658 6:-1 7:1 8:0.541985 9:-1 10:-1 11:-1 12:-1 13:-1
|
||||
-1 1:-0.0833333 2:1 3:0.333333 4:-1 5:-0.538813 6:-1 7:-1 8:0.267176 9:1 10:-1 11:-1 12:-0.333333 13:1
|
||||
-1 1:0.0416667 2:1 3:0.333333 4:-0.509434 5:-0.39726 6:-1 7:1 8:0.160305 9:-1 10:-0.870968 12:-1 13:1
|
||||
-1 1:-0.375 2:1 3:-0.333333 4:-0.509434 5:-0.570776 6:-1 7:-1 8:0.51145 9:-1 10:-1 11:-1 12:-1 13:-1
|
||||
+1 1:0.0416667 2:1 3:1 4:-0.698113 5:-0.484018 6:-1 7:-1 8:-0.160305 9:1 10:-0.0967742 12:-0.333333 13:1
|
||||
+1 1:0.5 2:1 3:1 4:-0.226415 5:-0.415525 6:-1 7:1 8:-0.145038 9:-1 10:-0.0967742 12:-0.333333 13:1
|
||||
-1 1:0.166667 2:1 3:0.333333 4:0.0566038 5:-0.808219 6:-1 7:-1 8:0.572519 9:-1 10:-0.483871 11:-1 12:-1 13:-1
|
||||
+1 1:0.416667 2:1 3:1 4:-0.320755 5:-0.0684932 6:1 7:1 8:-0.0687023 9:1 10:-0.419355 11:-1 12:1 13:1
|
||||
-1 1:-0.75 2:-1 3:1 4:-0.169811 5:-0.739726 6:-1 7:-1 8:0.694656 9:-1 10:-0.548387 11:-1 12:-1 13:-1
|
||||
-1 1:-0.5 2:1 3:-0.333333 4:-0.226415 5:-0.648402 6:-1 7:-1 8:-0.0687023 9:-1 10:-1 12:-1 13:0.5
|
||||
+1 1:0.375 2:-1 3:0.333333 4:-0.320755 5:-0.374429 6:-1 7:-1 8:-0.603053 9:-1 10:-0.612903 12:-0.333333 13:1
|
||||
+1 1:-0.416667 2:-1 3:1 4:-0.283019 5:-0.0182648 6:1 7:1 8:-0.00763359 9:1 10:-0.0322581 12:-1 13:1
|
||||
-1 1:0.208333 2:-1 3:-1 4:0.0566038 5:-0.283105 6:1 7:1 8:0.389313 9:-1 10:-0.677419 11:-1 12:-1 13:-1
|
||||
-1 1:-0.0416667 2:1 3:-1 4:-0.54717 5:-0.726027 6:-1 7:1 8:0.816794 9:-1 10:-1 12:-1 13:0.5
|
||||
+1 1:0.333333 2:-1 3:1 4:-0.0377358 5:-0.173516 6:-1 7:1 8:0.145038 9:1 10:-0.677419 12:-1 13:1
|
||||
+1 1:-0.583333 2:1 3:1 4:-0.54717 5:-0.575342 6:-1 7:-1 8:0.0534351 9:-1 10:-0.612903 12:-1 13:1
|
||||
-1 1:-0.333333 2:1 3:1 4:-0.603774 5:-0.388128 6:-1 7:1 8:0.740458 9:-1 10:-1 11:-1 12:-1 13:-1
|
||||
+1 1:-0.0416667 2:1 3:1 4:-0.358491 5:-0.410959 6:-1 7:-1 8:0.374046 9:1 10:-1 11:-1 12:-0.333333 13:1
|
||||
-1 1:0.375 2:1 3:0.333333 4:-0.320755 5:-0.520548 6:-1 7:-1 8:0.145038 9:-1 10:-0.419355 12:1 13:1
|
||||
+1 1:0.375 2:-1 3:1 4:0.245283 5:-0.826484 6:-1 7:1 8:0.129771 9:-1 10:1 11:1 12:1 13:1
|
||||
-1 2:-1 3:1 4:-0.169811 5:-0.506849 6:-1 7:1 8:0.358779 9:-1 10:-1 11:-1 12:-1 13:-1
|
||||
+1 1:-0.416667 2:1 3:1 4:-0.509434 5:-0.767123 6:-1 7:1 8:-0.251908 9:1 10:-0.193548 12:-1 13:1
|
||||
-1 1:-0.25 2:1 3:0.333333 4:-0.169811 5:-0.401826 6:-1 7:1 8:0.29771 9:-1 10:-1 11:-1 12:-1 13:-1
|
||||
-1 1:-0.0416667 2:1 3:-0.333333 4:-0.509434 5:-0.0913242 6:-1 7:-1 8:0.541985 9:-1 10:-0.935484 11:-1 12:-1 13:-1
|
||||
+1 1:0.625 2:1 3:0.333333 4:0.622642 5:-0.324201 6:1 7:1 8:0.206107 9:1 10:-0.483871 12:-1 13:1
|
||||
-1 1:-0.583333 2:1 3:0.333333 4:-0.132075 5:-0.109589 6:-1 7:1 8:0.694656 9:-1 10:-1 11:-1 12:-1 13:-1
|
||||
-1 2:-1 3:1 4:-0.320755 5:-0.369863 6:-1 7:1 8:0.0992366 9:-1 10:-0.870968 12:-1 13:-1
|
||||
+1 1:0.375 2:-1 3:1 4:-0.132075 5:-0.351598 6:-1 7:1 8:0.358779 9:-1 10:0.16129 11:1 12:0.333333 13:-1
|
||||
-1 1:-0.0833333 2:-1 3:0.333333 4:-0.132075 5:-0.16895 6:-1 7:1 8:0.0839695 9:-1 10:-0.516129 11:-1 12:-0.333333 13:-1
|
||||
+1 1:0.291667 2:1 3:1 4:-0.320755 5:-0.420091 6:-1 7:-1 8:0.114504 9:1 10:-0.548387 11:-1 12:-0.333333 13:1
|
||||
+1 1:0.5 2:1 3:1 4:-0.698113 5:-0.442922 6:-1 7:1 8:0.328244 9:-1 10:-0.806452 11:-1 12:0.333333 13:0.5
|
||||
-1 1:0.5 2:-1 3:0.333333 4:0.150943 5:-0.347032 6:-1 7:-1 8:0.175573 9:-1 10:-0.741935 11:-1 12:-1 13:-1
|
||||
+1 1:0.291667 2:1 3:0.333333 4:-0.132075 5:-0.730594 6:-1 7:1 8:0.282443 9:-1 10:-0.0322581 12:-1 13:-1
|
||||
+1 1:0.291667 2:1 3:1 4:-0.0377358 5:-0.287671 6:-1 7:1 8:0.0839695 9:1 10:-0.0967742 12:0.333333 13:1
|
||||
+1 1:0.0416667 2:1 3:1 4:-0.509434 5:-0.716895 6:-1 7:-1 8:-0.358779 9:-1 10:-0.548387 12:-0.333333 13:1
|
||||
-1 1:-0.375 2:1 3:-0.333333 4:-0.320755 5:-0.575342 6:-1 7:1 8:0.78626 9:-1 10:-1 11:-1 12:-1 13:-1
|
||||
+1 1:-0.375 2:1 3:1 4:-0.660377 5:-0.251142 6:-1 7:1 8:0.251908 9:-1 10:-1 11:-1 12:-0.333333 13:-1
|
||||
-1 1:-0.0833333 2:1 3:0.333333 4:-0.698113 5:-0.776256 6:-1 7:-1 8:-0.206107 9:-1 10:-0.806452 11:-1 12:-1 13:-1
|
||||
-1 1:0.25 2:1 3:0.333333 4:0.0566038 5:-0.607306 6:1 7:-1 8:0.312977 9:-1 10:-0.483871 11:-1 12:-1 13:-1
|
||||
-1 1:0.75 2:-1 3:-0.333333 4:0.245283 5:-0.196347 6:-1 7:-1 8:0.389313 9:-1 10:-0.870968 11:-1 12:0.333333 13:-1
|
||||
-1 1:0.333333 2:1 3:0.333333 4:0.0566038 5:-0.465753 6:1 7:-1 8:0.00763359 9:1 10:-0.677419 12:-1 13:-1
|
||||
+1 1:0.0833333 2:1 3:1 4:-0.283019 5:0.0365297 6:-1 7:-1 8:-0.0687023 9:1 10:-0.612903 12:-0.333333 13:1
|
||||
+1 1:0.458333 2:1 3:0.333333 4:-0.132075 5:-0.0456621 6:-1 7:-1 8:0.328244 9:-1 10:-1 11:-1 12:-1 13:-1
|
||||
-1 1:-0.416667 2:1 3:1 4:0.0566038 5:-0.447489 6:-1 7:-1 8:0.526718 9:-1 10:-0.516129 11:-1 12:-1 13:-1
|
||||
-1 1:0.208333 2:-1 3:0.333333 4:-0.509434 5:-0.0228311 6:-1 7:-1 8:0.541985 9:-1 10:-1 11:-1 12:-1 13:-1
|
||||
+1 1:0.291667 2:1 3:1 4:-0.320755 5:-0.634703 6:-1 7:1 8:-0.0687023 9:1 10:-0.225806 12:0.333333 13:1
|
||||
+1 1:0.208333 2:1 3:-0.333333 4:-0.509434 5:-0.278539 6:-1 7:1 8:0.358779 9:-1 10:-0.419355 12:-1 13:-1
|
||||
-1 1:-0.166667 2:1 3:-0.333333 4:-0.320755 5:-0.360731 6:-1 7:-1 8:0.526718 9:-1 10:-0.806452 11:-1 12:-1 13:-1
|
||||
+1 1:-0.208333 2:1 3:-0.333333 4:-0.698113 5:-0.52968 6:-1 7:-1 8:0.480916 9:-1 10:-0.677419 11:1 12:-1 13:1
|
||||
-1 1:-0.0416667 2:1 3:0.333333 4:0.471698 5:-0.666667 6:1 7:-1 8:0.389313 9:-1 10:-0.83871 11:-1 12:-1 13:1
|
||||
-1 1:-0.375 2:1 3:-0.333333 4:-0.509434 5:-0.374429 6:-1 7:-1 8:0.557252 9:-1 10:-1 11:-1 12:-1 13:1
|
||||
-1 1:0.125 2:-1 3:-0.333333 4:-0.132075 5:-0.232877 6:-1 7:1 8:0.251908 9:-1 10:-0.580645 12:-1 13:-1
|
||||
-1 1:0.166667 2:1 3:1 4:-0.132075 5:-0.69863 6:-1 7:-1 8:0.175573 9:-1 10:-0.870968 12:-1 13:0.5
|
||||
+1 1:0.583333 2:1 3:1 4:0.245283 5:-0.269406 6:-1 7:1 8:-0.435115 9:1 10:-0.516129 12:1 13:-1
|
|
@ -0,0 +1,270 @@
|
|||
20
|
||||
10
|
||||
5.5
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
Binary file not shown.
Binary file not shown.
|
@ -0,0 +1,45 @@
|
|||
# This Makefile is used under Linux
|
||||
|
||||
MATLABDIR ?= /usr/local/matlab
|
||||
# for Mac
|
||||
# MATLABDIR ?= /opt/local/matlab
|
||||
|
||||
CXX ?= g++
|
||||
#CXX = g++-4.1
|
||||
CFLAGS = -Wall -Wconversion -O3 -fPIC -I$(MATLABDIR)/extern/include -I..
|
||||
|
||||
MEX = $(MATLABDIR)/bin/mex
|
||||
MEX_OPTION = CC="$(CXX)" CXX="$(CXX)" CFLAGS="$(CFLAGS)" CXXFLAGS="$(CFLAGS)"
|
||||
# comment the following line if you use MATLAB on 32-bit computer
|
||||
MEX_OPTION += -largeArrayDims
|
||||
MEX_EXT = $(shell $(MATLABDIR)/bin/mexext)
|
||||
|
||||
all: matlab
|
||||
|
||||
matlab: binary
|
||||
|
||||
octave:
|
||||
@echo "please type make under Octave"
|
||||
|
||||
binary: svmpredict.$(MEX_EXT) svmtrain.$(MEX_EXT) libsvmread.$(MEX_EXT) libsvmwrite.$(MEX_EXT)
|
||||
|
||||
svmpredict.$(MEX_EXT): svmpredict.c ../svm.h ../svm.o svm_model_matlab.o
|
||||
$(MEX) $(MEX_OPTION) svmpredict.c ../svm.o svm_model_matlab.o
|
||||
|
||||
svmtrain.$(MEX_EXT): svmtrain.c ../svm.h ../svm.o svm_model_matlab.o
|
||||
$(MEX) $(MEX_OPTION) svmtrain.c ../svm.o svm_model_matlab.o
|
||||
|
||||
libsvmread.$(MEX_EXT): libsvmread.c
|
||||
$(MEX) $(MEX_OPTION) libsvmread.c
|
||||
|
||||
libsvmwrite.$(MEX_EXT): libsvmwrite.c
|
||||
$(MEX) $(MEX_OPTION) libsvmwrite.c
|
||||
|
||||
svm_model_matlab.o: svm_model_matlab.c ../svm.h
|
||||
$(CXX) $(CFLAGS) -c svm_model_matlab.c
|
||||
|
||||
../svm.o: ../svm.cpp ../svm.h
|
||||
make -C .. svm.o
|
||||
|
||||
clean:
|
||||
rm -f *~ *.o *.mex* *.obj ../svm.o
|
|
@ -0,0 +1,245 @@
|
|||
-----------------------------------------
|
||||
--- MATLAB/OCTAVE interface of LIBSVM ---
|
||||
-----------------------------------------
|
||||
|
||||
Table of Contents
|
||||
=================
|
||||
|
||||
- Introduction
|
||||
- Installation
|
||||
- Usage
|
||||
- Returned Model Structure
|
||||
- Other Utilities
|
||||
- Examples
|
||||
- Additional Information
|
||||
|
||||
|
||||
Introduction
|
||||
============
|
||||
|
||||
This tool provides a simple interface to LIBSVM, a library for support vector
|
||||
machines (http://www.csie.ntu.edu.tw/~cjlin/libsvm). It is very easy to use as
|
||||
the usage and the way of specifying parameters are the same as that of LIBSVM.
|
||||
|
||||
Installation
|
||||
============
|
||||
|
||||
On Windows systems, pre-built binary files are already in the
|
||||
directory '..\windows', so no need to conduct installation. Now we
|
||||
provide binary files only for 64bit MATLAB on Windows. If you would
|
||||
like to re-build the package, please rely on the following steps.
|
||||
|
||||
We recommend using make.m on both MATLAB and OCTAVE. Just type 'make'
|
||||
to build 'libsvmread.mex', 'libsvmwrite.mex', 'svmtrain.mex', and
|
||||
'svmpredict.mex'.
|
||||
|
||||
On MATLAB or Octave:
|
||||
|
||||
>> make
|
||||
|
||||
If make.m does not work on MATLAB (especially for Windows), try 'mex
|
||||
-setup' to choose a suitable compiler for mex. Make sure your compiler
|
||||
is accessible and workable. Then type 'make' to start the
|
||||
installation.
|
||||
|
||||
Example:
|
||||
|
||||
matlab>> mex -setup
|
||||
(ps: MATLAB will show the following messages to setup default compiler.)
|
||||
Please choose your compiler for building external interface (MEX) files:
|
||||
Would you like mex to locate installed compilers [y]/n? y
|
||||
Select a compiler:
|
||||
[1] Microsoft Visual C/C++ version 7.1 in C:\Program Files\Microsoft Visual Studio
|
||||
[0] None
|
||||
Compiler: 1
|
||||
Please verify your choices:
|
||||
Compiler: Microsoft Visual C/C++ 7.1
|
||||
Location: C:\Program Files\Microsoft Visual Studio
|
||||
Are these correct?([y]/n): y
|
||||
|
||||
matlab>> make
|
||||
|
||||
On Unix systems, if neither make.m nor 'mex -setup' works, please use
|
||||
Makefile and type 'make' in a command window. Note that we assume
|
||||
your MATLAB is installed in '/usr/local/matlab'. If not, please change
|
||||
MATLABDIR in Makefile.
|
||||
|
||||
Example:
|
||||
linux> make
|
||||
|
||||
To use octave, type 'make octave':
|
||||
|
||||
Example:
|
||||
linux> make octave
|
||||
|
||||
For a list of supported/compatible compilers for MATLAB, please check
|
||||
the following page:
|
||||
|
||||
http://www.mathworks.com/support/compilers/current_release/
|
||||
|
||||
Usage
|
||||
=====
|
||||
|
||||
matlab> model = svmtrain(training_label_vector, training_instance_matrix [, 'libsvm_options']);
|
||||
|
||||
-training_label_vector:
|
||||
An m by 1 vector of training labels (type must be double).
|
||||
-training_instance_matrix:
|
||||
An m by n matrix of m training instances with n features.
|
||||
It can be dense or sparse (type must be double).
|
||||
-libsvm_options:
|
||||
A string of training options in the same format as that of LIBSVM.
|
||||
|
||||
matlab> [predicted_label, accuracy, decision_values/prob_estimates] = svmpredict(testing_label_vector, testing_instance_matrix, model [, 'libsvm_options']);
|
||||
matlab> [predicted_label] = svmpredict(testing_label_vector, testing_instance_matrix, model [, 'libsvm_options']);
|
||||
|
||||
-testing_label_vector:
|
||||
An m by 1 vector of prediction labels. If labels of test
|
||||
data are unknown, simply use any random values. (type must be double)
|
||||
-testing_instance_matrix:
|
||||
An m by n matrix of m testing instances with n features.
|
||||
It can be dense or sparse. (type must be double)
|
||||
-model:
|
||||
The output of svmtrain.
|
||||
-libsvm_options:
|
||||
A string of testing options in the same format as that of LIBSVM.
|
||||
|
||||
Returned Model Structure
|
||||
========================
|
||||
|
||||
The 'svmtrain' function returns a model which can be used for future
|
||||
prediction. It is a structure and is organized as [Parameters, nr_class,
|
||||
totalSV, rho, Label, ProbA, ProbB, nSV, sv_coef, SVs]:
|
||||
|
||||
-Parameters: parameters
|
||||
-nr_class: number of classes; = 2 for regression/one-class svm
|
||||
-totalSV: total #SV
|
||||
-rho: -b of the decision function(s) wx+b
|
||||
-Label: label of each class; empty for regression/one-class SVM
|
||||
-sv_indices: values in [1,...,num_traning_data] to indicate SVs in the training set
|
||||
-ProbA: pairwise probability information; empty if -b 0 or in one-class SVM
|
||||
-ProbB: pairwise probability information; empty if -b 0 or in one-class SVM
|
||||
-nSV: number of SVs for each class; empty for regression/one-class SVM
|
||||
-sv_coef: coefficients for SVs in decision functions
|
||||
-SVs: support vectors
|
||||
|
||||
If you do not use the option '-b 1', ProbA and ProbB are empty
|
||||
matrices. If the '-v' option is specified, cross validation is
|
||||
conducted and the returned model is just a scalar: cross-validation
|
||||
accuracy for classification and mean-squared error for regression.
|
||||
|
||||
More details about this model can be found in LIBSVM FAQ
|
||||
(http://www.csie.ntu.edu.tw/~cjlin/libsvm/faq.html) and LIBSVM
|
||||
implementation document
|
||||
(http://www.csie.ntu.edu.tw/~cjlin/papers/libsvm.pdf).
|
||||
|
||||
Result of Prediction
|
||||
====================
|
||||
|
||||
The function 'svmpredict' has three outputs. The first one,
|
||||
predictd_label, is a vector of predicted labels. The second output,
|
||||
accuracy, is a vector including accuracy (for classification), mean
|
||||
squared error, and squared correlation coefficient (for regression).
|
||||
The third is a matrix containing decision values or probability
|
||||
estimates (if '-b 1' is specified). If k is the number of classes
|
||||
in training data, for decision values, each row includes results of
|
||||
predicting k(k-1)/2 binary-class SVMs. For classification, k = 1 is a
|
||||
special case. Decision value +1 is returned for each testing instance,
|
||||
instead of an empty vector. For probabilities, each row contains k values
|
||||
indicating the probability that the testing instance is in each class.
|
||||
Note that the order of classes here is the same as 'Label' field
|
||||
in the model structure.
|
||||
|
||||
Other Utilities
|
||||
===============
|
||||
|
||||
A matlab function libsvmread reads files in LIBSVM format:
|
||||
|
||||
[label_vector, instance_matrix] = libsvmread('data.txt');
|
||||
|
||||
Two outputs are labels and instances, which can then be used as inputs
|
||||
of svmtrain or svmpredict.
|
||||
|
||||
A matlab function libsvmwrite writes Matlab matrix to a file in LIBSVM format:
|
||||
|
||||
libsvmwrite('data.txt', label_vector, instance_matrix)
|
||||
|
||||
The instance_matrix must be a sparse matrix. (type must be double)
|
||||
For 32bit and 64bit MATLAB on Windows, pre-built binary files are ready
|
||||
in the directory `..\windows', but in future releases, we will only
|
||||
include 64bit MATLAB binary files.
|
||||
|
||||
These codes are prepared by Rong-En Fan and Kai-Wei Chang from National
|
||||
Taiwan University.
|
||||
|
||||
Examples
|
||||
========
|
||||
|
||||
Train and test on the provided data heart_scale:
|
||||
|
||||
matlab> [heart_scale_label, heart_scale_inst] = libsvmread('../heart_scale');
|
||||
matlab> model = svmtrain(heart_scale_label, heart_scale_inst, '-c 1 -g 0.07');
|
||||
matlab> [predict_label, accuracy, dec_values] = svmpredict(heart_scale_label, heart_scale_inst, model); % test the training data
|
||||
|
||||
For probability estimates, you need '-b 1' for training and testing:
|
||||
|
||||
matlab> [heart_scale_label, heart_scale_inst] = libsvmread('../heart_scale');
|
||||
matlab> model = svmtrain(heart_scale_label, heart_scale_inst, '-c 1 -g 0.07 -b 1');
|
||||
matlab> [heart_scale_label, heart_scale_inst] = libsvmread('../heart_scale');
|
||||
matlab> [predict_label, accuracy, prob_estimates] = svmpredict(heart_scale_label, heart_scale_inst, model, '-b 1');
|
||||
|
||||
To use precomputed kernel, you must include sample serial number as
|
||||
the first column of the training and testing data (assume your kernel
|
||||
matrix is K, # of instances is n):
|
||||
|
||||
matlab> K1 = [(1:n)', K]; % include sample serial number as first column
|
||||
matlab> model = svmtrain(label_vector, K1, '-t 4');
|
||||
matlab> [predict_label, accuracy, dec_values] = svmpredict(label_vector, K1, model); % test the training data
|
||||
|
||||
We give the following detailed example by splitting heart_scale into
|
||||
150 training and 120 testing data. Constructing a linear kernel
|
||||
matrix and then using the precomputed kernel gives exactly the same
|
||||
testing error as using the LIBSVM built-in linear kernel.
|
||||
|
||||
matlab> [heart_scale_label, heart_scale_inst] = libsvmread('../heart_scale');
|
||||
matlab>
|
||||
matlab> % Split Data
|
||||
matlab> train_data = heart_scale_inst(1:150,:);
|
||||
matlab> train_label = heart_scale_label(1:150,:);
|
||||
matlab> test_data = heart_scale_inst(151:270,:);
|
||||
matlab> test_label = heart_scale_label(151:270,:);
|
||||
matlab>
|
||||
matlab> % Linear Kernel
|
||||
matlab> model_linear = svmtrain(train_label, train_data, '-t 0');
|
||||
matlab> [predict_label_L, accuracy_L, dec_values_L] = svmpredict(test_label, test_data, model_linear);
|
||||
matlab>
|
||||
matlab> % Precomputed Kernel
|
||||
matlab> model_precomputed = svmtrain(train_label, [(1:150)', train_data*train_data'], '-t 4');
|
||||
matlab> [predict_label_P, accuracy_P, dec_values_P] = svmpredict(test_label, [(1:120)', test_data*train_data'], model_precomputed);
|
||||
matlab>
|
||||
matlab> accuracy_L % Display the accuracy using linear kernel
|
||||
matlab> accuracy_P % Display the accuracy using precomputed kernel
|
||||
|
||||
Note that for testing, you can put anything in the
|
||||
testing_label_vector. For more details of precomputed kernels, please
|
||||
read the section ``Precomputed Kernels'' in the README of the LIBSVM
|
||||
package.
|
||||
|
||||
Additional Information
|
||||
======================
|
||||
|
||||
This interface was initially written by Jun-Cheng Chen, Kuan-Jen Peng,
|
||||
Chih-Yuan Yang and Chih-Huai Cheng from Department of Computer
|
||||
Science, National Taiwan University. The current version was prepared
|
||||
by Rong-En Fan and Ting-Fan Wu. If you find this tool useful, please
|
||||
cite LIBSVM as follows
|
||||
|
||||
Chih-Chung Chang and Chih-Jen Lin, LIBSVM : a library for support
|
||||
vector machines. ACM Transactions on Intelligent Systems and
|
||||
Technology, 2:27:1--27:27, 2011. Software available at
|
||||
http://www.csie.ntu.edu.tw/~cjlin/libsvm
|
||||
|
||||
For any question, please contact Chih-Jen Lin <cjlin@csie.ntu.edu.tw>,
|
||||
or check the FAQ page:
|
||||
|
||||
http://www.csie.ntu.edu.tw/~cjlin/libsvm/faq.html#/Q10:_MATLAB_interface
|
|
@ -0,0 +1,39 @@
|
|||
Introduction
|
||||
============
|
||||
|
||||
This tool provides a simple interface to LIBSVM with instance weight support
|
||||
|
||||
Installation
|
||||
============
|
||||
|
||||
Please check README for the detail.
|
||||
|
||||
Usage
|
||||
=====
|
||||
|
||||
matlab> model = svmtrain(training_weight_vector, training_label_vector, training_instance_matrix, 'libsvm_options')
|
||||
|
||||
-training_weight_vector:
|
||||
An m by 1 vector of training weights. (type must be double)
|
||||
-training_label_vector:
|
||||
An m by 1 vector of training labels. (type must be double)
|
||||
-training_instance_matrix:
|
||||
An m by n matrix of m training instances with n features. (type must be double)
|
||||
-libsvm_options:
|
||||
A string of training options in the same format as that of LIBSVM.
|
||||
|
||||
Examples
|
||||
========
|
||||
|
||||
Train and test on the provided data heart_scale:
|
||||
|
||||
matlab> [heart_scale_label, heart_scale_inst] = libsvmread('../heart_scale');
|
||||
matlab> heart_scale_weight = load('../heart_scale.wgt');
|
||||
matlab> model = svmtrain(heart_scale_weight, heart_scale_label, heart_scale_inst, '-c 1');
|
||||
matlab> [predict_label, accuracy, dec_values] = svmpredict(heart_scale_label, heart_scale_inst, model); % test the training data
|
||||
|
||||
Train and test without weights:
|
||||
|
||||
matlab> model = svmtrain([], heart_scale_label, heart_scale_inst, '-c 1');
|
||||
|
||||
|
|
@ -0,0 +1,212 @@
|
|||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <stdlib.h>
|
||||
#include <ctype.h>
|
||||
#include <errno.h>
|
||||
|
||||
#include "mex.h"
|
||||
|
||||
#ifdef MX_API_VER
|
||||
#if MX_API_VER < 0x07030000
|
||||
typedef int mwIndex;
|
||||
#endif
|
||||
#endif
|
||||
#ifndef max
|
||||
#define max(x,y) (((x)>(y))?(x):(y))
|
||||
#endif
|
||||
#ifndef min
|
||||
#define min(x,y) (((x)<(y))?(x):(y))
|
||||
#endif
|
||||
|
||||
void exit_with_help()
|
||||
{
|
||||
mexPrintf(
|
||||
"Usage: [label_vector, instance_matrix] = libsvmread('filename');\n"
|
||||
);
|
||||
}
|
||||
|
||||
static void fake_answer(int nlhs, mxArray *plhs[])
|
||||
{
|
||||
int i;
|
||||
for(i=0;i<nlhs;i++)
|
||||
plhs[i] = mxCreateDoubleMatrix(0, 0, mxREAL);
|
||||
}
|
||||
|
||||
static char *line;
|
||||
static int max_line_len;
|
||||
|
||||
static char* readline(FILE *input)
|
||||
{
|
||||
int len;
|
||||
|
||||
if(fgets(line,max_line_len,input) == NULL)
|
||||
return NULL;
|
||||
|
||||
while(strrchr(line,'\n') == NULL)
|
||||
{
|
||||
max_line_len *= 2;
|
||||
line = (char *) realloc(line, max_line_len);
|
||||
len = (int) strlen(line);
|
||||
if(fgets(line+len,max_line_len-len,input) == NULL)
|
||||
break;
|
||||
}
|
||||
return line;
|
||||
}
|
||||
|
||||
// read in a problem (in libsvm format)
|
||||
void read_problem(const char *filename, int nlhs, mxArray *plhs[])
|
||||
{
|
||||
int max_index, min_index, inst_max_index;
|
||||
size_t elements, k, i, l=0;
|
||||
FILE *fp = fopen(filename,"r");
|
||||
char *endptr;
|
||||
mwIndex *ir, *jc;
|
||||
double *labels, *samples;
|
||||
|
||||
if(fp == NULL)
|
||||
{
|
||||
mexPrintf("can't open input file %s\n",filename);
|
||||
fake_answer(nlhs, plhs);
|
||||
return;
|
||||
}
|
||||
|
||||
max_line_len = 1024;
|
||||
line = (char *) malloc(max_line_len*sizeof(char));
|
||||
|
||||
max_index = 0;
|
||||
min_index = 1; // our index starts from 1
|
||||
elements = 0;
|
||||
while(readline(fp) != NULL)
|
||||
{
|
||||
char *idx, *val;
|
||||
// features
|
||||
int index = 0;
|
||||
|
||||
inst_max_index = -1; // strtol gives 0 if wrong format, and precomputed kernel has <index> start from 0
|
||||
strtok(line," \t"); // label
|
||||
while (1)
|
||||
{
|
||||
idx = strtok(NULL,":"); // index:value
|
||||
val = strtok(NULL," \t");
|
||||
if(val == NULL)
|
||||
break;
|
||||
|
||||
errno = 0;
|
||||
index = (int) strtol(idx,&endptr,10);
|
||||
if(endptr == idx || errno != 0 || *endptr != '\0' || index <= inst_max_index)
|
||||
{
|
||||
mexPrintf("Wrong input format at line %d\n",l+1);
|
||||
fake_answer(nlhs, plhs);
|
||||
return;
|
||||
}
|
||||
else
|
||||
inst_max_index = index;
|
||||
|
||||
min_index = min(min_index, index);
|
||||
elements++;
|
||||
}
|
||||
max_index = max(max_index, inst_max_index);
|
||||
l++;
|
||||
}
|
||||
rewind(fp);
|
||||
|
||||
// y
|
||||
plhs[0] = mxCreateDoubleMatrix(l, 1, mxREAL);
|
||||
// x^T
|
||||
if (min_index <= 0)
|
||||
plhs[1] = mxCreateSparse(max_index-min_index+1, l, elements, mxREAL);
|
||||
else
|
||||
plhs[1] = mxCreateSparse(max_index, l, elements, mxREAL);
|
||||
|
||||
labels = mxGetPr(plhs[0]);
|
||||
samples = mxGetPr(plhs[1]);
|
||||
ir = mxGetIr(plhs[1]);
|
||||
jc = mxGetJc(plhs[1]);
|
||||
|
||||
k=0;
|
||||
for(i=0;i<l;i++)
|
||||
{
|
||||
char *idx, *val, *label;
|
||||
jc[i] = k;
|
||||
|
||||
readline(fp);
|
||||
|
||||
label = strtok(line," \t\n");
|
||||
if(label == NULL)
|
||||
{
|
||||
mexPrintf("Empty line at line %d\n",i+1);
|
||||
fake_answer(nlhs, plhs);
|
||||
return;
|
||||
}
|
||||
labels[i] = strtod(label,&endptr);
|
||||
if(endptr == label || *endptr != '\0')
|
||||
{
|
||||
mexPrintf("Wrong input format at line %d\n",i+1);
|
||||
fake_answer(nlhs, plhs);
|
||||
return;
|
||||
}
|
||||
|
||||
// features
|
||||
while(1)
|
||||
{
|
||||
idx = strtok(NULL,":");
|
||||
val = strtok(NULL," \t");
|
||||
if(val == NULL)
|
||||
break;
|
||||
|
||||
ir[k] = (mwIndex) (strtol(idx,&endptr,10) - min_index); // precomputed kernel has <index> start from 0
|
||||
|
||||
errno = 0;
|
||||
samples[k] = strtod(val,&endptr);
|
||||
if (endptr == val || errno != 0 || (*endptr != '\0' && !isspace(*endptr)))
|
||||
{
|
||||
mexPrintf("Wrong input format at line %d\n",i+1);
|
||||
fake_answer(nlhs, plhs);
|
||||
return;
|
||||
}
|
||||
++k;
|
||||
}
|
||||
}
|
||||
jc[l] = k;
|
||||
|
||||
fclose(fp);
|
||||
free(line);
|
||||
|
||||
{
|
||||
mxArray *rhs[1], *lhs[1];
|
||||
rhs[0] = plhs[1];
|
||||
if(mexCallMATLAB(1, lhs, 1, rhs, "transpose"))
|
||||
{
|
||||
mexPrintf("Error: cannot transpose problem\n");
|
||||
fake_answer(nlhs, plhs);
|
||||
return;
|
||||
}
|
||||
plhs[1] = lhs[0];
|
||||
}
|
||||
}
|
||||
|
||||
void mexFunction( int nlhs, mxArray *plhs[],
|
||||
int nrhs, const mxArray *prhs[] )
|
||||
{
|
||||
char filename[256];
|
||||
|
||||
if(nrhs != 1 || nlhs != 2)
|
||||
{
|
||||
exit_with_help();
|
||||
fake_answer(nlhs, plhs);
|
||||
return;
|
||||
}
|
||||
|
||||
mxGetString(prhs[0], filename, mxGetN(prhs[0]) + 1);
|
||||
|
||||
if(filename == NULL)
|
||||
{
|
||||
mexPrintf("Error: filename is NULL\n");
|
||||
return;
|
||||
}
|
||||
|
||||
read_problem(filename, nlhs, plhs);
|
||||
|
||||
return;
|
||||
}
|
||||
|
|
@ -0,0 +1,119 @@
|
|||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include "mex.h"
|
||||
|
||||
#ifdef MX_API_VER
|
||||
#if MX_API_VER < 0x07030000
|
||||
typedef int mwIndex;
|
||||
#endif
|
||||
#endif
|
||||
|
||||
void exit_with_help()
|
||||
{
|
||||
mexPrintf(
|
||||
"Usage: libsvmwrite('filename', label_vector, instance_matrix);\n"
|
||||
);
|
||||
}
|
||||
|
||||
static void fake_answer(int nlhs, mxArray *plhs[])
|
||||
{
|
||||
int i;
|
||||
for(i=0;i<nlhs;i++)
|
||||
plhs[i] = mxCreateDoubleMatrix(0, 0, mxREAL);
|
||||
}
|
||||
|
||||
void libsvmwrite(const char *filename, const mxArray *label_vec, const mxArray *instance_mat)
|
||||
{
|
||||
FILE *fp = fopen(filename,"w");
|
||||
mwIndex *ir, *jc, k, low, high;
|
||||
size_t i, l, label_vector_row_num;
|
||||
double *samples, *labels;
|
||||
mxArray *instance_mat_col; // instance sparse matrix in column format
|
||||
|
||||
if(fp ==NULL)
|
||||
{
|
||||
mexPrintf("can't open output file %s\n",filename);
|
||||
return;
|
||||
}
|
||||
|
||||
// transpose instance matrix
|
||||
{
|
||||
mxArray *prhs[1], *plhs[1];
|
||||
prhs[0] = mxDuplicateArray(instance_mat);
|
||||
if(mexCallMATLAB(1, plhs, 1, prhs, "transpose"))
|
||||
{
|
||||
mexPrintf("Error: cannot transpose instance matrix\n");
|
||||
return;
|
||||
}
|
||||
instance_mat_col = plhs[0];
|
||||
mxDestroyArray(prhs[0]);
|
||||
}
|
||||
|
||||
// the number of instance
|
||||
l = mxGetN(instance_mat_col);
|
||||
label_vector_row_num = mxGetM(label_vec);
|
||||
|
||||
if(label_vector_row_num!=l)
|
||||
{
|
||||
mexPrintf("Length of label vector does not match # of instances.\n");
|
||||
return;
|
||||
}
|
||||
|
||||
// each column is one instance
|
||||
labels = mxGetPr(label_vec);
|
||||
samples = mxGetPr(instance_mat_col);
|
||||
ir = mxGetIr(instance_mat_col);
|
||||
jc = mxGetJc(instance_mat_col);
|
||||
|
||||
for(i=0;i<l;i++)
|
||||
{
|
||||
fprintf(fp,"%g", labels[i]);
|
||||
|
||||
low = jc[i], high = jc[i+1];
|
||||
for(k=low;k<high;k++)
|
||||
fprintf(fp," %lu:%g", (size_t)ir[k]+1, samples[k]);
|
||||
|
||||
fprintf(fp,"\n");
|
||||
}
|
||||
|
||||
fclose(fp);
|
||||
return;
|
||||
}
|
||||
|
||||
void mexFunction( int nlhs, mxArray *plhs[],
|
||||
int nrhs, const mxArray *prhs[] )
|
||||
{
|
||||
if(nlhs > 0)
|
||||
{
|
||||
exit_with_help();
|
||||
fake_answer(nlhs, plhs);
|
||||
return;
|
||||
}
|
||||
|
||||
// Transform the input Matrix to libsvm format
|
||||
if(nrhs == 3)
|
||||
{
|
||||
char filename[256];
|
||||
if(!mxIsDouble(prhs[1]) || !mxIsDouble(prhs[2]))
|
||||
{
|
||||
mexPrintf("Error: label vector and instance matrix must be double\n");
|
||||
return;
|
||||
}
|
||||
|
||||
mxGetString(prhs[0], filename, mxGetN(prhs[0])+1);
|
||||
|
||||
if(mxIsSparse(prhs[2]))
|
||||
libsvmwrite(filename, prhs[1], prhs[2]);
|
||||
else
|
||||
{
|
||||
mexPrintf("Instance_matrix must be sparse\n");
|
||||
return;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
exit_with_help();
|
||||
return;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,21 @@
|
|||
% This make.m is for MATLAB and OCTAVE under Windows, Mac, and Unix
|
||||
|
||||
try
|
||||
Type = ver;
|
||||
% This part is for OCTAVE
|
||||
if(strcmp(Type(1).Name, 'Octave') == 1)
|
||||
mex libsvmread.c
|
||||
mex libsvmwrite.c
|
||||
mex svmtrain.c ../svm.cpp svm_model_matlab.c
|
||||
mex svmpredict.c ../svm.cpp svm_model_matlab.c
|
||||
% This part is for MATLAB
|
||||
% Add -largeArrayDims on 64-bit machines of MATLAB
|
||||
else
|
||||
mex CFLAGS="\$CFLAGS -std=c99" -largeArrayDims libsvmread.c
|
||||
mex CFLAGS="\$CFLAGS -std=c99" -largeArrayDims libsvmwrite.c
|
||||
mex CFLAGS="\$CFLAGS -std=c99" -largeArrayDims svmtrain.c ../svm.cpp svm_model_matlab.c
|
||||
mex CFLAGS="\$CFLAGS -std=c99" -largeArrayDims svmpredict.c ../svm.cpp svm_model_matlab.c
|
||||
end
|
||||
catch
|
||||
fprintf('If make.m fails, please check README about detailed instructions.\n');
|
||||
end
|
|
@ -0,0 +1,374 @@
|
|||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include "../svm.h"
|
||||
|
||||
#include "mex.h"
|
||||
|
||||
#ifdef MX_API_VER
|
||||
#if MX_API_VER < 0x07030000
|
||||
typedef int mwIndex;
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#define NUM_OF_RETURN_FIELD 11
|
||||
|
||||
#define Malloc(type,n) (type *)malloc((n)*sizeof(type))
|
||||
|
||||
static const char *field_names[] = {
|
||||
"Parameters",
|
||||
"nr_class",
|
||||
"totalSV",
|
||||
"rho",
|
||||
"Label",
|
||||
"sv_indices",
|
||||
"ProbA",
|
||||
"ProbB",
|
||||
"nSV",
|
||||
"sv_coef",
|
||||
"SVs"
|
||||
};
|
||||
|
||||
const char *model_to_matlab_structure(mxArray *plhs[], int num_of_feature, struct svm_model *model)
|
||||
{
|
||||
int i, j, n;
|
||||
double *ptr;
|
||||
mxArray *return_model, **rhs;
|
||||
int out_id = 0;
|
||||
|
||||
rhs = (mxArray **)mxMalloc(sizeof(mxArray *)*NUM_OF_RETURN_FIELD);
|
||||
|
||||
// Parameters
|
||||
rhs[out_id] = mxCreateDoubleMatrix(5, 1, mxREAL);
|
||||
ptr = mxGetPr(rhs[out_id]);
|
||||
ptr[0] = model->param.svm_type;
|
||||
ptr[1] = model->param.kernel_type;
|
||||
ptr[2] = model->param.degree;
|
||||
ptr[3] = model->param.gamma;
|
||||
ptr[4] = model->param.coef0;
|
||||
out_id++;
|
||||
|
||||
// nr_class
|
||||
rhs[out_id] = mxCreateDoubleMatrix(1, 1, mxREAL);
|
||||
ptr = mxGetPr(rhs[out_id]);
|
||||
ptr[0] = model->nr_class;
|
||||
out_id++;
|
||||
|
||||
// total SV
|
||||
rhs[out_id] = mxCreateDoubleMatrix(1, 1, mxREAL);
|
||||
ptr = mxGetPr(rhs[out_id]);
|
||||
ptr[0] = model->l;
|
||||
out_id++;
|
||||
|
||||
// rho
|
||||
n = model->nr_class*(model->nr_class-1)/2;
|
||||
rhs[out_id] = mxCreateDoubleMatrix(n, 1, mxREAL);
|
||||
ptr = mxGetPr(rhs[out_id]);
|
||||
for(i = 0; i < n; i++)
|
||||
ptr[i] = model->rho[i];
|
||||
out_id++;
|
||||
|
||||
// Label
|
||||
if(model->label)
|
||||
{
|
||||
rhs[out_id] = mxCreateDoubleMatrix(model->nr_class, 1, mxREAL);
|
||||
ptr = mxGetPr(rhs[out_id]);
|
||||
for(i = 0; i < model->nr_class; i++)
|
||||
ptr[i] = model->label[i];
|
||||
}
|
||||
else
|
||||
rhs[out_id] = mxCreateDoubleMatrix(0, 0, mxREAL);
|
||||
out_id++;
|
||||
|
||||
// sv_indices
|
||||
if(model->sv_indices)
|
||||
{
|
||||
rhs[out_id] = mxCreateDoubleMatrix(model->l, 1, mxREAL);
|
||||
ptr = mxGetPr(rhs[out_id]);
|
||||
for(i = 0; i < model->l; i++)
|
||||
ptr[i] = model->sv_indices[i];
|
||||
}
|
||||
else
|
||||
rhs[out_id] = mxCreateDoubleMatrix(0, 0, mxREAL);
|
||||
out_id++;
|
||||
|
||||
// probA
|
||||
if(model->probA != NULL)
|
||||
{
|
||||
rhs[out_id] = mxCreateDoubleMatrix(n, 1, mxREAL);
|
||||
ptr = mxGetPr(rhs[out_id]);
|
||||
for(i = 0; i < n; i++)
|
||||
ptr[i] = model->probA[i];
|
||||
}
|
||||
else
|
||||
rhs[out_id] = mxCreateDoubleMatrix(0, 0, mxREAL);
|
||||
out_id ++;
|
||||
|
||||
// probB
|
||||
if(model->probB != NULL)
|
||||
{
|
||||
rhs[out_id] = mxCreateDoubleMatrix(n, 1, mxREAL);
|
||||
ptr = mxGetPr(rhs[out_id]);
|
||||
for(i = 0; i < n; i++)
|
||||
ptr[i] = model->probB[i];
|
||||
}
|
||||
else
|
||||
rhs[out_id] = mxCreateDoubleMatrix(0, 0, mxREAL);
|
||||
out_id++;
|
||||
|
||||
// nSV
|
||||
if(model->nSV)
|
||||
{
|
||||
rhs[out_id] = mxCreateDoubleMatrix(model->nr_class, 1, mxREAL);
|
||||
ptr = mxGetPr(rhs[out_id]);
|
||||
for(i = 0; i < model->nr_class; i++)
|
||||
ptr[i] = model->nSV[i];
|
||||
}
|
||||
else
|
||||
rhs[out_id] = mxCreateDoubleMatrix(0, 0, mxREAL);
|
||||
out_id++;
|
||||
|
||||
// sv_coef
|
||||
rhs[out_id] = mxCreateDoubleMatrix(model->l, model->nr_class-1, mxREAL);
|
||||
ptr = mxGetPr(rhs[out_id]);
|
||||
for(i = 0; i < model->nr_class-1; i++)
|
||||
for(j = 0; j < model->l; j++)
|
||||
ptr[(i*(model->l))+j] = model->sv_coef[i][j];
|
||||
out_id++;
|
||||
|
||||
// SVs
|
||||
{
|
||||
int ir_index, nonzero_element;
|
||||
mwIndex *ir, *jc;
|
||||
mxArray *pprhs[1], *pplhs[1];
|
||||
|
||||
if(model->param.kernel_type == PRECOMPUTED)
|
||||
{
|
||||
nonzero_element = model->l;
|
||||
num_of_feature = 1;
|
||||
}
|
||||
else
|
||||
{
|
||||
nonzero_element = 0;
|
||||
for(i = 0; i < model->l; i++) {
|
||||
j = 0;
|
||||
while(model->SV[i][j].index != -1)
|
||||
{
|
||||
nonzero_element++;
|
||||
j++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// SV in column, easier accessing
|
||||
rhs[out_id] = mxCreateSparse(num_of_feature, model->l, nonzero_element, mxREAL);
|
||||
ir = mxGetIr(rhs[out_id]);
|
||||
jc = mxGetJc(rhs[out_id]);
|
||||
ptr = mxGetPr(rhs[out_id]);
|
||||
jc[0] = ir_index = 0;
|
||||
for(i = 0;i < model->l; i++)
|
||||
{
|
||||
if(model->param.kernel_type == PRECOMPUTED)
|
||||
{
|
||||
// make a (1 x model->l) matrix
|
||||
ir[ir_index] = 0;
|
||||
ptr[ir_index] = model->SV[i][0].value;
|
||||
ir_index++;
|
||||
jc[i+1] = jc[i] + 1;
|
||||
}
|
||||
else
|
||||
{
|
||||
int x_index = 0;
|
||||
while (model->SV[i][x_index].index != -1)
|
||||
{
|
||||
ir[ir_index] = model->SV[i][x_index].index - 1;
|
||||
ptr[ir_index] = model->SV[i][x_index].value;
|
||||
ir_index++, x_index++;
|
||||
}
|
||||
jc[i+1] = jc[i] + x_index;
|
||||
}
|
||||
}
|
||||
// transpose back to SV in row
|
||||
pprhs[0] = rhs[out_id];
|
||||
if(mexCallMATLAB(1, pplhs, 1, pprhs, "transpose"))
|
||||
return "cannot transpose SV matrix";
|
||||
rhs[out_id] = pplhs[0];
|
||||
out_id++;
|
||||
}
|
||||
|
||||
/* Create a struct matrix contains NUM_OF_RETURN_FIELD fields */
|
||||
return_model = mxCreateStructMatrix(1, 1, NUM_OF_RETURN_FIELD, field_names);
|
||||
|
||||
/* Fill struct matrix with input arguments */
|
||||
for(i = 0; i < NUM_OF_RETURN_FIELD; i++)
|
||||
mxSetField(return_model,0,field_names[i],mxDuplicateArray(rhs[i]));
|
||||
/* return */
|
||||
plhs[0] = return_model;
|
||||
mxFree(rhs);
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
struct svm_model *matlab_matrix_to_model(const mxArray *matlab_struct, const char **msg)
|
||||
{
|
||||
int i, j, n, num_of_fields;
|
||||
double *ptr;
|
||||
int id = 0;
|
||||
struct svm_node *x_space;
|
||||
struct svm_model *model;
|
||||
mxArray **rhs;
|
||||
|
||||
num_of_fields = mxGetNumberOfFields(matlab_struct);
|
||||
if(num_of_fields != NUM_OF_RETURN_FIELD)
|
||||
{
|
||||
*msg = "number of return field is not correct";
|
||||
return NULL;
|
||||
}
|
||||
rhs = (mxArray **) mxMalloc(sizeof(mxArray *)*num_of_fields);
|
||||
|
||||
for(i=0;i<num_of_fields;i++)
|
||||
rhs[i] = mxGetFieldByNumber(matlab_struct, 0, i);
|
||||
|
||||
model = Malloc(struct svm_model, 1);
|
||||
model->rho = NULL;
|
||||
model->probA = NULL;
|
||||
model->probB = NULL;
|
||||
model->label = NULL;
|
||||
model->sv_indices = NULL;
|
||||
model->nSV = NULL;
|
||||
model->free_sv = 1; // XXX
|
||||
|
||||
ptr = mxGetPr(rhs[id]);
|
||||
model->param.svm_type = (int)ptr[0];
|
||||
model->param.kernel_type = (int)ptr[1];
|
||||
model->param.degree = (int)ptr[2];
|
||||
model->param.gamma = ptr[3];
|
||||
model->param.coef0 = ptr[4];
|
||||
id++;
|
||||
|
||||
ptr = mxGetPr(rhs[id]);
|
||||
model->nr_class = (int)ptr[0];
|
||||
id++;
|
||||
|
||||
ptr = mxGetPr(rhs[id]);
|
||||
model->l = (int)ptr[0];
|
||||
id++;
|
||||
|
||||
// rho
|
||||
n = model->nr_class * (model->nr_class-1)/2;
|
||||
model->rho = (double*) malloc(n*sizeof(double));
|
||||
ptr = mxGetPr(rhs[id]);
|
||||
for(i=0;i<n;i++)
|
||||
model->rho[i] = ptr[i];
|
||||
id++;
|
||||
|
||||
// label
|
||||
if(mxIsEmpty(rhs[id]) == 0)
|
||||
{
|
||||
model->label = (int*) malloc(model->nr_class*sizeof(int));
|
||||
ptr = mxGetPr(rhs[id]);
|
||||
for(i=0;i<model->nr_class;i++)
|
||||
model->label[i] = (int)ptr[i];
|
||||
}
|
||||
id++;
|
||||
|
||||
// sv_indices
|
||||
if(mxIsEmpty(rhs[id]) == 0)
|
||||
{
|
||||
model->sv_indices = (int*) malloc(model->l*sizeof(int));
|
||||
ptr = mxGetPr(rhs[id]);
|
||||
for(i=0;i<model->l;i++)
|
||||
model->sv_indices[i] = (int)ptr[i];
|
||||
}
|
||||
id++;
|
||||
|
||||
// probA
|
||||
if(mxIsEmpty(rhs[id]) == 0)
|
||||
{
|
||||
model->probA = (double*) malloc(n*sizeof(double));
|
||||
ptr = mxGetPr(rhs[id]);
|
||||
for(i=0;i<n;i++)
|
||||
model->probA[i] = ptr[i];
|
||||
}
|
||||
id++;
|
||||
|
||||
// probB
|
||||
if(mxIsEmpty(rhs[id]) == 0)
|
||||
{
|
||||
model->probB = (double*) malloc(n*sizeof(double));
|
||||
ptr = mxGetPr(rhs[id]);
|
||||
for(i=0;i<n;i++)
|
||||
model->probB[i] = ptr[i];
|
||||
}
|
||||
id++;
|
||||
|
||||
// nSV
|
||||
if(mxIsEmpty(rhs[id]) == 0)
|
||||
{
|
||||
model->nSV = (int*) malloc(model->nr_class*sizeof(int));
|
||||
ptr = mxGetPr(rhs[id]);
|
||||
for(i=0;i<model->nr_class;i++)
|
||||
model->nSV[i] = (int)ptr[i];
|
||||
}
|
||||
id++;
|
||||
|
||||
// sv_coef
|
||||
ptr = mxGetPr(rhs[id]);
|
||||
model->sv_coef = (double**) malloc((model->nr_class-1)*sizeof(double));
|
||||
for( i=0 ; i< model->nr_class -1 ; i++ )
|
||||
model->sv_coef[i] = (double*) malloc((model->l)*sizeof(double));
|
||||
for(i = 0; i < model->nr_class - 1; i++)
|
||||
for(j = 0; j < model->l; j++)
|
||||
model->sv_coef[i][j] = ptr[i*(model->l)+j];
|
||||
id++;
|
||||
|
||||
// SV
|
||||
{
|
||||
int sr, elements;
|
||||
int num_samples;
|
||||
mwIndex *ir, *jc;
|
||||
mxArray *pprhs[1], *pplhs[1];
|
||||
|
||||
// transpose SV
|
||||
pprhs[0] = rhs[id];
|
||||
if(mexCallMATLAB(1, pplhs, 1, pprhs, "transpose"))
|
||||
{
|
||||
svm_free_and_destroy_model(&model);
|
||||
*msg = "cannot transpose SV matrix";
|
||||
return NULL;
|
||||
}
|
||||
rhs[id] = pplhs[0];
|
||||
|
||||
sr = (int)mxGetN(rhs[id]);
|
||||
|
||||
ptr = mxGetPr(rhs[id]);
|
||||
ir = mxGetIr(rhs[id]);
|
||||
jc = mxGetJc(rhs[id]);
|
||||
|
||||
num_samples = (int)mxGetNzmax(rhs[id]);
|
||||
|
||||
elements = num_samples + sr;
|
||||
|
||||
model->SV = (struct svm_node **) malloc(sr * sizeof(struct svm_node *));
|
||||
x_space = (struct svm_node *)malloc(elements * sizeof(struct svm_node));
|
||||
|
||||
// SV is in column
|
||||
for(i=0;i<sr;i++)
|
||||
{
|
||||
int low = (int)jc[i], high = (int)jc[i+1];
|
||||
int x_index = 0;
|
||||
model->SV[i] = &x_space[low+i];
|
||||
for(j=low;j<high;j++)
|
||||
{
|
||||
model->SV[i][x_index].index = (int)ir[j] + 1;
|
||||
model->SV[i][x_index].value = ptr[j];
|
||||
x_index++;
|
||||
}
|
||||
model->SV[i][x_index].index = -1;
|
||||
}
|
||||
|
||||
id++;
|
||||
}
|
||||
mxFree(rhs);
|
||||
|
||||
return model;
|
||||
}
|
|
@ -0,0 +1,2 @@
|
|||
const char *model_to_matlab_structure(mxArray *plhs[], int num_of_feature, struct svm_model *model);
|
||||
struct svm_model *matlab_matrix_to_model(const mxArray *matlab_struct, const char **error_message);
|
|
@ -0,0 +1,370 @@
|
|||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include "../svm.h"
|
||||
|
||||
#include "mex.h"
|
||||
#include "svm_model_matlab.h"
|
||||
|
||||
#ifdef MX_API_VER
|
||||
#if MX_API_VER < 0x07030000
|
||||
typedef int mwIndex;
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#define CMD_LEN 2048
|
||||
|
||||
int print_null(const char *s,...) {}
|
||||
int (*info)(const char *fmt,...) = &mexPrintf;
|
||||
|
||||
void read_sparse_instance(const mxArray *prhs, int index, struct svm_node *x)
|
||||
{
|
||||
int i, j, low, high;
|
||||
mwIndex *ir, *jc;
|
||||
double *samples;
|
||||
|
||||
ir = mxGetIr(prhs);
|
||||
jc = mxGetJc(prhs);
|
||||
samples = mxGetPr(prhs);
|
||||
|
||||
// each column is one instance
|
||||
j = 0;
|
||||
low = (int)jc[index], high = (int)jc[index+1];
|
||||
for(i=low;i<high;i++)
|
||||
{
|
||||
x[j].index = (int)ir[i] + 1;
|
||||
x[j].value = samples[i];
|
||||
j++;
|
||||
}
|
||||
x[j].index = -1;
|
||||
}
|
||||
|
||||
static void fake_answer(int nlhs, mxArray *plhs[])
|
||||
{
|
||||
int i;
|
||||
for(i=0;i<nlhs;i++)
|
||||
plhs[i] = mxCreateDoubleMatrix(0, 0, mxREAL);
|
||||
}
|
||||
|
||||
void predict(int nlhs, mxArray *plhs[], const mxArray *prhs[], struct svm_model *model, const int predict_probability)
|
||||
{
|
||||
int label_vector_row_num, label_vector_col_num;
|
||||
int feature_number, testing_instance_number;
|
||||
int instance_index;
|
||||
double *ptr_instance, *ptr_label, *ptr_predict_label;
|
||||
double *ptr_prob_estimates, *ptr_dec_values, *ptr;
|
||||
struct svm_node *x;
|
||||
mxArray *pplhs[1]; // transposed instance sparse matrix
|
||||
mxArray *tplhs[3]; // temporary storage for plhs[]
|
||||
|
||||
int correct = 0;
|
||||
int total = 0;
|
||||
double error = 0;
|
||||
double sump = 0, sumt = 0, sumpp = 0, sumtt = 0, sumpt = 0;
|
||||
|
||||
int svm_type=svm_get_svm_type(model);
|
||||
int nr_class=svm_get_nr_class(model);
|
||||
double *prob_estimates=NULL;
|
||||
|
||||
// prhs[1] = testing instance matrix
|
||||
feature_number = (int)mxGetN(prhs[1]);
|
||||
testing_instance_number = (int)mxGetM(prhs[1]);
|
||||
label_vector_row_num = (int)mxGetM(prhs[0]);
|
||||
label_vector_col_num = (int)mxGetN(prhs[0]);
|
||||
|
||||
if(label_vector_row_num!=testing_instance_number)
|
||||
{
|
||||
mexPrintf("Length of label vector does not match # of instances.\n");
|
||||
fake_answer(nlhs, plhs);
|
||||
return;
|
||||
}
|
||||
if(label_vector_col_num!=1)
|
||||
{
|
||||
mexPrintf("label (1st argument) should be a vector (# of column is 1).\n");
|
||||
fake_answer(nlhs, plhs);
|
||||
return;
|
||||
}
|
||||
|
||||
ptr_instance = mxGetPr(prhs[1]);
|
||||
ptr_label = mxGetPr(prhs[0]);
|
||||
|
||||
// transpose instance matrix
|
||||
if(mxIsSparse(prhs[1]))
|
||||
{
|
||||
if(model->param.kernel_type == PRECOMPUTED)
|
||||
{
|
||||
// precomputed kernel requires dense matrix, so we make one
|
||||
mxArray *rhs[1], *lhs[1];
|
||||
rhs[0] = mxDuplicateArray(prhs[1]);
|
||||
if(mexCallMATLAB(1, lhs, 1, rhs, "full"))
|
||||
{
|
||||
mexPrintf("Error: cannot full testing instance matrix\n");
|
||||
fake_answer(nlhs, plhs);
|
||||
return;
|
||||
}
|
||||
ptr_instance = mxGetPr(lhs[0]);
|
||||
mxDestroyArray(rhs[0]);
|
||||
}
|
||||
else
|
||||
{
|
||||
mxArray *pprhs[1];
|
||||
pprhs[0] = mxDuplicateArray(prhs[1]);
|
||||
if(mexCallMATLAB(1, pplhs, 1, pprhs, "transpose"))
|
||||
{
|
||||
mexPrintf("Error: cannot transpose testing instance matrix\n");
|
||||
fake_answer(nlhs, plhs);
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if(predict_probability)
|
||||
{
|
||||
if(svm_type==NU_SVR || svm_type==EPSILON_SVR)
|
||||
info("Prob. model for test data: target value = predicted value + z,\nz: Laplace distribution e^(-|z|/sigma)/(2sigma),sigma=%g\n",svm_get_svr_probability(model));
|
||||
else
|
||||
prob_estimates = (double *) malloc(nr_class*sizeof(double));
|
||||
}
|
||||
|
||||
tplhs[0] = mxCreateDoubleMatrix(testing_instance_number, 1, mxREAL);
|
||||
if(predict_probability)
|
||||
{
|
||||
// prob estimates are in plhs[2]
|
||||
if(svm_type==C_SVC || svm_type==NU_SVC)
|
||||
tplhs[2] = mxCreateDoubleMatrix(testing_instance_number, nr_class, mxREAL);
|
||||
else
|
||||
tplhs[2] = mxCreateDoubleMatrix(0, 0, mxREAL);
|
||||
}
|
||||
else
|
||||
{
|
||||
// decision values are in plhs[2]
|
||||
if(svm_type == ONE_CLASS ||
|
||||
svm_type == EPSILON_SVR ||
|
||||
svm_type == NU_SVR ||
|
||||
nr_class == 1) // if only one class in training data, decision values are still returned.
|
||||
tplhs[2] = mxCreateDoubleMatrix(testing_instance_number, 1, mxREAL);
|
||||
else
|
||||
tplhs[2] = mxCreateDoubleMatrix(testing_instance_number, nr_class*(nr_class-1)/2, mxREAL);
|
||||
}
|
||||
|
||||
ptr_predict_label = mxGetPr(tplhs[0]);
|
||||
ptr_prob_estimates = mxGetPr(tplhs[2]);
|
||||
ptr_dec_values = mxGetPr(tplhs[2]);
|
||||
x = (struct svm_node*)malloc((feature_number+1)*sizeof(struct svm_node) );
|
||||
for(instance_index=0;instance_index<testing_instance_number;instance_index++)
|
||||
{
|
||||
int i;
|
||||
double target_label, predict_label;
|
||||
|
||||
target_label = ptr_label[instance_index];
|
||||
|
||||
if(mxIsSparse(prhs[1]) && model->param.kernel_type != PRECOMPUTED) // prhs[1]^T is still sparse
|
||||
read_sparse_instance(pplhs[0], instance_index, x);
|
||||
else
|
||||
{
|
||||
for(i=0;i<feature_number;i++)
|
||||
{
|
||||
x[i].index = i+1;
|
||||
x[i].value = ptr_instance[testing_instance_number*i+instance_index];
|
||||
}
|
||||
x[feature_number].index = -1;
|
||||
}
|
||||
|
||||
if(predict_probability)
|
||||
{
|
||||
if(svm_type==C_SVC || svm_type==NU_SVC)
|
||||
{
|
||||
predict_label = svm_predict_probability(model, x, prob_estimates);
|
||||
ptr_predict_label[instance_index] = predict_label;
|
||||
for(i=0;i<nr_class;i++)
|
||||
ptr_prob_estimates[instance_index + i * testing_instance_number] = prob_estimates[i];
|
||||
} else {
|
||||
predict_label = svm_predict(model,x);
|
||||
ptr_predict_label[instance_index] = predict_label;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
if(svm_type == ONE_CLASS ||
|
||||
svm_type == EPSILON_SVR ||
|
||||
svm_type == NU_SVR)
|
||||
{
|
||||
double res;
|
||||
predict_label = svm_predict_values(model, x, &res);
|
||||
ptr_dec_values[instance_index] = res;
|
||||
}
|
||||
else
|
||||
{
|
||||
double *dec_values = (double *) malloc(sizeof(double) * nr_class*(nr_class-1)/2);
|
||||
predict_label = svm_predict_values(model, x, dec_values);
|
||||
if(nr_class == 1)
|
||||
ptr_dec_values[instance_index] = 1;
|
||||
else
|
||||
for(i=0;i<(nr_class*(nr_class-1))/2;i++)
|
||||
ptr_dec_values[instance_index + i * testing_instance_number] = dec_values[i];
|
||||
free(dec_values);
|
||||
}
|
||||
ptr_predict_label[instance_index] = predict_label;
|
||||
}
|
||||
|
||||
if(predict_label == target_label)
|
||||
++correct;
|
||||
error += (predict_label-target_label)*(predict_label-target_label);
|
||||
sump += predict_label;
|
||||
sumt += target_label;
|
||||
sumpp += predict_label*predict_label;
|
||||
sumtt += target_label*target_label;
|
||||
sumpt += predict_label*target_label;
|
||||
++total;
|
||||
}
|
||||
if(svm_type==NU_SVR || svm_type==EPSILON_SVR)
|
||||
{
|
||||
info("Mean squared error = %g (regression)\n",error/total);
|
||||
info("Squared correlation coefficient = %g (regression)\n",
|
||||
((total*sumpt-sump*sumt)*(total*sumpt-sump*sumt))/
|
||||
((total*sumpp-sump*sump)*(total*sumtt-sumt*sumt))
|
||||
);
|
||||
}
|
||||
else
|
||||
info("Accuracy = %g%% (%d/%d) (classification)\n",
|
||||
(double)correct/total*100,correct,total);
|
||||
|
||||
// return accuracy, mean squared error, squared correlation coefficient
|
||||
tplhs[1] = mxCreateDoubleMatrix(3, 1, mxREAL);
|
||||
ptr = mxGetPr(tplhs[1]);
|
||||
ptr[0] = (double)correct/total*100;
|
||||
ptr[1] = error/total;
|
||||
ptr[2] = ((total*sumpt-sump*sumt)*(total*sumpt-sump*sumt))/
|
||||
((total*sumpp-sump*sump)*(total*sumtt-sumt*sumt));
|
||||
|
||||
free(x);
|
||||
if(prob_estimates != NULL)
|
||||
free(prob_estimates);
|
||||
|
||||
switch(nlhs)
|
||||
{
|
||||
case 3:
|
||||
plhs[2] = tplhs[2];
|
||||
plhs[1] = tplhs[1];
|
||||
case 1:
|
||||
case 0:
|
||||
plhs[0] = tplhs[0];
|
||||
}
|
||||
}
|
||||
|
||||
void exit_with_help()
|
||||
{
|
||||
mexPrintf(
|
||||
"Usage: [predicted_label, accuracy, decision_values/prob_estimates] = svmpredict(testing_label_vector, testing_instance_matrix, model, 'libsvm_options')\n"
|
||||
" [predicted_label] = svmpredict(testing_label_vector, testing_instance_matrix, model, 'libsvm_options')\n"
|
||||
"Parameters:\n"
|
||||
" model: SVM model structure from svmtrain.\n"
|
||||
" libsvm_options:\n"
|
||||
" -b probability_estimates: whether to predict probability estimates, 0 or 1 (default 0); one-class SVM not supported yet\n"
|
||||
" -q : quiet mode (no outputs)\n"
|
||||
"Returns:\n"
|
||||
" predicted_label: SVM prediction output vector.\n"
|
||||
" accuracy: a vector with accuracy, mean squared error, squared correlation coefficient.\n"
|
||||
" prob_estimates: If selected, probability estimate vector.\n"
|
||||
);
|
||||
}
|
||||
|
||||
void mexFunction( int nlhs, mxArray *plhs[],
|
||||
int nrhs, const mxArray *prhs[] )
|
||||
{
|
||||
int prob_estimate_flag = 0;
|
||||
struct svm_model *model;
|
||||
info = &mexPrintf;
|
||||
|
||||
if(nlhs == 2 || nlhs > 3 || nrhs > 4 || nrhs < 3)
|
||||
{
|
||||
exit_with_help();
|
||||
fake_answer(nlhs, plhs);
|
||||
return;
|
||||
}
|
||||
|
||||
if(!mxIsDouble(prhs[0]) || !mxIsDouble(prhs[1])) {
|
||||
mexPrintf("Error: label vector and instance matrix must be double\n");
|
||||
fake_answer(nlhs, plhs);
|
||||
return;
|
||||
}
|
||||
|
||||
if(mxIsStruct(prhs[2]))
|
||||
{
|
||||
const char *error_msg;
|
||||
|
||||
// parse options
|
||||
if(nrhs==4)
|
||||
{
|
||||
int i, argc = 1;
|
||||
char cmd[CMD_LEN], *argv[CMD_LEN/2];
|
||||
|
||||
// put options in argv[]
|
||||
mxGetString(prhs[3], cmd, mxGetN(prhs[3]) + 1);
|
||||
if((argv[argc] = strtok(cmd, " ")) != NULL)
|
||||
while((argv[++argc] = strtok(NULL, " ")) != NULL)
|
||||
;
|
||||
|
||||
for(i=1;i<argc;i++)
|
||||
{
|
||||
if(argv[i][0] != '-') break;
|
||||
if((++i>=argc) && argv[i-1][1] != 'q')
|
||||
{
|
||||
exit_with_help();
|
||||
fake_answer(nlhs, plhs);
|
||||
return;
|
||||
}
|
||||
switch(argv[i-1][1])
|
||||
{
|
||||
case 'b':
|
||||
prob_estimate_flag = atoi(argv[i]);
|
||||
break;
|
||||
case 'q':
|
||||
i--;
|
||||
info = &print_null;
|
||||
break;
|
||||
default:
|
||||
mexPrintf("Unknown option: -%c\n", argv[i-1][1]);
|
||||
exit_with_help();
|
||||
fake_answer(nlhs, plhs);
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
model = matlab_matrix_to_model(prhs[2], &error_msg);
|
||||
if (model == NULL)
|
||||
{
|
||||
mexPrintf("Error: can't read model: %s\n", error_msg);
|
||||
fake_answer(nlhs, plhs);
|
||||
return;
|
||||
}
|
||||
|
||||
if(prob_estimate_flag)
|
||||
{
|
||||
if(svm_check_probability_model(model)==0)
|
||||
{
|
||||
mexPrintf("Model does not support probabiliy estimates\n");
|
||||
fake_answer(nlhs, plhs);
|
||||
svm_free_and_destroy_model(&model);
|
||||
return;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
if(svm_check_probability_model(model)!=0)
|
||||
info("Model supports probability estimates, but disabled in predicton.\n");
|
||||
}
|
||||
|
||||
predict(nlhs, plhs, prhs, model, prob_estimate_flag);
|
||||
// destroy model
|
||||
svm_free_and_destroy_model(&model);
|
||||
}
|
||||
else
|
||||
{
|
||||
mexPrintf("model file should be a struct array\n");
|
||||
fake_answer(nlhs, plhs);
|
||||
}
|
||||
|
||||
return;
|
||||
}
|
|
@ -0,0 +1,524 @@
|
|||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <ctype.h>
|
||||
#include "../svm.h"
|
||||
|
||||
#include "mex.h"
|
||||
#include "svm_model_matlab.h"
|
||||
|
||||
#ifdef MX_API_VER
|
||||
#if MX_API_VER < 0x07030000
|
||||
typedef int mwIndex;
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#define CMD_LEN 2048
|
||||
#define Malloc(type,n) (type *)malloc((n)*sizeof(type))
|
||||
|
||||
void print_null(const char *s) {}
|
||||
void print_string_matlab(const char *s) {mexPrintf(s);}
|
||||
|
||||
void exit_with_help()
|
||||
{
|
||||
mexPrintf(
|
||||
"Usage: model = svmtrain(training_weight_vector, training_label_vector, training_instance_matrix, 'libsvm_options');\n"
|
||||
"libsvm_options:\n"
|
||||
"-s svm_type : set type of SVM (default 0)\n"
|
||||
" 0 -- C-SVC (multi-class classification)\n"
|
||||
" 1 -- nu-SVC (multi-class classification)\n"
|
||||
" 2 -- one-class SVM\n"
|
||||
" 3 -- epsilon-SVR (regression)\n"
|
||||
" 4 -- nu-SVR (regression)\n"
|
||||
"-t kernel_type : set type of kernel function (default 2)\n"
|
||||
" 0 -- linear: u'*v\n"
|
||||
" 1 -- polynomial: (gamma*u'*v + coef0)^degree\n"
|
||||
" 2 -- radial basis function: exp(-gamma*|u-v|^2)\n"
|
||||
" 3 -- sigmoid: tanh(gamma*u'*v + coef0)\n"
|
||||
" 4 -- precomputed kernel (kernel values in training_instance_matrix)\n"
|
||||
"-d degree : set degree in kernel function (default 3)\n"
|
||||
"-g gamma : set gamma in kernel function (default 1/num_features)\n"
|
||||
"-r coef0 : set coef0 in kernel function (default 0)\n"
|
||||
"-c cost : set the parameter C of C-SVC, epsilon-SVR, and nu-SVR (default 1)\n"
|
||||
"-n nu : set the parameter nu of nu-SVC, one-class SVM, and nu-SVR (default 0.5)\n"
|
||||
"-p epsilon : set the epsilon in loss function of epsilon-SVR (default 0.1)\n"
|
||||
"-m cachesize : set cache memory size in MB (default 100)\n"
|
||||
"-e epsilon : set tolerance of termination criterion (default 0.001)\n"
|
||||
"-h shrinking : whether to use the shrinking heuristics, 0 or 1 (default 1)\n"
|
||||
"-b probability_estimates : whether to train a SVC or SVR model for probability estimates, 0 or 1 (default 0)\n"
|
||||
"-wi weight : set the parameter C of class i to weight*C, for C-SVC (default 1)\n"
|
||||
"-v n : n-fold cross validation mode\n"
|
||||
"-q : quiet mode (no outputs)\n"
|
||||
);
|
||||
}
|
||||
|
||||
// svm arguments
|
||||
struct svm_parameter param; // set by parse_command_line
|
||||
struct svm_problem prob; // set by read_problem
|
||||
struct svm_model *model;
|
||||
struct svm_node *x_space;
|
||||
int cross_validation;
|
||||
int nr_fold;
|
||||
|
||||
|
||||
double do_cross_validation()
|
||||
{
|
||||
int i;
|
||||
int total_correct = 0;
|
||||
double total_error = 0;
|
||||
double sumv = 0, sumy = 0, sumvv = 0, sumyy = 0, sumvy = 0;
|
||||
double *target = Malloc(double,prob.l);
|
||||
double retval = 0.0;
|
||||
|
||||
svm_cross_validation(&prob,¶m,nr_fold,target);
|
||||
if(param.svm_type == EPSILON_SVR ||
|
||||
param.svm_type == NU_SVR)
|
||||
{
|
||||
for(i=0;i<prob.l;i++)
|
||||
{
|
||||
double y = prob.y[i];
|
||||
double v = target[i];
|
||||
total_error += (v-y)*(v-y);
|
||||
sumv += v;
|
||||
sumy += y;
|
||||
sumvv += v*v;
|
||||
sumyy += y*y;
|
||||
sumvy += v*y;
|
||||
}
|
||||
mexPrintf("Cross Validation Mean squared error = %g\n",total_error/prob.l);
|
||||
mexPrintf("Cross Validation Squared correlation coefficient = %g\n",
|
||||
((prob.l*sumvy-sumv*sumy)*(prob.l*sumvy-sumv*sumy))/
|
||||
((prob.l*sumvv-sumv*sumv)*(prob.l*sumyy-sumy*sumy))
|
||||
);
|
||||
retval = total_error/prob.l;
|
||||
}
|
||||
else
|
||||
{
|
||||
for(i=0;i<prob.l;i++)
|
||||
if(target[i] == prob.y[i])
|
||||
++total_correct;
|
||||
mexPrintf("Cross Validation Accuracy = %g%%\n",100.0*total_correct/prob.l);
|
||||
retval = 100.0*total_correct/prob.l;
|
||||
}
|
||||
free(target);
|
||||
return retval;
|
||||
}
|
||||
|
||||
// nrhs should be 4
|
||||
int parse_command_line(int nrhs, const mxArray *prhs[], char *model_file_name)
|
||||
{
|
||||
int i, argc = 1;
|
||||
char cmd[CMD_LEN];
|
||||
char *argv[CMD_LEN/2];
|
||||
void (*print_func)(const char *) = print_string_matlab; // default printing to stdout
|
||||
|
||||
// default values
|
||||
param.svm_type = C_SVC;
|
||||
param.kernel_type = RBF;
|
||||
param.degree = 3;
|
||||
param.gamma = 0; // 1/num_features
|
||||
param.coef0 = 0;
|
||||
param.nu = 0.5;
|
||||
param.cache_size = 100;
|
||||
param.C = 1;
|
||||
param.eps = 1e-3;
|
||||
param.p = 0.1;
|
||||
param.shrinking = 1;
|
||||
param.probability = 0;
|
||||
param.nr_weight = 0;
|
||||
param.weight_label = NULL;
|
||||
param.weight = NULL;
|
||||
cross_validation = 0;
|
||||
|
||||
if(nrhs <= 1)
|
||||
return 1;
|
||||
|
||||
if(nrhs > 3)
|
||||
{
|
||||
// put options in argv[]
|
||||
mxGetString(prhs[3], cmd, mxGetN(prhs[3]) + 1);
|
||||
if((argv[argc] = strtok(cmd, " ")) != NULL)
|
||||
while((argv[++argc] = strtok(NULL, " ")) != NULL)
|
||||
;
|
||||
}
|
||||
|
||||
// parse options
|
||||
for(i=1;i<argc;i++)
|
||||
{
|
||||
if(argv[i][0] != '-') break;
|
||||
++i;
|
||||
if(i>=argc && argv[i-1][1] != 'q') // since option -q has no parameter
|
||||
return 1;
|
||||
switch(argv[i-1][1])
|
||||
{
|
||||
case 's':
|
||||
param.svm_type = atoi(argv[i]);
|
||||
break;
|
||||
case 't':
|
||||
param.kernel_type = atoi(argv[i]);
|
||||
break;
|
||||
case 'd':
|
||||
param.degree = atoi(argv[i]);
|
||||
break;
|
||||
case 'g':
|
||||
param.gamma = atof(argv[i]);
|
||||
break;
|
||||
case 'r':
|
||||
param.coef0 = atof(argv[i]);
|
||||
break;
|
||||
case 'n':
|
||||
param.nu = atof(argv[i]);
|
||||
break;
|
||||
case 'm':
|
||||
param.cache_size = atof(argv[i]);
|
||||
break;
|
||||
case 'c':
|
||||
param.C = atof(argv[i]);
|
||||
break;
|
||||
case 'e':
|
||||
param.eps = atof(argv[i]);
|
||||
break;
|
||||
case 'p':
|
||||
param.p = atof(argv[i]);
|
||||
break;
|
||||
case 'h':
|
||||
param.shrinking = atoi(argv[i]);
|
||||
break;
|
||||
case 'b':
|
||||
param.probability = atoi(argv[i]);
|
||||
break;
|
||||
case 'q':
|
||||
print_func = &print_null;
|
||||
i--;
|
||||
break;
|
||||
case 'v':
|
||||
cross_validation = 1;
|
||||
nr_fold = atoi(argv[i]);
|
||||
if(nr_fold < 2)
|
||||
{
|
||||
mexPrintf("n-fold cross validation: n must >= 2\n");
|
||||
return 1;
|
||||
}
|
||||
break;
|
||||
case 'w':
|
||||
++param.nr_weight;
|
||||
param.weight_label = (int *)realloc(param.weight_label,sizeof(int)*param.nr_weight);
|
||||
param.weight = (double *)realloc(param.weight,sizeof(double)*param.nr_weight);
|
||||
param.weight_label[param.nr_weight-1] = atoi(&argv[i-1][2]);
|
||||
param.weight[param.nr_weight-1] = atof(argv[i]);
|
||||
break;
|
||||
default:
|
||||
mexPrintf("Unknown option -%c\n", argv[i-1][1]);
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
|
||||
svm_set_print_string_function(print_func);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
// read in a problem (in svmlight format)
|
||||
int read_problem_dense(const mxArray *weight_vec, const mxArray *label_vec, const mxArray *instance_mat)
|
||||
{
|
||||
size_t i, j, k, l;
|
||||
size_t elements, max_index, sc, label_vector_row_num, weight_vector_row_num;
|
||||
double *samples, *labels, *weights;
|
||||
|
||||
prob.x = NULL;
|
||||
prob.y = NULL;
|
||||
prob.W = NULL;
|
||||
x_space = NULL;
|
||||
|
||||
weights = mxGetPr(weight_vec);
|
||||
labels = mxGetPr(label_vec);
|
||||
samples = mxGetPr(instance_mat);
|
||||
sc = mxGetN(instance_mat);
|
||||
|
||||
elements = 0;
|
||||
// the number of instance
|
||||
l = mxGetM(instance_mat);
|
||||
prob.l = (int)l;
|
||||
weight_vector_row_num = mxGetM(weight_vec);
|
||||
label_vector_row_num = mxGetM(label_vec);
|
||||
|
||||
if(weight_vector_row_num == 0)
|
||||
mexPrintf("Warning: treat each instance with weight 1.0\n");
|
||||
else if(weight_vector_row_num!=prob.l)
|
||||
{
|
||||
mexPrintf("Length of weight vector does not match # of instances.\n");
|
||||
return -1;
|
||||
}
|
||||
if(label_vector_row_num!=l)
|
||||
{
|
||||
mexPrintf("Length of label vector does not match # of instances.\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
if(param.kernel_type == PRECOMPUTED)
|
||||
elements = l * (sc + 1);
|
||||
else
|
||||
{
|
||||
for(i = 0; i < l; i++)
|
||||
{
|
||||
for(k = 0; k < sc; k++)
|
||||
if(samples[k * l + i] != 0)
|
||||
elements++;
|
||||
// count the '-1' element
|
||||
elements++;
|
||||
}
|
||||
}
|
||||
|
||||
prob.y = Malloc(double,l);
|
||||
prob.x = Malloc(struct svm_node *,l);
|
||||
prob.W = Malloc(double,l);
|
||||
x_space = Malloc(struct svm_node, elements);
|
||||
|
||||
max_index = sc;
|
||||
j = 0;
|
||||
for(i = 0; i < l; i++)
|
||||
{
|
||||
prob.x[i] = &x_space[j];
|
||||
prob.y[i] = labels[i];
|
||||
prob.W[i] = 1;
|
||||
if(weight_vector_row_num == prob.l)
|
||||
prob.W[i] *= (double) weights[i];
|
||||
|
||||
for(k = 0; k < sc; k++)
|
||||
{
|
||||
if(param.kernel_type == PRECOMPUTED || samples[k * l + i] != 0)
|
||||
{
|
||||
x_space[j].index = (int)k + 1;
|
||||
x_space[j].value = samples[k * l + i];
|
||||
j++;
|
||||
}
|
||||
}
|
||||
x_space[j++].index = -1;
|
||||
}
|
||||
|
||||
if(param.gamma == 0 && max_index > 0)
|
||||
param.gamma = (double)(1.0/max_index);
|
||||
|
||||
if(param.kernel_type == PRECOMPUTED)
|
||||
for(i=0;i<l;i++)
|
||||
{
|
||||
if((int)prob.x[i][0].value <= 0 || (int)prob.x[i][0].value > (int)max_index)
|
||||
{
|
||||
mexPrintf("Wrong input format: sample_serial_number out of range\n");
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int read_problem_sparse(const mxArray *weight_vec, const mxArray *label_vec, const mxArray *instance_mat)
|
||||
{
|
||||
mwIndex *ir, *jc, low, high, k;
|
||||
// using size_t due to the output type of matlab functions
|
||||
size_t i, j, l, elements, max_index, label_vector_row_num, weight_vector_row_num;
|
||||
mwSize num_samples;
|
||||
double *samples, *labels, *weights;
|
||||
mxArray *instance_mat_col; // transposed instance sparse matrix
|
||||
|
||||
prob.x = NULL;
|
||||
prob.y = NULL;
|
||||
prob.W = NULL;
|
||||
x_space = NULL;
|
||||
|
||||
// transpose instance matrix
|
||||
{
|
||||
mxArray *prhs[1], *plhs[1];
|
||||
prhs[0] = mxDuplicateArray(instance_mat);
|
||||
if(mexCallMATLAB(1, plhs, 1, prhs, "transpose"))
|
||||
{
|
||||
mexPrintf("Error: cannot transpose training instance matrix\n");
|
||||
return -1;
|
||||
}
|
||||
instance_mat_col = plhs[0];
|
||||
mxDestroyArray(prhs[0]);
|
||||
}
|
||||
|
||||
// each column is one instance
|
||||
weights = mxGetPr(weight_vec);
|
||||
labels = mxGetPr(label_vec);
|
||||
samples = mxGetPr(instance_mat_col);
|
||||
ir = mxGetIr(instance_mat_col);
|
||||
jc = mxGetJc(instance_mat_col);
|
||||
|
||||
num_samples = mxGetNzmax(instance_mat_col);
|
||||
|
||||
// the number of instance
|
||||
l = mxGetN(instance_mat_col);
|
||||
prob.l = (int) l;
|
||||
label_vector_row_num = mxGetM(label_vec);
|
||||
weight_vector_row_num = mxGetM(weight_vec);
|
||||
|
||||
if(weight_vector_row_num == 0)
|
||||
mexPrintf("Warning: treat each instance with weight 1.0\n");
|
||||
else if(weight_vector_row_num!=prob.l)
|
||||
{
|
||||
mexPrintf("Length of weight vector does not match # of instances.\n");
|
||||
return -1;
|
||||
}
|
||||
if(label_vector_row_num!=l)
|
||||
{
|
||||
mexPrintf("Length of label vector does not match # of instances.\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
elements = num_samples + l;
|
||||
max_index = mxGetM(instance_mat_col);
|
||||
|
||||
prob.y = Malloc(double,l);
|
||||
prob.x = Malloc(struct svm_node *,l);
|
||||
prob.W = Malloc(double,l);
|
||||
x_space = Malloc(struct svm_node, elements);
|
||||
|
||||
j = 0;
|
||||
for(i=0;i<l;i++)
|
||||
{
|
||||
prob.x[i] = &x_space[j];
|
||||
prob.y[i] = labels[i];
|
||||
prob.W[i] = 1;
|
||||
if(weight_vector_row_num == prob.l)
|
||||
prob.W[i] *= (double) weights[i];
|
||||
low = jc[i], high = jc[i+1];
|
||||
for(k=low;k<high;k++)
|
||||
{
|
||||
x_space[j].index = (int)ir[k] + 1;
|
||||
x_space[j].value = samples[k];
|
||||
j++;
|
||||
}
|
||||
x_space[j++].index = -1;
|
||||
}
|
||||
|
||||
if(param.gamma == 0 && max_index > 0)
|
||||
param.gamma = (double)(1.0/max_index);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void fake_answer(int nlhs, mxArray *plhs[])
|
||||
{
|
||||
int i;
|
||||
for(i=0;i<nlhs;i++)
|
||||
plhs[i] = mxCreateDoubleMatrix(0, 0, mxREAL);
|
||||
}
|
||||
|
||||
// Interface function of matlab
|
||||
// now assume prhs[0]: label prhs[1]: features
|
||||
void mexFunction( int nlhs, mxArray *plhs[],
|
||||
int nrhs, const mxArray *prhs[] )
|
||||
{
|
||||
const char *error_msg;
|
||||
|
||||
// fix random seed to have same results for each run
|
||||
// (for cross validation and probability estimation)
|
||||
srand(1);
|
||||
|
||||
if(nlhs > 1)
|
||||
{
|
||||
exit_with_help();
|
||||
fake_answer(nlhs, plhs);
|
||||
return;
|
||||
}
|
||||
|
||||
// Transform the input Matrix to libsvm format
|
||||
if(nrhs > 2 && nrhs < 5)
|
||||
{
|
||||
int err;
|
||||
|
||||
if(!mxIsDouble(prhs[0]) || !mxIsDouble(prhs[1]))
|
||||
{
|
||||
mexPrintf("Error: label vector and instance matrix must be double\n");
|
||||
fake_answer(nlhs, plhs);
|
||||
return;
|
||||
}
|
||||
|
||||
if(mxIsSparse(prhs[0]))
|
||||
{
|
||||
mexPrintf("Error: label vector should not be in sparse format\n");
|
||||
fake_answer(nlhs, plhs);
|
||||
return;
|
||||
}
|
||||
|
||||
if(parse_command_line(nrhs, prhs, NULL))
|
||||
{
|
||||
exit_with_help();
|
||||
svm_destroy_param(¶m);
|
||||
fake_answer(nlhs, plhs);
|
||||
return;
|
||||
}
|
||||
|
||||
if(mxIsSparse(prhs[2]))
|
||||
{
|
||||
if(param.kernel_type == PRECOMPUTED)
|
||||
{
|
||||
// precomputed kernel requires dense matrix, so we make one
|
||||
mxArray *rhs[1], *lhs[1];
|
||||
|
||||
rhs[0] = mxDuplicateArray(prhs[2]);
|
||||
if(mexCallMATLAB(1, lhs, 1, rhs, "full"))
|
||||
{
|
||||
mexPrintf("Error: cannot generate a full training instance matrix\n");
|
||||
svm_destroy_param(¶m);
|
||||
fake_answer(nlhs, plhs);
|
||||
return;
|
||||
}
|
||||
err = read_problem_dense(prhs[0], prhs[1], lhs[0]);
|
||||
mxDestroyArray(lhs[0]);
|
||||
mxDestroyArray(rhs[0]);
|
||||
}
|
||||
else
|
||||
err = read_problem_sparse(prhs[0], prhs[1], prhs[2]);
|
||||
}
|
||||
else
|
||||
err = read_problem_dense(prhs[0], prhs[1], prhs[2]);
|
||||
|
||||
// svmtrain's original code
|
||||
error_msg = svm_check_parameter(&prob, ¶m);
|
||||
|
||||
if(err || error_msg)
|
||||
{
|
||||
if (error_msg != NULL)
|
||||
mexPrintf("Error: %s\n", error_msg);
|
||||
svm_destroy_param(¶m);
|
||||
free(prob.y);
|
||||
free(prob.x);
|
||||
free(prob.W);
|
||||
free(x_space);
|
||||
fake_answer(nlhs, plhs);
|
||||
return;
|
||||
}
|
||||
|
||||
if(cross_validation)
|
||||
{
|
||||
double *ptr;
|
||||
plhs[0] = mxCreateDoubleMatrix(1, 1, mxREAL);
|
||||
ptr = mxGetPr(plhs[0]);
|
||||
ptr[0] = do_cross_validation();
|
||||
}
|
||||
else
|
||||
{
|
||||
int nr_feat = (int)mxGetN(prhs[2]);
|
||||
const char *error_msg;
|
||||
model = svm_train(&prob, ¶m);
|
||||
error_msg = model_to_matlab_structure(plhs, nr_feat, model);
|
||||
if(error_msg)
|
||||
mexPrintf("Error: can't convert libsvm model to matrix structure: %s\n", error_msg);
|
||||
svm_free_and_destroy_model(&model);
|
||||
}
|
||||
svm_destroy_param(¶m);
|
||||
free(prob.y);
|
||||
free(prob.x);
|
||||
free(prob.W);
|
||||
free(x_space);
|
||||
}
|
||||
else
|
||||
{
|
||||
exit_with_help();
|
||||
fake_answer(nlhs, plhs);
|
||||
return;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,4 @@
|
|||
all = lib
|
||||
|
||||
lib:
|
||||
make -C .. lib
|
|
@ -0,0 +1,367 @@
|
|||
----------------------------------
|
||||
--- Python interface of LIBSVM ---
|
||||
----------------------------------
|
||||
|
||||
Table of Contents
|
||||
=================
|
||||
|
||||
- Introduction
|
||||
- Installation
|
||||
- Quick Start
|
||||
- Design Description
|
||||
- Data Structures
|
||||
- Utility Functions
|
||||
- Additional Information
|
||||
|
||||
Introduction
|
||||
============
|
||||
|
||||
Python (http://www.python.org/) is a programming language suitable for rapid
|
||||
development. This tool provides a simple Python interface to LIBSVM, a library
|
||||
for support vector machines (http://www.csie.ntu.edu.tw/~cjlin/libsvm). The
|
||||
interface is very easy to use as the usage is the same as that of LIBSVM. The
|
||||
interface is developed with the built-in Python library "ctypes."
|
||||
|
||||
Installation
|
||||
============
|
||||
|
||||
On Unix systems, type
|
||||
|
||||
> make
|
||||
|
||||
The interface needs only LIBSVM shared library, which is generated by
|
||||
the above command. We assume that the shared library is on the LIBSVM
|
||||
main directory or in the system path.
|
||||
|
||||
For windows, the shared library libsvm.dll for 32-bit python is ready
|
||||
in the directory `..\windows'. You can also copy it to the system
|
||||
directory (e.g., `C:\WINDOWS\system32\' for Windows XP). To regenerate
|
||||
the shared library, please follow the instruction of building windows
|
||||
binaries in LIBSVM README.
|
||||
|
||||
Quick Start
|
||||
===========
|
||||
|
||||
There are two levels of usage. The high-level one uses utility functions
|
||||
in svmutil.py and the usage is the same as the LIBSVM MATLAB interface.
|
||||
|
||||
>>> from svmutil import *
|
||||
# Read data in LIBSVM format
|
||||
>>> y, x = svm_read_problem('../heart_scale')
|
||||
>>> m = svm_train(y[:200], x[:200], '-c 4')
|
||||
>>> p_label, p_acc, p_val = svm_predict(y[200:], x[200:], m)
|
||||
|
||||
# Construct problem in python format
|
||||
# Dense data
|
||||
>>> y, x = [1,-1], [[1,0,1], [-1,0,-1]]
|
||||
# Sparse data
|
||||
>>> y, x = [1,-1], [{1:1, 3:1}, {1:-1,3:-1}]
|
||||
>>> prob = svm_problem(y, x)
|
||||
>>> param = svm_parameter('-t 0 -c 4 -b 1')
|
||||
>>> m = svm_train(prob, param)
|
||||
|
||||
# Precomputed kernel data (-t 4)
|
||||
# Dense data
|
||||
>>> y, x = [1,-1], [[1, 2, -2], [2, -2, 2]]
|
||||
# Sparse data
|
||||
>>> y, x = [1,-1], [{0:1, 1:2, 2:-2}, {0:2, 1:-2, 2:2}]
|
||||
# isKernel=True must be set for precomputer kernel
|
||||
>>> prob = svm_problem(y, x, isKernel=True)
|
||||
>>> param = svm_parameter('-t 4 -c 4 -b 1')
|
||||
>>> m = svm_train(prob, param)
|
||||
# For the format of precomputed kernel, please read LIBSVM README.
|
||||
|
||||
|
||||
# Other utility functions
|
||||
>>> svm_save_model('heart_scale.model', m)
|
||||
>>> m = svm_load_model('heart_scale.model')
|
||||
>>> p_label, p_acc, p_val = svm_predict(y, x, m, '-b 1')
|
||||
>>> ACC, MSE, SCC = evaluations(y, p_label)
|
||||
|
||||
# Getting online help
|
||||
>>> help(svm_train)
|
||||
|
||||
The low-level use directly calls C interfaces imported by svm.py. Note that
|
||||
all arguments and return values are in ctypes format. You need to handle them
|
||||
carefully.
|
||||
|
||||
>>> from svm import *
|
||||
>>> prob = svm_problem([1,-1], [{1:1, 3:1}, {1:-1,3:-1}])
|
||||
>>> param = svm_parameter('-c 4')
|
||||
>>> m = libsvm.svm_train(prob, param) # m is a ctype pointer to an svm_model
|
||||
# Convert a Python-format instance to svm_nodearray, a ctypes structure
|
||||
>>> x0, max_idx = gen_svm_nodearray({1:1, 3:1})
|
||||
>>> label = libsvm.svm_predict(m, x0)
|
||||
|
||||
Design Description
|
||||
==================
|
||||
|
||||
There are two files svm.py and svmutil.py, which respectively correspond to
|
||||
low-level and high-level use of the interface.
|
||||
|
||||
In svm.py, we adopt the Python built-in library "ctypes," so that
|
||||
Python can directly access C structures and interface functions defined
|
||||
in svm.h.
|
||||
|
||||
While advanced users can use structures/functions in svm.py, to
|
||||
avoid handling ctypes structures, in svmutil.py we provide some easy-to-use
|
||||
functions. The usage is similar to LIBSVM MATLAB interface.
|
||||
|
||||
Data Structures
|
||||
===============
|
||||
|
||||
Four data structures derived from svm.h are svm_node, svm_problem, svm_parameter,
|
||||
and svm_model. They all contain fields with the same names in svm.h. Access
|
||||
these fields carefully because you directly use a C structure instead of a
|
||||
Python object. For svm_model, accessing the field directly is not recommanded.
|
||||
Programmers should use the interface functions or methods of svm_model class
|
||||
in Python to get the values. The following description introduces additional
|
||||
fields and methods.
|
||||
|
||||
Before using the data structures, execute the following command to load the
|
||||
LIBSVM shared library:
|
||||
|
||||
>>> from svm import *
|
||||
|
||||
- class svm_node:
|
||||
|
||||
Construct an svm_node.
|
||||
|
||||
>>> node = svm_node(idx, val)
|
||||
|
||||
idx: an integer indicates the feature index.
|
||||
|
||||
val: a float indicates the feature value.
|
||||
|
||||
Show the index and the value of a node.
|
||||
|
||||
>>> print(node)
|
||||
|
||||
- Function: gen_svm_nodearray(xi [,feature_max=None [,isKernel=False]])
|
||||
|
||||
Generate a feature vector from a Python list/tuple or a dictionary:
|
||||
|
||||
>>> xi, max_idx = gen_svm_nodearray({1:1, 3:1, 5:-2})
|
||||
|
||||
xi: the returned svm_nodearray (a ctypes structure)
|
||||
|
||||
max_idx: the maximal feature index of xi
|
||||
|
||||
feature_max: if feature_max is assigned, features with indices larger than
|
||||
feature_max are removed.
|
||||
|
||||
isKernel: if isKernel == True, the list index starts from 0 for precomputed
|
||||
kernel. Otherwise, the list index starts from 1. The default
|
||||
value is False.
|
||||
|
||||
- class svm_problem:
|
||||
|
||||
Construct an svm_problem instance
|
||||
|
||||
>>> prob = svm_problem(y, x)
|
||||
|
||||
y: a Python list/tuple of l labels (type must be int/double).
|
||||
|
||||
x: a Python list/tuple of l data instances. Each element of x must be
|
||||
an instance of list/tuple/dictionary type.
|
||||
|
||||
Note that if your x contains sparse data (i.e., dictionary), the internal
|
||||
ctypes data format is still sparse.
|
||||
|
||||
For pre-computed kernel, the isKernel flag should be set to True:
|
||||
|
||||
>>> prob = svm_problem(y, x, isKernel=True)
|
||||
|
||||
Please read LIBSVM README for more details of pre-computed kernel.
|
||||
|
||||
- class svm_parameter:
|
||||
|
||||
Construct an svm_parameter instance
|
||||
|
||||
>>> param = svm_parameter('training_options')
|
||||
|
||||
If 'training_options' is empty, LIBSVM default values are applied.
|
||||
|
||||
Set param to LIBSVM default values.
|
||||
|
||||
>>> param.set_to_default_values()
|
||||
|
||||
Parse a string of options.
|
||||
|
||||
>>> param.parse_options('training_options')
|
||||
|
||||
Show values of parameters.
|
||||
|
||||
>>> print(param)
|
||||
|
||||
- class svm_model:
|
||||
|
||||
There are two ways to obtain an instance of svm_model:
|
||||
|
||||
>>> model = svm_train(y, x)
|
||||
>>> model = svm_load_model('model_file_name')
|
||||
|
||||
Note that the returned structure of interface functions
|
||||
libsvm.svm_train and libsvm.svm_load_model is a ctypes pointer of
|
||||
svm_model, which is different from the svm_model object returned
|
||||
by svm_train and svm_load_model in svmutil.py. We provide a
|
||||
function toPyModel for the conversion:
|
||||
|
||||
>>> model_ptr = libsvm.svm_train(prob, param)
|
||||
>>> model = toPyModel(model_ptr)
|
||||
|
||||
If you obtain a model in a way other than the above approaches,
|
||||
handle it carefully to avoid memory leak or segmentation fault.
|
||||
|
||||
Some interface functions to access LIBSVM models are wrapped as
|
||||
members of the class svm_model:
|
||||
|
||||
>>> svm_type = model.get_svm_type()
|
||||
>>> nr_class = model.get_nr_class()
|
||||
>>> svr_probability = model.get_svr_probability()
|
||||
>>> class_labels = model.get_labels()
|
||||
>>> sv_indices = model.get_sv_indices()
|
||||
>>> nr_sv = model.get_nr_sv()
|
||||
>>> is_prob_model = model.is_probability_model()
|
||||
>>> support_vector_coefficients = model.get_sv_coef()
|
||||
>>> support_vectors = model.get_SV()
|
||||
|
||||
Utility Functions
|
||||
=================
|
||||
|
||||
To use utility functions, type
|
||||
|
||||
>>> from svmutil import *
|
||||
|
||||
The above command loads
|
||||
svm_train() : train an SVM model
|
||||
svm_predict() : predict testing data
|
||||
svm_read_problem() : read the data from a LIBSVM-format file.
|
||||
svm_load_model() : load a LIBSVM model.
|
||||
svm_save_model() : save model to a file.
|
||||
evaluations() : evaluate prediction results.
|
||||
|
||||
- Function: svm_train
|
||||
|
||||
There are three ways to call svm_train()
|
||||
|
||||
>>> model = svm_train(y, x [, 'training_options'])
|
||||
>>> model = svm_train(prob [, 'training_options'])
|
||||
>>> model = svm_train(prob, param)
|
||||
|
||||
y: a list/tuple of l training labels (type must be int/double).
|
||||
|
||||
x: a list/tuple of l training instances. The feature vector of
|
||||
each training instance is an instance of list/tuple or dictionary.
|
||||
|
||||
training_options: a string in the same form as that for LIBSVM command
|
||||
mode.
|
||||
|
||||
prob: an svm_problem instance generated by calling
|
||||
svm_problem(y, x).
|
||||
For pre-computed kernel, you should use
|
||||
svm_problem(y, x, isKernel=True)
|
||||
|
||||
param: an svm_parameter instance generated by calling
|
||||
svm_parameter('training_options')
|
||||
|
||||
model: the returned svm_model instance. See svm.h for details of this
|
||||
structure. If '-v' is specified, cross validation is
|
||||
conducted and the returned model is just a scalar: cross-validation
|
||||
accuracy for classification and mean-squared error for regression.
|
||||
|
||||
To train the same data many times with different
|
||||
parameters, the second and the third ways should be faster..
|
||||
|
||||
Examples:
|
||||
|
||||
>>> y, x = svm_read_problem('../heart_scale')
|
||||
>>> prob = svm_problem(y, x)
|
||||
>>> param = svm_parameter('-s 3 -c 5 -h 0')
|
||||
>>> m = svm_train(y, x, '-c 5')
|
||||
>>> m = svm_train(prob, '-t 2 -c 5')
|
||||
>>> m = svm_train(prob, param)
|
||||
>>> CV_ACC = svm_train(y, x, '-v 3')
|
||||
|
||||
- Function: svm_predict
|
||||
|
||||
To predict testing data with a model, use
|
||||
|
||||
>>> p_labs, p_acc, p_vals = svm_predict(y, x, model [,'predicting_options'])
|
||||
|
||||
y: a list/tuple of l true labels (type must be int/double). It is used
|
||||
for calculating the accuracy. Use [0]*len(x) if true labels are
|
||||
unavailable.
|
||||
|
||||
x: a list/tuple of l predicting instances. The feature vector of
|
||||
each predicting instance is an instance of list/tuple or dictionary.
|
||||
|
||||
predicting_options: a string of predicting options in the same format as
|
||||
that of LIBSVM.
|
||||
|
||||
model: an svm_model instance.
|
||||
|
||||
p_labels: a list of predicted labels
|
||||
|
||||
p_acc: a tuple including accuracy (for classification), mean
|
||||
squared error, and squared correlation coefficient (for
|
||||
regression).
|
||||
|
||||
p_vals: a list of decision values or probability estimates (if '-b 1'
|
||||
is specified). If k is the number of classes in training data,
|
||||
for decision values, each element includes results of predicting
|
||||
k(k-1)/2 binary-class SVMs. For classification, k = 1 is a
|
||||
special case. Decision value [+1] is returned for each testing
|
||||
instance, instead of an empty list.
|
||||
For probabilities, each element contains k values indicating
|
||||
the probability that the testing instance is in each class.
|
||||
Note that the order of classes is the same as the 'model.label'
|
||||
field in the model structure.
|
||||
|
||||
Example:
|
||||
|
||||
>>> m = svm_train(y, x, '-c 5')
|
||||
>>> p_labels, p_acc, p_vals = svm_predict(y, x, m)
|
||||
|
||||
- Functions: svm_read_problem/svm_load_model/svm_save_model
|
||||
|
||||
See the usage by examples:
|
||||
|
||||
>>> y, x = svm_read_problem('data.txt')
|
||||
>>> m = svm_load_model('model_file')
|
||||
>>> svm_save_model('model_file', m)
|
||||
|
||||
- Function: evaluations
|
||||
|
||||
Calculate some evaluations using the true values (ty) and predicted
|
||||
values (pv):
|
||||
|
||||
>>> (ACC, MSE, SCC) = evaluations(ty, pv)
|
||||
|
||||
ty: a list of true values.
|
||||
|
||||
pv: a list of predict values.
|
||||
|
||||
ACC: accuracy.
|
||||
|
||||
MSE: mean squared error.
|
||||
|
||||
SCC: squared correlation coefficient.
|
||||
|
||||
|
||||
Additional Information
|
||||
======================
|
||||
|
||||
This interface was written by Hsiang-Fu Yu from Department of Computer
|
||||
Science, National Taiwan University. If you find this tool useful, please
|
||||
cite LIBSVM as follows
|
||||
|
||||
Chih-Chung Chang and Chih-Jen Lin, LIBSVM : a library for support
|
||||
vector machines. ACM Transactions on Intelligent Systems and
|
||||
Technology, 2:27:1--27:27, 2011. Software available at
|
||||
http://www.csie.ntu.edu.tw/~cjlin/libsvm
|
||||
|
||||
For any question, please contact Chih-Jen Lin <cjlin@csie.ntu.edu.tw>,
|
||||
or check the FAQ page:
|
||||
|
||||
http://www.csie.ntu.edu.tw/~cjlin/libsvm/faq.html
|
|
@ -0,0 +1,63 @@
|
|||
Introduction
|
||||
============
|
||||
|
||||
This tool provides a Python interface to LIBSVM with instance weight support
|
||||
|
||||
Installation
|
||||
============
|
||||
|
||||
Please check README for detail.
|
||||
|
||||
USAGE
|
||||
=====
|
||||
|
||||
The usage is bascally the same as the version without supporting
|
||||
instance weights. We only show differences below.
|
||||
|
||||
- Function: svm_train
|
||||
|
||||
There are three ways to call svm_train()
|
||||
|
||||
>>> model = svm_train(W, y, x [, 'training_options'])
|
||||
>>> model = svm_train(prob [, 'training_options'])
|
||||
>>> model = svm_train(prob, param)
|
||||
|
||||
W: a list/tuple of l training weights (type must be double).
|
||||
Use [] if no weights.
|
||||
|
||||
y: a list/tuple of l training labels (type must be int/double).
|
||||
|
||||
x: a list/tuple of l training instances. The feature vector of
|
||||
each training instance is an instance of list/tuple or dictionary.
|
||||
|
||||
training_options: a string in the same form as that for LIBSVM command
|
||||
mode.
|
||||
|
||||
prob: an svm_problem instance generated by calling
|
||||
svm_problem(W, y, x).
|
||||
|
||||
param: an svm_parameter instance generated by calling
|
||||
svm_parameter('training_options')
|
||||
|
||||
model: the returned svm_model instance. See svm.h for details of this
|
||||
structure. If '-v' is specified, cross validation is
|
||||
conducted and the returned model is just a scalar: cross-validation
|
||||
accuracy for classification and mean-squared error for regression.
|
||||
|
||||
To train the same data many times with different
|
||||
parameters, the second and the third ways should be faster..
|
||||
|
||||
Examples:
|
||||
|
||||
>>> y, x = svm_read_problem('../heart_scale')
|
||||
>>> W = [1] * len(y)
|
||||
>>> W[0] = 10
|
||||
>>> prob = svm_problem(W, y, x)
|
||||
>>> param = svm_parameter('-s 3 -c 5 -h 0')
|
||||
>>> m = svm_train([], y, x, '-c 5')
|
||||
>>> m = svm_train(W, y, x)
|
||||
>>> m = svm_train(prob, '-t 2 -c 5')
|
||||
>>> m = svm_train(prob, param)
|
||||
>>> CV_ACC = svm_train(W, y, x, '-v 3')
|
||||
|
||||
|
|
@ -0,0 +1,335 @@
|
|||
#!/usr/bin/env python
|
||||
|
||||
from ctypes import *
|
||||
from ctypes.util import find_library
|
||||
from os import path
|
||||
import sys
|
||||
|
||||
__all__ = ['libsvm', 'svm_problem', 'svm_parameter',
|
||||
'toPyModel', 'gen_svm_nodearray', 'print_null', 'svm_node', 'C_SVC',
|
||||
'EPSILON_SVR', 'LINEAR', 'NU_SVC', 'NU_SVR', 'ONE_CLASS',
|
||||
'POLY', 'PRECOMPUTED', 'PRINT_STRING_FUN', 'RBF',
|
||||
'SIGMOID', 'c_double', 'svm_model']
|
||||
|
||||
try:
|
||||
dirname = path.dirname(path.abspath(__file__))
|
||||
if sys.platform == 'win32':
|
||||
libsvm = CDLL(path.join(dirname, r'..\windows\libsvm.dll'))
|
||||
else:
|
||||
libsvm = CDLL(path.join(dirname, '../libsvm.so.2'))
|
||||
except:
|
||||
# For unix the prefix 'lib' is not considered.
|
||||
if find_library('svm'):
|
||||
libsvm = CDLL(find_library('svm'))
|
||||
elif find_library('libsvm'):
|
||||
libsvm = CDLL(find_library('libsvm'))
|
||||
else:
|
||||
raise Exception('LIBSVM library not found.')
|
||||
|
||||
C_SVC = 0
|
||||
NU_SVC = 1
|
||||
ONE_CLASS = 2
|
||||
EPSILON_SVR = 3
|
||||
NU_SVR = 4
|
||||
|
||||
LINEAR = 0
|
||||
POLY = 1
|
||||
RBF = 2
|
||||
SIGMOID = 3
|
||||
PRECOMPUTED = 4
|
||||
|
||||
PRINT_STRING_FUN = CFUNCTYPE(None, c_char_p)
|
||||
def print_null(s):
|
||||
return
|
||||
|
||||
def genFields(names, types):
|
||||
return list(zip(names, types))
|
||||
|
||||
def fillprototype(f, restype, argtypes):
|
||||
f.restype = restype
|
||||
f.argtypes = argtypes
|
||||
|
||||
class svm_node(Structure):
|
||||
_names = ["index", "value"]
|
||||
_types = [c_int, c_double]
|
||||
_fields_ = genFields(_names, _types)
|
||||
|
||||
def __str__(self):
|
||||
return '%d:%g' % (self.index, self.value)
|
||||
|
||||
def gen_svm_nodearray(xi, feature_max=None, isKernel=None):
|
||||
if isinstance(xi, dict):
|
||||
index_range = xi.keys()
|
||||
elif isinstance(xi, (list, tuple)):
|
||||
if not isKernel:
|
||||
xi = [0] + xi # idx should start from 1
|
||||
index_range = range(len(xi))
|
||||
else:
|
||||
raise TypeError('xi should be a dictionary, list or tuple')
|
||||
|
||||
if feature_max:
|
||||
assert(isinstance(feature_max, int))
|
||||
index_range = filter(lambda j: j <= feature_max, index_range)
|
||||
if not isKernel:
|
||||
index_range = filter(lambda j:xi[j] != 0, index_range)
|
||||
|
||||
index_range = sorted(index_range)
|
||||
ret = (svm_node * (len(index_range)+1))()
|
||||
ret[-1].index = -1
|
||||
for idx, j in enumerate(index_range):
|
||||
ret[idx].index = j
|
||||
ret[idx].value = xi[j]
|
||||
max_idx = 0
|
||||
if index_range:
|
||||
max_idx = index_range[-1]
|
||||
return ret, max_idx
|
||||
|
||||
class svm_problem(Structure):
|
||||
_names = ["l", "y", "x", "W"]
|
||||
_types = [c_int, POINTER(c_double), POINTER(POINTER(svm_node)), POINTER(c_double)]
|
||||
_fields_ = genFields(_names, _types)
|
||||
|
||||
def __init__(self, W, y, x, isKernel=None):
|
||||
if len(y) != len(x):
|
||||
raise ValueError("len(y) != len(x)")
|
||||
if len(W) != 0 and len(W) != len(x):
|
||||
raise ValueError("len(W) != len(x)")
|
||||
self.l = l = len(y)
|
||||
if len(W) == 0:
|
||||
W = [1] * l
|
||||
|
||||
max_idx = 0
|
||||
x_space = self.x_space = []
|
||||
for i, xi in enumerate(x):
|
||||
tmp_xi, tmp_idx = gen_svm_nodearray(xi,isKernel=isKernel)
|
||||
x_space += [tmp_xi]
|
||||
max_idx = max(max_idx, tmp_idx)
|
||||
self.n = max_idx
|
||||
|
||||
self.W = (c_double * l)()
|
||||
for i, Wi in enumerate(W): self.W[i] = Wi
|
||||
|
||||
self.y = (c_double * l)()
|
||||
for i, yi in enumerate(y): self.y[i] = yi
|
||||
|
||||
self.x = (POINTER(svm_node) * l)()
|
||||
for i, xi in enumerate(self.x_space): self.x[i] = xi
|
||||
|
||||
class svm_parameter(Structure):
|
||||
_names = ["svm_type", "kernel_type", "degree", "gamma", "coef0",
|
||||
"cache_size", "eps", "C", "nr_weight", "weight_label", "weight",
|
||||
"nu", "p", "shrinking", "probability"]
|
||||
_types = [c_int, c_int, c_int, c_double, c_double,
|
||||
c_double, c_double, c_double, c_int, POINTER(c_int), POINTER(c_double),
|
||||
c_double, c_double, c_int, c_int]
|
||||
_fields_ = genFields(_names, _types)
|
||||
|
||||
def __init__(self, options = None):
|
||||
if options == None:
|
||||
options = ''
|
||||
self.parse_options(options)
|
||||
|
||||
def __str__(self):
|
||||
s = ''
|
||||
attrs = svm_parameter._names + list(self.__dict__.keys())
|
||||
values = map(lambda attr: getattr(self, attr), attrs)
|
||||
for attr, val in zip(attrs, values):
|
||||
s += (' %s: %s\n' % (attr, val))
|
||||
s = s.strip()
|
||||
|
||||
return s
|
||||
|
||||
def set_to_default_values(self):
|
||||
self.svm_type = C_SVC;
|
||||
self.kernel_type = RBF
|
||||
self.degree = 3
|
||||
self.gamma = 0
|
||||
self.coef0 = 0
|
||||
self.nu = 0.5
|
||||
self.cache_size = 100
|
||||
self.C = 1
|
||||
self.eps = 0.001
|
||||
self.p = 0.1
|
||||
self.shrinking = 1
|
||||
self.probability = 0
|
||||
self.nr_weight = 0
|
||||
self.weight_label = (c_int*0)()
|
||||
self.weight = (c_double*0)()
|
||||
self.cross_validation = False
|
||||
self.nr_fold = 0
|
||||
self.print_func = cast(None, PRINT_STRING_FUN)
|
||||
|
||||
def parse_options(self, options):
|
||||
if isinstance(options, list):
|
||||
argv = options
|
||||
elif isinstance(options, str):
|
||||
argv = options.split()
|
||||
else:
|
||||
raise TypeError("arg 1 should be a list or a str.")
|
||||
self.set_to_default_values()
|
||||
self.print_func = cast(None, PRINT_STRING_FUN)
|
||||
weight_label = []
|
||||
weight = []
|
||||
|
||||
i = 0
|
||||
while i < len(argv):
|
||||
if argv[i] == "-s":
|
||||
i = i + 1
|
||||
self.svm_type = int(argv[i])
|
||||
elif argv[i] == "-t":
|
||||
i = i + 1
|
||||
self.kernel_type = int(argv[i])
|
||||
elif argv[i] == "-d":
|
||||
i = i + 1
|
||||
self.degree = int(argv[i])
|
||||
elif argv[i] == "-g":
|
||||
i = i + 1
|
||||
self.gamma = float(argv[i])
|
||||
elif argv[i] == "-r":
|
||||
i = i + 1
|
||||
self.coef0 = float(argv[i])
|
||||
elif argv[i] == "-n":
|
||||
i = i + 1
|
||||
self.nu = float(argv[i])
|
||||
elif argv[i] == "-m":
|
||||
i = i + 1
|
||||
self.cache_size = float(argv[i])
|
||||
elif argv[i] == "-c":
|
||||
i = i + 1
|
||||
self.C = float(argv[i])
|
||||
elif argv[i] == "-e":
|
||||
i = i + 1
|
||||
self.eps = float(argv[i])
|
||||
elif argv[i] == "-p":
|
||||
i = i + 1
|
||||
self.p = float(argv[i])
|
||||
elif argv[i] == "-h":
|
||||
i = i + 1
|
||||
self.shrinking = int(argv[i])
|
||||
elif argv[i] == "-b":
|
||||
i = i + 1
|
||||
self.probability = int(argv[i])
|
||||
elif argv[i] == "-q":
|
||||
self.print_func = PRINT_STRING_FUN(print_null)
|
||||
elif argv[i] == "-v":
|
||||
i = i + 1
|
||||
self.cross_validation = 1
|
||||
self.nr_fold = int(argv[i])
|
||||
if self.nr_fold < 2:
|
||||
raise ValueError("n-fold cross validation: n must >= 2")
|
||||
elif argv[i].startswith("-w"):
|
||||
i = i + 1
|
||||
self.nr_weight += 1
|
||||
nr_weight = self.nr_weight
|
||||
weight_label += [int(argv[i-1][2:])]
|
||||
weight += [float(argv[i])]
|
||||
else:
|
||||
raise ValueError("Wrong options")
|
||||
i += 1
|
||||
|
||||
libsvm.svm_set_print_string_function(self.print_func)
|
||||
self.weight_label = (c_int*self.nr_weight)()
|
||||
self.weight = (c_double*self.nr_weight)()
|
||||
for i in range(self.nr_weight):
|
||||
self.weight[i] = weight[i]
|
||||
self.weight_label[i] = weight_label[i]
|
||||
|
||||
class svm_model(Structure):
|
||||
_names = ['param', 'nr_class', 'l', 'SV', 'sv_coef', 'rho',
|
||||
'probA', 'probB', 'sv_indices', 'label', 'nSV', 'free_sv']
|
||||
_types = [svm_parameter, c_int, c_int, POINTER(POINTER(svm_node)),
|
||||
POINTER(POINTER(c_double)), POINTER(c_double),
|
||||
POINTER(c_double), POINTER(c_double), POINTER(c_int),
|
||||
POINTER(c_int), POINTER(c_int), c_int]
|
||||
_fields_ = genFields(_names, _types)
|
||||
|
||||
def __init__(self):
|
||||
self.__createfrom__ = 'python'
|
||||
|
||||
def __del__(self):
|
||||
# free memory created by C to avoid memory leak
|
||||
if hasattr(self, '__createfrom__') and self.__createfrom__ == 'C':
|
||||
libsvm.svm_free_and_destroy_model(pointer(self))
|
||||
|
||||
def get_svm_type(self):
|
||||
return libsvm.svm_get_svm_type(self)
|
||||
|
||||
def get_nr_class(self):
|
||||
return libsvm.svm_get_nr_class(self)
|
||||
|
||||
def get_svr_probability(self):
|
||||
return libsvm.svm_get_svr_probability(self)
|
||||
|
||||
def get_labels(self):
|
||||
nr_class = self.get_nr_class()
|
||||
labels = (c_int * nr_class)()
|
||||
libsvm.svm_get_labels(self, labels)
|
||||
return labels[:nr_class]
|
||||
|
||||
def get_sv_indices(self):
|
||||
total_sv = self.get_nr_sv()
|
||||
sv_indices = (c_int * total_sv)()
|
||||
libsvm.svm_get_sv_indices(self, sv_indices)
|
||||
return sv_indices[:total_sv]
|
||||
|
||||
def get_nr_sv(self):
|
||||
return libsvm.svm_get_nr_sv(self)
|
||||
|
||||
def is_probability_model(self):
|
||||
return (libsvm.svm_check_probability_model(self) == 1)
|
||||
|
||||
def get_sv_coef(self):
|
||||
return [tuple(self.sv_coef[j][i] for j in xrange(self.nr_class - 1))
|
||||
for i in xrange(self.l)]
|
||||
|
||||
def get_SV(self):
|
||||
result = []
|
||||
for sparse_sv in self.SV[:self.l]:
|
||||
row = dict()
|
||||
|
||||
i = 0
|
||||
while True:
|
||||
row[sparse_sv[i].index] = sparse_sv[i].value
|
||||
if sparse_sv[i].index == -1:
|
||||
break
|
||||
i += 1
|
||||
|
||||
result.append(row)
|
||||
return result
|
||||
|
||||
def toPyModel(model_ptr):
|
||||
"""
|
||||
toPyModel(model_ptr) -> svm_model
|
||||
|
||||
Convert a ctypes POINTER(svm_model) to a Python svm_model
|
||||
"""
|
||||
if bool(model_ptr) == False:
|
||||
raise ValueError("Null pointer")
|
||||
m = model_ptr.contents
|
||||
m.__createfrom__ = 'C'
|
||||
return m
|
||||
|
||||
fillprototype(libsvm.svm_train, POINTER(svm_model), [POINTER(svm_problem), POINTER(svm_parameter)])
|
||||
fillprototype(libsvm.svm_cross_validation, None, [POINTER(svm_problem), POINTER(svm_parameter), c_int, POINTER(c_double)])
|
||||
|
||||
fillprototype(libsvm.svm_save_model, c_int, [c_char_p, POINTER(svm_model)])
|
||||
fillprototype(libsvm.svm_load_model, POINTER(svm_model), [c_char_p])
|
||||
|
||||
fillprototype(libsvm.svm_get_svm_type, c_int, [POINTER(svm_model)])
|
||||
fillprototype(libsvm.svm_get_nr_class, c_int, [POINTER(svm_model)])
|
||||
fillprototype(libsvm.svm_get_labels, None, [POINTER(svm_model), POINTER(c_int)])
|
||||
fillprototype(libsvm.svm_get_sv_indices, None, [POINTER(svm_model), POINTER(c_int)])
|
||||
fillprototype(libsvm.svm_get_nr_sv, c_int, [POINTER(svm_model)])
|
||||
fillprototype(libsvm.svm_get_svr_probability, c_double, [POINTER(svm_model)])
|
||||
|
||||
fillprototype(libsvm.svm_predict_values, c_double, [POINTER(svm_model), POINTER(svm_node), POINTER(c_double)])
|
||||
fillprototype(libsvm.svm_predict, c_double, [POINTER(svm_model), POINTER(svm_node)])
|
||||
fillprototype(libsvm.svm_predict_probability, c_double, [POINTER(svm_model), POINTER(svm_node), POINTER(c_double)])
|
||||
|
||||
fillprototype(libsvm.svm_free_model_content, None, [POINTER(svm_model)])
|
||||
fillprototype(libsvm.svm_free_and_destroy_model, None, [POINTER(POINTER(svm_model))])
|
||||
fillprototype(libsvm.svm_destroy_param, None, [POINTER(svm_parameter)])
|
||||
|
||||
fillprototype(libsvm.svm_check_parameter, c_char_p, [POINTER(svm_problem), POINTER(svm_parameter)])
|
||||
fillprototype(libsvm.svm_check_probability_model, c_int, [POINTER(svm_model)])
|
||||
fillprototype(libsvm.svm_set_print_string_function, None, [PRINT_STRING_FUN])
|
|
@ -0,0 +1,263 @@
|
|||
#!/usr/bin/env python
|
||||
|
||||
import os
|
||||
import sys
|
||||
from svm import *
|
||||
from svm import __all__ as svm_all
|
||||
|
||||
|
||||
__all__ = ['evaluations', 'svm_load_model', 'svm_predict', 'svm_read_problem',
|
||||
'svm_save_model', 'svm_train'] + svm_all
|
||||
|
||||
sys.path = [os.path.dirname(os.path.abspath(__file__))] + sys.path
|
||||
|
||||
def svm_read_problem(data_file_name):
|
||||
"""
|
||||
svm_read_problem(data_file_name) -> [y, x]
|
||||
|
||||
Read LIBSVM-format data from data_file_name and return labels y
|
||||
and data instances x.
|
||||
"""
|
||||
prob_y = []
|
||||
prob_x = []
|
||||
for line in open(data_file_name):
|
||||
line = line.split(None, 1)
|
||||
# In case an instance with all zero features
|
||||
if len(line) == 1: line += ['']
|
||||
label, features = line
|
||||
xi = {}
|
||||
for e in features.split():
|
||||
ind, val = e.split(":")
|
||||
xi[int(ind)] = float(val)
|
||||
prob_y += [float(label)]
|
||||
prob_x += [xi]
|
||||
return (prob_y, prob_x)
|
||||
|
||||
def svm_load_model(model_file_name):
|
||||
"""
|
||||
svm_load_model(model_file_name) -> model
|
||||
|
||||
Load a LIBSVM model from model_file_name and return.
|
||||
"""
|
||||
model = libsvm.svm_load_model(model_file_name.encode())
|
||||
if not model:
|
||||
print("can't open model file %s" % model_file_name)
|
||||
return None
|
||||
model = toPyModel(model)
|
||||
return model
|
||||
|
||||
def svm_save_model(model_file_name, model):
|
||||
"""
|
||||
svm_save_model(model_file_name, model) -> None
|
||||
|
||||
Save a LIBSVM model to the file model_file_name.
|
||||
"""
|
||||
libsvm.svm_save_model(model_file_name.encode(), model)
|
||||
|
||||
def evaluations(ty, pv):
|
||||
"""
|
||||
evaluations(ty, pv) -> (ACC, MSE, SCC)
|
||||
|
||||
Calculate accuracy, mean squared error and squared correlation coefficient
|
||||
using the true values (ty) and predicted values (pv).
|
||||
"""
|
||||
if len(ty) != len(pv):
|
||||
raise ValueError("len(ty) must equal to len(pv)")
|
||||
total_correct = total_error = 0
|
||||
sumv = sumy = sumvv = sumyy = sumvy = 0
|
||||
for v, y in zip(pv, ty):
|
||||
if y == v:
|
||||
total_correct += 1
|
||||
total_error += (v-y)*(v-y)
|
||||
sumv += v
|
||||
sumy += y
|
||||
sumvv += v*v
|
||||
sumyy += y*y
|
||||
sumvy += v*y
|
||||
l = len(ty)
|
||||
ACC = 100.0*total_correct/l
|
||||
MSE = total_error/l
|
||||
try:
|
||||
SCC = ((l*sumvy-sumv*sumy)*(l*sumvy-sumv*sumy))/((l*sumvv-sumv*sumv)*(l*sumyy-sumy*sumy))
|
||||
except:
|
||||
SCC = float('nan')
|
||||
return (ACC, MSE, SCC)
|
||||
|
||||
def svm_train(arg1, arg2=None, arg3=None, arg4 = None):
|
||||
"""
|
||||
svm_train(W, x [, options]) -> model | ACC | MSE
|
||||
svm_train(prob [, options]) -> model | ACC | MSE
|
||||
svm_train(prob, param) -> model | ACC| MSE
|
||||
|
||||
Train an SVM model from weighted data (W, y, x) or an svm_problem prob using
|
||||
'options' or an svm_parameter param.
|
||||
If '-v' is specified in 'options' (i.e., cross validation)
|
||||
either accuracy (ACC) or mean-squared error (MSE) is returned.
|
||||
options:
|
||||
-s svm_type : set type of SVM (default 0)
|
||||
0 -- C-SVC (multi-class classification)
|
||||
1 -- nu-SVC (multi-class classification)
|
||||
2 -- one-class SVM
|
||||
3 -- epsilon-SVR (regression)
|
||||
4 -- nu-SVR (regression)
|
||||
-t kernel_type : set type of kernel function (default 2)
|
||||
0 -- linear: u'*v
|
||||
1 -- polynomial: (gamma*u'*v + coef0)^degree
|
||||
2 -- radial basis function: exp(-gamma*|u-v|^2)
|
||||
3 -- sigmoid: tanh(gamma*u'*v + coef0)
|
||||
4 -- precomputed kernel (kernel values in training_set_file)
|
||||
-d degree : set degree in kernel function (default 3)
|
||||
-g gamma : set gamma in kernel function (default 1/num_features)
|
||||
-r coef0 : set coef0 in kernel function (default 0)
|
||||
-c cost : set the parameter C of C-SVC, epsilon-SVR, and nu-SVR (default 1)
|
||||
-n nu : set the parameter nu of nu-SVC, one-class SVM, and nu-SVR (default 0.5)
|
||||
-p epsilon : set the epsilon in loss function of epsilon-SVR (default 0.1)
|
||||
-m cachesize : set cache memory size in MB (default 100)
|
||||
-e epsilon : set tolerance of termination criterion (default 0.001)
|
||||
-h shrinking : whether to use the shrinking heuristics, 0 or 1 (default 1)
|
||||
-b probability_estimates : whether to train a SVC or SVR model for probability estimates, 0 or 1 (default 0)
|
||||
-wi weight : set the parameter C of class i to weight*C, for C-SVC (default 1)
|
||||
-v n: n-fold cross validation mode
|
||||
-q : quiet mode (no outputs)
|
||||
"""
|
||||
prob, param = None, None
|
||||
if isinstance(arg1, (list, tuple)):
|
||||
assert isinstance(arg2, (list, tuple))
|
||||
assert isinstance(arg3, list)
|
||||
W, y, x, options = arg1, arg2, arg3, arg4
|
||||
param = svm_parameter(options)
|
||||
prob = svm_problem(W, y, x, isKernel=(param.kernel_type == PRECOMPUTED))
|
||||
elif isinstance(arg1, svm_problem):
|
||||
prob = arg1
|
||||
if isinstance(arg2, svm_parameter):
|
||||
param = arg2
|
||||
else:
|
||||
param = svm_parameter(arg2)
|
||||
if prob == None or param == None:
|
||||
raise TypeError("Wrong types for the arguments")
|
||||
|
||||
if param.kernel_type == PRECOMPUTED:
|
||||
for xi in prob.x_space:
|
||||
idx, val = xi[0].index, xi[0].value
|
||||
if xi[0].index != 0:
|
||||
raise ValueError('Wrong input format: first column must be 0:sample_serial_number')
|
||||
if val <= 0 or val > prob.n:
|
||||
raise ValueError('Wrong input format: sample_serial_number out of range')
|
||||
|
||||
if param.gamma == 0 and prob.n > 0:
|
||||
param.gamma = 1.0 / prob.n
|
||||
libsvm.svm_set_print_string_function(param.print_func)
|
||||
err_msg = libsvm.svm_check_parameter(prob, param)
|
||||
if err_msg:
|
||||
raise ValueError('Error: %s' % err_msg)
|
||||
|
||||
if param.cross_validation:
|
||||
l, nr_fold = prob.l, param.nr_fold
|
||||
target = (c_double * l)()
|
||||
libsvm.svm_cross_validation(prob, param, nr_fold, target)
|
||||
ACC, MSE, SCC = evaluations(prob.y[:l], target[:l])
|
||||
if param.svm_type in [EPSILON_SVR, NU_SVR]:
|
||||
print("Cross Validation Mean squared error = %g" % MSE)
|
||||
print("Cross Validation Squared correlation coefficient = %g" % SCC)
|
||||
return MSE
|
||||
else:
|
||||
print("Cross Validation Accuracy = %g%%" % ACC)
|
||||
return ACC
|
||||
else:
|
||||
m = libsvm.svm_train(prob, param)
|
||||
m = toPyModel(m)
|
||||
|
||||
# If prob is destroyed, data including SVs pointed by m can remain.
|
||||
m.x_space = prob.x_space
|
||||
return m
|
||||
|
||||
def svm_predict(y, x, m, options=""):
|
||||
"""
|
||||
svm_predict(y, x, m [, options]) -> (p_labels, p_acc, p_vals)
|
||||
|
||||
Predict data (y, x) with the SVM model m.
|
||||
options:
|
||||
-b probability_estimates: whether to predict probability estimates,
|
||||
0 or 1 (default 0); for one-class SVM only 0 is supported.
|
||||
-q : quiet mode (no outputs).
|
||||
|
||||
The return tuple contains
|
||||
p_labels: a list of predicted labels
|
||||
p_acc: a tuple including accuracy (for classification), mean-squared
|
||||
error, and squared correlation coefficient (for regression).
|
||||
p_vals: a list of decision values or probability estimates (if '-b 1'
|
||||
is specified). If k is the number of classes, for decision values,
|
||||
each element includes results of predicting k(k-1)/2 binary-class
|
||||
SVMs. For probabilities, each element contains k values indicating
|
||||
the probability that the testing instance is in each class.
|
||||
Note that the order of classes here is the same as 'model.label'
|
||||
field in the model structure.
|
||||
"""
|
||||
|
||||
def info(s):
|
||||
print(s)
|
||||
|
||||
predict_probability = 0
|
||||
argv = options.split()
|
||||
i = 0
|
||||
while i < len(argv):
|
||||
if argv[i] == '-b':
|
||||
i += 1
|
||||
predict_probability = int(argv[i])
|
||||
elif argv[i] == '-q':
|
||||
info = print_null
|
||||
else:
|
||||
raise ValueError("Wrong options")
|
||||
i+=1
|
||||
|
||||
svm_type = m.get_svm_type()
|
||||
is_prob_model = m.is_probability_model()
|
||||
nr_class = m.get_nr_class()
|
||||
pred_labels = []
|
||||
pred_values = []
|
||||
|
||||
if predict_probability:
|
||||
if not is_prob_model:
|
||||
raise ValueError("Model does not support probabiliy estimates")
|
||||
|
||||
if svm_type in [NU_SVR, EPSILON_SVR]:
|
||||
info("Prob. model for test data: target value = predicted value + z,\n"
|
||||
"z: Laplace distribution e^(-|z|/sigma)/(2sigma),sigma=%g" % m.get_svr_probability());
|
||||
nr_class = 0
|
||||
|
||||
prob_estimates = (c_double * nr_class)()
|
||||
for xi in x:
|
||||
xi, idx = gen_svm_nodearray(xi, isKernel=(m.param.kernel_type == PRECOMPUTED))
|
||||
label = libsvm.svm_predict_probability(m, xi, prob_estimates)
|
||||
values = prob_estimates[:nr_class]
|
||||
pred_labels += [label]
|
||||
pred_values += [values]
|
||||
else:
|
||||
if is_prob_model:
|
||||
info("Model supports probability estimates, but disabled in predicton.")
|
||||
if svm_type in (ONE_CLASS, EPSILON_SVR, NU_SVC):
|
||||
nr_classifier = 1
|
||||
else:
|
||||
nr_classifier = nr_class*(nr_class-1)//2
|
||||
dec_values = (c_double * nr_classifier)()
|
||||
for xi in x:
|
||||
xi, idx = gen_svm_nodearray(xi, isKernel=(m.param.kernel_type == PRECOMPUTED))
|
||||
label = libsvm.svm_predict_values(m, xi, dec_values)
|
||||
if(nr_class == 1):
|
||||
values = [1]
|
||||
else:
|
||||
values = dec_values[:nr_classifier]
|
||||
pred_labels += [label]
|
||||
pred_values += [values]
|
||||
|
||||
ACC, MSE, SCC = evaluations(y, pred_labels)
|
||||
l = len(y)
|
||||
if svm_type in [EPSILON_SVR, NU_SVR]:
|
||||
info("Mean squared error = %g (regression)" % MSE)
|
||||
info("Squared correlation coefficient = %g (regression)" % SCC)
|
||||
else:
|
||||
info("Accuracy = %g%% (%d/%d) (classification)" % (ACC, int(l*ACC/100), l))
|
||||
|
||||
return pred_labels, (ACC, MSE, SCC), pred_values
|
||||
|
||||
|
Binary file not shown.
|
@ -0,0 +1,239 @@
|
|||
#include <stdio.h>
|
||||
#include <ctype.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <errno.h>
|
||||
#include "svm.h"
|
||||
|
||||
int print_null(const char *s,...) {return 0;}
|
||||
|
||||
static int (*info)(const char *fmt,...) = &printf;
|
||||
|
||||
struct svm_node *x;
|
||||
int max_nr_attr = 64;
|
||||
|
||||
struct svm_model* model;
|
||||
int predict_probability=0;
|
||||
|
||||
static char *line = NULL;
|
||||
static int max_line_len;
|
||||
|
||||
static char* readline(FILE *input)
|
||||
{
|
||||
int len;
|
||||
|
||||
if(fgets(line,max_line_len,input) == NULL)
|
||||
return NULL;
|
||||
|
||||
while(strrchr(line,'\n') == NULL)
|
||||
{
|
||||
max_line_len *= 2;
|
||||
line = (char *) realloc(line,max_line_len);
|
||||
len = (int) strlen(line);
|
||||
if(fgets(line+len,max_line_len-len,input) == NULL)
|
||||
break;
|
||||
}
|
||||
return line;
|
||||
}
|
||||
|
||||
void exit_input_error(int line_num)
|
||||
{
|
||||
fprintf(stderr,"Wrong input format at line %d\n", line_num);
|
||||
exit(1);
|
||||
}
|
||||
|
||||
void predict(FILE *input, FILE *output)
|
||||
{
|
||||
int correct = 0;
|
||||
int total = 0;
|
||||
double error = 0;
|
||||
double sump = 0, sumt = 0, sumpp = 0, sumtt = 0, sumpt = 0;
|
||||
|
||||
int svm_type=svm_get_svm_type(model);
|
||||
int nr_class=svm_get_nr_class(model);
|
||||
double *prob_estimates=NULL;
|
||||
int j;
|
||||
|
||||
if(predict_probability)
|
||||
{
|
||||
if (svm_type==NU_SVR || svm_type==EPSILON_SVR)
|
||||
info("Prob. model for test data: target value = predicted value + z,\nz: Laplace distribution e^(-|z|/sigma)/(2sigma),sigma=%g\n",svm_get_svr_probability(model));
|
||||
else
|
||||
{
|
||||
int *labels=(int *) malloc(nr_class*sizeof(int));
|
||||
svm_get_labels(model,labels);
|
||||
prob_estimates = (double *) malloc(nr_class*sizeof(double));
|
||||
fprintf(output,"labels");
|
||||
for(j=0;j<nr_class;j++)
|
||||
fprintf(output," %d",labels[j]);
|
||||
fprintf(output,"\n");
|
||||
free(labels);
|
||||
}
|
||||
}
|
||||
|
||||
max_line_len = 1024;
|
||||
line = (char *)malloc(max_line_len*sizeof(char));
|
||||
while(readline(input) != NULL)
|
||||
{
|
||||
int i = 0;
|
||||
double target_label, predict_label;
|
||||
char *idx, *val, *label, *endptr;
|
||||
int inst_max_index = -1; // strtol gives 0 if wrong format, and precomputed kernel has <index> start from 0
|
||||
|
||||
label = strtok(line," \t\n");
|
||||
if(label == NULL) // empty line
|
||||
exit_input_error(total+1);
|
||||
|
||||
target_label = strtod(label,&endptr);
|
||||
if(endptr == label || *endptr != '\0')
|
||||
exit_input_error(total+1);
|
||||
|
||||
while(1)
|
||||
{
|
||||
if(i>=max_nr_attr-1) // need one more for index = -1
|
||||
{
|
||||
max_nr_attr *= 2;
|
||||
x = (struct svm_node *) realloc(x,max_nr_attr*sizeof(struct svm_node));
|
||||
}
|
||||
|
||||
idx = strtok(NULL,":");
|
||||
val = strtok(NULL," \t");
|
||||
|
||||
if(val == NULL)
|
||||
break;
|
||||
errno = 0;
|
||||
x[i].index = (int) strtol(idx,&endptr,10);
|
||||
if(endptr == idx || errno != 0 || *endptr != '\0' || x[i].index <= inst_max_index)
|
||||
exit_input_error(total+1);
|
||||
else
|
||||
inst_max_index = x[i].index;
|
||||
|
||||
errno = 0;
|
||||
x[i].value = strtod(val,&endptr);
|
||||
if(endptr == val || errno != 0 || (*endptr != '\0' && !isspace(*endptr)))
|
||||
exit_input_error(total+1);
|
||||
|
||||
++i;
|
||||
}
|
||||
x[i].index = -1;
|
||||
|
||||
if (predict_probability && (svm_type==C_SVC || svm_type==NU_SVC))
|
||||
{
|
||||
predict_label = svm_predict_probability(model,x,prob_estimates);
|
||||
fprintf(output,"%g",predict_label);
|
||||
for(j=0;j<nr_class;j++)
|
||||
fprintf(output," %g",prob_estimates[j]);
|
||||
fprintf(output,"\n");
|
||||
}
|
||||
else
|
||||
{
|
||||
predict_label = svm_predict(model,x);
|
||||
fprintf(output,"%g\n",predict_label);
|
||||
}
|
||||
|
||||
if(predict_label == target_label)
|
||||
++correct;
|
||||
error += (predict_label-target_label)*(predict_label-target_label);
|
||||
sump += predict_label;
|
||||
sumt += target_label;
|
||||
sumpp += predict_label*predict_label;
|
||||
sumtt += target_label*target_label;
|
||||
sumpt += predict_label*target_label;
|
||||
++total;
|
||||
}
|
||||
if (svm_type==NU_SVR || svm_type==EPSILON_SVR)
|
||||
{
|
||||
info("Mean squared error = %g (regression)\n",error/total);
|
||||
info("Squared correlation coefficient = %g (regression)\n",
|
||||
((total*sumpt-sump*sumt)*(total*sumpt-sump*sumt))/
|
||||
((total*sumpp-sump*sump)*(total*sumtt-sumt*sumt))
|
||||
);
|
||||
}
|
||||
else
|
||||
info("Accuracy = %g%% (%d/%d) (classification)\n",
|
||||
(double)correct/total*100,correct,total);
|
||||
if(predict_probability)
|
||||
free(prob_estimates);
|
||||
}
|
||||
|
||||
void exit_with_help()
|
||||
{
|
||||
printf(
|
||||
"Usage: svm-predict [options] test_file model_file output_file\n"
|
||||
"options:\n"
|
||||
"-b probability_estimates: whether to predict probability estimates, 0 or 1 (default 0); for one-class SVM only 0 is supported\n"
|
||||
"-q : quiet mode (no outputs)\n"
|
||||
);
|
||||
exit(1);
|
||||
}
|
||||
|
||||
int main(int argc, char **argv)
|
||||
{
|
||||
FILE *input, *output;
|
||||
int i;
|
||||
// parse options
|
||||
for(i=1;i<argc;i++)
|
||||
{
|
||||
if(argv[i][0] != '-') break;
|
||||
++i;
|
||||
switch(argv[i-1][1])
|
||||
{
|
||||
case 'b':
|
||||
predict_probability = atoi(argv[i]);
|
||||
break;
|
||||
case 'q':
|
||||
info = &print_null;
|
||||
i--;
|
||||
break;
|
||||
default:
|
||||
fprintf(stderr,"Unknown option: -%c\n", argv[i-1][1]);
|
||||
exit_with_help();
|
||||
}
|
||||
}
|
||||
|
||||
if(i>=argc-2)
|
||||
exit_with_help();
|
||||
|
||||
input = fopen(argv[i],"r");
|
||||
if(input == NULL)
|
||||
{
|
||||
fprintf(stderr,"can't open input file %s\n",argv[i]);
|
||||
exit(1);
|
||||
}
|
||||
|
||||
output = fopen(argv[i+2],"w");
|
||||
if(output == NULL)
|
||||
{
|
||||
fprintf(stderr,"can't open output file %s\n",argv[i+2]);
|
||||
exit(1);
|
||||
}
|
||||
|
||||
if((model=svm_load_model(argv[i+1]))==0)
|
||||
{
|
||||
fprintf(stderr,"can't open model file %s\n",argv[i+1]);
|
||||
exit(1);
|
||||
}
|
||||
|
||||
x = (struct svm_node *) malloc(max_nr_attr*sizeof(struct svm_node));
|
||||
if(predict_probability)
|
||||
{
|
||||
if(svm_check_probability_model(model)==0)
|
||||
{
|
||||
fprintf(stderr,"Model does not support probabiliy estimates\n");
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
if(svm_check_probability_model(model)!=0)
|
||||
info("Model supports probability estimates, but disabled in prediction.\n");
|
||||
}
|
||||
|
||||
predict(input,output);
|
||||
svm_free_and_destroy_model(&model);
|
||||
free(x);
|
||||
free(line);
|
||||
fclose(input);
|
||||
fclose(output);
|
||||
return 0;
|
||||
}
|
Binary file not shown.
|
@ -0,0 +1,397 @@
|
|||
#include <float.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <ctype.h>
|
||||
#include <string.h>
|
||||
|
||||
void exit_with_help()
|
||||
{
|
||||
printf(
|
||||
"Usage: svm-scale [options] data_filename\n"
|
||||
"options:\n"
|
||||
"-l lower : x scaling lower limit (default -1)\n"
|
||||
"-u upper : x scaling upper limit (default +1)\n"
|
||||
"-y y_lower y_upper : y scaling limits (default: no y scaling)\n"
|
||||
"-s save_filename : save scaling parameters to save_filename\n"
|
||||
"-r restore_filename : restore scaling parameters from restore_filename\n"
|
||||
);
|
||||
exit(1);
|
||||
}
|
||||
|
||||
char *line = NULL;
|
||||
int max_line_len = 1024;
|
||||
double lower=-1.0,upper=1.0,y_lower,y_upper;
|
||||
int y_scaling = 0;
|
||||
double *feature_max;
|
||||
double *feature_min;
|
||||
double y_max = -DBL_MAX;
|
||||
double y_min = DBL_MAX;
|
||||
int max_index;
|
||||
int min_index;
|
||||
long int num_nonzeros = 0;
|
||||
long int new_num_nonzeros = 0;
|
||||
|
||||
#define max(x,y) (((x)>(y))?(x):(y))
|
||||
#define min(x,y) (((x)<(y))?(x):(y))
|
||||
|
||||
void output_target(double value);
|
||||
void output(int index, double value);
|
||||
char* readline(FILE *input);
|
||||
int clean_up(FILE *fp_restore, FILE *fp, const char *msg);
|
||||
|
||||
int main(int argc,char **argv)
|
||||
{
|
||||
int i,index;
|
||||
FILE *fp, *fp_restore = NULL;
|
||||
char *save_filename = NULL;
|
||||
char *restore_filename = NULL;
|
||||
|
||||
for(i=1;i<argc;i++)
|
||||
{
|
||||
if(argv[i][0] != '-') break;
|
||||
++i;
|
||||
switch(argv[i-1][1])
|
||||
{
|
||||
case 'l': lower = atof(argv[i]); break;
|
||||
case 'u': upper = atof(argv[i]); break;
|
||||
case 'y':
|
||||
y_lower = atof(argv[i]);
|
||||
++i;
|
||||
y_upper = atof(argv[i]);
|
||||
y_scaling = 1;
|
||||
break;
|
||||
case 's': save_filename = argv[i]; break;
|
||||
case 'r': restore_filename = argv[i]; break;
|
||||
default:
|
||||
fprintf(stderr,"unknown option\n");
|
||||
exit_with_help();
|
||||
}
|
||||
}
|
||||
|
||||
if(!(upper > lower) || (y_scaling && !(y_upper > y_lower)))
|
||||
{
|
||||
fprintf(stderr,"inconsistent lower/upper specification\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
if(restore_filename && save_filename)
|
||||
{
|
||||
fprintf(stderr,"cannot use -r and -s simultaneously\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
if(argc != i+1)
|
||||
exit_with_help();
|
||||
|
||||
fp=fopen(argv[i],"r");
|
||||
|
||||
if(fp==NULL)
|
||||
{
|
||||
fprintf(stderr,"can't open file %s\n", argv[i]);
|
||||
exit(1);
|
||||
}
|
||||
|
||||
line = (char *) malloc(max_line_len*sizeof(char));
|
||||
|
||||
#define SKIP_TARGET\
|
||||
while(isspace(*p)) ++p;\
|
||||
while(!isspace(*p)) ++p;
|
||||
|
||||
#define SKIP_ELEMENT\
|
||||
while(*p!=':') ++p;\
|
||||
++p;\
|
||||
while(isspace(*p)) ++p;\
|
||||
while(*p && !isspace(*p)) ++p;
|
||||
|
||||
/* assumption: min index of attributes is 1 */
|
||||
/* pass 1: find out max index of attributes */
|
||||
max_index = 0;
|
||||
min_index = 1;
|
||||
|
||||
if(restore_filename)
|
||||
{
|
||||
int idx, c;
|
||||
|
||||
fp_restore = fopen(restore_filename,"r");
|
||||
if(fp_restore==NULL)
|
||||
{
|
||||
fprintf(stderr,"can't open file %s\n", restore_filename);
|
||||
exit(1);
|
||||
}
|
||||
|
||||
c = fgetc(fp_restore);
|
||||
if(c == 'y')
|
||||
{
|
||||
readline(fp_restore);
|
||||
readline(fp_restore);
|
||||
readline(fp_restore);
|
||||
}
|
||||
readline(fp_restore);
|
||||
readline(fp_restore);
|
||||
|
||||
while(fscanf(fp_restore,"%d %*f %*f\n",&idx) == 1)
|
||||
max_index = max(idx,max_index);
|
||||
rewind(fp_restore);
|
||||
}
|
||||
|
||||
while(readline(fp)!=NULL)
|
||||
{
|
||||
char *p=line;
|
||||
|
||||
SKIP_TARGET
|
||||
|
||||
while(sscanf(p,"%d:%*f",&index)==1)
|
||||
{
|
||||
max_index = max(max_index, index);
|
||||
min_index = min(min_index, index);
|
||||
SKIP_ELEMENT
|
||||
num_nonzeros++;
|
||||
}
|
||||
}
|
||||
|
||||
if(min_index < 1)
|
||||
fprintf(stderr,
|
||||
"WARNING: minimal feature index is %d, but indices should start from 1\n", min_index);
|
||||
|
||||
rewind(fp);
|
||||
|
||||
feature_max = (double *)malloc((max_index+1)* sizeof(double));
|
||||
feature_min = (double *)malloc((max_index+1)* sizeof(double));
|
||||
|
||||
if(feature_max == NULL || feature_min == NULL)
|
||||
{
|
||||
fprintf(stderr,"can't allocate enough memory\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
for(i=0;i<=max_index;i++)
|
||||
{
|
||||
feature_max[i]=-DBL_MAX;
|
||||
feature_min[i]=DBL_MAX;
|
||||
}
|
||||
|
||||
/* pass 2: find out min/max value */
|
||||
while(readline(fp)!=NULL)
|
||||
{
|
||||
char *p=line;
|
||||
int next_index=1;
|
||||
double target;
|
||||
double value;
|
||||
|
||||
if (sscanf(p,"%lf",&target) != 1)
|
||||
return clean_up(fp_restore, fp, "ERROR: failed to read labels\n");
|
||||
y_max = max(y_max,target);
|
||||
y_min = min(y_min,target);
|
||||
|
||||
SKIP_TARGET
|
||||
|
||||
while(sscanf(p,"%d:%lf",&index,&value)==2)
|
||||
{
|
||||
for(i=next_index;i<index;i++)
|
||||
{
|
||||
feature_max[i]=max(feature_max[i],0);
|
||||
feature_min[i]=min(feature_min[i],0);
|
||||
}
|
||||
|
||||
feature_max[index]=max(feature_max[index],value);
|
||||
feature_min[index]=min(feature_min[index],value);
|
||||
|
||||
SKIP_ELEMENT
|
||||
next_index=index+1;
|
||||
}
|
||||
|
||||
for(i=next_index;i<=max_index;i++)
|
||||
{
|
||||
feature_max[i]=max(feature_max[i],0);
|
||||
feature_min[i]=min(feature_min[i],0);
|
||||
}
|
||||
}
|
||||
|
||||
rewind(fp);
|
||||
|
||||
/* pass 2.5: save/restore feature_min/feature_max */
|
||||
|
||||
if(restore_filename)
|
||||
{
|
||||
/* fp_restore rewinded in finding max_index */
|
||||
int idx, c;
|
||||
double fmin, fmax;
|
||||
int next_index = 1;
|
||||
|
||||
if((c = fgetc(fp_restore)) == 'y')
|
||||
{
|
||||
if(fscanf(fp_restore, "%lf %lf\n", &y_lower, &y_upper) != 2 ||
|
||||
fscanf(fp_restore, "%lf %lf\n", &y_min, &y_max) != 2)
|
||||
return clean_up(fp_restore, fp, "ERROR: failed to read scaling parameters\n");
|
||||
y_scaling = 1;
|
||||
}
|
||||
else
|
||||
ungetc(c, fp_restore);
|
||||
|
||||
if (fgetc(fp_restore) == 'x')
|
||||
{
|
||||
if(fscanf(fp_restore, "%lf %lf\n", &lower, &upper) != 2)
|
||||
return clean_up(fp_restore, fp, "ERROR: failed to read scaling parameters\n");
|
||||
while(fscanf(fp_restore,"%d %lf %lf\n",&idx,&fmin,&fmax)==3)
|
||||
{
|
||||
for(i = next_index;i<idx;i++)
|
||||
if(feature_min[i] != feature_max[i])
|
||||
fprintf(stderr,
|
||||
"WARNING: feature index %d appeared in file %s was not seen in the scaling factor file %s.\n",
|
||||
i, argv[argc-1], restore_filename);
|
||||
|
||||
feature_min[idx] = fmin;
|
||||
feature_max[idx] = fmax;
|
||||
|
||||
next_index = idx + 1;
|
||||
}
|
||||
|
||||
for(i=next_index;i<=max_index;i++)
|
||||
if(feature_min[i] != feature_max[i])
|
||||
fprintf(stderr,
|
||||
"WARNING: feature index %d appeared in file %s was not seen in the scaling factor file %s.\n",
|
||||
i, argv[argc-1], restore_filename);
|
||||
}
|
||||
fclose(fp_restore);
|
||||
}
|
||||
|
||||
if(save_filename)
|
||||
{
|
||||
FILE *fp_save = fopen(save_filename,"w");
|
||||
if(fp_save==NULL)
|
||||
{
|
||||
fprintf(stderr,"can't open file %s\n", save_filename);
|
||||
exit(1);
|
||||
}
|
||||
if(y_scaling)
|
||||
{
|
||||
fprintf(fp_save, "y\n");
|
||||
fprintf(fp_save, "%.16g %.16g\n", y_lower, y_upper);
|
||||
fprintf(fp_save, "%.16g %.16g\n", y_min, y_max);
|
||||
}
|
||||
fprintf(fp_save, "x\n");
|
||||
fprintf(fp_save, "%.16g %.16g\n", lower, upper);
|
||||
for(i=1;i<=max_index;i++)
|
||||
{
|
||||
if(feature_min[i]!=feature_max[i])
|
||||
fprintf(fp_save,"%d %.16g %.16g\n",i,feature_min[i],feature_max[i]);
|
||||
}
|
||||
|
||||
if(min_index < 1)
|
||||
fprintf(stderr,
|
||||
"WARNING: scaling factors with indices smaller than 1 are not stored to the file %s.\n", save_filename);
|
||||
|
||||
fclose(fp_save);
|
||||
}
|
||||
|
||||
/* pass 3: scale */
|
||||
while(readline(fp)!=NULL)
|
||||
{
|
||||
char *p=line;
|
||||
int next_index=1;
|
||||
double target;
|
||||
double value;
|
||||
|
||||
if (sscanf(p,"%lf",&target) != 1)
|
||||
return clean_up(NULL, fp, "ERROR: failed to read labels\n");
|
||||
output_target(target);
|
||||
|
||||
SKIP_TARGET
|
||||
|
||||
while(sscanf(p,"%d:%lf",&index,&value)==2)
|
||||
{
|
||||
for(i=next_index;i<index;i++)
|
||||
output(i,0);
|
||||
|
||||
output(index,value);
|
||||
|
||||
SKIP_ELEMENT
|
||||
next_index=index+1;
|
||||
}
|
||||
|
||||
for(i=next_index;i<=max_index;i++)
|
||||
output(i,0);
|
||||
|
||||
printf("\n");
|
||||
}
|
||||
|
||||
if (new_num_nonzeros > num_nonzeros)
|
||||
fprintf(stderr,
|
||||
"WARNING: original #nonzeros %ld\n"
|
||||
" new #nonzeros %ld\n"
|
||||
"Use -l 0 if many original feature values are zeros\n",
|
||||
num_nonzeros, new_num_nonzeros);
|
||||
|
||||
free(line);
|
||||
free(feature_max);
|
||||
free(feature_min);
|
||||
fclose(fp);
|
||||
return 0;
|
||||
}
|
||||
|
||||
char* readline(FILE *input)
|
||||
{
|
||||
int len;
|
||||
|
||||
if(fgets(line,max_line_len,input) == NULL)
|
||||
return NULL;
|
||||
|
||||
while(strrchr(line,'\n') == NULL)
|
||||
{
|
||||
max_line_len *= 2;
|
||||
line = (char *) realloc(line, max_line_len);
|
||||
len = (int) strlen(line);
|
||||
if(fgets(line+len,max_line_len-len,input) == NULL)
|
||||
break;
|
||||
}
|
||||
return line;
|
||||
}
|
||||
|
||||
void output_target(double value)
|
||||
{
|
||||
if(y_scaling)
|
||||
{
|
||||
if(value == y_min)
|
||||
value = y_lower;
|
||||
else if(value == y_max)
|
||||
value = y_upper;
|
||||
else value = y_lower + (y_upper-y_lower) *
|
||||
(value - y_min)/(y_max-y_min);
|
||||
}
|
||||
printf("%g ",value);
|
||||
}
|
||||
|
||||
void output(int index, double value)
|
||||
{
|
||||
/* skip single-valued attribute */
|
||||
if(feature_max[index] == feature_min[index])
|
||||
return;
|
||||
|
||||
if(value == feature_min[index])
|
||||
value = lower;
|
||||
else if(value == feature_max[index])
|
||||
value = upper;
|
||||
else
|
||||
value = lower + (upper-lower) *
|
||||
(value-feature_min[index])/
|
||||
(feature_max[index]-feature_min[index]);
|
||||
|
||||
if(value != 0)
|
||||
{
|
||||
printf("%d:%g ",index, value);
|
||||
new_num_nonzeros++;
|
||||
}
|
||||
}
|
||||
|
||||
int clean_up(FILE *fp_restore, FILE *fp, const char* msg)
|
||||
{
|
||||
fprintf(stderr, "%s", msg);
|
||||
free(line);
|
||||
free(feature_max);
|
||||
free(feature_min);
|
||||
fclose(fp);
|
||||
if (fp_restore)
|
||||
fclose(fp_restore);
|
||||
return -1;
|
||||
}
|
||||
|
Binary file not shown.
|
@ -0,0 +1,395 @@
|
|||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <ctype.h>
|
||||
#include <errno.h>
|
||||
#include "svm.h"
|
||||
#define Malloc(type,n) (type *)malloc((n)*sizeof(type))
|
||||
|
||||
void print_null(const char *s) {}
|
||||
|
||||
void exit_with_help()
|
||||
{
|
||||
printf(
|
||||
"Usage: svm-train [options] training_set_file [model_file]\n"
|
||||
"options:\n"
|
||||
"-s svm_type : set type of SVM (default 0)\n"
|
||||
" 0 -- C-SVC (multi-class classification)\n"
|
||||
" 1 -- nu-SVC (multi-class classification)\n"
|
||||
" 2 -- one-class SVM\n"
|
||||
" 3 -- epsilon-SVR (regression)\n"
|
||||
" 4 -- nu-SVR (regression)\n"
|
||||
"-t kernel_type : set type of kernel function (default 2)\n"
|
||||
" 0 -- linear: u'*v\n"
|
||||
" 1 -- polynomial: (gamma*u'*v + coef0)^degree\n"
|
||||
" 2 -- radial basis function: exp(-gamma*|u-v|^2)\n"
|
||||
" 3 -- sigmoid: tanh(gamma*u'*v + coef0)\n"
|
||||
" 4 -- precomputed kernel (kernel values in training_set_file)\n"
|
||||
"-d degree : set degree in kernel function (default 3)\n"
|
||||
"-g gamma : set gamma in kernel function (default 1/num_features)\n"
|
||||
"-r coef0 : set coef0 in kernel function (default 0)\n"
|
||||
"-c cost : set the parameter C of C-SVC, epsilon-SVR, and nu-SVR (default 1)\n"
|
||||
"-n nu : set the parameter nu of nu-SVC, one-class SVM, and nu-SVR (default 0.5)\n"
|
||||
"-p epsilon : set the epsilon in loss function of epsilon-SVR (default 0.1)\n"
|
||||
"-m cachesize : set cache memory size in MB (default 100)\n"
|
||||
"-e epsilon : set tolerance of termination criterion (default 0.001)\n"
|
||||
"-h shrinking : whether to use the shrinking heuristics, 0 or 1 (default 1)\n"
|
||||
"-b probability_estimates : whether to train a SVC or SVR model for probability estimates, 0 or 1 (default 0)\n"
|
||||
"-wi weight : set the parameter C of class i to weight*C, for C-SVC (default 1)\n"
|
||||
"-v n: n-fold cross validation mode\n"
|
||||
"-q : quiet mode (no outputs)\n"
|
||||
"-W weight_file: set weight file\n"
|
||||
);
|
||||
exit(1);
|
||||
}
|
||||
|
||||
void exit_input_error(int line_num)
|
||||
{
|
||||
fprintf(stderr,"Wrong input format at line %d\n", line_num);
|
||||
exit(1);
|
||||
}
|
||||
|
||||
void parse_command_line(int argc, char **argv, char *input_file_name, char *model_file_name);
|
||||
void read_problem(const char *filename);
|
||||
void do_cross_validation();
|
||||
|
||||
struct svm_parameter param; // set by parse_command_line
|
||||
struct svm_problem prob; // set by read_problem
|
||||
struct svm_model *model;
|
||||
struct svm_node *x_space;
|
||||
char *weight_file;
|
||||
int cross_validation;
|
||||
int nr_fold;
|
||||
|
||||
static char *line = NULL;
|
||||
static int max_line_len;
|
||||
|
||||
static char* readline(FILE *input)
|
||||
{
|
||||
int len;
|
||||
|
||||
if(fgets(line,max_line_len,input) == NULL)
|
||||
return NULL;
|
||||
|
||||
while(strrchr(line,'\n') == NULL)
|
||||
{
|
||||
max_line_len *= 2;
|
||||
line = (char *) realloc(line,max_line_len);
|
||||
len = (int) strlen(line);
|
||||
if(fgets(line+len,max_line_len-len,input) == NULL)
|
||||
break;
|
||||
}
|
||||
return line;
|
||||
}
|
||||
|
||||
int main(int argc, char **argv)
|
||||
{
|
||||
char input_file_name[1024];
|
||||
char model_file_name[1024];
|
||||
const char *error_msg;
|
||||
|
||||
parse_command_line(argc, argv, input_file_name, model_file_name);
|
||||
read_problem(input_file_name);
|
||||
error_msg = svm_check_parameter(&prob,¶m);
|
||||
|
||||
if(error_msg)
|
||||
{
|
||||
fprintf(stderr,"ERROR: %s\n",error_msg);
|
||||
exit(1);
|
||||
}
|
||||
|
||||
if(cross_validation)
|
||||
{
|
||||
do_cross_validation();
|
||||
}
|
||||
else
|
||||
{
|
||||
model = svm_train(&prob,¶m);
|
||||
if(svm_save_model(model_file_name,model))
|
||||
{
|
||||
fprintf(stderr, "can't save model to file %s\n", model_file_name);
|
||||
exit(1);
|
||||
}
|
||||
svm_free_and_destroy_model(&model);
|
||||
}
|
||||
svm_destroy_param(¶m);
|
||||
free(prob.y);
|
||||
free(prob.x);
|
||||
free(x_space);
|
||||
free(line);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
void do_cross_validation()
|
||||
{
|
||||
int i;
|
||||
int total_correct = 0;
|
||||
double total_error = 0;
|
||||
double sumv = 0, sumy = 0, sumvv = 0, sumyy = 0, sumvy = 0;
|
||||
double *target = Malloc(double,prob.l);
|
||||
|
||||
svm_cross_validation(&prob,¶m,nr_fold,target);
|
||||
if(param.svm_type == EPSILON_SVR ||
|
||||
param.svm_type == NU_SVR)
|
||||
{
|
||||
for(i=0;i<prob.l;i++)
|
||||
{
|
||||
double y = prob.y[i];
|
||||
double v = target[i];
|
||||
total_error += (v-y)*(v-y);
|
||||
sumv += v;
|
||||
sumy += y;
|
||||
sumvv += v*v;
|
||||
sumyy += y*y;
|
||||
sumvy += v*y;
|
||||
}
|
||||
printf("Cross Validation Mean squared error = %g\n",total_error/prob.l);
|
||||
printf("Cross Validation Squared correlation coefficient = %g\n",
|
||||
((prob.l*sumvy-sumv*sumy)*(prob.l*sumvy-sumv*sumy))/
|
||||
((prob.l*sumvv-sumv*sumv)*(prob.l*sumyy-sumy*sumy))
|
||||
);
|
||||
}
|
||||
else
|
||||
{
|
||||
for(i=0;i<prob.l;i++)
|
||||
if(target[i] == prob.y[i])
|
||||
++total_correct;
|
||||
printf("Cross Validation Accuracy = %g%%\n",100.0*total_correct/prob.l);
|
||||
}
|
||||
free(target);
|
||||
}
|
||||
|
||||
void parse_command_line(int argc, char **argv, char *input_file_name, char *model_file_name)
|
||||
{
|
||||
int i;
|
||||
void (*print_func)(const char*) = NULL; // default printing to stdout
|
||||
|
||||
// default values
|
||||
param.svm_type = C_SVC;
|
||||
param.kernel_type = RBF;
|
||||
param.degree = 3;
|
||||
param.gamma = 0; // 1/num_features
|
||||
param.coef0 = 0;
|
||||
param.nu = 0.5;
|
||||
param.cache_size = 100;
|
||||
param.C = 1;
|
||||
param.eps = 1e-3;
|
||||
param.p = 0.1;
|
||||
param.shrinking = 1;
|
||||
param.probability = 0;
|
||||
param.nr_weight = 0;
|
||||
param.weight_label = NULL;
|
||||
param.weight = NULL;
|
||||
cross_validation = 0;
|
||||
|
||||
// parse options
|
||||
for(i=1;i<argc;i++)
|
||||
{
|
||||
if(argv[i][0] != '-') break;
|
||||
if(++i>=argc)
|
||||
exit_with_help();
|
||||
switch(argv[i-1][1])
|
||||
{
|
||||
case 's':
|
||||
param.svm_type = atoi(argv[i]);
|
||||
break;
|
||||
case 't':
|
||||
param.kernel_type = atoi(argv[i]);
|
||||
break;
|
||||
case 'd':
|
||||
param.degree = atoi(argv[i]);
|
||||
break;
|
||||
case 'g':
|
||||
param.gamma = atof(argv[i]);
|
||||
break;
|
||||
case 'r':
|
||||
param.coef0 = atof(argv[i]);
|
||||
break;
|
||||
case 'n':
|
||||
param.nu = atof(argv[i]);
|
||||
break;
|
||||
case 'm':
|
||||
param.cache_size = atof(argv[i]);
|
||||
break;
|
||||
case 'c':
|
||||
param.C = atof(argv[i]);
|
||||
break;
|
||||
case 'e':
|
||||
param.eps = atof(argv[i]);
|
||||
break;
|
||||
case 'p':
|
||||
param.p = atof(argv[i]);
|
||||
break;
|
||||
case 'h':
|
||||
param.shrinking = atoi(argv[i]);
|
||||
break;
|
||||
case 'b':
|
||||
param.probability = atoi(argv[i]);
|
||||
break;
|
||||
case 'q':
|
||||
print_func = &print_null;
|
||||
i--;
|
||||
break;
|
||||
case 'v':
|
||||
cross_validation = 1;
|
||||
nr_fold = atoi(argv[i]);
|
||||
if(nr_fold < 2)
|
||||
{
|
||||
fprintf(stderr,"n-fold cross validation: n must >= 2\n");
|
||||
exit_with_help();
|
||||
}
|
||||
break;
|
||||
case 'w':
|
||||
++param.nr_weight;
|
||||
param.weight_label = (int *)realloc(param.weight_label,sizeof(int)*param.nr_weight);
|
||||
param.weight = (double *)realloc(param.weight,sizeof(double)*param.nr_weight);
|
||||
param.weight_label[param.nr_weight-1] = atoi(&argv[i-1][2]);
|
||||
param.weight[param.nr_weight-1] = atof(argv[i]);
|
||||
break;
|
||||
case 'W':
|
||||
weight_file = argv[i];
|
||||
break;
|
||||
default:
|
||||
fprintf(stderr,"Unknown option: -%c\n", argv[i-1][1]);
|
||||
exit_with_help();
|
||||
}
|
||||
}
|
||||
|
||||
svm_set_print_string_function(print_func);
|
||||
|
||||
// determine filenames
|
||||
|
||||
if(i>=argc)
|
||||
exit_with_help();
|
||||
|
||||
strcpy(input_file_name, argv[i]);
|
||||
|
||||
if(i<argc-1)
|
||||
strcpy(model_file_name,argv[i+1]);
|
||||
else
|
||||
{
|
||||
char *p = strrchr(argv[i],'/');
|
||||
if(p==NULL)
|
||||
p = argv[i];
|
||||
else
|
||||
++p;
|
||||
sprintf(model_file_name,"%s.model",p);
|
||||
}
|
||||
}
|
||||
|
||||
// read in a problem (in svmlight format)
|
||||
|
||||
void read_problem(const char *filename)
|
||||
{
|
||||
int max_index, inst_max_index, i;
|
||||
size_t elements, j;
|
||||
FILE *fp = fopen(filename,"r");
|
||||
char *endptr;
|
||||
char *idx, *val, *label;
|
||||
|
||||
if(fp == NULL)
|
||||
{
|
||||
fprintf(stderr,"can't open input file %s\n",filename);
|
||||
exit(1);
|
||||
}
|
||||
|
||||
prob.l = 0;
|
||||
elements = 0;
|
||||
|
||||
max_line_len = 1024;
|
||||
line = Malloc(char,max_line_len);
|
||||
while(readline(fp)!=NULL)
|
||||
{
|
||||
char *p = strtok(line," \t"); // label
|
||||
|
||||
// features
|
||||
while(1)
|
||||
{
|
||||
p = strtok(NULL," \t");
|
||||
if(p == NULL || *p == '\n') // check '\n' as ' ' may be after the last feature
|
||||
break;
|
||||
++elements;
|
||||
}
|
||||
++elements;
|
||||
++prob.l;
|
||||
}
|
||||
rewind(fp);
|
||||
|
||||
prob.y = Malloc(double,prob.l);
|
||||
prob.x = Malloc(struct svm_node *,prob.l);
|
||||
prob.W = Malloc(double,prob.l);
|
||||
x_space = Malloc(struct svm_node,elements);
|
||||
|
||||
max_index = 0;
|
||||
j=0;
|
||||
for(i=0;i<prob.l;i++)
|
||||
{
|
||||
inst_max_index = -1; // strtol gives 0 if wrong format, and precomputed kernel has <index> start from 0
|
||||
readline(fp);
|
||||
prob.x[i] = &x_space[j];
|
||||
label = strtok(line," \t\n");
|
||||
if(label == NULL) // empty line
|
||||
exit_input_error(i+1);
|
||||
|
||||
prob.y[i] = strtod(label,&endptr);
|
||||
if(endptr == label || *endptr != '\0')
|
||||
exit_input_error(i+1);
|
||||
prob.W[i] = 1;
|
||||
|
||||
while(1)
|
||||
{
|
||||
idx = strtok(NULL,":");
|
||||
val = strtok(NULL," \t");
|
||||
|
||||
if(val == NULL)
|
||||
break;
|
||||
|
||||
errno = 0;
|
||||
x_space[j].index = (int) strtol(idx,&endptr,10);
|
||||
if(endptr == idx || errno != 0 || *endptr != '\0' || x_space[j].index <= inst_max_index)
|
||||
exit_input_error(i+1);
|
||||
else
|
||||
inst_max_index = x_space[j].index;
|
||||
|
||||
errno = 0;
|
||||
x_space[j].value = strtod(val,&endptr);
|
||||
if(endptr == val || errno != 0 || (*endptr != '\0' && !isspace(*endptr)))
|
||||
exit_input_error(i+1);
|
||||
|
||||
++j;
|
||||
}
|
||||
|
||||
if(inst_max_index > max_index)
|
||||
max_index = inst_max_index;
|
||||
x_space[j++].index = -1;
|
||||
}
|
||||
|
||||
if(param.gamma == 0 && max_index > 0)
|
||||
param.gamma = 1.0/max_index;
|
||||
|
||||
if(param.kernel_type == PRECOMPUTED)
|
||||
for(i=0;i<prob.l;i++)
|
||||
{
|
||||
if (prob.x[i][0].index != 0)
|
||||
{
|
||||
fprintf(stderr,"Wrong input format: first column must be 0:sample_serial_number\n");
|
||||
exit(1);
|
||||
}
|
||||
if ((int)prob.x[i][0].value <= 0 || (int)prob.x[i][0].value > max_index)
|
||||
{
|
||||
fprintf(stderr,"Wrong input format: sample_serial_number out of range\n");
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
|
||||
fclose(fp);
|
||||
|
||||
if(weight_file)
|
||||
{
|
||||
fp = fopen(weight_file,"r");
|
||||
for(i=0;i<prob.l;i++)
|
||||
fscanf(fp,"%lf",&prob.W[i]);
|
||||
fclose(fp);
|
||||
}
|
||||
}
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,21 @@
|
|||
LIBRARY libsvm
|
||||
EXPORTS
|
||||
svm_train @1
|
||||
svm_cross_validation @2
|
||||
svm_save_model @3
|
||||
svm_load_model @4
|
||||
svm_get_svm_type @5
|
||||
svm_get_nr_class @6
|
||||
svm_get_labels @7
|
||||
svm_get_svr_probability @8
|
||||
svm_predict_values @9
|
||||
svm_predict @10
|
||||
svm_predict_probability @11
|
||||
svm_free_model_content @12
|
||||
svm_free_and_destroy_model @13
|
||||
svm_destroy_param @14
|
||||
svm_check_parameter @15
|
||||
svm_check_probability_model @16
|
||||
svm_set_print_string_function @17
|
||||
svm_get_sv_indices @18
|
||||
svm_get_nr_sv @19
|
|
@ -0,0 +1,105 @@
|
|||
#ifndef _LIBSVM_H
|
||||
#define _LIBSVM_H
|
||||
|
||||
#define LIBSVM_VERSION 320
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
extern int libsvm_version;
|
||||
|
||||
struct svm_node
|
||||
{
|
||||
int index;
|
||||
double value;
|
||||
};
|
||||
|
||||
struct svm_problem
|
||||
{
|
||||
int l;
|
||||
double *y;
|
||||
struct svm_node **x;
|
||||
double *W; /* instance weight */
|
||||
};
|
||||
|
||||
enum { C_SVC, NU_SVC, ONE_CLASS, EPSILON_SVR, NU_SVR }; /* svm_type */
|
||||
enum { LINEAR, POLY, RBF, SIGMOID, PRECOMPUTED }; /* kernel_type */
|
||||
|
||||
struct svm_parameter
|
||||
{
|
||||
int svm_type;
|
||||
int kernel_type;
|
||||
int degree; /* for poly */
|
||||
double gamma; /* for poly/rbf/sigmoid */
|
||||
double coef0; /* for poly/sigmoid */
|
||||
|
||||
/* these are for training only */
|
||||
double cache_size; /* in MB */
|
||||
double eps; /* stopping criteria */
|
||||
double C; /* for C_SVC, EPSILON_SVR and NU_SVR */
|
||||
int nr_weight; /* for C_SVC */
|
||||
int *weight_label; /* for C_SVC */
|
||||
double* weight; /* for C_SVC */
|
||||
double nu; /* for NU_SVC, ONE_CLASS, and NU_SVR */
|
||||
double p; /* for EPSILON_SVR */
|
||||
int shrinking; /* use the shrinking heuristics */
|
||||
int probability; /* do probability estimates */
|
||||
};
|
||||
|
||||
//
|
||||
// svm_model
|
||||
//
|
||||
struct svm_model
|
||||
{
|
||||
struct svm_parameter param; /* parameter */
|
||||
int nr_class; /* number of classes, = 2 in regression/one class svm */
|
||||
int l; /* total #SV */
|
||||
struct svm_node **SV; /* SVs (SV[l]) */
|
||||
double **sv_coef; /* coefficients for SVs in decision functions (sv_coef[k-1][l]) */
|
||||
double *rho; /* constants in decision functions (rho[k*(k-1)/2]) */
|
||||
double *probA; /* pariwise probability information */
|
||||
double *probB;
|
||||
int *sv_indices; /* sv_indices[0,...,nSV-1] are values in [1,...,num_traning_data] to indicate SVs in the training set */
|
||||
|
||||
/* for classification only */
|
||||
|
||||
int *label; /* label of each class (label[k]) */
|
||||
int *nSV; /* number of SVs for each class (nSV[k]) */
|
||||
/* nSV[0] + nSV[1] + ... + nSV[k-1] = l */
|
||||
/* XXX */
|
||||
int free_sv; /* 1 if svm_model is created by svm_load_model*/
|
||||
/* 0 if svm_model is created by svm_train */
|
||||
};
|
||||
|
||||
struct svm_model *svm_train(const struct svm_problem *prob, const struct svm_parameter *param);
|
||||
void svm_cross_validation(const struct svm_problem *prob, const struct svm_parameter *param, int nr_fold, double *target);
|
||||
|
||||
int svm_save_model(const char *model_file_name, const struct svm_model *model);
|
||||
struct svm_model *svm_load_model(const char *model_file_name);
|
||||
|
||||
int svm_get_svm_type(const struct svm_model *model);
|
||||
int svm_get_nr_class(const struct svm_model *model);
|
||||
void svm_get_labels(const struct svm_model *model, int *label);
|
||||
void svm_get_sv_indices(const struct svm_model *model, int *sv_indices);
|
||||
int svm_get_nr_sv(const struct svm_model *model);
|
||||
double svm_get_svr_probability(const struct svm_model *model);
|
||||
|
||||
double svm_predict_values(const struct svm_model *model, const struct svm_node *x, double* dec_values);
|
||||
double svm_predict(const struct svm_model *model, const struct svm_node *x);
|
||||
double svm_predict_probability(const struct svm_model *model, const struct svm_node *x, double* prob_estimates);
|
||||
|
||||
void svm_free_model_content(struct svm_model *model_ptr);
|
||||
void svm_free_and_destroy_model(struct svm_model **model_ptr_ptr);
|
||||
void svm_destroy_param(struct svm_parameter *param);
|
||||
|
||||
const char *svm_check_parameter(const struct svm_problem *prob, const struct svm_parameter *param);
|
||||
int svm_check_probability_model(const struct svm_model *model);
|
||||
|
||||
void svm_set_print_string_function(void (*print_func)(const char *));
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* _LIBSVM_H */
|
Binary file not shown.
|
@ -0,0 +1,210 @@
|
|||
This directory includes some useful codes:
|
||||
|
||||
1. subset selection tools.
|
||||
2. parameter selection tools.
|
||||
3. LIBSVM format checking tools
|
||||
|
||||
Part I: Subset selection tools
|
||||
|
||||
Introduction
|
||||
============
|
||||
|
||||
Training large data is time consuming. Sometimes one should work on a
|
||||
smaller subset first. The python script subset.py randomly selects a
|
||||
specified number of samples. For classification data, we provide a
|
||||
stratified selection to ensure the same class distribution in the
|
||||
subset.
|
||||
|
||||
Usage: subset.py [options] dataset number [output1] [output2]
|
||||
|
||||
This script selects a subset of the given data set.
|
||||
|
||||
options:
|
||||
-s method : method of selection (default 0)
|
||||
0 -- stratified selection (classification only)
|
||||
1 -- random selection
|
||||
|
||||
output1 : the subset (optional)
|
||||
output2 : the rest of data (optional)
|
||||
|
||||
If output1 is omitted, the subset will be printed on the screen.
|
||||
|
||||
Example
|
||||
=======
|
||||
|
||||
> python subset.py heart_scale 100 file1 file2
|
||||
|
||||
From heart_scale 100 samples are randomly selected and stored in
|
||||
file1. All remaining instances are stored in file2.
|
||||
|
||||
|
||||
Part II: Parameter Selection Tools
|
||||
|
||||
Introduction
|
||||
============
|
||||
|
||||
grid.py is a parameter selection tool for C-SVM classification using
|
||||
the RBF (radial basis function) kernel. It uses cross validation (CV)
|
||||
technique to estimate the accuracy of each parameter combination in
|
||||
the specified range and helps you to decide the best parameters for
|
||||
your problem.
|
||||
|
||||
grid.py directly executes libsvm binaries (so no python binding is needed)
|
||||
for cross validation and then draw contour of CV accuracy using gnuplot.
|
||||
You must have libsvm and gnuplot installed before using it. The package
|
||||
gnuplot is available at http://www.gnuplot.info/
|
||||
|
||||
On Mac OSX, the precompiled gnuplot file needs the library Aquarterm,
|
||||
which thus must be installed as well. In addition, this version of
|
||||
gnuplot does not support png, so you need to change "set term png
|
||||
transparent small" and use other image formats. For example, you may
|
||||
have "set term pbm small color".
|
||||
|
||||
Usage: grid.py [grid_options] [svm_options] dataset
|
||||
|
||||
grid_options :
|
||||
-log2c {begin,end,step | "null"} : set the range of c (default -5,15,2)
|
||||
begin,end,step -- c_range = 2^{begin,...,begin+k*step,...,end}
|
||||
"null" -- do not grid with c
|
||||
-log2g {begin,end,step | "null"} : set the range of g (default 3,-15,-2)
|
||||
begin,end,step -- g_range = 2^{begin,...,begin+k*step,...,end}
|
||||
"null" -- do not grid with g
|
||||
-v n : n-fold cross validation (default 5)
|
||||
-svmtrain pathname : set svm executable path and name
|
||||
-gnuplot {pathname | "null"} :
|
||||
pathname -- set gnuplot executable path and name
|
||||
"null" -- do not plot
|
||||
-out {pathname | "null"} : (default dataset.out)
|
||||
pathname -- set output file path and name
|
||||
"null" -- do not output file
|
||||
-png pathname : set graphic output file path and name (default dataset.png)
|
||||
-resume [pathname] : resume the grid task using an existing output file (default pathname is dataset.out)
|
||||
Use this option only if some parameters have been checked for the SAME data.
|
||||
|
||||
svm_options : additional options for svm-train
|
||||
|
||||
The program conducts v-fold cross validation using parameter C (and gamma)
|
||||
= 2^begin, 2^(begin+step), ..., 2^end.
|
||||
|
||||
You can specify where the libsvm executable and gnuplot are using the
|
||||
-svmtrain and -gnuplot parameters.
|
||||
|
||||
For windows users, please use pgnuplot.exe. If you are using gnuplot
|
||||
3.7.1, please upgrade to version 3.7.3 or higher. The version 3.7.1
|
||||
has a bug. If you use cygwin on windows, please use gunplot-x11.
|
||||
|
||||
If the task is terminated accidentally or you would like to change the
|
||||
range of parameters, you can apply '-resume' to save time by re-using
|
||||
previous results. You may specify the output file of a previous run
|
||||
or use the default (i.e., dataset.out) without giving a name. Please
|
||||
note that the same condition must be used in two runs. For example,
|
||||
you cannot use '-v 10' earlier and resume the task with '-v 5'.
|
||||
|
||||
The value of some options can be "null." For example, `-log2c -1,0,1
|
||||
-log2 "null"' means that C=2^-1,2^0,2^1 and g=LIBSVM's default gamma
|
||||
value. That is, you do not conduct parameter selection on gamma.
|
||||
|
||||
Example
|
||||
=======
|
||||
|
||||
> python grid.py -log2c -5,5,1 -log2g -4,0,1 -v 5 -m 300 heart_scale
|
||||
|
||||
Users (in particular MS Windows users) may need to specify the path of
|
||||
executable files. You can either change paths in the beginning of
|
||||
grid.py or specify them in the command line. For example,
|
||||
|
||||
> grid.py -log2c -5,5,1 -svmtrain "c:\Program Files\libsvm\windows\svm-train.exe" -gnuplot c:\tmp\gnuplot\binary\pgnuplot.exe -v 10 heart_scale
|
||||
|
||||
Output: two files
|
||||
dataset.png: the CV accuracy contour plot generated by gnuplot
|
||||
dataset.out: the CV accuracy at each (log2(C),log2(gamma))
|
||||
|
||||
The following example saves running time by loading the output file of a previous run.
|
||||
|
||||
> python grid.py -log2c -7,7,1 -log2g -5,2,1 -v 5 -resume heart_scale.out heart_scale
|
||||
|
||||
Parallel grid search
|
||||
====================
|
||||
|
||||
You can conduct a parallel grid search by dispatching jobs to a
|
||||
cluster of computers which share the same file system. First, you add
|
||||
machine names in grid.py:
|
||||
|
||||
ssh_workers = ["linux1", "linux5", "linux5"]
|
||||
|
||||
and then setup your ssh so that the authentication works without
|
||||
asking a password.
|
||||
|
||||
The same machine (e.g., linux5 here) can be listed more than once if
|
||||
it has multiple CPUs or has more RAM. If the local machine is the
|
||||
best, you can also enlarge the nr_local_worker. For example:
|
||||
|
||||
nr_local_worker = 2
|
||||
|
||||
Example:
|
||||
|
||||
> python grid.py heart_scale
|
||||
[local] -1 -1 78.8889 (best c=0.5, g=0.5, rate=78.8889)
|
||||
[linux5] -1 -7 83.3333 (best c=0.5, g=0.0078125, rate=83.3333)
|
||||
[linux5] 5 -1 77.037 (best c=0.5, g=0.0078125, rate=83.3333)
|
||||
[linux1] 5 -7 83.3333 (best c=0.5, g=0.0078125, rate=83.3333)
|
||||
.
|
||||
.
|
||||
.
|
||||
|
||||
If -log2c, -log2g, or -v is not specified, default values are used.
|
||||
|
||||
If your system uses telnet instead of ssh, you list the computer names
|
||||
in telnet_workers.
|
||||
|
||||
Calling grid in Python
|
||||
======================
|
||||
|
||||
In addition to using grid.py as a command-line tool, you can use it as a
|
||||
Python module.
|
||||
|
||||
>>> rate, param = find_parameters(dataset, options)
|
||||
|
||||
You need to specify `dataset' and `options' (default ''). See the following example.
|
||||
|
||||
> python
|
||||
|
||||
>>> from grid import *
|
||||
>>> rate, param = find_parameters('../heart_scale', '-log2c -1,1,1 -log2g -1,1,1')
|
||||
[local] 0.0 0.0 rate=74.8148 (best c=1.0, g=1.0, rate=74.8148)
|
||||
[local] 0.0 -1.0 rate=77.037 (best c=1.0, g=0.5, rate=77.037)
|
||||
.
|
||||
.
|
||||
[local] -1.0 -1.0 rate=78.8889 (best c=0.5, g=0.5, rate=78.8889)
|
||||
.
|
||||
.
|
||||
>>> rate
|
||||
78.8889
|
||||
>>> param
|
||||
{'c': 0.5, 'g': 0.5}
|
||||
|
||||
|
||||
Part III: LIBSVM format checking tools
|
||||
|
||||
Introduction
|
||||
============
|
||||
|
||||
`svm-train' conducts only a simple check of the input data. To do a
|
||||
detailed check, we provide a python script `checkdata.py.'
|
||||
|
||||
Usage: checkdata.py dataset
|
||||
|
||||
Exit status (returned value): 1 if there are errors, 0 otherwise.
|
||||
|
||||
This tool is written by Rong-En Fan at National Taiwan University.
|
||||
|
||||
Example
|
||||
=======
|
||||
|
||||
> cat bad_data
|
||||
1 3:1 2:4
|
||||
> python checkdata.py bad_data
|
||||
line 1: feature indices must be in an ascending order, previous/current features 3:1 2:4
|
||||
Found 1 lines with error.
|
||||
|
||||
|
|
@ -0,0 +1,108 @@
|
|||
#!/usr/bin/env python
|
||||
|
||||
#
|
||||
# A format checker for LIBSVM
|
||||
#
|
||||
|
||||
#
|
||||
# Copyright (c) 2007, Rong-En Fan
|
||||
#
|
||||
# All rights reserved.
|
||||
#
|
||||
# This program is distributed under the same license of the LIBSVM package.
|
||||
#
|
||||
|
||||
from sys import argv, exit
|
||||
import os.path
|
||||
|
||||
def err(line_no, msg):
|
||||
print("line {0}: {1}".format(line_no, msg))
|
||||
|
||||
# works like float() but does not accept nan and inf
|
||||
def my_float(x):
|
||||
if x.lower().find("nan") != -1 or x.lower().find("inf") != -1:
|
||||
raise ValueError
|
||||
|
||||
return float(x)
|
||||
|
||||
def main():
|
||||
if len(argv) != 2:
|
||||
print("Usage: {0} dataset".format(argv[0]))
|
||||
exit(1)
|
||||
|
||||
dataset = argv[1]
|
||||
|
||||
if not os.path.exists(dataset):
|
||||
print("dataset {0} not found".format(dataset))
|
||||
exit(1)
|
||||
|
||||
line_no = 1
|
||||
error_line_count = 0
|
||||
for line in open(dataset, 'r'):
|
||||
line_error = False
|
||||
|
||||
# each line must end with a newline character
|
||||
if line[-1] != '\n':
|
||||
err(line_no, "missing a newline character in the end")
|
||||
line_error = True
|
||||
|
||||
nodes = line.split()
|
||||
|
||||
# check label
|
||||
try:
|
||||
label = nodes.pop(0)
|
||||
|
||||
if label.find(',') != -1:
|
||||
# multi-label format
|
||||
try:
|
||||
for l in label.split(','):
|
||||
l = my_float(l)
|
||||
except:
|
||||
err(line_no, "label {0} is not a valid multi-label form".format(label))
|
||||
line_error = True
|
||||
else:
|
||||
try:
|
||||
label = my_float(label)
|
||||
except:
|
||||
err(line_no, "label {0} is not a number".format(label))
|
||||
line_error = True
|
||||
except:
|
||||
err(line_no, "missing label, perhaps an empty line?")
|
||||
line_error = True
|
||||
|
||||
# check features
|
||||
prev_index = -1
|
||||
for i in range(len(nodes)):
|
||||
try:
|
||||
(index, value) = nodes[i].split(':')
|
||||
|
||||
index = int(index)
|
||||
value = my_float(value)
|
||||
|
||||
# precomputed kernel's index starts from 0 and LIBSVM
|
||||
# checks it. Hence, don't treat index 0 as an error.
|
||||
if index < 0:
|
||||
err(line_no, "feature index must be positive; wrong feature {0}".format(nodes[i]))
|
||||
line_error = True
|
||||
elif index <= prev_index:
|
||||
err(line_no, "feature indices must be in an ascending order, previous/current features {0} {1}".format(nodes[i-1], nodes[i]))
|
||||
line_error = True
|
||||
prev_index = index
|
||||
except:
|
||||
err(line_no, "feature '{0}' not an <index>:<value> pair, <index> integer, <value> real number ".format(nodes[i]))
|
||||
line_error = True
|
||||
|
||||
line_no += 1
|
||||
|
||||
if line_error:
|
||||
error_line_count += 1
|
||||
|
||||
if error_line_count > 0:
|
||||
print("Found {0} lines with error.".format(error_line_count))
|
||||
return 1
|
||||
else:
|
||||
print("No error.")
|
||||
return 0
|
||||
|
||||
if __name__ == "__main__":
|
||||
exit(main())
|
|
@ -0,0 +1,79 @@
|
|||
#!/usr/bin/env python
|
||||
|
||||
import sys
|
||||
import os
|
||||
from subprocess import *
|
||||
|
||||
if len(sys.argv) <= 1:
|
||||
print('Usage: {0} training_file [testing_file]'.format(sys.argv[0]))
|
||||
raise SystemExit
|
||||
|
||||
# svm, grid, and gnuplot executable files
|
||||
|
||||
is_win32 = (sys.platform == 'win32')
|
||||
if not is_win32:
|
||||
svmscale_exe = "../svm-scale"
|
||||
svmtrain_exe = "../svm-train"
|
||||
svmpredict_exe = "../svm-predict"
|
||||
grid_py = "./grid.py"
|
||||
gnuplot_exe = "/usr/bin/gnuplot"
|
||||
else:
|
||||
# example for windows
|
||||
svmscale_exe = r"..\windows\svm-scale.exe"
|
||||
svmtrain_exe = r"..\windows\svm-train.exe"
|
||||
svmpredict_exe = r"..\windows\svm-predict.exe"
|
||||
gnuplot_exe = r"c:\tmp\gnuplot\binary\pgnuplot.exe"
|
||||
grid_py = r".\grid.py"
|
||||
|
||||
assert os.path.exists(svmscale_exe),"svm-scale executable not found"
|
||||
assert os.path.exists(svmtrain_exe),"svm-train executable not found"
|
||||
assert os.path.exists(svmpredict_exe),"svm-predict executable not found"
|
||||
assert os.path.exists(gnuplot_exe),"gnuplot executable not found"
|
||||
assert os.path.exists(grid_py),"grid.py not found"
|
||||
|
||||
train_pathname = sys.argv[1]
|
||||
assert os.path.exists(train_pathname),"training file not found"
|
||||
file_name = os.path.split(train_pathname)[1]
|
||||
scaled_file = file_name + ".scale"
|
||||
model_file = file_name + ".model"
|
||||
range_file = file_name + ".range"
|
||||
|
||||
if len(sys.argv) > 2:
|
||||
test_pathname = sys.argv[2]
|
||||
file_name = os.path.split(test_pathname)[1]
|
||||
assert os.path.exists(test_pathname),"testing file not found"
|
||||
scaled_test_file = file_name + ".scale"
|
||||
predict_test_file = file_name + ".predict"
|
||||
|
||||
cmd = '{0} -s "{1}" "{2}" > "{3}"'.format(svmscale_exe, range_file, train_pathname, scaled_file)
|
||||
print('Scaling training data...')
|
||||
Popen(cmd, shell = True, stdout = PIPE).communicate()
|
||||
|
||||
cmd = '{0} -svmtrain "{1}" -gnuplot "{2}" "{3}"'.format(grid_py, svmtrain_exe, gnuplot_exe, scaled_file)
|
||||
print('Cross validation...')
|
||||
f = Popen(cmd, shell = True, stdout = PIPE).stdout
|
||||
|
||||
line = ''
|
||||
while True:
|
||||
last_line = line
|
||||
line = f.readline()
|
||||
if not line: break
|
||||
c,g,rate = map(float,last_line.split())
|
||||
|
||||
print('Best c={0}, g={1} CV rate={2}'.format(c,g,rate))
|
||||
|
||||
cmd = '{0} -c {1} -g {2} "{3}" "{4}"'.format(svmtrain_exe,c,g,scaled_file,model_file)
|
||||
print('Training...')
|
||||
Popen(cmd, shell = True, stdout = PIPE).communicate()
|
||||
|
||||
print('Output model: {0}'.format(model_file))
|
||||
if len(sys.argv) > 2:
|
||||
cmd = '{0} -r "{1}" "{2}" > "{3}"'.format(svmscale_exe, range_file, test_pathname, scaled_test_file)
|
||||
print('Scaling testing data...')
|
||||
Popen(cmd, shell = True, stdout = PIPE).communicate()
|
||||
|
||||
cmd = '{0} "{1}" "{2}" "{3}"'.format(svmpredict_exe, scaled_test_file, model_file, predict_test_file)
|
||||
print('Testing...')
|
||||
Popen(cmd, shell = True).communicate()
|
||||
|
||||
print('Output prediction: {0}'.format(predict_test_file))
|
|
@ -0,0 +1,500 @@
|
|||
#!/usr/bin/env python
|
||||
__all__ = ['find_parameters']
|
||||
|
||||
import os, sys, traceback, getpass, time, re
|
||||
from threading import Thread
|
||||
from subprocess import *
|
||||
|
||||
if sys.version_info[0] < 3:
|
||||
from Queue import Queue
|
||||
else:
|
||||
from queue import Queue
|
||||
|
||||
telnet_workers = []
|
||||
ssh_workers = []
|
||||
nr_local_worker = 1
|
||||
|
||||
class GridOption:
|
||||
def __init__(self, dataset_pathname, options):
|
||||
dirname = os.path.dirname(__file__)
|
||||
if sys.platform != 'win32':
|
||||
self.svmtrain_pathname = os.path.join(dirname, '../svm-train')
|
||||
self.gnuplot_pathname = '/usr/bin/gnuplot'
|
||||
else:
|
||||
# example for windows
|
||||
self.svmtrain_pathname = os.path.join(dirname, r'..\windows\svm-train.exe')
|
||||
# svmtrain_pathname = r'c:\Program Files\libsvm\windows\svm-train.exe'
|
||||
self.gnuplot_pathname = r'c:\tmp\gnuplot\binary\pgnuplot.exe'
|
||||
self.fold = 5
|
||||
self.c_begin, self.c_end, self.c_step = -5, 15, 2
|
||||
self.g_begin, self.g_end, self.g_step = 3, -15, -2
|
||||
self.grid_with_c, self.grid_with_g = True, True
|
||||
self.dataset_pathname = dataset_pathname
|
||||
self.dataset_title = os.path.split(dataset_pathname)[1]
|
||||
self.out_pathname = '{0}.out'.format(self.dataset_title)
|
||||
self.png_pathname = '{0}.png'.format(self.dataset_title)
|
||||
self.pass_through_string = ' '
|
||||
self.resume_pathname = None
|
||||
self.parse_options(options)
|
||||
|
||||
def parse_options(self, options):
|
||||
if type(options) == str:
|
||||
options = options.split()
|
||||
i = 0
|
||||
pass_through_options = []
|
||||
|
||||
while i < len(options):
|
||||
if options[i] == '-log2c':
|
||||
i = i + 1
|
||||
if options[i] == 'null':
|
||||
self.grid_with_c = False
|
||||
else:
|
||||
self.c_begin, self.c_end, self.c_step = map(float,options[i].split(','))
|
||||
elif options[i] == '-log2g':
|
||||
i = i + 1
|
||||
if options[i] == 'null':
|
||||
self.grid_with_g = False
|
||||
else:
|
||||
self.g_begin, self.g_end, self.g_step = map(float,options[i].split(','))
|
||||
elif options[i] == '-v':
|
||||
i = i + 1
|
||||
self.fold = options[i]
|
||||
elif options[i] in ('-c','-g'):
|
||||
raise ValueError('Use -log2c and -log2g.')
|
||||
elif options[i] == '-svmtrain':
|
||||
i = i + 1
|
||||
self.svmtrain_pathname = options[i]
|
||||
elif options[i] == '-gnuplot':
|
||||
i = i + 1
|
||||
if options[i] == 'null':
|
||||
self.gnuplot_pathname = None
|
||||
else:
|
||||
self.gnuplot_pathname = options[i]
|
||||
elif options[i] == '-out':
|
||||
i = i + 1
|
||||
if options[i] == 'null':
|
||||
self.out_pathname = None
|
||||
else:
|
||||
self.out_pathname = options[i]
|
||||
elif options[i] == '-png':
|
||||
i = i + 1
|
||||
self.png_pathname = options[i]
|
||||
elif options[i] == '-resume':
|
||||
if i == (len(options)-1) or options[i+1].startswith('-'):
|
||||
self.resume_pathname = self.dataset_title + '.out'
|
||||
else:
|
||||
i = i + 1
|
||||
self.resume_pathname = options[i]
|
||||
else:
|
||||
pass_through_options.append(options[i])
|
||||
i = i + 1
|
||||
|
||||
self.pass_through_string = ' '.join(pass_through_options)
|
||||
if not os.path.exists(self.svmtrain_pathname):
|
||||
raise IOError('svm-train executable not found')
|
||||
if not os.path.exists(self.dataset_pathname):
|
||||
raise IOError('dataset not found')
|
||||
if self.resume_pathname and not os.path.exists(self.resume_pathname):
|
||||
raise IOError('file for resumption not found')
|
||||
if not self.grid_with_c and not self.grid_with_g:
|
||||
raise ValueError('-log2c and -log2g should not be null simultaneously')
|
||||
if self.gnuplot_pathname and not os.path.exists(self.gnuplot_pathname):
|
||||
sys.stderr.write('gnuplot executable not found\n')
|
||||
self.gnuplot_pathname = None
|
||||
|
||||
def redraw(db,best_param,gnuplot,options,tofile=False):
|
||||
if len(db) == 0: return
|
||||
begin_level = round(max(x[2] for x in db)) - 3
|
||||
step_size = 0.5
|
||||
|
||||
best_log2c,best_log2g,best_rate = best_param
|
||||
|
||||
# if newly obtained c, g, or cv values are the same,
|
||||
# then stop redrawing the contour.
|
||||
if all(x[0] == db[0][0] for x in db): return
|
||||
if all(x[1] == db[0][1] for x in db): return
|
||||
if all(x[2] == db[0][2] for x in db): return
|
||||
|
||||
if tofile:
|
||||
gnuplot.write(b"set term png transparent small linewidth 2 medium enhanced\n")
|
||||
gnuplot.write("set output \"{0}\"\n".format(options.png_pathname.replace('\\','\\\\')).encode())
|
||||
#gnuplot.write(b"set term postscript color solid\n")
|
||||
#gnuplot.write("set output \"{0}.ps\"\n".format(options.dataset_title).encode().encode())
|
||||
elif sys.platform == 'win32':
|
||||
gnuplot.write(b"set term windows\n")
|
||||
else:
|
||||
gnuplot.write( b"set term x11\n")
|
||||
gnuplot.write(b"set xlabel \"log2(C)\"\n")
|
||||
gnuplot.write(b"set ylabel \"log2(gamma)\"\n")
|
||||
gnuplot.write("set xrange [{0}:{1}]\n".format(options.c_begin,options.c_end).encode())
|
||||
gnuplot.write("set yrange [{0}:{1}]\n".format(options.g_begin,options.g_end).encode())
|
||||
gnuplot.write(b"set contour\n")
|
||||
gnuplot.write("set cntrparam levels incremental {0},{1},100\n".format(begin_level,step_size).encode())
|
||||
gnuplot.write(b"unset surface\n")
|
||||
gnuplot.write(b"unset ztics\n")
|
||||
gnuplot.write(b"set view 0,0\n")
|
||||
gnuplot.write("set title \"{0}\"\n".format(options.dataset_title).encode())
|
||||
gnuplot.write(b"unset label\n")
|
||||
gnuplot.write("set label \"Best log2(C) = {0} log2(gamma) = {1} accuracy = {2}%\" \
|
||||
at screen 0.5,0.85 center\n". \
|
||||
format(best_log2c, best_log2g, best_rate).encode())
|
||||
gnuplot.write("set label \"C = {0} gamma = {1}\""
|
||||
" at screen 0.5,0.8 center\n".format(2**best_log2c, 2**best_log2g).encode())
|
||||
gnuplot.write(b"set key at screen 0.9,0.9\n")
|
||||
gnuplot.write(b"splot \"-\" with lines\n")
|
||||
|
||||
db.sort(key = lambda x:(x[0], -x[1]))
|
||||
|
||||
prevc = db[0][0]
|
||||
for line in db:
|
||||
if prevc != line[0]:
|
||||
gnuplot.write(b"\n")
|
||||
prevc = line[0]
|
||||
gnuplot.write("{0[0]} {0[1]} {0[2]}\n".format(line).encode())
|
||||
gnuplot.write(b"e\n")
|
||||
gnuplot.write(b"\n") # force gnuplot back to prompt when term set failure
|
||||
gnuplot.flush()
|
||||
|
||||
|
||||
def calculate_jobs(options):
|
||||
|
||||
def range_f(begin,end,step):
|
||||
# like range, but works on non-integer too
|
||||
seq = []
|
||||
while True:
|
||||
if step > 0 and begin > end: break
|
||||
if step < 0 and begin < end: break
|
||||
seq.append(begin)
|
||||
begin = begin + step
|
||||
return seq
|
||||
|
||||
def permute_sequence(seq):
|
||||
n = len(seq)
|
||||
if n <= 1: return seq
|
||||
|
||||
mid = int(n/2)
|
||||
left = permute_sequence(seq[:mid])
|
||||
right = permute_sequence(seq[mid+1:])
|
||||
|
||||
ret = [seq[mid]]
|
||||
while left or right:
|
||||
if left: ret.append(left.pop(0))
|
||||
if right: ret.append(right.pop(0))
|
||||
|
||||
return ret
|
||||
|
||||
|
||||
c_seq = permute_sequence(range_f(options.c_begin,options.c_end,options.c_step))
|
||||
g_seq = permute_sequence(range_f(options.g_begin,options.g_end,options.g_step))
|
||||
|
||||
if not options.grid_with_c:
|
||||
c_seq = [None]
|
||||
if not options.grid_with_g:
|
||||
g_seq = [None]
|
||||
|
||||
nr_c = float(len(c_seq))
|
||||
nr_g = float(len(g_seq))
|
||||
i, j = 0, 0
|
||||
jobs = []
|
||||
|
||||
while i < nr_c or j < nr_g:
|
||||
if i/nr_c < j/nr_g:
|
||||
# increase C resolution
|
||||
line = []
|
||||
for k in range(0,j):
|
||||
line.append((c_seq[i],g_seq[k]))
|
||||
i = i + 1
|
||||
jobs.append(line)
|
||||
else:
|
||||
# increase g resolution
|
||||
line = []
|
||||
for k in range(0,i):
|
||||
line.append((c_seq[k],g_seq[j]))
|
||||
j = j + 1
|
||||
jobs.append(line)
|
||||
|
||||
resumed_jobs = {}
|
||||
|
||||
if options.resume_pathname is None:
|
||||
return jobs, resumed_jobs
|
||||
|
||||
for line in open(options.resume_pathname, 'r'):
|
||||
line = line.strip()
|
||||
rst = re.findall(r'rate=([0-9.]+)',line)
|
||||
if not rst:
|
||||
continue
|
||||
rate = float(rst[0])
|
||||
|
||||
c, g = None, None
|
||||
rst = re.findall(r'log2c=([0-9.-]+)',line)
|
||||
if rst:
|
||||
c = float(rst[0])
|
||||
rst = re.findall(r'log2g=([0-9.-]+)',line)
|
||||
if rst:
|
||||
g = float(rst[0])
|
||||
|
||||
resumed_jobs[(c,g)] = rate
|
||||
|
||||
return jobs, resumed_jobs
|
||||
|
||||
|
||||
class WorkerStopToken: # used to notify the worker to stop or if a worker is dead
|
||||
pass
|
||||
|
||||
class Worker(Thread):
|
||||
def __init__(self,name,job_queue,result_queue,options):
|
||||
Thread.__init__(self)
|
||||
self.name = name
|
||||
self.job_queue = job_queue
|
||||
self.result_queue = result_queue
|
||||
self.options = options
|
||||
|
||||
def run(self):
|
||||
while True:
|
||||
(cexp,gexp) = self.job_queue.get()
|
||||
if cexp is WorkerStopToken:
|
||||
self.job_queue.put((cexp,gexp))
|
||||
# print('worker {0} stop.'.format(self.name))
|
||||
break
|
||||
try:
|
||||
c, g = None, None
|
||||
if cexp != None:
|
||||
c = 2.0**cexp
|
||||
if gexp != None:
|
||||
g = 2.0**gexp
|
||||
rate = self.run_one(c,g)
|
||||
if rate is None: raise RuntimeError('get no rate')
|
||||
except:
|
||||
# we failed, let others do that and we just quit
|
||||
|
||||
traceback.print_exception(sys.exc_info()[0], sys.exc_info()[1], sys.exc_info()[2])
|
||||
|
||||
self.job_queue.put((cexp,gexp))
|
||||
sys.stderr.write('worker {0} quit.\n'.format(self.name))
|
||||
break
|
||||
else:
|
||||
self.result_queue.put((self.name,cexp,gexp,rate))
|
||||
|
||||
def get_cmd(self,c,g):
|
||||
options=self.options
|
||||
cmdline = '"' + options.svmtrain_pathname + '"'
|
||||
if options.grid_with_c:
|
||||
cmdline += ' -c {0} '.format(c)
|
||||
if options.grid_with_g:
|
||||
cmdline += ' -g {0} '.format(g)
|
||||
cmdline += ' -v {0} {1} {2} '.format\
|
||||
(options.fold,options.pass_through_string,options.dataset_pathname)
|
||||
return cmdline
|
||||
|
||||
class LocalWorker(Worker):
|
||||
def run_one(self,c,g):
|
||||
cmdline = self.get_cmd(c,g)
|
||||
result = Popen(cmdline,shell=True,stdout=PIPE,stderr=PIPE,stdin=PIPE).stdout
|
||||
for line in result.readlines():
|
||||
if str(line).find('Cross') != -1:
|
||||
return float(line.split()[-1][0:-1])
|
||||
|
||||
class SSHWorker(Worker):
|
||||
def __init__(self,name,job_queue,result_queue,host,options):
|
||||
Worker.__init__(self,name,job_queue,result_queue,options)
|
||||
self.host = host
|
||||
self.cwd = os.getcwd()
|
||||
def run_one(self,c,g):
|
||||
cmdline = 'ssh -x -t -t {0} "cd {1}; {2}"'.format\
|
||||
(self.host,self.cwd,self.get_cmd(c,g))
|
||||
result = Popen(cmdline,shell=True,stdout=PIPE,stderr=PIPE,stdin=PIPE).stdout
|
||||
for line in result.readlines():
|
||||
if str(line).find('Cross') != -1:
|
||||
return float(line.split()[-1][0:-1])
|
||||
|
||||
class TelnetWorker(Worker):
|
||||
def __init__(self,name,job_queue,result_queue,host,username,password,options):
|
||||
Worker.__init__(self,name,job_queue,result_queue,options)
|
||||
self.host = host
|
||||
self.username = username
|
||||
self.password = password
|
||||
def run(self):
|
||||
import telnetlib
|
||||
self.tn = tn = telnetlib.Telnet(self.host)
|
||||
tn.read_until('login: ')
|
||||
tn.write(self.username + '\n')
|
||||
tn.read_until('Password: ')
|
||||
tn.write(self.password + '\n')
|
||||
|
||||
# XXX: how to know whether login is successful?
|
||||
tn.read_until(self.username)
|
||||
#
|
||||
print('login ok', self.host)
|
||||
tn.write('cd '+os.getcwd()+'\n')
|
||||
Worker.run(self)
|
||||
tn.write('exit\n')
|
||||
def run_one(self,c,g):
|
||||
cmdline = self.get_cmd(c,g)
|
||||
result = self.tn.write(cmdline+'\n')
|
||||
(idx,matchm,output) = self.tn.expect(['Cross.*\n'])
|
||||
for line in output.split('\n'):
|
||||
if str(line).find('Cross') != -1:
|
||||
return float(line.split()[-1][0:-1])
|
||||
|
||||
def find_parameters(dataset_pathname, options=''):
|
||||
|
||||
def update_param(c,g,rate,best_c,best_g,best_rate,worker,resumed):
|
||||
if (rate > best_rate) or (rate==best_rate and g==best_g and c<best_c):
|
||||
best_rate,best_c,best_g = rate,c,g
|
||||
stdout_str = '[{0}] {1} {2} (best '.format\
|
||||
(worker,' '.join(str(x) for x in [c,g] if x is not None),rate)
|
||||
output_str = ''
|
||||
if c != None:
|
||||
stdout_str += 'c={0}, '.format(2.0**best_c)
|
||||
output_str += 'log2c={0} '.format(c)
|
||||
if g != None:
|
||||
stdout_str += 'g={0}, '.format(2.0**best_g)
|
||||
output_str += 'log2g={0} '.format(g)
|
||||
stdout_str += 'rate={0})'.format(best_rate)
|
||||
print(stdout_str)
|
||||
if options.out_pathname and not resumed:
|
||||
output_str += 'rate={0}\n'.format(rate)
|
||||
result_file.write(output_str)
|
||||
result_file.flush()
|
||||
|
||||
return best_c,best_g,best_rate
|
||||
|
||||
options = GridOption(dataset_pathname, options);
|
||||
|
||||
if options.gnuplot_pathname:
|
||||
gnuplot = Popen(options.gnuplot_pathname,stdin = PIPE,stdout=PIPE,stderr=PIPE).stdin
|
||||
else:
|
||||
gnuplot = None
|
||||
|
||||
# put jobs in queue
|
||||
|
||||
jobs,resumed_jobs = calculate_jobs(options)
|
||||
job_queue = Queue(0)
|
||||
result_queue = Queue(0)
|
||||
|
||||
for (c,g) in resumed_jobs:
|
||||
result_queue.put(('resumed',c,g,resumed_jobs[(c,g)]))
|
||||
|
||||
for line in jobs:
|
||||
for (c,g) in line:
|
||||
if (c,g) not in resumed_jobs:
|
||||
job_queue.put((c,g))
|
||||
|
||||
# hack the queue to become a stack --
|
||||
# this is important when some thread
|
||||
# failed and re-put a job. It we still
|
||||
# use FIFO, the job will be put
|
||||
# into the end of the queue, and the graph
|
||||
# will only be updated in the end
|
||||
|
||||
job_queue._put = job_queue.queue.appendleft
|
||||
|
||||
# fire telnet workers
|
||||
|
||||
if telnet_workers:
|
||||
nr_telnet_worker = len(telnet_workers)
|
||||
username = getpass.getuser()
|
||||
password = getpass.getpass()
|
||||
for host in telnet_workers:
|
||||
worker = TelnetWorker(host,job_queue,result_queue,
|
||||
host,username,password,options)
|
||||
worker.start()
|
||||
|
||||
# fire ssh workers
|
||||
|
||||
if ssh_workers:
|
||||
for host in ssh_workers:
|
||||
worker = SSHWorker(host,job_queue,result_queue,host,options)
|
||||
worker.start()
|
||||
|
||||
# fire local workers
|
||||
|
||||
for i in range(nr_local_worker):
|
||||
worker = LocalWorker('local',job_queue,result_queue,options)
|
||||
worker.start()
|
||||
|
||||
# gather results
|
||||
|
||||
done_jobs = {}
|
||||
|
||||
if options.out_pathname:
|
||||
if options.resume_pathname:
|
||||
result_file = open(options.out_pathname, 'a')
|
||||
else:
|
||||
result_file = open(options.out_pathname, 'w')
|
||||
|
||||
|
||||
db = []
|
||||
best_rate = -1
|
||||
best_c,best_g = None,None
|
||||
|
||||
for (c,g) in resumed_jobs:
|
||||
rate = resumed_jobs[(c,g)]
|
||||
best_c,best_g,best_rate = update_param(c,g,rate,best_c,best_g,best_rate,'resumed',True)
|
||||
|
||||
for line in jobs:
|
||||
for (c,g) in line:
|
||||
while (c,g) not in done_jobs:
|
||||
(worker,c1,g1,rate1) = result_queue.get()
|
||||
done_jobs[(c1,g1)] = rate1
|
||||
if (c1,g1) not in resumed_jobs:
|
||||
best_c,best_g,best_rate = update_param(c1,g1,rate1,best_c,best_g,best_rate,worker,False)
|
||||
db.append((c,g,done_jobs[(c,g)]))
|
||||
if gnuplot and options.grid_with_c and options.grid_with_g:
|
||||
redraw(db,[best_c, best_g, best_rate],gnuplot,options)
|
||||
redraw(db,[best_c, best_g, best_rate],gnuplot,options,True)
|
||||
|
||||
|
||||
if options.out_pathname:
|
||||
result_file.close()
|
||||
job_queue.put((WorkerStopToken,None))
|
||||
best_param, best_cg = {}, []
|
||||
if best_c != None:
|
||||
best_param['c'] = 2.0**best_c
|
||||
best_cg += [2.0**best_c]
|
||||
if best_g != None:
|
||||
best_param['g'] = 2.0**best_g
|
||||
best_cg += [2.0**best_g]
|
||||
print('{0} {1}'.format(' '.join(map(str,best_cg)), best_rate))
|
||||
|
||||
return best_rate, best_param
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
||||
def exit_with_help():
|
||||
print("""\
|
||||
Usage: grid.py [grid_options] [svm_options] dataset
|
||||
|
||||
grid_options :
|
||||
-log2c {begin,end,step | "null"} : set the range of c (default -5,15,2)
|
||||
begin,end,step -- c_range = 2^{begin,...,begin+k*step,...,end}
|
||||
"null" -- do not grid with c
|
||||
-log2g {begin,end,step | "null"} : set the range of g (default 3,-15,-2)
|
||||
begin,end,step -- g_range = 2^{begin,...,begin+k*step,...,end}
|
||||
"null" -- do not grid with g
|
||||
-v n : n-fold cross validation (default 5)
|
||||
-svmtrain pathname : set svm executable path and name
|
||||
-gnuplot {pathname | "null"} :
|
||||
pathname -- set gnuplot executable path and name
|
||||
"null" -- do not plot
|
||||
-out {pathname | "null"} : (default dataset.out)
|
||||
pathname -- set output file path and name
|
||||
"null" -- do not output file
|
||||
-png pathname : set graphic output file path and name (default dataset.png)
|
||||
-resume [pathname] : resume the grid task using an existing output file (default pathname is dataset.out)
|
||||
This is experimental. Try this option only if some parameters have been checked for the SAME data.
|
||||
|
||||
svm_options : additional options for svm-train""")
|
||||
sys.exit(1)
|
||||
|
||||
if len(sys.argv) < 2:
|
||||
exit_with_help()
|
||||
dataset_pathname = sys.argv[-1]
|
||||
options = sys.argv[1:-1]
|
||||
try:
|
||||
find_parameters(dataset_pathname, options)
|
||||
except (IOError,ValueError) as e:
|
||||
sys.stderr.write(str(e) + '\n')
|
||||
sys.stderr.write('Try "grid.py" for more information.\n')
|
||||
sys.exit(1)
|
|
@ -0,0 +1,120 @@
|
|||
#!/usr/bin/env python
|
||||
|
||||
import os, sys, math, random
|
||||
from collections import defaultdict
|
||||
|
||||
if sys.version_info[0] >= 3:
|
||||
xrange = range
|
||||
|
||||
def exit_with_help(argv):
|
||||
print("""\
|
||||
Usage: {0} [options] dataset subset_size [output1] [output2]
|
||||
|
||||
This script randomly selects a subset of the dataset.
|
||||
|
||||
options:
|
||||
-s method : method of selection (default 0)
|
||||
0 -- stratified selection (classification only)
|
||||
1 -- random selection
|
||||
|
||||
output1 : the subset (optional)
|
||||
output2 : rest of the data (optional)
|
||||
If output1 is omitted, the subset will be printed on the screen.""".format(argv[0]))
|
||||
exit(1)
|
||||
|
||||
def process_options(argv):
|
||||
argc = len(argv)
|
||||
if argc < 3:
|
||||
exit_with_help(argv)
|
||||
|
||||
# default method is stratified selection
|
||||
method = 0
|
||||
subset_file = sys.stdout
|
||||
rest_file = None
|
||||
|
||||
i = 1
|
||||
while i < argc:
|
||||
if argv[i][0] != "-":
|
||||
break
|
||||
if argv[i] == "-s":
|
||||
i = i + 1
|
||||
method = int(argv[i])
|
||||
if method not in [0,1]:
|
||||
print("Unknown selection method {0}".format(method))
|
||||
exit_with_help(argv)
|
||||
i = i + 1
|
||||
|
||||
dataset = argv[i]
|
||||
subset_size = int(argv[i+1])
|
||||
if i+2 < argc:
|
||||
subset_file = open(argv[i+2],'w')
|
||||
if i+3 < argc:
|
||||
rest_file = open(argv[i+3],'w')
|
||||
|
||||
return dataset, subset_size, method, subset_file, rest_file
|
||||
|
||||
def random_selection(dataset, subset_size):
|
||||
l = sum(1 for line in open(dataset,'r'))
|
||||
return sorted(random.sample(xrange(l), subset_size))
|
||||
|
||||
def stratified_selection(dataset, subset_size):
|
||||
labels = [line.split(None,1)[0] for line in open(dataset)]
|
||||
label_linenums = defaultdict(list)
|
||||
for i, label in enumerate(labels):
|
||||
label_linenums[label] += [i]
|
||||
|
||||
l = len(labels)
|
||||
remaining = subset_size
|
||||
ret = []
|
||||
|
||||
# classes with fewer data are sampled first; otherwise
|
||||
# some rare classes may not be selected
|
||||
for label in sorted(label_linenums, key=lambda x: len(label_linenums[x])):
|
||||
linenums = label_linenums[label]
|
||||
label_size = len(linenums)
|
||||
# at least one instance per class
|
||||
s = int(min(remaining, max(1, math.ceil(label_size*(float(subset_size)/l)))))
|
||||
if s == 0:
|
||||
sys.stderr.write('''\
|
||||
Error: failed to have at least one instance per class
|
||||
1. You may have regression data.
|
||||
2. Your classification data is unbalanced or too small.
|
||||
Please use -s 1.
|
||||
''')
|
||||
sys.exit(-1)
|
||||
remaining -= s
|
||||
ret += [linenums[i] for i in random.sample(xrange(label_size), s)]
|
||||
return sorted(ret)
|
||||
|
||||
def main(argv=sys.argv):
|
||||
dataset, subset_size, method, subset_file, rest_file = process_options(argv)
|
||||
#uncomment the following line to fix the random seed
|
||||
#random.seed(0)
|
||||
selected_lines = []
|
||||
|
||||
if method == 0:
|
||||
selected_lines = stratified_selection(dataset, subset_size)
|
||||
elif method == 1:
|
||||
selected_lines = random_selection(dataset, subset_size)
|
||||
|
||||
#select instances based on selected_lines
|
||||
dataset = open(dataset,'r')
|
||||
prev_selected_linenum = -1
|
||||
for i in xrange(len(selected_lines)):
|
||||
for cnt in xrange(selected_lines[i]-prev_selected_linenum-1):
|
||||
line = dataset.readline()
|
||||
if rest_file:
|
||||
rest_file.write(line)
|
||||
subset_file.write(dataset.readline())
|
||||
prev_selected_linenum = selected_lines[i]
|
||||
subset_file.close()
|
||||
|
||||
if rest_file:
|
||||
for line in dataset:
|
||||
rest_file.write(line)
|
||||
rest_file.close()
|
||||
dataset.close()
|
||||
|
||||
if __name__ == '__main__':
|
||||
main(sys.argv)
|
||||
|
|
@ -0,0 +1,717 @@
|
|||
from __future__ import print_function
|
||||
from properties import Properties
|
||||
from changedetection import ChangeDetection
|
||||
from ensemble import Ensemble
|
||||
from stream import Stream
|
||||
from model import Model
|
||||
import time, sys
|
||||
from py4j.java_gateway import JavaGateway, GatewayParameters, CallbackServerParameters
|
||||
import numpy as np
|
||||
|
||||
|
||||
class Manager(object):
|
||||
|
||||
def __init__(self, sourceFile, targetFile):
|
||||
self.SWindow = []
|
||||
self.TWindow = []
|
||||
self.TPredictWindow = []
|
||||
|
||||
self.SDataBuffer = [] #Queue
|
||||
self.TDataBuffer = [] #Queue
|
||||
|
||||
self.SInitialDataBuffer = []
|
||||
self.TInitialDataBuffer = []
|
||||
|
||||
self.changeDetector = ChangeDetection(Properties.GAMMA, Properties.SENSITIVITY, Properties.MAX_WINDOW_SIZE)
|
||||
self.ensemble = Ensemble(Properties.ENSEMBLE_SIZE)
|
||||
|
||||
classNameList = []
|
||||
self.source = Stream(sourceFile, classNameList, Properties.INITIAL_DATA_SIZE)
|
||||
self.target = Stream(targetFile, classNameList, Properties.INITIAL_DATA_SIZE)
|
||||
Properties.MAXVAR = self.source.MAXVAR
|
||||
|
||||
self.gateway = JavaGateway(start_callback_server=True, gateway_parameters=GatewayParameters(port=Properties.PY4JPORT), callback_server_parameters=CallbackServerParameters(port=Properties.PY4JPORT+1))
|
||||
self.app = self.gateway.entry_point
|
||||
|
||||
|
||||
"""
|
||||
Detect drift on a given data stream.
|
||||
Returns the change point index on the stream array.
|
||||
"""
|
||||
def __detectDrift(self, slidingWindow, flagStream):
|
||||
changePoint = -1
|
||||
if flagStream == 0:
|
||||
changePoint = self.changeDetector.detectSourceChange(slidingWindow)
|
||||
elif flagStream == 1:
|
||||
changePoint = self.changeDetector.detectTargetChange(slidingWindow)
|
||||
else:
|
||||
raise Exception('flagStream var has value ' + str(flagStream) + ' that is not supported.')
|
||||
return changePoint
|
||||
|
||||
|
||||
def __detectDriftJava(self, slidingWindow, flagStream):
|
||||
changePoint = -1
|
||||
|
||||
sw = self.gateway.jvm.java.util.ArrayList()
|
||||
for i in xrange(len(slidingWindow)):
|
||||
sw.append(float(slidingWindow[i]))
|
||||
|
||||
if flagStream == 0:
|
||||
changePoint = self.app.detectSourceChange(sw)
|
||||
elif flagStream == 1:
|
||||
changePoint = self.app.detectTargetChange(sw)
|
||||
else:
|
||||
raise Exception('flagStream var has value ' + str(flagStream) + ' that is not supported.')
|
||||
# print('ChangePoint = ' + str(changePoint))
|
||||
|
||||
return changePoint
|
||||
|
||||
|
||||
|
||||
"""
|
||||
Write value (accuracy or confidence) to a file with DatasetName as an identifier.
|
||||
"""
|
||||
def __saveResult(self, acc, datasetName):
|
||||
with open(datasetName + '_' + Properties.OUTFILENAME, 'a') as f:
|
||||
f.write(str(acc) + '\n')
|
||||
f.close()
|
||||
|
||||
|
||||
"""
|
||||
The main method handling MDC logic (using single ensemble).
|
||||
"""
|
||||
def start(self, datasetName):
|
||||
#Get initial data buffer
|
||||
self.SInitialDataBuffer= self.source.initialData
|
||||
self.TInitialDataBuffer= self.target.initialData
|
||||
|
||||
Properties.logger.info('Initializing Ensemble ...')
|
||||
#source model
|
||||
self.ensemble.generateNewModel(self.SInitialDataBuffer, self.TInitialDataBuffer, True)
|
||||
#target model
|
||||
self.ensemble.generateNewModel(self.SInitialDataBuffer, self.TInitialDataBuffer, False)
|
||||
Properties.logger.info(self.ensemble.getEnsembleSummary())
|
||||
|
||||
sourceIndex = 0
|
||||
targetIndex = 0
|
||||
trueSourceNum = 0
|
||||
trueTargetNum = 0
|
||||
targetConfSum = 0
|
||||
|
||||
Properties.logger.info('Starting MDC ...')
|
||||
while len(self.source.data) + len(self.target.data) > sourceIndex + targetIndex:
|
||||
ratio = (len(self.source.data) - sourceIndex) / (len(self.source.data) + len(self.target.data) - sourceIndex + targetIndex + 0.0)
|
||||
|
||||
if (np.random.rand() <= ratio and sourceIndex < len(self.source.data)) or (targetIndex >= len(self.target.data) and sourceIndex < len(self.source.data)):
|
||||
sdata = self.source.data[sourceIndex]
|
||||
self.SDataBuffer.append(sdata)
|
||||
resSource = self.ensemble.evaluateEnsemble(sdata, True)
|
||||
self.SWindow.append(resSource[0]) # prediction of 0 or 1
|
||||
print('S', end="")
|
||||
# get Source Accuracy
|
||||
sourceIndex += 1
|
||||
trueSourceNum += resSource[0]
|
||||
elif targetIndex < len(self.target.data):
|
||||
tdata = self.target.data[targetIndex]
|
||||
self.TDataBuffer.append(tdata)
|
||||
resTarget = self.ensemble.evaluateEnsemble(tdata, False)
|
||||
conf = resTarget[1] # confidence
|
||||
targetIndex += 1
|
||||
print('T', end="")
|
||||
|
||||
# If conf is very close to 0.0 or 1.0, beta probability might become zero, which can make problems in change detection. Handling this scenario.
|
||||
if conf < 0.1:
|
||||
self.TWindow.append(0.1)
|
||||
elif conf > 0.995:
|
||||
self.TWindow.append(0.995)
|
||||
else:
|
||||
self.TWindow.append(resTarget[1])
|
||||
self.TPredictWindow.append(resTarget[0])
|
||||
|
||||
#get Target Accuracy
|
||||
if resTarget[0] == tdata[-1]:
|
||||
trueTargetNum += 1
|
||||
acc = float(trueTargetNum)/(targetIndex)
|
||||
self.__saveResult(acc, datasetName)
|
||||
|
||||
#save confidence
|
||||
targetConfSum += conf
|
||||
self.__saveResult(float(targetConfSum)/(targetIndex), datasetName+'_confidence')
|
||||
|
||||
#Drift detection
|
||||
start = time.time()
|
||||
# srcCP = self.__detectDrift(self.SWindow, 0)
|
||||
# trgCP = self.__detectDrift(self.TWindow, 1)
|
||||
srcCP = self.__detectDriftJava(self.SWindow, 0)
|
||||
trgCP = self.__detectDriftJava(self.TWindow, 1)
|
||||
end = time.time()
|
||||
# print(int(end - start), end="")
|
||||
|
||||
if srcCP != -1:
|
||||
self.__saveResult(5555555.0, datasetName+'_confidence')
|
||||
Properties.logger.info('-------------------------- S O U R C E D R I F T ------------------------------------')
|
||||
Properties.logger.info('\nDrift found on source stream.')
|
||||
Properties.logger.info('dataIndex=' + str((targetIndex+sourceIndex)) + '\tsrcCP=' + str(srcCP) + '\ttrgCP=' + str(trgCP))
|
||||
|
||||
#remove data from buffer till srcCP
|
||||
for i in xrange(srcCP):
|
||||
del self.SDataBuffer[0]
|
||||
del self.SWindow[0]
|
||||
|
||||
#Exception with srcCP=0 (windowsize hit max or avg error is less than cutoff).
|
||||
#Keep atleast cushion number of instances
|
||||
if srcCP == 0:
|
||||
while len(self.SDataBuffer) > Properties.CUSHION:
|
||||
del self.SDataBuffer[0]
|
||||
del self.SWindow[0]
|
||||
|
||||
Properties.logger.info('Instances left in source sliding window : ' + str(len(self.SDataBuffer)))
|
||||
Properties.logger.info('Instances left in target sliding window : ' + str(len(self.TDataBuffer)))
|
||||
|
||||
Properties.logger.info('Updating ensemble weights')
|
||||
self.ensemble.updateWeight(self.SDataBuffer, True)
|
||||
|
||||
Properties.logger.info('Training a model for source stream')
|
||||
self.ensemble.generateNewModel(self.SDataBuffer, self.TDataBuffer, True)
|
||||
Properties.logger.info(self.ensemble.getEnsembleSummary())
|
||||
|
||||
|
||||
if trgCP != -1:
|
||||
self.__saveResult(7777777.0, datasetName+'_confidence')
|
||||
Properties.logger.info('-------------------------- T A R G E T D R I F T ------------------------------------')
|
||||
Properties.logger.info('Drift found on target stream.')
|
||||
Properties.logger.info('dataIndex=' + str((targetIndex+sourceIndex)) + '\tsrcCP=' + str(srcCP) + '\ttrgCP=' + str(trgCP))
|
||||
|
||||
#remove data from buffer till trgCP
|
||||
for i in xrange(trgCP):
|
||||
del self.TDataBuffer[0]
|
||||
del self.TWindow[0]
|
||||
del self.TPredictWindow[0]
|
||||
|
||||
#Exception with trgCP=0 (windowsize hit max or avg error is less than cutoff).
|
||||
#Keep atleast cushion number of instances
|
||||
if trgCP == 0:
|
||||
while len(self.TDataBuffer) > Properties.CUSHION:
|
||||
del self.TDataBuffer[0]
|
||||
del self.TWindow[0]
|
||||
del self.TPredictWindow[0]
|
||||
|
||||
Properties.logger.info('Instances left in source sliding window : ' + str(len(self.SDataBuffer)))
|
||||
Properties.logger.info('Instances left in target sliding window : ' + str(len(self.TDataBuffer)))
|
||||
|
||||
Properties.logger.info('Updating ensemble weights')
|
||||
self.ensemble.updateWeight(self.TDataBuffer, False)
|
||||
|
||||
if (len(self.SDataBuffer) > 0 and len(self.TDataBuffer)> 0):
|
||||
Properties.logger.info('Training a model for target stream')
|
||||
self.ensemble.generateNewModel(self.SDataBuffer, self.TDataBuffer, False)
|
||||
Properties.logger.info(self.ensemble.getEnsembleSummary())
|
||||
|
||||
if (targetIndex+sourceIndex)%100 == 0:
|
||||
print('')
|
||||
|
||||
Properties.logger.info('Done !!')
|
||||
return float(trueSourceNum)/(sourceIndex), float(trueTargetNum)/(targetIndex)
|
||||
|
||||
|
||||
"""
|
||||
Main module for MDC2 logic (using two separate ensembles)
|
||||
"""
|
||||
def start2(self, datasetName):
|
||||
#Get initial data buffer
|
||||
self.SInitialDataBuffer= self.source.initialData
|
||||
self.TInitialDataBuffer= self.target.initialData
|
||||
|
||||
#Initialize Ensembles
|
||||
srcEnsemble = Ensemble(Properties.ENSEMBLE_SIZE)
|
||||
trgEnsemble = Ensemble(Properties.ENSEMBLE_SIZE)
|
||||
|
||||
Properties.logger.info('Initializing Ensemble ...')
|
||||
#source model
|
||||
srcEnsemble.generateNewModel(self.SInitialDataBuffer, self.TInitialDataBuffer, True)
|
||||
Properties.logger.info('Source Ensemble')
|
||||
Properties.logger.info(srcEnsemble.getEnsembleSummary())
|
||||
#target model
|
||||
trgEnsemble.generateNewModel(self.SInitialDataBuffer, self.TInitialDataBuffer, False)
|
||||
Properties.logger.info('Target Ensemble')
|
||||
Properties.logger.info(trgEnsemble.getEnsembleSummary())
|
||||
|
||||
dataIndex = 0
|
||||
trueTargetNum = 0
|
||||
targetConfSum = 0
|
||||
|
||||
Properties.logger.info('Starting MDC2 ...')
|
||||
while(len(self.source.data) > dataIndex):
|
||||
print('.', end="")
|
||||
|
||||
#Source Stream
|
||||
sdata = self.source.data[dataIndex]
|
||||
self.SDataBuffer.append(sdata)
|
||||
resSource = srcEnsemble.evaluateEnsemble(sdata, True)
|
||||
self.SWindow.append(resSource[0]) #prediction of 0 or 1
|
||||
|
||||
#Target Stream
|
||||
tdata = self.target.data[dataIndex]
|
||||
self.TDataBuffer.append(tdata)
|
||||
resTarget = trgEnsemble.evaluateEnsemble(tdata, False)
|
||||
conf = resTarget[1] #confidence
|
||||
|
||||
# If conf is very close to 0.0 or 1.0, beta probability might become zero, which can make problems in change detection. Handling this scenario.
|
||||
if conf < 0.1:
|
||||
self.TWindow.append(0.1)
|
||||
elif conf > 0.995:
|
||||
self.TWindow.append(0.995)
|
||||
else:
|
||||
self.TWindow.append(resTarget[1])
|
||||
self.TPredictWindow.append(resTarget[0])
|
||||
|
||||
#get Target Accuracy
|
||||
if resTarget[0] == tdata[-1]:
|
||||
trueTargetNum += 1
|
||||
acc = float(trueTargetNum)/(dataIndex + 1)
|
||||
self.__saveResult(acc, datasetName)
|
||||
|
||||
#save confidence
|
||||
targetConfSum += conf
|
||||
self.__saveResult(float(targetConfSum)/(dataIndex+1), datasetName+'_confidence')
|
||||
|
||||
#Drift detection
|
||||
start = time.time()
|
||||
# srcCP = self.__detectDrift(self.SWindow, 0)
|
||||
# trgCP = self.__detectDrift(self.TWindow, 1)
|
||||
srcCP = self.__detectDriftJava(self.SWindow, 0)
|
||||
trgCP = self.__detectDriftJava(self.TWindow, 1)
|
||||
end = time.time()
|
||||
# print(int(end - start), end="")
|
||||
|
||||
if srcCP != -1:
|
||||
self.__saveResult(5555555.0, datasetName+'_confidence')
|
||||
Properties.logger.info('-------------------------- S O U R C E D R I F T ------------------------------------')
|
||||
Properties.logger.info('\nDrift found on source stream.')
|
||||
Properties.logger.info('dataIndex=' + str(dataIndex) + '\tsrcCP=' + str(srcCP) + '\ttrgCP=' + str(trgCP))
|
||||
|
||||
#remove data from buffer till srcCP
|
||||
for i in xrange(srcCP):
|
||||
del self.SDataBuffer[0]
|
||||
del self.SWindow[0]
|
||||
|
||||
#Exception with srcCP=0 (windowsize hit max or avg error is less than cutoff).
|
||||
#Keep atleast cushion number of instances
|
||||
if srcCP == 0:
|
||||
while len(self.SDataBuffer) > Properties.CUSHION:
|
||||
del self.SDataBuffer[0]
|
||||
del self.SWindow[0]
|
||||
|
||||
Properties.logger.info('Instances left in source sliding window : ' + str(len(self.SDataBuffer)))
|
||||
Properties.logger.info('Instances left in target sliding window : ' + str(len(self.TDataBuffer)))
|
||||
|
||||
#Updating source Ensemble
|
||||
Properties.logger.info('Updating source ensemble weights')
|
||||
srcEnsemble.updateWeight(self.SDataBuffer, True)
|
||||
|
||||
Properties.logger.info('Training a model for source stream')
|
||||
srcEnsemble.generateNewModel(self.SDataBuffer, self.TDataBuffer, True)
|
||||
Properties.logger.info('Source Ensemble')
|
||||
Properties.logger.info(srcEnsemble.getEnsembleSummary())
|
||||
|
||||
|
||||
if trgCP != -1:
|
||||
self.__saveResult(7777777.0, datasetName+'_confidence')
|
||||
Properties.logger.info('-------------------------- T A R G E T D R I F T ------------------------------------')
|
||||
Properties.logger.info('Drift found on target stream.')
|
||||
Properties.logger.info('dataIndex=' + str(dataIndex) + '\tsrcCP=' + str(srcCP) + '\ttrgCP=' + str(trgCP))
|
||||
|
||||
#remove data from buffer till trgCP
|
||||
for i in xrange(trgCP):
|
||||
del self.TDataBuffer[0]
|
||||
del self.TWindow[0]
|
||||
del self.TPredictWindow[0]
|
||||
|
||||
#Exception with trgCP=0 (windowsize hit max or avg error is less than cutoff).
|
||||
#Keep atleast cushion number of instances
|
||||
if trgCP == 0:
|
||||
while len(self.TDataBuffer) > Properties.CUSHION:
|
||||
del self.TDataBuffer[0]
|
||||
del self.TWindow[0]
|
||||
del self.TPredictWindow[0]
|
||||
|
||||
Properties.logger.info('Instances left in source sliding window : ' + str(len(self.SDataBuffer)))
|
||||
Properties.logger.info('Instances left in target sliding window : ' + str(len(self.TDataBuffer)))
|
||||
|
||||
Properties.logger.info('Updating target ensemble weights')
|
||||
trgEnsemble.updateWeight(self.TDataBuffer, False)
|
||||
|
||||
Properties.logger.info('Training a model for target stream')
|
||||
trgEnsemble.generateNewModel(self.SDataBuffer, self.TDataBuffer, False)
|
||||
Properties.logger.info('Target Ensemble')
|
||||
Properties.logger.info(trgEnsemble.getEnsembleSummary())
|
||||
|
||||
dataIndex += 1
|
||||
if dataIndex%100 == 0:
|
||||
print('')
|
||||
|
||||
Properties.logger.info('Done !!')
|
||||
|
||||
|
||||
"""
|
||||
Baseline skmm (single target model with initial train only)
|
||||
"""
|
||||
def start_skmm(self, datasetName):
|
||||
#Get initial data buffer
|
||||
self.SInitialDataBuffer= self.source.initialData
|
||||
self.TInitialDataBuffer= self.target.initialData
|
||||
|
||||
#Initialize Model
|
||||
model = Model()
|
||||
model.train(self.SInitialDataBuffer, self.TInitialDataBuffer, Properties.MAXVAR)
|
||||
|
||||
dataIndex = 0
|
||||
trueTargetNum = 0
|
||||
|
||||
Properties.logger.info('Starting skmm baseline ...')
|
||||
while(len(self.source.data) > dataIndex):
|
||||
print('.', end="")
|
||||
|
||||
#Source Stream
|
||||
sdata = self.source.data[dataIndex]
|
||||
self.SDataBuffer.append(sdata)
|
||||
|
||||
#Target Stream
|
||||
tdata = self.target.data[dataIndex]
|
||||
self.TDataBuffer.append(tdata)
|
||||
|
||||
#test data instance in each model
|
||||
|
||||
resTarget = model.test([tdata], Properties.MAXVAR)
|
||||
|
||||
#get Target Accuracy
|
||||
if resTarget[0][0] == tdata[-1]:
|
||||
trueTargetNum += 1
|
||||
acc = float(trueTargetNum)/(dataIndex + 1)
|
||||
self.__saveResult(acc, datasetName)
|
||||
|
||||
dataIndex += 1
|
||||
if dataIndex%100 == 0:
|
||||
print('')
|
||||
|
||||
Properties.logger.info('Done !!')
|
||||
|
||||
|
||||
"""
|
||||
Baseline mkmm (single target model trained periodically)
|
||||
"""
|
||||
def start_mkmm(self, datasetName):
|
||||
#Get initial data buffer
|
||||
self.SInitialDataBuffer= self.source.initialData
|
||||
self.TInitialDataBuffer= self.target.initialData
|
||||
|
||||
#Initialize Model
|
||||
model = Model()
|
||||
model.train(self.SInitialDataBuffer, self.TInitialDataBuffer, Properties.MAXVAR)
|
||||
|
||||
dataIndex = 0
|
||||
trueTargetNum = 0
|
||||
|
||||
Properties.logger.info('Starting skmm baseline ...')
|
||||
while(len(self.source.data) > dataIndex):
|
||||
print('.', end="")
|
||||
|
||||
#Source Stream
|
||||
sdata = self.source.data[dataIndex]
|
||||
self.SDataBuffer.append(sdata)
|
||||
|
||||
#Target Stream
|
||||
tdata = self.target.data[dataIndex]
|
||||
self.TDataBuffer.append(tdata)
|
||||
|
||||
#test data instance in each model
|
||||
resTarget = model.test([tdata], Properties.MAXVAR)
|
||||
|
||||
#get Target Accuracy
|
||||
if resTarget[0][0] == tdata[-1]:
|
||||
trueTargetNum += 1
|
||||
acc = float(trueTargetNum)/(dataIndex + 1)
|
||||
self.__saveResult(acc, datasetName)
|
||||
|
||||
dataIndex += 1
|
||||
if dataIndex%100 == 0:
|
||||
print('')
|
||||
if dataIndex%Properties.MAX_WINDOW_SIZE == 0:
|
||||
model = Model()
|
||||
model.train(self.SDataBuffer, self.TDataBuffer, Properties.MAXVAR)
|
||||
self.SDataBuffer = []
|
||||
self.TDataBuffer = []
|
||||
|
||||
Properties.logger.info('Done !!')
|
||||
|
||||
|
||||
"""
|
||||
Baseline srconly using an ensemble of only source classifiers.
|
||||
Target labels predicted from this ensemble using its target weights.
|
||||
"""
|
||||
def start_srconly(self, datasetName):
|
||||
#Get initial data buffer
|
||||
self.SInitialDataBuffer= self.source.initialData
|
||||
self.TInitialDataBuffer= self.target.initialData
|
||||
|
||||
#Initialize Ensembles
|
||||
srcEnsemble = Ensemble(Properties.ENSEMBLE_SIZE)
|
||||
|
||||
Properties.logger.info('Initializing Ensemble ...')
|
||||
#source model
|
||||
srcEnsemble.generateNewModel(self.SInitialDataBuffer, self.TInitialDataBuffer, True)
|
||||
Properties.logger.info('Source Ensemble')
|
||||
Properties.logger.info(srcEnsemble.getEnsembleSummary())
|
||||
|
||||
dataIndex = 0
|
||||
trueTargetNum = 0
|
||||
targetConfSum = 0
|
||||
|
||||
Properties.logger.info('Starting srconly-MDC ...')
|
||||
while(len(self.source.data) > dataIndex):
|
||||
print('.', end="")
|
||||
|
||||
#Source Stream
|
||||
sdata = self.source.data[dataIndex]
|
||||
self.SDataBuffer.append(sdata)
|
||||
resSource = srcEnsemble.evaluateEnsemble(sdata, True)
|
||||
self.SWindow.append(resSource[0]) #prediction of 0 or 1
|
||||
|
||||
#Target Stream
|
||||
tdata = self.target.data[dataIndex]
|
||||
self.TDataBuffer.append(tdata)
|
||||
resTarget = srcEnsemble.evaluateEnsemble(tdata, False)
|
||||
conf = resTarget[1] #confidence
|
||||
|
||||
# If conf is very close to 0.0 or 1.0, beta probability might become zero, which can make problems in change detection. Handling this scenario.
|
||||
if conf < 0.1:
|
||||
self.TWindow.append(0.1)
|
||||
elif conf > 0.995:
|
||||
self.TWindow.append(0.995)
|
||||
else:
|
||||
self.TWindow.append(resTarget[1])
|
||||
self.TPredictWindow.append(resTarget[0])
|
||||
|
||||
#get Target Accuracy
|
||||
if resTarget[0] == tdata[-1]:
|
||||
trueTargetNum += 1
|
||||
acc = float(trueTargetNum)/(dataIndex + 1)
|
||||
self.__saveResult(acc, datasetName)
|
||||
|
||||
#save confidence
|
||||
targetConfSum += conf
|
||||
self.__saveResult(float(targetConfSum)/(dataIndex+1), datasetName+'_confidence')
|
||||
|
||||
#Drift detection
|
||||
start = time.time()
|
||||
# srcCP = self.__detectDrift(self.SWindow, 0)
|
||||
# trgCP = self.__detectDrift(self.TWindow, 1)
|
||||
srcCP = self.__detectDriftJava(self.SWindow, 0)
|
||||
trgCP = self.__detectDriftJava(self.TWindow, 1)
|
||||
end = time.time()
|
||||
# print(int(end - start), end="")
|
||||
|
||||
if srcCP != -1:
|
||||
self.__saveResult(5555555.0, datasetName+'_confidence')
|
||||
Properties.logger.info('-------------------------- S O U R C E D R I F T ------------------------------------')
|
||||
Properties.logger.info('\nDrift found on source stream.')
|
||||
Properties.logger.info('dataIndex=' + str(dataIndex) + '\tsrcCP=' + str(srcCP) + '\ttrgCP=' + str(trgCP))
|
||||
|
||||
#remove data from buffer till srcCP
|
||||
for i in xrange(srcCP):
|
||||
del self.SDataBuffer[0]
|
||||
del self.SWindow[0]
|
||||
|
||||
#Exception with srcCP=0 (windowsize hit max or avg error is less than cutoff).
|
||||
#Keep atleast cushion number of instances
|
||||
if srcCP == 0:
|
||||
while len(self.SDataBuffer) > Properties.CUSHION:
|
||||
del self.SDataBuffer[0]
|
||||
del self.SWindow[0]
|
||||
|
||||
Properties.logger.info('Instances left in source sliding window : ' + str(len(self.SDataBuffer)))
|
||||
Properties.logger.info('Instances left in target sliding window : ' + str(len(self.TDataBuffer)))
|
||||
|
||||
#Updating source Ensemble
|
||||
Properties.logger.info('Updating source ensemble weights')
|
||||
srcEnsemble.updateWeight(self.SDataBuffer, True)
|
||||
|
||||
Properties.logger.info('Training a model for source stream')
|
||||
srcEnsemble.generateNewModel(self.SDataBuffer, self.TDataBuffer, True)
|
||||
Properties.logger.info('Source Ensemble')
|
||||
Properties.logger.info(srcEnsemble.getEnsembleSummary())
|
||||
|
||||
|
||||
if trgCP != -1:
|
||||
self.__saveResult(7777777.0, datasetName+'_confidence')
|
||||
Properties.logger.info('-------------------------- T A R G E T D R I F T ------------------------------------')
|
||||
Properties.logger.info('Drift found on target stream.')
|
||||
Properties.logger.info('dataIndex=' + str(dataIndex) + '\tsrcCP=' + str(srcCP) + '\ttrgCP=' + str(trgCP))
|
||||
|
||||
#remove data from buffer till trgCP
|
||||
for i in xrange(trgCP):
|
||||
del self.TDataBuffer[0]
|
||||
del self.TWindow[0]
|
||||
del self.TPredictWindow[0]
|
||||
|
||||
#Exception with trgCP=0 (windowsize hit max or avg error is less than cutoff).
|
||||
#Keep atleast cushion number of instances
|
||||
if trgCP == 0:
|
||||
while len(self.TDataBuffer) > Properties.CUSHION:
|
||||
del self.TDataBuffer[0]
|
||||
del self.TWindow[0]
|
||||
del self.TPredictWindow[0]
|
||||
|
||||
Properties.logger.info('Instances left in source sliding window : ' + str(len(self.SDataBuffer)))
|
||||
Properties.logger.info('Instances left in target sliding window : ' + str(len(self.TDataBuffer)))
|
||||
|
||||
Properties.logger.info('Updating target ensemble weights')
|
||||
srcEnsemble.updateWeight(self.TDataBuffer, False)
|
||||
|
||||
Properties.logger.info('Training a model for target stream')
|
||||
srcEnsemble.generateNewModel(self.SDataBuffer, self.TDataBuffer, True)
|
||||
Properties.logger.info('Target Ensemble')
|
||||
Properties.logger.info(srcEnsemble.getEnsembleSummary())
|
||||
|
||||
|
||||
dataIndex += 1
|
||||
if dataIndex%100 == 0:
|
||||
print('')
|
||||
|
||||
Properties.logger.info('Done !!')
|
||||
|
||||
|
||||
"""
|
||||
Baseline trgonly using an ensemble of only target classifiers.
|
||||
Target labels predicted from this ensemble using its target weights.
|
||||
Source drift is computed using source-weighted ensemble prediction.
|
||||
"""
|
||||
def start_trgonly(self, datasetName):
|
||||
#Get initial data buffer
|
||||
self.SInitialDataBuffer= self.source.initialData
|
||||
self.TInitialDataBuffer= self.target.initialData
|
||||
|
||||
#Initialize Ensembles
|
||||
trgEnsemble = Ensemble(Properties.ENSEMBLE_SIZE)
|
||||
|
||||
Properties.logger.info('Initializing Ensemble ...')
|
||||
#target model
|
||||
trgEnsemble.generateNewModel(self.SInitialDataBuffer, self.TInitialDataBuffer, False)
|
||||
Properties.logger.info('Target Ensemble')
|
||||
Properties.logger.info(trgEnsemble.getEnsembleSummary())
|
||||
|
||||
dataIndex = 0
|
||||
trueTargetNum = 0
|
||||
targetConfSum = 0
|
||||
|
||||
Properties.logger.info('Starting trgonly-MDC ...')
|
||||
while(len(self.source.data) > dataIndex):
|
||||
print('.', end="")
|
||||
|
||||
#Source Stream
|
||||
sdata = self.source.data[dataIndex]
|
||||
self.SDataBuffer.append(sdata)
|
||||
resSource = trgEnsemble.evaluateEnsemble(sdata, True)
|
||||
self.SWindow.append(resSource[0]) #prediction of 0 or 1
|
||||
|
||||
#Target Stream
|
||||
tdata = self.target.data[dataIndex]
|
||||
self.TDataBuffer.append(tdata)
|
||||
resTarget = trgEnsemble.evaluateEnsemble(tdata, False)
|
||||
conf = resTarget[1] #confidence
|
||||
|
||||
# If conf is very close to 0.0 or 1.0, beta probability might become zero, which can make problems in change detection. Handling this scenario.
|
||||
if conf < 0.1:
|
||||
self.TWindow.append(0.1)
|
||||
elif conf > 0.995:
|
||||
self.TWindow.append(0.995)
|
||||
else:
|
||||
self.TWindow.append(resTarget[1])
|
||||
self.TPredictWindow.append(resTarget[0])
|
||||
|
||||
#get Target Accuracy
|
||||
if resTarget[0] == tdata[-1]:
|
||||
trueTargetNum += 1
|
||||
acc = float(trueTargetNum)/(dataIndex + 1)
|
||||
self.__saveResult(acc, datasetName)
|
||||
|
||||
#save confidence
|
||||
targetConfSum += conf
|
||||
self.__saveResult(float(targetConfSum)/(dataIndex+1), datasetName+'_confidence')
|
||||
|
||||
#Drift detection
|
||||
start = time.time()
|
||||
# srcCP = self.__detectDrift(self.SWindow, 0)
|
||||
# trgCP = self.__detectDrift(self.TWindow, 1)
|
||||
srcCP = self.__detectDriftJava(self.SWindow, 0)
|
||||
trgCP = self.__detectDriftJava(self.TWindow, 1)
|
||||
end = time.time()
|
||||
# print(int(end - start), end="")
|
||||
|
||||
if srcCP != -1:
|
||||
self.__saveResult(5555555.0, datasetName+'_confidence')
|
||||
Properties.logger.info('-------------------------- S O U R C E D R I F T ------------------------------------')
|
||||
Properties.logger.info('\nDrift found on source stream.')
|
||||
Properties.logger.info('dataIndex=' + str(dataIndex) + '\tsrcCP=' + str(srcCP) + '\ttrgCP=' + str(trgCP))
|
||||
|
||||
#remove data from buffer till srcCP
|
||||
for i in xrange(srcCP):
|
||||
del self.SDataBuffer[0]
|
||||
del self.SWindow[0]
|
||||
|
||||
#Exception with srcCP=0 (windowsize hit max or avg error is less than cutoff).
|
||||
#Keep atleast cushion number of instances
|
||||
if srcCP == 0:
|
||||
while len(self.SDataBuffer) > Properties.CUSHION:
|
||||
del self.SDataBuffer[0]
|
||||
del self.SWindow[0]
|
||||
|
||||
Properties.logger.info('Instances left in source sliding window : ' + str(len(self.SDataBuffer)))
|
||||
Properties.logger.info('Instances left in target sliding window : ' + str(len(self.TDataBuffer)))
|
||||
|
||||
#Updating source Ensemble
|
||||
Properties.logger.info('Updating source ensemble weights')
|
||||
trgEnsemble.updateWeight(self.SDataBuffer, True)
|
||||
|
||||
Properties.logger.info('Training a model for source stream')
|
||||
trgEnsemble.generateNewModel(self.SDataBuffer, self.TDataBuffer, False)
|
||||
Properties.logger.info('Source Ensemble')
|
||||
Properties.logger.info(trgEnsemble.getEnsembleSummary())
|
||||
|
||||
|
||||
if trgCP != -1:
|
||||
self.__saveResult(7777777.0, datasetName+'_confidence')
|
||||
Properties.logger.info('-------------------------- T A R G E T D R I F T ------------------------------------')
|
||||
Properties.logger.info('Drift found on target stream.')
|
||||
Properties.logger.info('dataIndex=' + str(dataIndex) + '\tsrcCP=' + str(srcCP) + '\ttrgCP=' + str(trgCP))
|
||||
|
||||
#remove data from buffer till trgCP
|
||||
for i in xrange(trgCP):
|
||||
del self.TDataBuffer[0]
|
||||
del self.TWindow[0]
|
||||
del self.TPredictWindow[0]
|
||||
|
||||
#Exception with trgCP=0 (windowsize hit max or avg error is less than cutoff).
|
||||
#Keep atleast cushion number of instances
|
||||
if trgCP == 0:
|
||||
while len(self.TDataBuffer) > Properties.CUSHION:
|
||||
del self.TDataBuffer[0]
|
||||
del self.TWindow[0]
|
||||
del self.TPredictWindow[0]
|
||||
|
||||
Properties.logger.info('Instances left in source sliding window : ' + str(len(self.SDataBuffer)))
|
||||
Properties.logger.info('Instances left in target sliding window : ' + str(len(self.TDataBuffer)))
|
||||
|
||||
Properties.logger.info('Updating target ensemble weights')
|
||||
trgEnsemble.updateWeight(self.TDataBuffer, False)
|
||||
|
||||
Properties.logger.info('Training a model for target stream')
|
||||
trgEnsemble.generateNewModel(self.SDataBuffer, self.TDataBuffer, False)
|
||||
Properties.logger.info('Target Ensemble')
|
||||
Properties.logger.info(trgEnsemble.getEnsembleSummary())
|
||||
|
||||
dataIndex += 1
|
||||
if dataIndex%100 == 0:
|
||||
print('')
|
||||
|
||||
Properties.logger.info('Done !!')
|
|
@ -0,0 +1,268 @@
|
|||
import math, numpy, sklearn.metrics.pairwise as sk
|
||||
from cvxopt import matrix, solvers
|
||||
from svmutil import *
|
||||
from grid import *
|
||||
import random, sys
|
||||
|
||||
|
||||
class Model(object):
|
||||
|
||||
def __init__(self):
|
||||
self.model = None
|
||||
self.sweight = 1.0
|
||||
self.tweight = 1.0
|
||||
|
||||
self.__trainLabelOrder = []
|
||||
|
||||
|
||||
"""
|
||||
Compute instance (importance) weights using Kernel Mean Matching.
|
||||
Returns a list of instance weights for training data.
|
||||
"""
|
||||
def __kmm(self, Xtrain, Xtest, sigma):
|
||||
n_tr = len(Xtrain)
|
||||
n_te = len(Xtest)
|
||||
|
||||
#calculate Kernel
|
||||
print 'Computing kernel for training data ...'
|
||||
K_ns = sk.rbf_kernel(Xtrain, Xtrain, sigma)
|
||||
#make it symmetric
|
||||
K = 0.5*(K_ns + K_ns.transpose())
|
||||
|
||||
#calculate kappa
|
||||
print 'Computing kernel for kappa ...'
|
||||
kappa_r = sk.rbf_kernel(Xtrain, Xtest, sigma)
|
||||
ones = numpy.ones(shape=(n_te, 1))
|
||||
kappa = numpy.dot(kappa_r, ones)
|
||||
kappa = -(float(n_tr)/float(n_te)) * kappa
|
||||
|
||||
#calculate eps
|
||||
eps = (math.sqrt(n_tr) - 1)/math.sqrt(n_tr)
|
||||
|
||||
#constraints
|
||||
A0 = numpy.ones(shape=(1,n_tr))
|
||||
A1 = -numpy.ones(shape=(1,n_tr))
|
||||
A = numpy.vstack([A0, A1, -numpy.eye(n_tr), numpy.eye(n_tr)])
|
||||
b = numpy.array([[n_tr*(eps+1), n_tr*(eps-1)]])
|
||||
b = numpy.vstack([b.T, -numpy.zeros(shape=(n_tr,1)), numpy.ones(shape=(n_tr,1))*1000])
|
||||
|
||||
print 'Solving quadratic program for beta ...'
|
||||
P = matrix(K, tc='d')
|
||||
q = matrix(kappa, tc='d')
|
||||
G = matrix(A, tc='d')
|
||||
h = matrix(b, tc='d')
|
||||
beta = solvers.qp(P,q,G,h)
|
||||
return [i for i in beta['x']]
|
||||
|
||||
|
||||
"""
|
||||
Build a SVM model.
|
||||
"""
|
||||
def __build(self, trainX, trainY, beta, svmParam):
|
||||
prob = svm_problem(beta, trainY, trainX)
|
||||
# param = svm_parameter('-s 0 -c 131072 -t 2 -q -b 1 -g 0.0001')
|
||||
param = svm_parameter('-s 0 -t 2 -q -b 1 -c ' + str(svmParam['c']) + ' -g ' + str(svmParam['g']))
|
||||
return svm_train(prob, param)
|
||||
|
||||
|
||||
# """
|
||||
# Compute distance between two
|
||||
# """
|
||||
# def __computeDistanceSq(self, d1, d2):
|
||||
# dist = 0
|
||||
# for i in d1:
|
||||
# if i in d2:
|
||||
# #when d1 and d2 have the same feature
|
||||
# dist += ((d1[i] - d2[i]) ** 2)
|
||||
# else:
|
||||
# #feature in d1 only
|
||||
# dist += (d1[i] ** 2)
|
||||
# for i in d2:
|
||||
# #feature in d2 only
|
||||
# if i not in d1:
|
||||
# dist += (d2[i] ** 2)
|
||||
# return dist
|
||||
|
||||
|
||||
|
||||
"""
|
||||
Kernel width is the median of distances between instances of sparse data
|
||||
"""
|
||||
def __computeKernelWidth(self, data):
|
||||
dist = []
|
||||
for i in xrange(len(data)):
|
||||
for j in range(i+1, len(data)):
|
||||
# s = self.__computeDistanceSq(data[i], data[j])
|
||||
# dist.append(math.sqrt(s))
|
||||
dist.append(numpy.sqrt(numpy.sum((numpy.array(data[i]) - numpy.array(data[j])) ** 2)))
|
||||
return numpy.median(numpy.array(dist))
|
||||
|
||||
|
||||
|
||||
"""
|
||||
Initialize training of a new weighted SVM model by choosing best parameters.
|
||||
Sets the trained model for this object.
|
||||
"""
|
||||
def train(self, traindata, testdata, maxvar):
|
||||
beta = []
|
||||
trainY = []
|
||||
trainX = []
|
||||
testX = []
|
||||
|
||||
#SVM parameter selection
|
||||
# with open('train_svmpar.data', 'w') as f:
|
||||
# for d in traindata:
|
||||
# # if d[-1] not in self.__trainLabelOrder:
|
||||
# # self.__trainLabelOrder.append(d[-1])
|
||||
# line = str(d[-1])
|
||||
# for c in sorted(d):
|
||||
# if c != -1:
|
||||
# line += ' ' + str(c) + ':' + str(d[c])
|
||||
# f.write(line + '\n')
|
||||
# rate, svmParam = find_parameters('train_svmpar.data', '-log2c 1,100,10 -log2g -10,0,2 -gnuplot null -out null')
|
||||
|
||||
svmParam = {'c':131072, 'g':0.0001}
|
||||
|
||||
#Subsample training data if given data size is more than 1000
|
||||
newtraindata = []
|
||||
if len(traindata) <= 1000:
|
||||
newtraindata = traindata
|
||||
else:
|
||||
seen = []
|
||||
for i in xrange(1000):
|
||||
r = random.randint(0, 1000)
|
||||
if r not in seen:
|
||||
seen.append(r)
|
||||
newtraindata.append(traindata[r])
|
||||
|
||||
#Data preparation for computing beta.
|
||||
#Data format: space separated <index:value> with class index as -1.
|
||||
for d in newtraindata:
|
||||
if d[-1] not in self.__trainLabelOrder:
|
||||
self.__trainLabelOrder.append(d[-1])
|
||||
trainY.append(d[-1])
|
||||
|
||||
covar = []
|
||||
for c in xrange(maxvar):
|
||||
if c in d:
|
||||
covar.append(d[c])
|
||||
else:
|
||||
covar.append(0.0)
|
||||
trainX.append(covar)
|
||||
|
||||
|
||||
if testdata == None:
|
||||
for c in xrange(len(trainX)):
|
||||
beta.append(1.0)
|
||||
else:
|
||||
# gammab = 0.001
|
||||
gammab = self.__computeKernelWidth(trainX)
|
||||
for d in testdata:
|
||||
covar = []
|
||||
for c in xrange(maxvar):
|
||||
if c in d:
|
||||
covar.append(d[c])
|
||||
else:
|
||||
covar.append(0.0)
|
||||
testX.append(covar)
|
||||
|
||||
beta = self.__kmm(trainX, testX, gammab)
|
||||
|
||||
#Model training
|
||||
self.model = self.__build(trainX, trainY, beta, svmParam)
|
||||
|
||||
|
||||
"""
|
||||
Test the weighted SVM to predict labels of a given test data.
|
||||
Returns the result of prediction, each of the form <label, probability, true label>
|
||||
"""
|
||||
def test(self, testdata, maxvar):
|
||||
#Data preparation for model prediction
|
||||
#Data format: space separated <index:value> with class index as -1.
|
||||
testX = []
|
||||
testY = []
|
||||
for d in testdata:
|
||||
# if d[-1] not in self.__trainLabelOrder:
|
||||
# self.__trainLabelOrder.append(d[-1])
|
||||
testY.append(d[-1])
|
||||
covar = []
|
||||
for c in xrange(maxvar):
|
||||
if c in d:
|
||||
covar.append(d[c])
|
||||
else:
|
||||
covar.append(0.0)
|
||||
testX.append(covar)
|
||||
|
||||
#predict and gather results
|
||||
res = svm_predict(testY, testX, self.model, '-q -b 1') #returns <label, accuracy, value>
|
||||
result = []
|
||||
for i in xrange(len(res[0])):
|
||||
result.append([res[0][i], res[2][i][self.__trainLabelOrder.index(res[0][i])], testY[i]])
|
||||
return result
|
||||
|
||||
|
||||
"""
|
||||
Compute weight of a source model using its error rate
|
||||
"""
|
||||
def __computeWeight(self, errorRate):
|
||||
if errorRate <= 0.5:
|
||||
if errorRate == 0:
|
||||
errorRate = 0.01
|
||||
return 0.5*math.log((1-errorRate)/errorRate)
|
||||
else:
|
||||
return 0.01
|
||||
|
||||
|
||||
"""
|
||||
Set model weights using test prediction.
|
||||
For source weight, use error rate with known source data labels.
|
||||
For target weight, use confidence (or probability) measure on target data.
|
||||
"""
|
||||
def computeModelWeight(self, data, isSource, maxvar):
|
||||
result = self.test(data, maxvar)
|
||||
if isSource:
|
||||
#for source weight
|
||||
err = 0
|
||||
for i in xrange(len(result)):
|
||||
if result[i][0] != data[i][-1]:
|
||||
err += 1
|
||||
self.sweight = self.__computeWeight(float(err)/len(data))
|
||||
else:
|
||||
#for target weight
|
||||
conf = 0.0
|
||||
for r in result:
|
||||
conf += r[1]
|
||||
self.tweight = (conf/len(result))
|
||||
|
||||
|
||||
|
||||
"""
|
||||
FOR TESTING
|
||||
"""
|
||||
if __name__ == '__main__':
|
||||
traindata = []
|
||||
testdata = []
|
||||
labels = []
|
||||
maxvar = 5
|
||||
for i in xrange(10):
|
||||
y = random.randint(0,2)
|
||||
x = {-1:y}
|
||||
for j in xrange(maxvar):
|
||||
x[j] = (random.randint(0,100))
|
||||
|
||||
if y not in labels:
|
||||
labels.append(y)
|
||||
traindata.append(x)
|
||||
|
||||
for i in xrange(5):
|
||||
y = random.randint(0,2)
|
||||
x = {-1:y}
|
||||
for j in xrange(maxvar):
|
||||
x[j] = (random.randint(0,100))
|
||||
|
||||
testdata.append(x)
|
||||
|
||||
model = Model()
|
||||
model.train(traindata,testdata, maxvar)
|
||||
model.test(testdata, maxvar)
|
||||
print labels
|
|
@ -0,0 +1,31 @@
|
|||
import sys
|
||||
from manager import Manager
|
||||
from properties import Properties
|
||||
import time
|
||||
|
||||
|
||||
|
||||
def main(datasetName):
|
||||
# datasetName = 'powersupply_normalized'
|
||||
|
||||
props = Properties('config.properties', datasetName)
|
||||
srcfile = Properties.BASEDIR + datasetName + Properties.SRCAPPEND
|
||||
trgfile = Properties.BASEDIR + datasetName + Properties.TRGAPPEND
|
||||
mgr = Manager(srcfile, trgfile)
|
||||
|
||||
Properties.logger.info(props.summary())
|
||||
Properties.logger.info('Start Stream Simulation')
|
||||
|
||||
start_time = time.time()
|
||||
source_cr, target_cr = mgr.start(datasetName)
|
||||
training_time = time.time() - start_time
|
||||
# mgr.start2(datasetName)
|
||||
|
||||
#baseline methods
|
||||
# mgr.start_skmm(datasetName)
|
||||
# mgr.start_mkmm(datasetName)
|
||||
# mgr.start_srconly(datasetName)
|
||||
# mgr.start_trgonly(datasetName)
|
||||
|
||||
mgr.gateway.shutdown()
|
||||
return {'SourceCR': source_cr, 'TargetCR': target_cr, 'TrainingTime': training_time}
|
|
@ -0,0 +1,99 @@
|
|||
import logging, subprocess
|
||||
import threading, random
|
||||
|
||||
|
||||
class Properties(object):
|
||||
GAMMA = 0.0
|
||||
CUSHION = 0
|
||||
SENSITIVITY = 0.0
|
||||
MAX_WINDOW_SIZE = 0
|
||||
ENSEMBLE_SIZE = 0
|
||||
CONFTHRESHOLD = 0.0
|
||||
CONFCUTOFF = 0.0
|
||||
INITIAL_DATA_SIZE = 0
|
||||
MAXVAR = 0
|
||||
|
||||
IDENTIFIER = ''
|
||||
OUTFILENAME = ''
|
||||
TEMPDIR = ''
|
||||
LOGFILE = ''
|
||||
|
||||
BASEDIR = ''
|
||||
SRCAPPEND = ''
|
||||
TRGAPPEND = ''
|
||||
|
||||
PY4JPORT = 25333
|
||||
|
||||
logger = None
|
||||
|
||||
def __init__(self, propfilename, datasetName):
|
||||
dict = {}
|
||||
with open(propfilename) as f:
|
||||
for line in f:
|
||||
(key,val) = line.split('=')
|
||||
dict[key.strip()] = val.strip()
|
||||
|
||||
self.__class__.GAMMA = float(dict['gamma'])
|
||||
self.__class__.CUSHION = int(dict['cushion'])
|
||||
self.__class__.SENSITIVITY = float(dict['sensitivity'])
|
||||
self.__class__.MAX_WINDOW_SIZE = int(dict['maxWindowSize'])
|
||||
self.__class__.ENSEMBLE_SIZE = int(dict['ensemble_size'])
|
||||
self.__class__.CONFTHRESHOLD = float(dict['confthreshold'])
|
||||
self.__class__.CONFCUTOFF = float(dict['confcutoff'])
|
||||
self.__class__.INITIAL_DATA_SIZE = int(dict['initialDataSize'])
|
||||
|
||||
self.__class__.IDENTIFIER = datasetName + '_' + str(self.__class__.MAX_WINDOW_SIZE)
|
||||
self.__class__.OUTFILENAME = self.__class__.IDENTIFIER + '_' + dict['output_file_name']
|
||||
self.__class__.TEMPDIR = dict['tempDir']
|
||||
self.__class__.LOGFILE = self.__class__.IDENTIFIER + '_' + dict['logfile']
|
||||
|
||||
if self.__class__.logger: self.__class__.logger = None
|
||||
self.__class__.logger = self.__setupLogger()
|
||||
|
||||
self.__class__.MAXVAR = 0
|
||||
|
||||
self.__class__.BASEDIR = dict['baseDir']
|
||||
self.__class__.SRCAPPEND = dict['srcfileAppend']
|
||||
self.__class__.TRGAPPEND = dict['trgfileAppend']
|
||||
|
||||
self.__class__.PY4JPORT = random.randint(25333, 30000)
|
||||
|
||||
t = threading.Thread(target=self.__startCPDJava)
|
||||
t.daemon = True
|
||||
t.start()
|
||||
|
||||
|
||||
|
||||
def __startCPDJava(self):
|
||||
subprocess.call(['java', '-jar', 'change_point.jar', str(self.__class__.GAMMA), str(self.__class__.SENSITIVITY), str(self.__class__.MAX_WINDOW_SIZE), str(self.__class__.CUSHION), str(self.__class__.CONFCUTOFF), str(self.__class__.PY4JPORT)])
|
||||
|
||||
|
||||
|
||||
def __setupLogger(self):
|
||||
logger = logging.getLogger(__name__)
|
||||
logger.setLevel(logging.INFO)
|
||||
|
||||
sh = logging.StreamHandler()
|
||||
sh.setLevel(logging.INFO)
|
||||
logger.addHandler(sh)
|
||||
handler = logging.FileHandler(self.__class__.LOGFILE)
|
||||
handler.setLevel(logging.INFO)
|
||||
formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
|
||||
handler.setFormatter(formatter)
|
||||
logger.addHandler(handler)
|
||||
return logger
|
||||
|
||||
|
||||
|
||||
def summary(self):
|
||||
line = 'Parameter values are as follows:'
|
||||
line += '\nGamma = ' + str(self.GAMMA)
|
||||
line += '\nSensitivity = ' + str(self.SENSITIVITY)
|
||||
line += '\nEnsemble Size = ' + str(self.ENSEMBLE_SIZE)
|
||||
line += '\nConfidence Threshold (NOT USED) = ' + str(self.CONFTHRESHOLD)
|
||||
line += '\nConfidence Cutoff = ' + str(self.CONFCUTOFF)
|
||||
line += '\nMax Window Size = ' + str(self.MAX_WINDOW_SIZE)
|
||||
line += '\nInitial Training Size = ' + str(self.INITIAL_DATA_SIZE)
|
||||
line += '\nMaximum Num Variables = ' + str(self.MAXVAR)
|
||||
line += '\nOutput File = ' + str(self.OUTFILENAME)
|
||||
return line
|
|
@ -0,0 +1,53 @@
|
|||
|
||||
class Stream(object):
|
||||
|
||||
data = None
|
||||
initialData = None
|
||||
|
||||
"""
|
||||
Initialize a stream by reading data from file.
|
||||
Input data file formats: ARFF or Sparse (.data)
|
||||
"""
|
||||
def __init__(self, filename, classList, initialSize):
|
||||
self.data = []
|
||||
self.initialData = []
|
||||
self.MAXVAR = self.__readData(filename, classList, initialSize)
|
||||
|
||||
|
||||
"""
|
||||
Read data from file in CSV or Sparse format.
|
||||
Return maximum number of variables.
|
||||
"""
|
||||
def __readData(self, filename, classList, initialSize):
|
||||
with open(filename) as f:
|
||||
data = f.readlines()
|
||||
|
||||
maxvar = 0
|
||||
for i in data:
|
||||
d = {}
|
||||
if filename.endswith('.csv'):
|
||||
features = i.strip().split(',')
|
||||
if features[-1] not in classList:
|
||||
classList.append(features[-1])
|
||||
d[-1] = float(classList.index(features[-1]))
|
||||
for j in xrange(len(features)-1):
|
||||
d[j] = float(features[j])
|
||||
maxvar = len(features)-1
|
||||
else:
|
||||
features = i.strip().split(' ')
|
||||
for fea in features:
|
||||
val = fea.strip().split(':')
|
||||
if len(val) < 2:
|
||||
d[-1] = float(val[0])
|
||||
else:
|
||||
d[int(val[0])-1] = float(val[1])
|
||||
#get maximum number of features
|
||||
if maxvar < int(val[0]):
|
||||
maxvar = int(val[0])
|
||||
|
||||
if len(self.initialData) < initialSize:
|
||||
self.initialData.append(d)
|
||||
else:
|
||||
self.data.append(d)
|
||||
|
||||
return maxvar
|
|
@ -0,0 +1,337 @@
|
|||
#!/usr/bin/env python
|
||||
|
||||
from ctypes import *
|
||||
from ctypes.util import find_library
|
||||
from os import path
|
||||
import sys
|
||||
|
||||
__all__ = ['libsvm', 'svm_problem', 'svm_parameter',
|
||||
'toPyModel', 'gen_svm_nodearray', 'print_null', 'svm_node', 'C_SVC',
|
||||
'EPSILON_SVR', 'LINEAR', 'NU_SVC', 'NU_SVR', 'ONE_CLASS',
|
||||
'POLY', 'PRECOMPUTED', 'PRINT_STRING_FUN', 'RBF',
|
||||
'SIGMOID', 'c_double', 'svm_model']
|
||||
|
||||
try:
|
||||
dirname = path.dirname(path.abspath(__file__))
|
||||
if sys.platform == 'win32':
|
||||
#libsvm = CDLL(path.join(dirname, r'..\windows\libsvm.dll'))
|
||||
libsvm = CDLL(path.join(dirname, r'libsvm-weights-3.20\libsvm.dll'))
|
||||
else:
|
||||
#libsvm = CDLL(path.join(dirname, '../libsvm.so.2'))
|
||||
libsvm = CDLL(path.join(dirname, 'libsvm-weights-3.20/libsvm.so.2'))
|
||||
except:
|
||||
# For unix the prefix 'lib' is not considered.
|
||||
if find_library('svm'):
|
||||
libsvm = CDLL(find_library('svm'))
|
||||
elif find_library('libsvm'):
|
||||
libsvm = CDLL(find_library('libsvm'))
|
||||
else:
|
||||
raise Exception('LIBSVM library not found.')
|
||||
|
||||
C_SVC = 0
|
||||
NU_SVC = 1
|
||||
ONE_CLASS = 2
|
||||
EPSILON_SVR = 3
|
||||
NU_SVR = 4
|
||||
|
||||
LINEAR = 0
|
||||
POLY = 1
|
||||
RBF = 2
|
||||
SIGMOID = 3
|
||||
PRECOMPUTED = 4
|
||||
|
||||
PRINT_STRING_FUN = CFUNCTYPE(None, c_char_p)
|
||||
def print_null(s):
|
||||
return
|
||||
|
||||
def genFields(names, types):
|
||||
return list(zip(names, types))
|
||||
|
||||
def fillprototype(f, restype, argtypes):
|
||||
f.restype = restype
|
||||
f.argtypes = argtypes
|
||||
|
||||
class svm_node(Structure):
|
||||
_names = ["index", "value"]
|
||||
_types = [c_int, c_double]
|
||||
_fields_ = genFields(_names, _types)
|
||||
|
||||
def __str__(self):
|
||||
return '%d:%g' % (self.index, self.value)
|
||||
|
||||
def gen_svm_nodearray(xi, feature_max=None, isKernel=None):
|
||||
if isinstance(xi, dict):
|
||||
index_range = xi.keys()
|
||||
elif isinstance(xi, (list, tuple)):
|
||||
if not isKernel:
|
||||
xi = [0] + xi # idx should start from 1
|
||||
index_range = range(len(xi))
|
||||
else:
|
||||
raise TypeError('xi should be a dictionary, list or tuple')
|
||||
|
||||
if feature_max:
|
||||
assert(isinstance(feature_max, int))
|
||||
index_range = filter(lambda j: j <= feature_max, index_range)
|
||||
if not isKernel:
|
||||
index_range = filter(lambda j:xi[j] != 0, index_range)
|
||||
|
||||
index_range = sorted(index_range)
|
||||
ret = (svm_node * (len(index_range)+1))()
|
||||
ret[-1].index = -1
|
||||
for idx, j in enumerate(index_range):
|
||||
ret[idx].index = j
|
||||
ret[idx].value = xi[j]
|
||||
max_idx = 0
|
||||
if index_range:
|
||||
max_idx = index_range[-1]
|
||||
return ret, max_idx
|
||||
|
||||
class svm_problem(Structure):
|
||||
_names = ["l", "y", "x", "W"]
|
||||
_types = [c_int, POINTER(c_double), POINTER(POINTER(svm_node)), POINTER(c_double)]
|
||||
_fields_ = genFields(_names, _types)
|
||||
|
||||
def __init__(self, W, y, x, isKernel=None):
|
||||
if len(y) != len(x):
|
||||
raise ValueError("len(y) != len(x)")
|
||||
if len(W) != 0 and len(W) != len(x):
|
||||
raise ValueError("len(W) != len(x)")
|
||||
self.l = l = len(y)
|
||||
if len(W) == 0:
|
||||
W = [1] * l
|
||||
|
||||
max_idx = 0
|
||||
x_space = self.x_space = []
|
||||
for i, xi in enumerate(x):
|
||||
tmp_xi, tmp_idx = gen_svm_nodearray(xi,isKernel=isKernel)
|
||||
x_space += [tmp_xi]
|
||||
max_idx = max(max_idx, tmp_idx)
|
||||
self.n = max_idx
|
||||
|
||||
self.W = (c_double * l)()
|
||||
for i, Wi in enumerate(W): self.W[i] = Wi
|
||||
|
||||
self.y = (c_double * l)()
|
||||
for i, yi in enumerate(y): self.y[i] = yi
|
||||
|
||||
self.x = (POINTER(svm_node) * l)()
|
||||
for i, xi in enumerate(self.x_space): self.x[i] = xi
|
||||
|
||||
class svm_parameter(Structure):
|
||||
_names = ["svm_type", "kernel_type", "degree", "gamma", "coef0",
|
||||
"cache_size", "eps", "C", "nr_weight", "weight_label", "weight",
|
||||
"nu", "p", "shrinking", "probability"]
|
||||
_types = [c_int, c_int, c_int, c_double, c_double,
|
||||
c_double, c_double, c_double, c_int, POINTER(c_int), POINTER(c_double),
|
||||
c_double, c_double, c_int, c_int]
|
||||
_fields_ = genFields(_names, _types)
|
||||
|
||||
def __init__(self, options = None):
|
||||
if options == None:
|
||||
options = ''
|
||||
self.parse_options(options)
|
||||
|
||||
def __str__(self):
|
||||
s = ''
|
||||
attrs = svm_parameter._names + list(self.__dict__.keys())
|
||||
values = map(lambda attr: getattr(self, attr), attrs)
|
||||
for attr, val in zip(attrs, values):
|
||||
s += (' %s: %s\n' % (attr, val))
|
||||
s = s.strip()
|
||||
|
||||
return s
|
||||
|
||||
def set_to_default_values(self):
|
||||
self.svm_type = C_SVC;
|
||||
self.kernel_type = RBF
|
||||
self.degree = 3
|
||||
self.gamma = 0
|
||||
self.coef0 = 0
|
||||
self.nu = 0.5
|
||||
self.cache_size = 100
|
||||
self.C = 1
|
||||
self.eps = 0.001
|
||||
self.p = 0.1
|
||||
self.shrinking = 1
|
||||
self.probability = 0
|
||||
self.nr_weight = 0
|
||||
self.weight_label = (c_int*0)()
|
||||
self.weight = (c_double*0)()
|
||||
self.cross_validation = False
|
||||
self.nr_fold = 0
|
||||
self.print_func = cast(None, PRINT_STRING_FUN)
|
||||
|
||||
def parse_options(self, options):
|
||||
if isinstance(options, list):
|
||||
argv = options
|
||||
elif isinstance(options, str):
|
||||
argv = options.split()
|
||||
else:
|
||||
raise TypeError("arg 1 should be a list or a str.")
|
||||
self.set_to_default_values()
|
||||
self.print_func = cast(None, PRINT_STRING_FUN)
|
||||
weight_label = []
|
||||
weight = []
|
||||
|
||||
i = 0
|
||||
while i < len(argv):
|
||||
if argv[i] == "-s":
|
||||
i = i + 1
|
||||
self.svm_type = int(argv[i])
|
||||
elif argv[i] == "-t":
|
||||
i = i + 1
|
||||
self.kernel_type = int(argv[i])
|
||||
elif argv[i] == "-d":
|
||||
i = i + 1
|
||||
self.degree = int(argv[i])
|
||||
elif argv[i] == "-g":
|
||||
i = i + 1
|
||||
self.gamma = float(argv[i])
|
||||
elif argv[i] == "-r":
|
||||
i = i + 1
|
||||
self.coef0 = float(argv[i])
|
||||
elif argv[i] == "-n":
|
||||
i = i + 1
|
||||
self.nu = float(argv[i])
|
||||
elif argv[i] == "-m":
|
||||
i = i + 1
|
||||
self.cache_size = float(argv[i])
|
||||
elif argv[i] == "-c":
|
||||
i = i + 1
|
||||
self.C = float(argv[i])
|
||||
elif argv[i] == "-e":
|
||||
i = i + 1
|
||||
self.eps = float(argv[i])
|
||||
elif argv[i] == "-p":
|
||||
i = i + 1
|
||||
self.p = float(argv[i])
|
||||
elif argv[i] == "-h":
|
||||
i = i + 1
|
||||
self.shrinking = int(argv[i])
|
||||
elif argv[i] == "-b":
|
||||
i = i + 1
|
||||
self.probability = int(argv[i])
|
||||
elif argv[i] == "-q":
|
||||
self.print_func = PRINT_STRING_FUN(print_null)
|
||||
elif argv[i] == "-v":
|
||||
i = i + 1
|
||||
self.cross_validation = 1
|
||||
self.nr_fold = int(argv[i])
|
||||
if self.nr_fold < 2:
|
||||
raise ValueError("n-fold cross validation: n must >= 2")
|
||||
elif argv[i].startswith("-w"):
|
||||
i = i + 1
|
||||
self.nr_weight += 1
|
||||
nr_weight = self.nr_weight
|
||||
weight_label += [int(argv[i-1][2:])]
|
||||
weight += [float(argv[i])]
|
||||
else:
|
||||
raise ValueError("Wrong options")
|
||||
i += 1
|
||||
|
||||
libsvm.svm_set_print_string_function(self.print_func)
|
||||
self.weight_label = (c_int*self.nr_weight)()
|
||||
self.weight = (c_double*self.nr_weight)()
|
||||
for i in range(self.nr_weight):
|
||||
self.weight[i] = weight[i]
|
||||
self.weight_label[i] = weight_label[i]
|
||||
|
||||
class svm_model(Structure):
|
||||
_names = ['param', 'nr_class', 'l', 'SV', 'sv_coef', 'rho',
|
||||
'probA', 'probB', 'sv_indices', 'label', 'nSV', 'free_sv']
|
||||
_types = [svm_parameter, c_int, c_int, POINTER(POINTER(svm_node)),
|
||||
POINTER(POINTER(c_double)), POINTER(c_double),
|
||||
POINTER(c_double), POINTER(c_double), POINTER(c_int),
|
||||
POINTER(c_int), POINTER(c_int), c_int]
|
||||
_fields_ = genFields(_names, _types)
|
||||
|
||||
def __init__(self):
|
||||
self.__createfrom__ = 'python'
|
||||
|
||||
def __del__(self):
|
||||
# free memory created by C to avoid memory leak
|
||||
if hasattr(self, '__createfrom__') and self.__createfrom__ == 'C':
|
||||
libsvm.svm_free_and_destroy_model(pointer(self))
|
||||
|
||||
def get_svm_type(self):
|
||||
return libsvm.svm_get_svm_type(self)
|
||||
|
||||
def get_nr_class(self):
|
||||
return libsvm.svm_get_nr_class(self)
|
||||
|
||||
def get_svr_probability(self):
|
||||
return libsvm.svm_get_svr_probability(self)
|
||||
|
||||
def get_labels(self):
|
||||
nr_class = self.get_nr_class()
|
||||
labels = (c_int * nr_class)()
|
||||
libsvm.svm_get_labels(self, labels)
|
||||
return labels[:nr_class]
|
||||
|
||||
def get_sv_indices(self):
|
||||
total_sv = self.get_nr_sv()
|
||||
sv_indices = (c_int * total_sv)()
|
||||
libsvm.svm_get_sv_indices(self, sv_indices)
|
||||
return sv_indices[:total_sv]
|
||||
|
||||
def get_nr_sv(self):
|
||||
return libsvm.svm_get_nr_sv(self)
|
||||
|
||||
def is_probability_model(self):
|
||||
return (libsvm.svm_check_probability_model(self) == 1)
|
||||
|
||||
def get_sv_coef(self):
|
||||
return [tuple(self.sv_coef[j][i] for j in xrange(self.nr_class - 1))
|
||||
for i in xrange(self.l)]
|
||||
|
||||
def get_SV(self):
|
||||
result = []
|
||||
for sparse_sv in self.SV[:self.l]:
|
||||
row = dict()
|
||||
|
||||
i = 0
|
||||
while True:
|
||||
row[sparse_sv[i].index] = sparse_sv[i].value
|
||||
if sparse_sv[i].index == -1:
|
||||
break
|
||||
i += 1
|
||||
|
||||
result.append(row)
|
||||
return result
|
||||
|
||||
def toPyModel(model_ptr):
|
||||
"""
|
||||
toPyModel(model_ptr) -> svm_model
|
||||
|
||||
Convert a ctypes POINTER(svm_model) to a Python svm_model
|
||||
"""
|
||||
if bool(model_ptr) == False:
|
||||
raise ValueError("Null pointer")
|
||||
m = model_ptr.contents
|
||||
m.__createfrom__ = 'C'
|
||||
return m
|
||||
|
||||
fillprototype(libsvm.svm_train, POINTER(svm_model), [POINTER(svm_problem), POINTER(svm_parameter)])
|
||||
fillprototype(libsvm.svm_cross_validation, None, [POINTER(svm_problem), POINTER(svm_parameter), c_int, POINTER(c_double)])
|
||||
|
||||
fillprototype(libsvm.svm_save_model, c_int, [c_char_p, POINTER(svm_model)])
|
||||
fillprototype(libsvm.svm_load_model, POINTER(svm_model), [c_char_p])
|
||||
|
||||
fillprototype(libsvm.svm_get_svm_type, c_int, [POINTER(svm_model)])
|
||||
fillprototype(libsvm.svm_get_nr_class, c_int, [POINTER(svm_model)])
|
||||
fillprototype(libsvm.svm_get_labels, None, [POINTER(svm_model), POINTER(c_int)])
|
||||
fillprototype(libsvm.svm_get_sv_indices, None, [POINTER(svm_model), POINTER(c_int)])
|
||||
fillprototype(libsvm.svm_get_nr_sv, c_int, [POINTER(svm_model)])
|
||||
fillprototype(libsvm.svm_get_svr_probability, c_double, [POINTER(svm_model)])
|
||||
|
||||
fillprototype(libsvm.svm_predict_values, c_double, [POINTER(svm_model), POINTER(svm_node), POINTER(c_double)])
|
||||
fillprototype(libsvm.svm_predict, c_double, [POINTER(svm_model), POINTER(svm_node)])
|
||||
fillprototype(libsvm.svm_predict_probability, c_double, [POINTER(svm_model), POINTER(svm_node), POINTER(c_double)])
|
||||
|
||||
fillprototype(libsvm.svm_free_model_content, None, [POINTER(svm_model)])
|
||||
fillprototype(libsvm.svm_free_and_destroy_model, None, [POINTER(POINTER(svm_model))])
|
||||
fillprototype(libsvm.svm_destroy_param, None, [POINTER(svm_parameter)])
|
||||
|
||||
fillprototype(libsvm.svm_check_parameter, c_char_p, [POINTER(svm_problem), POINTER(svm_parameter)])
|
||||
fillprototype(libsvm.svm_check_probability_model, c_int, [POINTER(svm_model)])
|
||||
fillprototype(libsvm.svm_set_print_string_function, None, [PRINT_STRING_FUN])
|
|
@ -0,0 +1,263 @@
|
|||
#!/usr/bin/env python
|
||||
|
||||
import os
|
||||
import sys
|
||||
from svm import *
|
||||
from svm import __all__ as svm_all
|
||||
|
||||
|
||||
__all__ = ['evaluations', 'svm_load_model', 'svm_predict', 'svm_read_problem',
|
||||
'svm_save_model', 'svm_train'] + svm_all
|
||||
|
||||
sys.path = [os.path.dirname(os.path.abspath(__file__))] + sys.path
|
||||
|
||||
def svm_read_problem(data_file_name):
|
||||
"""
|
||||
svm_read_problem(data_file_name) -> [y, x]
|
||||
|
||||
Read LIBSVM-format data from data_file_name and return labels y
|
||||
and data instances x.
|
||||
"""
|
||||
prob_y = []
|
||||
prob_x = []
|
||||
for line in open(data_file_name):
|
||||
line = line.split(None, 1)
|
||||
# In case an instance with all zero features
|
||||
if len(line) == 1: line += ['']
|
||||
label, features = line
|
||||
xi = {}
|
||||
for e in features.split():
|
||||
ind, val = e.split(":")
|
||||
xi[int(ind)] = float(val)
|
||||
prob_y += [float(label)]
|
||||
prob_x += [xi]
|
||||
return (prob_y, prob_x)
|
||||
|
||||
def svm_load_model(model_file_name):
|
||||
"""
|
||||
svm_load_model(model_file_name) -> model
|
||||
|
||||
Load a LIBSVM model from model_file_name and return.
|
||||
"""
|
||||
model = libsvm.svm_load_model(model_file_name.encode())
|
||||
if not model:
|
||||
print("can't open model file %s" % model_file_name)
|
||||
return None
|
||||
model = toPyModel(model)
|
||||
return model
|
||||
|
||||
def svm_save_model(model_file_name, model):
|
||||
"""
|
||||
svm_save_model(model_file_name, model) -> None
|
||||
|
||||
Save a LIBSVM model to the file model_file_name.
|
||||
"""
|
||||
libsvm.svm_save_model(model_file_name.encode(), model)
|
||||
|
||||
def evaluations(ty, pv):
|
||||
"""
|
||||
evaluations(ty, pv) -> (ACC, MSE, SCC)
|
||||
|
||||
Calculate accuracy, mean squared error and squared correlation coefficient
|
||||
using the true values (ty) and predicted values (pv).
|
||||
"""
|
||||
if len(ty) != len(pv):
|
||||
raise ValueError("len(ty) must equal to len(pv)")
|
||||
total_correct = total_error = 0
|
||||
sumv = sumy = sumvv = sumyy = sumvy = 0
|
||||
for v, y in zip(pv, ty):
|
||||
if y == v:
|
||||
total_correct += 1
|
||||
total_error += (v-y)*(v-y)
|
||||
sumv += v
|
||||
sumy += y
|
||||
sumvv += v*v
|
||||
sumyy += y*y
|
||||
sumvy += v*y
|
||||
l = len(ty)
|
||||
ACC = 100.0*total_correct/l
|
||||
MSE = total_error/l
|
||||
try:
|
||||
SCC = ((l*sumvy-sumv*sumy)*(l*sumvy-sumv*sumy))/((l*sumvv-sumv*sumv)*(l*sumyy-sumy*sumy))
|
||||
except:
|
||||
SCC = float('nan')
|
||||
return (ACC, MSE, SCC)
|
||||
|
||||
def svm_train(arg1, arg2=None, arg3=None, arg4 = None):
|
||||
"""
|
||||
svm_train(W, x [, options]) -> model | ACC | MSE
|
||||
svm_train(prob [, options]) -> model | ACC | MSE
|
||||
svm_train(prob, param) -> model | ACC| MSE
|
||||
|
||||
Train an SVM model from weighted data (W, y, x) or an svm_problem prob using
|
||||
'options' or an svm_parameter param.
|
||||
If '-v' is specified in 'options' (i.e., cross validation)
|
||||
either accuracy (ACC) or mean-squared error (MSE) is returned.
|
||||
options:
|
||||
-s svm_type : set type of SVM (default 0)
|
||||
0 -- C-SVC (multi-class classification)
|
||||
1 -- nu-SVC (multi-class classification)
|
||||
2 -- one-class SVM
|
||||
3 -- epsilon-SVR (regression)
|
||||
4 -- nu-SVR (regression)
|
||||
-t kernel_type : set type of kernel function (default 2)
|
||||
0 -- linear: u'*v
|
||||
1 -- polynomial: (gamma*u'*v + coef0)^degree
|
||||
2 -- radial basis function: exp(-gamma*|u-v|^2)
|
||||
3 -- sigmoid: tanh(gamma*u'*v + coef0)
|
||||
4 -- precomputed kernel (kernel values in training_set_file)
|
||||
-d degree : set degree in kernel function (default 3)
|
||||
-g gamma : set gamma in kernel function (default 1/num_features)
|
||||
-r coef0 : set coef0 in kernel function (default 0)
|
||||
-c cost : set the parameter C of C-SVC, epsilon-SVR, and nu-SVR (default 1)
|
||||
-n nu : set the parameter nu of nu-SVC, one-class SVM, and nu-SVR (default 0.5)
|
||||
-p epsilon : set the epsilon in loss function of epsilon-SVR (default 0.1)
|
||||
-m cachesize : set cache memory size in MB (default 100)
|
||||
-e epsilon : set tolerance of termination criterion (default 0.001)
|
||||
-h shrinking : whether to use the shrinking heuristics, 0 or 1 (default 1)
|
||||
-b probability_estimates : whether to train a SVC or SVR model for probability estimates, 0 or 1 (default 0)
|
||||
-wi weight : set the parameter C of class i to weight*C, for C-SVC (default 1)
|
||||
-v n: n-fold cross validation mode
|
||||
-q : quiet mode (no outputs)
|
||||
"""
|
||||
prob, param = None, None
|
||||
if isinstance(arg1, (list, tuple)):
|
||||
assert isinstance(arg2, (list, tuple))
|
||||
assert isinstance(arg3, list)
|
||||
W, y, x, options = arg1, arg2, arg3, arg4
|
||||
param = svm_parameter(options)
|
||||
prob = svm_problem(W, y, x, isKernel=(param.kernel_type == PRECOMPUTED))
|
||||
elif isinstance(arg1, svm_problem):
|
||||
prob = arg1
|
||||
if isinstance(arg2, svm_parameter):
|
||||
param = arg2
|
||||
else:
|
||||
param = svm_parameter(arg2)
|
||||
if prob == None or param == None:
|
||||
raise TypeError("Wrong types for the arguments")
|
||||
|
||||
if param.kernel_type == PRECOMPUTED:
|
||||
for xi in prob.x_space:
|
||||
idx, val = xi[0].index, xi[0].value
|
||||
if xi[0].index != 0:
|
||||
raise ValueError('Wrong input format: first column must be 0:sample_serial_number')
|
||||
if val <= 0 or val > prob.n:
|
||||
raise ValueError('Wrong input format: sample_serial_number out of range')
|
||||
|
||||
if param.gamma == 0 and prob.n > 0:
|
||||
param.gamma = 1.0 / prob.n
|
||||
libsvm.svm_set_print_string_function(param.print_func)
|
||||
err_msg = libsvm.svm_check_parameter(prob, param)
|
||||
if err_msg:
|
||||
raise ValueError('Error: %s' % err_msg)
|
||||
|
||||
if param.cross_validation:
|
||||
l, nr_fold = prob.l, param.nr_fold
|
||||
target = (c_double * l)()
|
||||
libsvm.svm_cross_validation(prob, param, nr_fold, target)
|
||||
ACC, MSE, SCC = evaluations(prob.y[:l], target[:l])
|
||||
if param.svm_type in [EPSILON_SVR, NU_SVR]:
|
||||
print("Cross Validation Mean squared error = %g" % MSE)
|
||||
print("Cross Validation Squared correlation coefficient = %g" % SCC)
|
||||
return MSE
|
||||
else:
|
||||
print("Cross Validation Accuracy = %g%%" % ACC)
|
||||
return ACC
|
||||
else:
|
||||
m = libsvm.svm_train(prob, param)
|
||||
m = toPyModel(m)
|
||||
|
||||
# If prob is destroyed, data including SVs pointed by m can remain.
|
||||
m.x_space = prob.x_space
|
||||
return m
|
||||
|
||||
def svm_predict(y, x, m, options=""):
|
||||
"""
|
||||
svm_predict(y, x, m [, options]) -> (p_labels, p_acc, p_vals)
|
||||
|
||||
Predict data (y, x) with the SVM model m.
|
||||
options:
|
||||
-b probability_estimates: whether to predict probability estimates,
|
||||
0 or 1 (default 0); for one-class SVM only 0 is supported.
|
||||
-q : quiet mode (no outputs).
|
||||
|
||||
The return tuple contains
|
||||
p_labels: a list of predicted labels
|
||||
p_acc: a tuple including accuracy (for classification), mean-squared
|
||||
error, and squared correlation coefficient (for regression).
|
||||
p_vals: a list of decision values or probability estimates (if '-b 1'
|
||||
is specified). If k is the number of classes, for decision values,
|
||||
each element includes results of predicting k(k-1)/2 binary-class
|
||||
SVMs. For probabilities, each element contains k values indicating
|
||||
the probability that the testing instance is in each class.
|
||||
Note that the order of classes here is the same as 'model.label'
|
||||
field in the model structure.
|
||||
"""
|
||||
|
||||
def info(s):
|
||||
print(s)
|
||||
|
||||
predict_probability = 0
|
||||
argv = options.split()
|
||||
i = 0
|
||||
while i < len(argv):
|
||||
if argv[i] == '-b':
|
||||
i += 1
|
||||
predict_probability = int(argv[i])
|
||||
elif argv[i] == '-q':
|
||||
info = print_null
|
||||
else:
|
||||
raise ValueError("Wrong options")
|
||||
i+=1
|
||||
|
||||
svm_type = m.get_svm_type()
|
||||
is_prob_model = m.is_probability_model()
|
||||
nr_class = m.get_nr_class()
|
||||
pred_labels = []
|
||||
pred_values = []
|
||||
|
||||
if predict_probability:
|
||||
if not is_prob_model:
|
||||
raise ValueError("Model does not support probabiliy estimates")
|
||||
|
||||
if svm_type in [NU_SVR, EPSILON_SVR]:
|
||||
info("Prob. model for test data: target value = predicted value + z,\n"
|
||||
"z: Laplace distribution e^(-|z|/sigma)/(2sigma),sigma=%g" % m.get_svr_probability());
|
||||
nr_class = 0
|
||||
|
||||
prob_estimates = (c_double * nr_class)()
|
||||
for xi in x:
|
||||
xi, idx = gen_svm_nodearray(xi, isKernel=(m.param.kernel_type == PRECOMPUTED))
|
||||
label = libsvm.svm_predict_probability(m, xi, prob_estimates)
|
||||
values = prob_estimates[:nr_class]
|
||||
pred_labels += [label]
|
||||
pred_values += [values]
|
||||
else:
|
||||
if is_prob_model:
|
||||
info("Model supports probability estimates, but disabled in predicton.")
|
||||
if svm_type in (ONE_CLASS, EPSILON_SVR, NU_SVC):
|
||||
nr_classifier = 1
|
||||
else:
|
||||
nr_classifier = nr_class*(nr_class-1)//2
|
||||
dec_values = (c_double * nr_classifier)()
|
||||
for xi in x:
|
||||
xi, idx = gen_svm_nodearray(xi, isKernel=(m.param.kernel_type == PRECOMPUTED))
|
||||
label = libsvm.svm_predict_values(m, xi, dec_values)
|
||||
if(nr_class == 1):
|
||||
values = [1]
|
||||
else:
|
||||
values = dec_values[:nr_classifier]
|
||||
pred_labels += [label]
|
||||
pred_values += [values]
|
||||
|
||||
ACC, MSE, SCC = evaluations(y, pred_labels)
|
||||
l = len(y)
|
||||
if svm_type in [EPSILON_SVR, NU_SVR]:
|
||||
info("Mean squared error = %g (regression)" % MSE)
|
||||
info("Squared correlation coefficient = %g (regression)" % SCC)
|
||||
else:
|
||||
info("Accuracy = %g%% (%d/%d) (classification)" % (ACC, int(l*ACC/100), l))
|
||||
|
||||
return pred_labels, (ACC, MSE, SCC), pred_values
|
||||
|
||||
|
105
README.md
105
README.md
|
@ -1,2 +1,103 @@
|
|||
# ACDC
|
||||
ACDC: Online Unsupervised Cross-Domain Adaptation
|
||||
# Welcome to ACDC!
|
||||
|
||||
This is a framework aiming autonomously cross-domain conversion (ACDC) which handles fitting/training and concept drifts with a complete self-evolving structure, achieving domain adaptation via a domain-adversarial classifier module, all without the need for hyper-parameter tunability.
|
||||
|
||||
The paper is still under review. You will find the following on this repository:
|
||||
|
||||
- Original ACDC source-code, so you can re-validate the results presented on screen but without visualize the code.
|
||||
- A compilation of ACDC numerical results, including both experiments and ablation study.
|
||||
- Source-code of the baselines used throughout the paper.
|
||||
- A compilation of baselines numerical results.
|
||||
|
||||
|
||||
# Setting up your environments
|
||||
|
||||
We have source-codes used mainly in three languages:
|
||||
|
||||
- Python (including Python 2 and Python 3)
|
||||
- Matlab
|
||||
- Java
|
||||
|
||||
You will need Matlab to run the following baselines:
|
||||
|
||||
- ATL
|
||||
|
||||
You will need Python to run the following baselines:
|
||||
|
||||
- ACDC
|
||||
- MSC
|
||||
- FUSION
|
||||
|
||||
You will need Java (>13) to run the following baselines:
|
||||
|
||||
- Melanie
|
||||
|
||||
|
||||
While Matlab source-codes are probably a plug-and-play after you install Matlab in your machine, Python source-codes will use different environments. However, we organized and configured it for you, so you can install it with a single command. Java codes are a bit more harder to handle, so the better is to follow the [original Melanie repository]([https://github.com/nino2222/Melanie]) to configure your environment. You can still use ACDC to prepare the datasets in the Melanie format.
|
||||
|
||||
Make sure that you have [Anaconda]([https://www.anaconda.com/](https://www.anaconda.com/)) or [Conda]([https://docs.conda.io/en/latest/miniconda.html](https://docs.conda.io/en/latest/miniconda.html)) installed in your Machine. It can be Windows, Mac or Linux operational system.
|
||||
|
||||
Open your Anaconda Prompt and travel to the directory of the source-code you want to execute, example, ACDC directory.
|
||||
|
||||
Run the following command:
|
||||
|
||||
```conda env create -f environment.yml```
|
||||
|
||||
This command will create a conda enviroment called `acdc`, if you run it on the ACDC folder. The environments will automatically install the correct Python version that source-code needs (ACDC uses the most recently) and its dependencies.
|
||||
If you run the above command at the MSC folder, you will install a conda environment called `msc`. The same behavior extends to the FUSION and DFAMCD folders.
|
||||
|
||||
# Downloading the benchmarks
|
||||
To make the process simpler and automatically, all benchmarks are manage through a Python implementation. Some benchmarks are very big and heavy, so make sure you have enough storage space in your machine, while are connected to a internet connection.
|
||||
|
||||
ACDC will download and configure every benchmark automatically, applying concept drifts whenever necessary. If you use the `prepare_datasets.py` files found on every baseline folder, it will download and generate datasets according to what is used throughout the paper. You can read and evaluate this file to make sure the benchmarks are configured correctly.
|
||||
|
||||
If you want to test other variations of concept drifts, or even download and set the benchmarks without concept drifts, you can perform the following actions:
|
||||
|
||||
- Set up ACDC environment
|
||||
- Activate ACDC conda environment
|
||||
- Run `python ACDC.py` command
|
||||
|
||||
This command will print a number of instructions of how ACDC works, including how to download, prepare and save different benchmarks. You can also re-run ACDC with configurations similar or different from the paper.
|
||||
|
||||
Make sure that ACDC already downloaded every benchmark before run `prepare_datasets.py` on the baselines, as the later will use the generated `data` folder from ACDC, by executing the following command on the ACDC folder:
|
||||
|
||||
```
|
||||
conda activate acdc
|
||||
python -c "import ACDC as acdc; acdc.pre_download_benchmarks()"
|
||||
```
|
||||
|
||||
# Running ACDC
|
||||
After setup your environment, just run `python ACDC.pyc`. The script will print a list of commands for you.
|
||||
|
||||
## Example: Running ACDC with USPS --> MNIST experiment
|
||||
|
||||
After setup your environment, just run the following command in the ACDC directory:
|
||||
|
||||
```
|
||||
python -c "import ACDC as acdc; acdc.acdc('usps-16','mnist-16',5,7,False)"
|
||||
```
|
||||
|
||||
or just:
|
||||
|
||||
```
|
||||
python -c "import ACDC as acdc; acdc.acdc('usps-16','mnist-16')"
|
||||
```
|
||||
|
||||
You can also create a Jupyter file into ACDC directory and create a cell with the following command:
|
||||
```
|
||||
import ACDC as acdc
|
||||
acdc.acdc('usps-16','mnist-16')
|
||||
```
|
||||
|
||||
## Example: Running ACDC Ablation Study A
|
||||
|
||||
The Ablation studies turn-off or disable some funcionalities from the ACDC framework. To run ACDC Ablation Study A, we would just execute the same command as before, but replacing `ACDC.pyc` by `ACDC_Ablation_A.pyc`, like:
|
||||
|
||||
```
|
||||
python -c "import ACDC_Ablation_A as acdc; acdc.acdc('usps-16','mnist-16')
|
||||
```
|
||||
You can do something similar in a Jupyter file:
|
||||
```
|
||||
import ACDC_Ablation_A as acdc
|
||||
acdc.acdc('usps-16','mnist-16')
|
||||
```
|
||||
|
|
Binary file not shown.
Binary file not shown.
Binary file not shown.
|
@ -0,0 +1,168 @@
|
|||
name: acdc
|
||||
channels:
|
||||
- pytorch
|
||||
- anaconda
|
||||
- conda-forge
|
||||
- defaults
|
||||
dependencies:
|
||||
- alembic=1.3.2=py_0
|
||||
- async_generator=1.10=py_0
|
||||
- attrs=19.3.0=py_0
|
||||
- backcall=0.1.0=py37_0
|
||||
- blas=1.0=mkl
|
||||
- bleach=3.1.0=py37_0
|
||||
- blinker=1.4=py_1
|
||||
- blosc=1.16.3=h7bd577a_0
|
||||
- boto=2.49.0=py_0
|
||||
- boto3=1.10.50=py_0
|
||||
- botocore=1.13.50=py_0
|
||||
- bz2file=0.98=py_0
|
||||
- bzip2=1.0.8=he774522_0
|
||||
- ca-certificates=2019.11.28=hecc5488_0
|
||||
- certifi=2019.11.28=py37_0
|
||||
- certipy=0.1.3=py_0
|
||||
- cffi=1.13.2=py37h7a1dbc1_0
|
||||
- chardet=3.0.4=py37_1003
|
||||
- colorama=0.4.3=py_0
|
||||
- configurable-http-proxy=4.2.0=node13_he01fd0c_2
|
||||
- cryptography=2.8=py37hb32ad35_1
|
||||
- cudatoolkit=10.1.243=h74a9793_0
|
||||
- cycler=0.10.0=py37_0
|
||||
- decorator=4.4.1=py_0
|
||||
- defusedxml=0.6.0=py_0
|
||||
- docutils=0.15.2=py37_0
|
||||
- entrypoints=0.3=py37_0
|
||||
- freetype=2.9.1=ha9979f8_1
|
||||
- gensim=3.8.1=py37h6538335_1
|
||||
- hdf5=1.10.4=h7ebc959_0
|
||||
- icc_rt=2019.0.0=h0cc432a_1
|
||||
- icu=58.2=ha66f8fd_1
|
||||
- idna=2.8=py37_1000
|
||||
- importlib_metadata=1.3.0=py37_0
|
||||
- intel-openmp=2019.4=245
|
||||
- ipykernel=5.1.3=py37h39e3cac_0
|
||||
- ipython=7.11.1=py37h39e3cac_0
|
||||
- ipython_genutils=0.2.0=py37_0
|
||||
- ipywidgets=7.5.1=py_0
|
||||
- jedi=0.15.2=py37_0
|
||||
- jinja2=2.10.3=py_0
|
||||
- jmespath=0.9.4=py_0
|
||||
- jpeg=9b=hb83a4c4_2
|
||||
- jsonschema=3.2.0=py37_0
|
||||
- jupyter=1.0.0=py37_7
|
||||
- jupyter_client=5.3.4=py37_0
|
||||
- jupyter_console=6.0.0=py37_0
|
||||
- jupyter_core=4.6.1=py37_0
|
||||
- jupyterhub=1.0.0=py37_0
|
||||
- kiwisolver=1.1.0=py37ha925a31_0
|
||||
- krb5=1.16.4=hc04afaa_0
|
||||
- libcurl=7.65.3=h4496350_0
|
||||
- libiconv=1.15=h1df5818_7
|
||||
- libpng=1.6.37=h2a8f88b_0
|
||||
- libsodium=1.0.16=h9d3ae62_0
|
||||
- libssh2=1.8.2=h642c060_2
|
||||
- libtiff=4.1.0=h56a325e_0
|
||||
- libxml2=2.9.9=h464c3ec_0
|
||||
- libxslt=1.1.33=h579f668_0
|
||||
- lxml=4.4.2=py37h1350720_0
|
||||
- lz4-c=1.8.1.2=h2fa13f4_0
|
||||
- lzo=2.10=vc14h0a64fa6_1
|
||||
- m2w64-gcc-libgfortran=5.3.0=6
|
||||
- m2w64-gcc-libs=5.3.0=7
|
||||
- m2w64-gcc-libs-core=5.3.0=7
|
||||
- m2w64-gmp=6.1.0=2
|
||||
- m2w64-libwinpthread-git=5.0.0.4634.697f757=2
|
||||
- mako=1.1.0=py_0
|
||||
- markupsafe=1.1.1=py37he774522_0
|
||||
- matplotlib=3.1.1=py37hc8f65d3_0
|
||||
- mistune=0.8.4=py37he774522_0
|
||||
- mkl=2019.4=245
|
||||
- mkl-service=2.3.0=py37hb782905_0
|
||||
- mkl_fft=1.0.15=py37h14836fe_0
|
||||
- mkl_random=1.1.0=py37h675688f_0
|
||||
- mock=3.0.5=py37_0
|
||||
- more-itertools=8.0.2=py_0
|
||||
- msys2-conda-epoch=20160418=1
|
||||
- nbconvert=5.6.1=py37_0
|
||||
- nbformat=4.4.0=py37_0
|
||||
- ninja=1.9.0=py37h74a9793_0
|
||||
- nltk=3.4.5=py37_0
|
||||
- nodejs=13.6.0=0
|
||||
- notebook=6.0.2=py37_0
|
||||
- numexpr=2.7.0=py37hdce8814_0
|
||||
- numpy=1.17.4=py37h4320e6b_0
|
||||
- numpy-base=1.17.4=py37hc3f5095_0
|
||||
- oauthlib=3.0.1=py_0
|
||||
- olefile=0.46=py37_0
|
||||
- openssl=1.1.1d=hfa6e2cd_0
|
||||
- pandas=0.25.3=py37ha925a31_0
|
||||
- pandoc=2.2.3.2=0
|
||||
- pandocfilters=1.4.2=py37_1
|
||||
- parso=0.5.2=py_0
|
||||
- patsy=0.5.1=py37_0
|
||||
- pickleshare=0.7.5=py37_0
|
||||
- pillow=6.2.0=py37hdc69c19_0
|
||||
- pip=19.3.1=py37_0
|
||||
- prometheus_client=0.7.1=py_0
|
||||
- prompt_toolkit=2.0.9=py37_0
|
||||
- psutil=5.6.7=py37hfa6e2cd_0
|
||||
- pycparser=2.19=py37_0
|
||||
- pycurl=7.43.0.3=py37h636d3bd_1
|
||||
- pygments=2.5.2=py_0
|
||||
- pyjwt=1.7.1=py_0
|
||||
- pyopenssl=19.1.0=py37_0
|
||||
- pyparsing=2.4.6=py_0
|
||||
- pyqt=5.9.2=py37h6538335_2
|
||||
- pyrsistent=0.15.6=py37he774522_0
|
||||
- pysocks=1.7.1=py37_0
|
||||
- pytables=3.6.1=py37h1da0976_0
|
||||
- python=3.7.6=h60c2a47_2
|
||||
- python-dateutil=2.8.1=py_0
|
||||
- python-editor=1.0.4=py_0
|
||||
- pytorch=1.3.1=py3.7_cuda101_cudnn7_0
|
||||
- pytz=2019.3=py_0
|
||||
- pywin32=227=py37he774522_0
|
||||
- pywinpty=0.5.7=py37_0
|
||||
- pyzmq=18.1.0=py37ha925a31_0
|
||||
- qt=5.9.7=vc14h73c81de_0
|
||||
- qtconsole=4.6.0=py_1
|
||||
- requests=2.22.0=py37_1
|
||||
- s3transfer=0.2.1=py37_0
|
||||
- scipy=1.3.2=py37h29ff71c_0
|
||||
- seaborn=0.9.0=pyh91ea838_1
|
||||
- send2trash=1.5.0=py37_0
|
||||
- setuptools=44.0.0=py37_0
|
||||
- sip=4.19.8=py37h6538335_0
|
||||
- six=1.13.0=py37_0
|
||||
- smart_open=1.9.0=py_0
|
||||
- snappy=1.1.7=vc14h2dea872_1
|
||||
- sqlalchemy=1.3.12=py37hfa6e2cd_0
|
||||
- sqlite=3.30.1=he774522_0
|
||||
- statsmodels=0.10.1=py37h8c2d366_0
|
||||
- terminado=0.8.3=py37_0
|
||||
- testpath=0.4.4=py_0
|
||||
- tk=8.6.8=hfa6e2cd_0
|
||||
- torchvision=0.4.2=py37_cu101
|
||||
- tqdm=4.40.2=py_0
|
||||
- traitlets=4.3.3=py37_0
|
||||
- urllib3=1.25.7=py37_0
|
||||
- vc=14.1=h0510ff6_4
|
||||
- vs2015_runtime=14.16.27012=hf0eaf9b_1
|
||||
- wcwidth=0.1.7=py37_0
|
||||
- webencodings=0.5.1=py37_1
|
||||
- wheel=0.33.6=py37_0
|
||||
- widgetsnbextension=3.5.1=py37_0
|
||||
- win_inet_pton=1.1.0=py37_0
|
||||
- wincertstore=0.2=py37_0
|
||||
- winpty=0.4.3=4
|
||||
- xz=5.2.4=h2fa13f4_4
|
||||
- zeromq=4.3.1=h33f27b4_3
|
||||
- zipp=0.6.0=py_0
|
||||
- zlib=1.2.11=h62dcd97_3
|
||||
- zstd=1.3.7=h508b16e_0
|
||||
- pip:
|
||||
- docopt==0.6.2
|
||||
- ipyparallel==6.2.4
|
||||
- tornado==5.1.1
|
||||
prefix: C:\Users\marcus.decarvalho\AppData\Local\Continuum\miniconda3\envs\acdc
|
||||
|
Loading…
Reference in New Issue