Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import os, os.path, sys
- import glob
- import gridt4
- import numpy as np
- from shogun.Features import CombinedFeatures, RealFeatures, BinaryLabels
- from shogun.Kernel import CombinedKernel, PolyKernel, CustomKernel
- from shogun.Classifier import MKLClassification, MKL
- from shogun.Evaluation import PRCEvaluation
- from shogun.Evaluation import AccuracyMeasure
- import time
- from modshogun import *
- libSVMpathTRAIN = '/home/jvgemert/photoCompCode/libsvm-2.83/svm-train'
- libSVMpathPREDICT= '/home/jvgemert/photoCompCode/libsvm-2.83/svm-predict'
- libSVMpathTRAIN = '/var/scratch/jvgemert/photoCompCode/libsvm-2.83/svm-train'
- libSVMpathPREDICT= '/var/scratch/jvgemert/photoCompCode/libsvm-2.83/svm-predict'
- def readMatrix(fName):
- print 'readMatrix', fName
- fileIn = open(fName, 'r')
- lines = fileIn.read().splitlines()
- fileIn.close()
- feat = []
- #i = 0
- for line in lines:
- lst = line.strip().split()
- #feat.append([ float(f) for f in lst])
- feat.append(lst)
- #i = i + 1
- #print i,
- #print feat
- return np.matrix(feat, dtype=np.float64)
- def writeSVMprecomputed(outFileName, indexFilelines, kernelLines):
- """ write to LIBSVM precomputed format """
- print 'write to', outFileName
- fileOut = open(outFileName, 'w')
- # write kernel matrix for this class
- # for each line in the class (train/test) set
- # <label> 0:i 1:K(xi,x1) ... L:K(xi,xL)
- for i in range(len(indexFilelines)):
- #print lines[i], kernelLines[i][0:50]
- # write label
- label = indexFilelines[i].split()[1]
- if label != '0':
- fileOut.write( label + ' ')
- # write id
- fileOut.write('0:%d ' % (i+1))
- # write kernel vals
- vals = kernelLines[i].split()
- for j in range(len(vals)):
- fileOut.write('%d:%s ' % (j+1, vals[j]) )
- fileOut.write('\n')
- fileOut.close()
- def main(setName, featSet, subSet, vocSize, featFileTR, featFileTE):
- indexFilePath = os.path.join(setName, 'data', 'ImageSets', subSet)
- kernelPath = os.path.join(setName, 'features', subSet, vocSize, 'kernel')
- #fName = 'train.jpg.clrMerged.sift.all.histInt'
- #fName = featFile.replace('.txt', '*' + featSet) + '.*' + featFile.replace('.txt', '*' + featSet) + '.*histInt'
- fName = '*.' + featSet + '.*' + featFileTR.replace('.txt', '*' + featSet)
- if featSet.find('total') > 0:
- fName += '.merged.'
- fName += '*.*histInt'
- print os.path.join( kernelPath, fName)
- files = glob.glob( os.path.join( kernelPath, fName) )
- if len(files) != 1:
- print 'too many (or 0) files:', files
- raise AssertionError('more than 1, or 0, base kernel files found')
- featBaseTR = featFileTR.split('.')[0] + '.'
- featBaseTE = featFileTE.split('.')[0] + '.'
- print featBaseTR , featBaseTE
- baseKernelNameTR = files[0]
- baseKernelNameTRList = baseKernelNameTR.split(featBaseTR)
- baseKernelNameTE = os.path.join(kernelPath, featBaseTR + featBaseTE.join(baseKernelNameTRList[1:]) )
- #files[0].replace(featBaseTR, featBaseTE)
- print 'baseTR:', baseKernelNameTR
- print 'baseTE:', baseKernelNameTE
- # read the inital kernel matrix
- kernelLinesTR = readMatrix( baseKernelNameTR )
- #kernelLinesTR = np.loadtxt( baseKernelNameTR )
- print kernelLinesTR.shape
- #kernelLinesTR0 = readMatrix( os.path.join(kernelPath, 'trainval.jpg.2SsalTotal0.sift.cb.all.trainval.jpg.2SsalTotal0.sift.cb.all.histInt') )
- #print kernelLinesTR0.shape
- #kernelLinesTR1 = readMatrix(os.path.join(kernelPath, 'trainval.jpg.2SsalTotal1.sift.cb.all.trainval.jpg.2SsalTotal1.sift.cb.all.histInt') )
- #print kernelLinesTR1.shape
- kernelLinesTE= readMatrix(baseKernelNameTE).T
- print kernelLinesTE.shape
- #kernelLinesTE0 = readMatrix(os.path.join(kernelPath, 'trainval.jpg.2SsalTotal0.sift.cb.all.test.jpg.2SsalTotal0.sift.cb.all.histInt') )
- #print kernelLinesTE0.shape
- #kernelLinesTE1 = readMatrix(os.path.join(kernelPath, 'trainval.jpg.2SsalTotal1.sift.cb.all.test.jpg.2SsalTotal1.sift.cb.all.histInt') )
- #print kernelLinesTE1.shape
- kernelTR = CombinedKernel()
- kernelTR.parallel.set_num_threads(1)
- kernelTR.append_kernel(CustomKernel(kernelLinesTR))
- #kernelTR.append_kernel(CustomKernel(kernelLinesTR0))
- #kernelTR.append_kernel(CustomKernel(kernelLinesTR1))
- # create combined test features
- kernelTE = CombinedKernel()
- kernelTE.append_kernel(CustomKernel(kernelLinesTE))
- #kernelTE.append_kernel(CustomKernel(kernelLinesTE0))
- #kernelTE.append_kernel(CustomKernel(kernelLinesTE1))
- prevConfidences = []
- # get the classes
- indexFileExt = '*_' + featFileTR
- indexFiles = glob.glob( os.path.join(indexFilePath, indexFileExt) )
- print indexFilePath, indexFileExt
- print indexFiles
- for f in indexFiles:
- print 'indexFileTR:', f
- for i in range(5):
- print i
- fTE = f.replace(featFileTR, featFileTE)
- #print 'indexFileTE:', fTE
- # train file
- indexFile = open(f, 'r')
- indexFilelinesTR = indexFile.read().splitlines()
- indexFile.close()
- # create labels,
- labelsTR = np.array([ float(d.split()[1]) for d in indexFilelinesTR])
- labelsTR[labelsTR==0] = -1
- labelsTR = BinaryLabels(labelsTR)
- # test file
- indexFile = open(fTE, 'r')
- indexFilelinesTE = indexFile.read().splitlines()
- indexFile.close()
- # create labels,
- labelsTE_arr = np.array([ float(d.split()[1]) for d in indexFilelinesTE])
- labelsTE_arr[labelsTE_arr==0] = -1
- labelsTE = BinaryLabels(labelsTE_arr)
- # train mkl
- #mkl = MKLClassification()
- #mkl.io.set_loglevel(MSG_DEBUG)
- # which norm to use for MKL
- #mkl.set_mkl_norm(1) #1, 2,3
- # set cost (neg, pos)
- #mkl.set_C(1, 1)
- #mkl.set_epsilon(1e-5)
- # set kernel and labels
- #mkl.set_kernel(kernelTR)
- #mkl.set_labels(labelsTR)
- # train
- #mkl.train()
- #w=kernelTR.get_subkernel_weights()
- #print 'kernel Weights', w
- #print np.sum(mkl.apply().get_labels()==1)
- #print mkl.get_alphas()
- # test
- # and classify
- #mkl.set_kernel(kernelTE)
- #print 'apply'
- #mkl.apply()
- #print 'getLabels'
- #predLabels = mkl.apply().get_labels()
- #print np.sum(predLabels==1)
- #print labelsTE_arr[0:100]
- evaluatorAP = PRCEvaluation()
- #print 'eval'
- #evaluatorAP.evaluate( BinaryLabels(predLabels), labelsTE)
- #print 'AVP MKL', evaluatorAP.get_auPRC()
- evaluatorAC = AccuracyMeasure()
- #print 'ACC MKL', evaluatorAC.evaluate( BinaryLabels(predLabels), labelsTE)
- from shogun.Classifier import LibSVM
- svm=LibSVM( 10, kernelTR, labelsTR)
- svm.set_epsilon(1e-5)
- svm.train()
- svm.set_kernel(kernelTE)
- #print 'apply'
- svmApplied = svm.apply()
- predSVM = svm.apply().get_labels()
- #print predSVM
- #print 'eval'
- print 'nr +1= ', np.sum(predSVM==1)
- print 'confidence:', svmApplied.get_confidences()[0:100]
- if prevConfidences != []:
- print 'max conf diff with prev run:', np.max(abs(svmApplied.get_confidences()-prevConfidences))
- prevConfidences = svmApplied.get_confidences()
- evaluatorAP.evaluate( BinaryLabels(predSVM), labelsTE)
- print 'AVP SVM', evaluatorAP.get_auPRC()
- print 'ACC SVM', evaluatorAC.evaluate( BinaryLabels(predSVM), labelsTE)
- raise
- # do the cross-validation
- #xvalargs = 'gridt4.py -log2c -14,-6,1 -log2g 0,0,1 -v 10 ' + preCompKernelFileNameTR
- #xvalargs = 'gridt4.py -log2c -4,3,1 -log2g 0,0,1 -v 10 ' + preCompKernelFileNameTR
- #raise ''
- #TODO
- if len(sys.argv) != 8:
- print 'ignore input, run with default values'
- setName = 'VOC2007'
- featSet = 'total'
- subSet = 'Main'
- vocSize = '4000'
- featFileTR = 'trainval.txt'
- featFileTE = 'test.txt'
- siftType = 'sift'
- else:
- setName = sys.argv[1]
- featSet = sys.argv[2]
- subSet = sys.argv[3]
- vocSize = sys.argv[4]
- featFileTR = sys.argv[5]
- featFileTE = sys.argv[6]
- siftType = sys.argv[7]
- print sys.argv
- if siftType != 'sift':
- featSet += '-' + siftType
- main(setName, featSet, subSet, vocSize, featFileTR, featFileTE)
- [jvgemert@node415 photoCompCode]$
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement