Advertisement
Guest User

Untitled

a guest
Jul 5th, 2012
33
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 8.53 KB | None | 0 0
  1. import os, os.path, sys
  2. import glob
  3. import gridt4
  4.  
  5. import numpy as np
  6. from shogun.Features import CombinedFeatures, RealFeatures, BinaryLabels
  7. from shogun.Kernel import CombinedKernel, PolyKernel, CustomKernel
  8. from shogun.Classifier import MKLClassification, MKL
  9.  
  10. from shogun.Evaluation import PRCEvaluation
  11. from shogun.Evaluation import AccuracyMeasure
  12.  
  13. import time
  14.  
  15. from modshogun import *
  16.  
  17.  
  18.  
  19. libSVMpathTRAIN = '/home/jvgemert/photoCompCode/libsvm-2.83/svm-train'
  20. libSVMpathPREDICT= '/home/jvgemert/photoCompCode/libsvm-2.83/svm-predict'
  21.  
  22. libSVMpathTRAIN = '/var/scratch/jvgemert/photoCompCode/libsvm-2.83/svm-train'
  23. libSVMpathPREDICT= '/var/scratch/jvgemert/photoCompCode/libsvm-2.83/svm-predict'
  24.  
  25. def readMatrix(fName):
  26.     print 'readMatrix', fName
  27.     fileIn = open(fName, 'r')
  28.     lines = fileIn.read().splitlines()
  29.     fileIn.close()
  30.     feat = []
  31.     #i = 0
  32.     for line in lines:
  33.         lst = line.strip().split()
  34.         #feat.append([ float(f) for f in lst])
  35.         feat.append(lst)
  36.         #i = i + 1
  37.         #print i,
  38.     #print feat
  39.     return np.matrix(feat, dtype=np.float64)
  40.  
  41.  
  42. def writeSVMprecomputed(outFileName, indexFilelines, kernelLines):
  43.     """ write to LIBSVM precomputed format """
  44.     print 'write to', outFileName
  45.     fileOut = open(outFileName, 'w')
  46.  
  47.     # write kernel matrix for this class
  48.     # for each line in the class (train/test) set
  49.     # <label> 0:i 1:K(xi,x1) ... L:K(xi,xL)
  50.     for i in range(len(indexFilelines)):
  51.         #print lines[i], kernelLines[i][0:50]
  52.         # write label
  53.         label = indexFilelines[i].split()[1]
  54.         if label != '0':
  55.             fileOut.write( label + ' ')
  56.             # write id
  57.             fileOut.write('0:%d ' % (i+1))
  58.             # write kernel vals
  59.             vals = kernelLines[i].split()
  60.             for j in range(len(vals)):
  61.                 fileOut.write('%d:%s ' % (j+1, vals[j]) )
  62.             fileOut.write('\n')
  63.     fileOut.close()
  64.  
  65.  
  66. def main(setName, featSet, subSet, vocSize, featFileTR, featFileTE):
  67.  
  68.     indexFilePath = os.path.join(setName, 'data', 'ImageSets', subSet)
  69.     kernelPath = os.path.join(setName, 'features', subSet, vocSize, 'kernel')
  70.  
  71.     #fName = 'train.jpg.clrMerged.sift.all.histInt'
  72.     #fName = featFile.replace('.txt', '*' + featSet) + '.*' + featFile.replace('.txt', '*' + featSet) + '.*histInt'
  73.  
  74.     fName =  '*.' + featSet + '.*' + featFileTR.replace('.txt', '*' + featSet)
  75.     if featSet.find('total') > 0:
  76.         fName +=  '.merged.'
  77.     fName += '*.*histInt'
  78.     print os.path.join( kernelPath, fName)
  79.     files = glob.glob( os.path.join( kernelPath, fName) )
  80.     if len(files) != 1:
  81.         print 'too many (or 0) files:', files
  82.         raise AssertionError('more than 1, or 0, base kernel files found')
  83.  
  84.     featBaseTR = featFileTR.split('.')[0] + '.'
  85.     featBaseTE = featFileTE.split('.')[0] + '.'
  86.     print featBaseTR , featBaseTE
  87.     baseKernelNameTR = files[0]
  88.     baseKernelNameTRList = baseKernelNameTR.split(featBaseTR)
  89.     baseKernelNameTE = os.path.join(kernelPath, featBaseTR + featBaseTE.join(baseKernelNameTRList[1:]) )
  90.     #files[0].replace(featBaseTR, featBaseTE)
  91.     print 'baseTR:', baseKernelNameTR
  92.     print 'baseTE:', baseKernelNameTE
  93.     # read the inital kernel matrix
  94.  
  95.     kernelLinesTR = readMatrix( baseKernelNameTR )
  96.     #kernelLinesTR = np.loadtxt( baseKernelNameTR )
  97.     print kernelLinesTR.shape
  98.  
  99.     #kernelLinesTR0 = readMatrix( os.path.join(kernelPath, 'trainval.jpg.2SsalTotal0.sift.cb.all.trainval.jpg.2SsalTotal0.sift.cb.all.histInt') )
  100.     #print kernelLinesTR0.shape
  101.  
  102.     #kernelLinesTR1 = readMatrix(os.path.join(kernelPath, 'trainval.jpg.2SsalTotal1.sift.cb.all.trainval.jpg.2SsalTotal1.sift.cb.all.histInt') )
  103.     #print kernelLinesTR1.shape
  104.  
  105.     kernelLinesTE= readMatrix(baseKernelNameTE).T
  106.     print kernelLinesTE.shape
  107.  
  108.     #kernelLinesTE0 = readMatrix(os.path.join(kernelPath, 'trainval.jpg.2SsalTotal0.sift.cb.all.test.jpg.2SsalTotal0.sift.cb.all.histInt') )
  109.     #print kernelLinesTE0.shape
  110.  
  111.     #kernelLinesTE1 = readMatrix(os.path.join(kernelPath, 'trainval.jpg.2SsalTotal1.sift.cb.all.test.jpg.2SsalTotal1.sift.cb.all.histInt') )
  112.     #print kernelLinesTE1.shape
  113.  
  114.     kernelTR = CombinedKernel()
  115.     kernelTR.parallel.set_num_threads(1)
  116.     kernelTR.append_kernel(CustomKernel(kernelLinesTR))
  117.     #kernelTR.append_kernel(CustomKernel(kernelLinesTR0))
  118.     #kernelTR.append_kernel(CustomKernel(kernelLinesTR1))
  119.  
  120.     # create combined test features
  121.     kernelTE = CombinedKernel()
  122.     kernelTE.append_kernel(CustomKernel(kernelLinesTE))
  123.     #kernelTE.append_kernel(CustomKernel(kernelLinesTE0))
  124.     #kernelTE.append_kernel(CustomKernel(kernelLinesTE1))
  125.  
  126.     prevConfidences = []
  127.     # get the classes
  128.     indexFileExt = '*_' + featFileTR
  129.     indexFiles = glob.glob( os.path.join(indexFilePath, indexFileExt) )
  130.     print indexFilePath, indexFileExt
  131.     print indexFiles
  132.     for f in indexFiles:
  133.       print 'indexFileTR:', f
  134.       for i in range(5):
  135.         print i
  136.         fTE = f.replace(featFileTR, featFileTE)
  137.         #print 'indexFileTE:', fTE
  138.  
  139.         # train file
  140.         indexFile = open(f, 'r')
  141.         indexFilelinesTR = indexFile.read().splitlines()
  142.         indexFile.close()
  143.         # create labels,
  144.         labelsTR = np.array([ float(d.split()[1]) for d in indexFilelinesTR])
  145.         labelsTR[labelsTR==0] = -1
  146.         labelsTR = BinaryLabels(labelsTR)
  147.  
  148.         # test file
  149.         indexFile = open(fTE, 'r')
  150.         indexFilelinesTE = indexFile.read().splitlines()
  151.         indexFile.close()
  152.         # create labels,
  153.         labelsTE_arr = np.array([ float(d.split()[1]) for d in indexFilelinesTE])
  154.         labelsTE_arr[labelsTE_arr==0] = -1
  155.         labelsTE = BinaryLabels(labelsTE_arr)
  156.  
  157.         # train mkl
  158.         #mkl = MKLClassification()
  159.  
  160.         #mkl.io.set_loglevel(MSG_DEBUG)
  161.         # which norm to use for MKL
  162.         #mkl.set_mkl_norm(1) #1, 2,3
  163.  
  164.         # set cost (neg, pos)
  165.         #mkl.set_C(1, 1)
  166.         #mkl.set_epsilon(1e-5)
  167.  
  168.         # set kernel and labels
  169.         #mkl.set_kernel(kernelTR)
  170.         #mkl.set_labels(labelsTR)
  171.  
  172.         # train
  173.         #mkl.train()
  174.         #w=kernelTR.get_subkernel_weights()
  175.         #print 'kernel Weights',  w
  176.  
  177.         #print np.sum(mkl.apply().get_labels()==1)
  178.         #print mkl.get_alphas()
  179.         # test
  180.         # and classify
  181.         #mkl.set_kernel(kernelTE)
  182.         #print 'apply'
  183.         #mkl.apply()
  184.         #print 'getLabels'
  185.         #predLabels = mkl.apply().get_labels()
  186.         #print np.sum(predLabels==1)
  187.         #print labelsTE_arr[0:100]
  188.  
  189.         evaluatorAP = PRCEvaluation()
  190.         #print 'eval'
  191.         #evaluatorAP.evaluate( BinaryLabels(predLabels), labelsTE)
  192.         #print 'AVP MKL', evaluatorAP.get_auPRC()
  193.         evaluatorAC = AccuracyMeasure()
  194.         #print 'ACC MKL', evaluatorAC.evaluate( BinaryLabels(predLabels), labelsTE)
  195.  
  196.         from shogun.Classifier import LibSVM
  197.         svm=LibSVM( 10, kernelTR, labelsTR)
  198.         svm.set_epsilon(1e-5)
  199.         svm.train()
  200.         svm.set_kernel(kernelTE)
  201.         #print 'apply'
  202.         svmApplied = svm.apply()
  203.         predSVM = svm.apply().get_labels()
  204.         #print predSVM
  205.         #print 'eval'
  206.         print 'nr +1= ', np.sum(predSVM==1)
  207.         print 'confidence:', svmApplied.get_confidences()[0:100]
  208.         if prevConfidences != []:
  209.             print 'max conf diff with prev run:', np.max(abs(svmApplied.get_confidences()-prevConfidences))
  210.         prevConfidences = svmApplied.get_confidences()
  211.         evaluatorAP.evaluate( BinaryLabels(predSVM), labelsTE)
  212.         print 'AVP SVM', evaluatorAP.get_auPRC()
  213.         print 'ACC SVM', evaluatorAC.evaluate( BinaryLabels(predSVM), labelsTE)
  214.       raise
  215.  
  216.  
  217.  
  218.         # do the cross-validation
  219.         #xvalargs = 'gridt4.py -log2c -14,-6,1 -log2g 0,0,1 -v 10 ' + preCompKernelFileNameTR
  220.         #xvalargs = 'gridt4.py -log2c -4,3,1 -log2g 0,0,1 -v 10 ' + preCompKernelFileNameTR
  221.  
  222.         #raise ''
  223.  
  224. #TODO
  225.  
  226. if len(sys.argv) != 8:
  227.     print 'ignore input, run with default values'
  228.     setName = 'VOC2007'
  229.     featSet = 'total'
  230.     subSet = 'Main'
  231.     vocSize = '4000'
  232.     featFileTR = 'trainval.txt'
  233.     featFileTE = 'test.txt'
  234.     siftType = 'sift'
  235. else:
  236.     setName = sys.argv[1]
  237.     featSet = sys.argv[2]
  238.     subSet = sys.argv[3]
  239.     vocSize = sys.argv[4]
  240.     featFileTR = sys.argv[5]
  241.     featFileTE = sys.argv[6]
  242.     siftType = sys.argv[7]
  243.  
  244.  
  245. print sys.argv
  246.  
  247. if siftType != 'sift':
  248.     featSet += '-' + siftType
  249.  
  250. main(setName, featSet, subSet, vocSize, featFileTR, featFileTE)
  251.  
  252. [jvgemert@node415 photoCompCode]$
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement