Advertisement
Guest User

Untitled

a guest
Dec 8th, 2016
60
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 3.40 KB | None | 0 0
  1. #===================================================
  2. # Learning to rank - Test
  3. #===================================================
  4.  
  5. # join all the document's features into one
  6. super_arrays = np.vstack(arrays)
  7. super_ys = np.concatenate(ys)
  8.  
  9. super_arrays_norm = normalize(super_arrays)
  10.  
  11. # run the Perceptron
  12. percep = Perceptron(n_iter=1000, shuffle=False, fit_intercept = False)
  13. percep.fit(super_arrays_norm, super_ys)
  14.  
  15. #debug
  16. #print "param0" + str(percep.coef_[0,0])
  17. #print "param1" + str(percep.coef_[0,1])
  18. #print "param2" + str(percep.coef_[0,2])
  19. #print "param3" + str(percep.coef_[0,3])
  20. #print "param4" + str(percep.coef_[0,4])
  21. #print "param5" + str(percep.coef_[0,5])
  22. #print "Bias " + str(percep.intercept_)
  23.  
  24. print "\nStarting Test Phase:\n"
  25.  
  26. avg_precs = []
  27. for subdir, dirs, files in os.walk("test"):
  28. for i, file in enumerate(files):
  29. if file.lower().endswith('.txt'):
  30. # get the file names iteratively
  31. (prefix, sep, suffix) = file.rpartition('.')
  32. the_file = os.path.join(os.getcwd(), subdir + os.path.sep + prefix + '.txt')
  33. the_key_file = os.path.join(os.getcwd(), subdir + os.path.sep + prefix + '.key')
  34. # Prevent decode errors
  35. print the_file
  36. print the_key_file
  37. try:
  38. # get the features from the raw text
  39. feature_array, y, names = get_features_from_file_name(the_file, the_key_file)
  40.  
  41. feature_array = normalize(feature_array)
  42. #feature_array[:,0] = normalize(feature_array[:,0])
  43. #feature_array[:,1] = normalize(feature_array[:,1])
  44. #feature_array[:,2] = normalize(feature_array[:,2])
  45. #feature_array[:,3] = normalize(feature_array[:,3])
  46. #feature_array[:,4] = normalize(feature_array[:,4])
  47. #feature_array[:,5] = normalize(feature_array[:,5])
  48. # get the confidence for the set of examples
  49. predictions = percep.decision_function(feature_array)
  50.  
  51. # sort the confidence
  52. sorted_indexes = predictions.argsort()
  53.  
  54. avg_precs.append(average_precision_score(y, predictions))
  55.  
  56.  
  57. # debug
  58. #print "avg prec: ", average_precision_score(y, predictions)
  59. #
  60. #for k in range(1,21):
  61. # if (y[sorted_indexes[len(sorted_indexes)-k]] == 1):
  62. # print "===============hit\n\n\n\n\n\n================"
  63. # print names[sorted_indexes[len(sorted_indexes)-k]]
  64. #
  65. #for j, element in enumerate(y):
  66. # if element == 1:
  67. # print "previsao:", predictions[j], "position: ", feature_array[j, 0], "graph: ", feature_array[j, 1], "phraseness: ", feature_array[j, 2], "informativeness: ", feature_array[j, 3], "tf-position ", feature_array[j, 4], "tf-idf ", feature_array[j, 5]
  68. # /debug
  69.  
  70. except UnicodeDecodeError:
  71. "Unicode decode error, skipping"
  72.  
  73.  
  74.  
  75. sum_avg = 0
  76. for element in avg_precs:
  77. if not(math.isnan(element)):
  78. sum_avg += element
  79.  
  80. mean_avg = sum_avg/len(avg_precs)
  81.  
  82. print "Mean Average Precision:", mean_avg
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement