Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #===================================================
- # Learning to rank - Test
- #===================================================
- # join all the document's features into one
- super_arrays = np.vstack(arrays)
- super_ys = np.concatenate(ys)
- super_arrays_norm = normalize(super_arrays)
- # run the Perceptron
- percep = Perceptron(n_iter=1000, shuffle=False, fit_intercept = False)
- percep.fit(super_arrays_norm, super_ys)
- #debug
- #print "param0" + str(percep.coef_[0,0])
- #print "param1" + str(percep.coef_[0,1])
- #print "param2" + str(percep.coef_[0,2])
- #print "param3" + str(percep.coef_[0,3])
- #print "param4" + str(percep.coef_[0,4])
- #print "param5" + str(percep.coef_[0,5])
- #print "Bias " + str(percep.intercept_)
- print "\nStarting Test Phase:\n"
- avg_precs = []
- for subdir, dirs, files in os.walk("test"):
- for i, file in enumerate(files):
- if file.lower().endswith('.txt'):
- # get the file names iteratively
- (prefix, sep, suffix) = file.rpartition('.')
- the_file = os.path.join(os.getcwd(), subdir + os.path.sep + prefix + '.txt')
- the_key_file = os.path.join(os.getcwd(), subdir + os.path.sep + prefix + '.key')
- # Prevent decode errors
- print the_file
- print the_key_file
- try:
- # get the features from the raw text
- feature_array, y, names = get_features_from_file_name(the_file, the_key_file)
- feature_array = normalize(feature_array)
- #feature_array[:,0] = normalize(feature_array[:,0])
- #feature_array[:,1] = normalize(feature_array[:,1])
- #feature_array[:,2] = normalize(feature_array[:,2])
- #feature_array[:,3] = normalize(feature_array[:,3])
- #feature_array[:,4] = normalize(feature_array[:,4])
- #feature_array[:,5] = normalize(feature_array[:,5])
- # get the confidence for the set of examples
- predictions = percep.decision_function(feature_array)
- # sort the confidence
- sorted_indexes = predictions.argsort()
- avg_precs.append(average_precision_score(y, predictions))
- # debug
- #print "avg prec: ", average_precision_score(y, predictions)
- #
- #for k in range(1,21):
- # if (y[sorted_indexes[len(sorted_indexes)-k]] == 1):
- # print "===============hit\n\n\n\n\n\n================"
- # print names[sorted_indexes[len(sorted_indexes)-k]]
- #
- #for j, element in enumerate(y):
- # if element == 1:
- # print "previsao:", predictions[j], "position: ", feature_array[j, 0], "graph: ", feature_array[j, 1], "phraseness: ", feature_array[j, 2], "informativeness: ", feature_array[j, 3], "tf-position ", feature_array[j, 4], "tf-idf ", feature_array[j, 5]
- # /debug
- except UnicodeDecodeError:
- "Unicode decode error, skipping"
- sum_avg = 0
- for element in avg_precs:
- if not(math.isnan(element)):
- sum_avg += element
- mean_avg = sum_avg/len(avg_precs)
- print "Mean Average Precision:", mean_avg
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement