Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #===================================================
- # Learning to rank - Train
- #===================================================
- print "\nStarting Training Phase:\n"
- arrays = []
- ys = []
- namess = []
- for subdir, dirs, files in os.walk("train"):
- for i, file in enumerate(files):
- if file.lower().endswith('.txt'):
- # get the file names iteratively
- (prefix, sep, suffix) = file.rpartition('.')
- the_file = os.path.join(os.getcwd(), subdir + os.path.sep + prefix + '.txt')
- the_key_file = os.path.join(os.getcwd(), subdir + os.path.sep + prefix + '.key')
- try:
- print the_file
- print the_key_file
- # get the features
- feature_array, y, names = get_features_from_file_name(the_file, the_key_file)
- ##print "dimensions", feature_array.shape
- # save the features into lists of arrays
- arrays.append(feature_array)
- ys.append(y)
- namess += names
- print "done", i
- except UnicodeDecodeError:
- "Unicode decode error, skipping"
- print "Finished"
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement