SHARE
TWEET

p3v label_to_files

a guest Apr 19th, 2019 207 Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. import io
  2. import re
  3. import os
  4. import json
  5.  
  6. # Name of audacity file
  7. labels = 'labels.txt'
  8.  
  9. # Characters to replace with '_', re characters ([\^$.|?*+(){}) must be prefixed with '\'
  10. replace = ['\?', ':', '\*']
  11.  
  12. # Create a txt file for each clip
  13. create_txt = False
  14.  
  15. # Create a json file for each clip
  16. create_json = True
  17.  
  18. def find_nth(haystack, needle, n):
  19.   start = haystack.find(needle)
  20.   while start >= 0 and n > 1:
  21.       start = haystack.find(needle, start+len(needle))
  22.       n -= 1
  23.   return start
  24.  
  25. lines = [line.rstrip('\n').split('\t')[2] for line in open(labels, "r") if not line.isspace() ]
  26. files = [f for f in os.listdir('.') if f.endswith(".flac")]
  27.  
  28. for line in lines:
  29.   filename = re.sub("|".join(replace), "_", line)
  30.   matches = list(filter(lambda x: filename.startswith(re.sub('\(noisy\)', '', x, flags=re.IGNORECASE).replace('.flac', '')), files))
  31.  
  32.   if create_txt:
  33.     if len(matches) == 1:
  34.       with open(matches[0].replace('.flac', '.txt'), 'w') as f:
  35.         f.write(line)
  36.     else:
  37.       with open(filename + '.txt', 'w') as f:
  38.         f.write(line)
  39.  
  40.   if create_json:
  41.     dic = {
  42.       'timestamp': line.split('-')[0].strip(' '),
  43.       'character': line.split('-')[1].strip(' '),
  44.       'emotion': line.split('-')[2].strip(' '),
  45.       'text': re.sub('\(noisy\)', '', line[find_nth(line, '-', 3) + 1:], flags=re.IGNORECASE).strip(' '),
  46.       'noisy': line.strip(' ').lower().endswith('(noisy)'),
  47.     }
  48.  
  49.     if len(matches) == 1:
  50.       with open(matches[0].replace('.flac', '.json'), 'w') as f:
  51.         json.dump(dic, f, indent=4)
  52.     else:
  53.       with open(filename + '.json', 'w') as f:
  54.         json.dump(dic, f, indent=4)
RAW Paste Data
We use cookies for various purposes including analytics. By continuing to use Pastebin, you agree to our use of cookies as described in the Cookies Policy. OK, I Understand
Not a member of Pastebin yet?
Sign Up, it unlocks many cool features!
 
Top