Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import io
- import re
- import os
- import json
- # Name of audacity file
- labels = 'labels.txt'
- # Characters to replace with '_', re characters ([\^$.|?*+(){}) must be prefixed with '\'
- replace = ['\?', ':', '\*']
- # Create a txt file for each clip
- create_txt = False
- # Create a json file for each clip
- create_json = True
- def find_nth(haystack, needle, n):
- start = haystack.find(needle)
- while start >= 0 and n > 1:
- start = haystack.find(needle, start+len(needle))
- n -= 1
- return start
- lines = [line.rstrip('\n').split('\t')[2] for line in open(labels, "r") if not line.isspace() ]
- files = [f for f in os.listdir('.') if f.endswith(".flac")]
- for line in lines:
- filename = re.sub("|".join(replace), "_", line)
- matches = list(filter(lambda x: filename.startswith(re.sub('\(noisy\)', '', x, flags=re.IGNORECASE).replace('.flac', '')), files))
- if create_txt:
- if len(matches) == 1:
- with open(matches[0].replace('.flac', '.txt'), 'w') as f:
- f.write(line)
- else:
- with open(filename + '.txt', 'w') as f:
- f.write(line)
- if create_json:
- dic = {
- 'timestamp': line.split('-')[0].strip(' '),
- 'character': line.split('-')[1].strip(' '),
- 'emotion': line.split('-')[2].strip(' '),
- 'text': re.sub('\(noisy\)', '', line[find_nth(line, '-', 3) + 1:], flags=re.IGNORECASE).strip(' '),
- 'noisy': line.strip(' ').lower().endswith('(noisy)'),
- }
- if len(matches) == 1:
- with open(matches[0].replace('.flac', '.json'), 'w') as f:
- json.dump(dic, f, indent=4)
- else:
- with open(filename + '.json', 'w') as f:
- json.dump(dic, f, indent=4)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement