p3v label_to_files

import io
import re
import os
import json

# Name of audacity file
labels = 'labels.txt'

# Characters to replace with '_', re characters ([\^$.|?*+(){}) must be prefixed with '\'
replace = ['\?', ':', '\*']

# Create a txt file for each clip
create_txt = False

# Create a json file for each clip
create_json = True

def find_nth(haystack, needle, n):
  start = haystack.find(needle)
  while start >= 0 and n > 1:
      start = haystack.find(needle, start+len(needle))
      n -= 1
  return start

lines = [line.rstrip('\n').split('\t')[2] for line in open(labels, "r") if not line.isspace() ]
files = [f for f in os.listdir('.') if f.endswith(".flac")]

for line in lines:
  filename = re.sub("|".join(replace), "_", line)
  matches = list(filter(lambda x: filename.startswith(re.sub('\(noisy\)', '', x, flags=re.IGNORECASE).replace('.flac', '')), files))

  if create_txt:
    if len(matches) == 1:
      with open(matches[0].replace('.flac', '.txt'), 'w') as f:
        f.write(line)
    else:
      with open(filename + '.txt', 'w') as f:
        f.write(line)

  if create_json:
    dic = {
      'timestamp': line.split('-')[0].strip(' '),
      'character': line.split('-')[1].strip(' '),
      'emotion': line.split('-')[2].strip(' '),
      'text': re.sub('\(noisy\)', '', line[find_nth(line, '-', 3) + 1:], flags=re.IGNORECASE).strip(' '),
      'noisy': line.strip(' ').lower().endswith('(noisy)'),
    }

    if len(matches) == 1:
      with open(matches[0].replace('.flac', '.json'), 'w') as f:
        json.dump(dic, f, indent=4)
    else:
      with open(filename + '.json', 'w') as f:
        json.dump(dic, f, indent=4)