Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- '''
- Calculate INS, DEL, SUB
- '''
- import re
- import sys
- import pandas as pd
- def run(src, des, summ):
- data = pd.read_csv(src, sep=',', usecols=['fn', 'normtrans', 'tokenalignments', 'numtokens', 'numedits'])
- data.rename(columns={'fn': 'FilePath', 'normtrans': 'Transcription', 'tokenalignments': 'Recognition', 'numtokens': 'Tokens', 'numedits': 'Errs'}, inplace=True)
- summary = {'INS': 0, 'DEL': 0, 'SUB': 0, 'Tokens': 0, 'Errs': 0, 'WLD': 0}
- re_has = re.compile('\:')
- re_ins = re.compile('NULL\:(.*)')
- re_del = re.compile('.*\:NULL')
- INS = 0
- DEL = 0
- SUB = 0
- for (index, row) in data.iterrows():
- summary['Tokens'] += row['Tokens']
- summary['Errs'] += row['Errs']
- if row['Transcription'] != '' and row['Recognition'] == '':
- summary['WLD'] += 1
- rec = row['Recognition']
- words = rec.split(' ')
- lst = []
- for word in words:
- if re_has.search(word):
- if re_ins.search(word):
- summary['INS'] +=1
- elif re_del.search(word):
- summary['DEL'] += 1
- else:
- summary['SUB'] += 1
- item = word.replace('*', '').lstrip('(').rstrip(')')
- if ':' in item:
- left, right = item.split(':')
- if right != 'NULL':
- lst.append(right)
- # if left == 'NULL' and right != 'NULL':
- # INS += 1
- # elif right == 'NULL':
- # DEL += 1
- # else:
- # SUB += 1
- else:
- lst.append(item)
- data.loc[index, 'Recognition'] = ' '.join(lst)
- data.to_csv(des, sep='\t', columns=('FilePath', 'Transcription', 'Recognition'), index=False)
- summary['WER'] = '{0:.2f}'.format(summary['Errs'] / summary['Tokens'] * 100)
- summary['Utts'] = data.shape[0]
- HEADER = ('Utts', 'Tokens', 'WER', 'SUB', 'INS', 'DEL', 'WLD')
- for key in summary:
- print(key + ':' + str(summary[key]))
- # print('INS: {0}, SUB: {1}, DEL: {2}'.format(INS, SUB, DEL))
- with open(summ, 'w', encoding='utf-8') as fd:
- fd.write('{0}\n'.format('\t'.join(HEADER)))
- fd.write('{0}\t{1}\t{2}\t{3}\t{4}\t{5}\t{6}\n'.format(
- summary['Utts'], summary['Tokens'], summary['WER'],
- summary['SUB'], summary['INS'], summary['DEL'], summary['WLD']
- ))
- if __name__ == '__main__':
- run(sys.argv[1], sys.argv[2], sys.argv[3])
Add Comment
Please, Sign In to add comment