Guest User

Untitled

a guest
Jan 16th, 2018
94
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 2.52 KB | None | 0 0
  1. '''
  2. Calculate INS, DEL, SUB
  3. '''
  4.  
  5. import re
  6. import sys
  7. import pandas as pd
  8.  
  9.  
  10. def run(src, des, summ):
  11.  
  12. data = pd.read_csv(src, sep=',', usecols=['fn', 'normtrans', 'tokenalignments', 'numtokens', 'numedits'])
  13.  
  14. data.rename(columns={'fn': 'FilePath', 'normtrans': 'Transcription', 'tokenalignments': 'Recognition', 'numtokens': 'Tokens', 'numedits': 'Errs'}, inplace=True)
  15.  
  16. summary = {'INS': 0, 'DEL': 0, 'SUB': 0, 'Tokens': 0, 'Errs': 0, 'WLD': 0}
  17.  
  18. re_has = re.compile('\:')
  19. re_ins = re.compile('NULL\:(.*)')
  20. re_del = re.compile('.*\:NULL')
  21.  
  22. INS = 0
  23. DEL = 0
  24. SUB = 0
  25.  
  26. for (index, row) in data.iterrows():
  27. summary['Tokens'] += row['Tokens']
  28. summary['Errs'] += row['Errs']
  29.  
  30. if row['Transcription'] != '' and row['Recognition'] == '':
  31. summary['WLD'] += 1
  32.  
  33. rec = row['Recognition']
  34. words = rec.split(' ')
  35. lst = []
  36. for word in words:
  37. if re_has.search(word):
  38. if re_ins.search(word):
  39. summary['INS'] +=1
  40. elif re_del.search(word):
  41. summary['DEL'] += 1
  42. else:
  43. summary['SUB'] += 1
  44.  
  45. item = word.replace('*', '').lstrip('(').rstrip(')')
  46. if ':' in item:
  47. left, right = item.split(':')
  48. if right != 'NULL':
  49. lst.append(right)
  50. # if left == 'NULL' and right != 'NULL':
  51. # INS += 1
  52. # elif right == 'NULL':
  53. # DEL += 1
  54. # else:
  55. # SUB += 1
  56. else:
  57. lst.append(item)
  58.  
  59. data.loc[index, 'Recognition'] = ' '.join(lst)
  60.  
  61. data.to_csv(des, sep='\t', columns=('FilePath', 'Transcription', 'Recognition'), index=False)
  62.  
  63. summary['WER'] = '{0:.2f}'.format(summary['Errs'] / summary['Tokens'] * 100)
  64. summary['Utts'] = data.shape[0]
  65.  
  66. HEADER = ('Utts', 'Tokens', 'WER', 'SUB', 'INS', 'DEL', 'WLD')
  67.  
  68. for key in summary:
  69. print(key + ':' + str(summary[key]))
  70.  
  71.  
  72. # print('INS: {0}, SUB: {1}, DEL: {2}'.format(INS, SUB, DEL))
  73.  
  74. with open(summ, 'w', encoding='utf-8') as fd:
  75. fd.write('{0}\n'.format('\t'.join(HEADER)))
  76. fd.write('{0}\t{1}\t{2}\t{3}\t{4}\t{5}\t{6}\n'.format(
  77. summary['Utts'], summary['Tokens'], summary['WER'],
  78. summary['SUB'], summary['INS'], summary['DEL'], summary['WLD']
  79. ))
  80.  
  81. if __name__ == '__main__':
  82. run(sys.argv[1], sys.argv[2], sys.argv[3])
Add Comment
Please, Sign In to add comment