Advertisement
Guest User

Untitled

a guest
Feb 20th, 2019
73
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 5.01 KB | None | 0 0
  1. import csv
  2. from pydub import AudioSegment
  3. from os import walk
  4. from os.path import join, basename
  5. import numpy
  6.  
  7.  
  8. def count_silent_chunks(chunks, threshold, rev=False):
  9. """
  10. count the number of chunks that are silent from start or end to first noise and return it as an integer"
  11.  
  12. Keyword arguments:
  13. chunks -- a list of chunks of a sound file
  14. threshold -- dbFS value to compare against for detection of silence
  15. rev -- which edge to start from (True = right edge)
  16. """
  17. silent_blocks = 0
  18. rng = reversed(xrange(len(chunks))) if rev else xrange(len(chunks))
  19.  
  20. for i in rng:
  21. if chunks[i].dBFS == float('-inf') or chunks[i].dBFS < threshold:
  22. silent_blocks += 1
  23. else:
  24. break
  25. return silent_blocks
  26.  
  27.  
  28. def get_silence(audio, interval, threshold, step):
  29. """get length of silence at edges in ms from a wav file and return
  30. {start: [ms, blocks], end: [ms, blocks], duration: ms, threshold: dbFS}
  31.  
  32. Keyword arguments:
  33. audio -- filename
  34. interval -- size of chunks in ms
  35. threshold -- start dbFS threshold for silence comparison
  36. step -- amount to increment by when searching for silence threshold
  37. """
  38.  
  39. # chop off this number of ms from end of file (mouse click)
  40. end_omit = 250
  41. wav = AudioSegment.from_wav(audio)[:-1 * end_omit]
  42.  
  43. # break into chunks of interval ms
  44. chunks = [wav[i:i+interval]
  45. for i in range(0, len(wav), interval)]
  46.  
  47. # min/max chunks of silence to guard against implausible results
  48. min_silence = 250 / interval
  49. max_silence = len(chunks) - min_silence - 1
  50.  
  51. # find number of chunks with dBFS below threshold at start
  52. silent_blocks_start = 0
  53. selected_threshold = 0 # selected threshold
  54. for i in numpy.arange(threshold, 0, step):
  55. if silent_blocks_start > max_silence:
  56. silent_blocks_start = -1
  57. selected_threshold = 1
  58. break
  59. silent_blocks_start = count_silent_chunks(chunks, i)
  60. if silent_blocks_start > min_silence:
  61. selected_threshold = i
  62. break
  63.  
  64. # find number of chunks with dBFS below threshold at end
  65. if selected_threshold < 0:
  66. silent_blocks_end = count_silent_chunks(
  67. chunks, selected_threshold, True)
  68. else:
  69. silent_blocks_end = -1
  70.  
  71. if silent_blocks_end < min_silence + 1:
  72. silent_blocks_end = -1
  73.  
  74. end_ms_silence = -1
  75. start_ms_silence = -1
  76. if silent_blocks_start > 0 and silent_blocks_start < max_silence:
  77. start_ms_silence = silent_blocks_start * interval - interval/2
  78. if silent_blocks_end > 0 and silent_blocks_end < max_silence:
  79. end_ms_silence = silent_blocks_end * interval - \
  80. interval/2 + end_omit
  81.  
  82. return {"start": [start_ms_silence, silent_blocks_start], "end": [end_ms_silence, silent_blocks_end], "duration": len(chunks) * interval, "threshold": selected_threshold}
  83.  
  84.  
  85. def print_item(name, vals):
  86. """ print vales for a file """
  87. print '{:>16} \t {:>8} {:>4} \t {:>8} {:>4} \t {:>8} \t {:-03.3f}'.format(
  88. basename(name), vals["start"][0], vals["start"][1], vals["end"][0], vals["end"][1], vals["duration"], vals["threshold"])
  89.  
  90.  
  91. def write_csv(data, filename):
  92. """" write data to a csv """
  93. with open(str(filename), 'wb') as csvfile:
  94. writer = csv.writer(csvfile, delimiter=',')
  95. writer.writerow(["file", "leading_silence_ms", "leading_silence_blocks",
  96. "trailing_silence_ms", "trailing_silence_blocks", "duration", "dbFS_threshold"])
  97. for (name, vals) in data:
  98. writer.writerow(
  99. [basename(name), vals["start"][0], vals["start"][1], vals["end"][0], vals["end"][1], vals["duration"], vals["threshold"]])
  100.  
  101.  
  102. # get files in all directories in a directory
  103. audio_path = "/Users/tyler/Downloads/recordings"
  104. audio_files = []
  105. for root, dirs, files in walk(audio_path):
  106. for name in files:
  107. if("wav" in name and "E" in name):
  108. # only sound files of Experimental items (E for EXP)
  109. audio_files.append(join(root, name))
  110. if "prac" in dirs:
  111. # eliminat practice items
  112. dirs.remove("prac")
  113. if "108" in dirs:
  114. # this is a glitch
  115. dirs.remove("108")
  116. if "0" in dirs:
  117. # this is test data
  118. dirs.remove("0")
  119. if "1000" in dirs:
  120. # this is test data
  121. dirs.remove("1000")
  122.  
  123. threshold = -80 # starting dbFS value for detecting silence
  124. step = 0.01 # dbFS increment during search
  125. interval = 50 # ms, increase to speed up
  126.  
  127. # iterate over all files and find sileces
  128. edge_silences = {a: get_silence(
  129. join(audio_path, a), interval, threshold, step) for a in audio_files}
  130.  
  131. # output result and count files with bad values
  132. fails = 0
  133. for name, vals in sorted(edge_silences.items()):
  134. if vals["start"][0] < 0 or vals["end"][0] < 0:
  135. # no value found for start and/or end of file
  136. fails += 1
  137. print_item(name, vals)
  138. print '\n', str(fails), "failures out of", str(len(edge_silences))
  139.  
  140. # write results to file
  141. write_csv(edge_silences.items(), "output.csv")
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement