Advertisement
opexxx

captcha_split.py

Apr 23rd, 2014
192
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 3.28 KB | None | 0 0
  1. '''
  2. Quick and dirty way to generate separate wav files depending on the loud voice detected in audio captcha challenge.
  3. Lots of room for improvement.
  4.  
  5. What is Does:
  6.  
  7. 1. Minor noise removal.
  8. 2. Detect louder voices in input wav file
  9. 3. Depending on the number of loud voice detected it splits the main wav file.
  10.  
  11. '''
  12.  
  13. import wave
  14. import sys
  15. import struct
  16. import os
  17. import time
  18. import httplib
  19. from random import randint
  20.  
  21.  
  22. ip = wave.open(sys.argv[1], 'r')
  23. info = ip.getparams()
  24. frame_list = []
  25. for i in range(ip.getnframes()):
  26.     sframe = ip.readframes(1)
  27.     amplitude = struct.unpack('<h', sframe)[0]
  28.     frame_list.append(amplitude)
  29. ip.close()
  30. for i in range(0,len(frame_list)):
  31.     if abs(frame_list[i]) < 25:
  32.         frame_list[i] = 0
  33. ################################  Find Out most louder portions of the audio file ###########################
  34. thresh = 30
  35. output = []
  36. nonzerotemp = []
  37. length = len(frame_list)
  38. i = 0
  39. while i < length:
  40.     zeros = []
  41.     while i < length and frame_list[i] == 0:
  42.         i += 1
  43.         zeros.append(0)
  44.     if len(zeros) != 0 and len(zeros) < thresh:
  45.         nonzerotemp += zeros
  46.     elif len(zeros) > thresh:
  47.         if len(nonzerotemp) > 0 and i < length:
  48.             output.append(nonzerotemp)
  49.             nonzerotemp = []
  50.     else:
  51.         nonzerotemp.append(frame_list[i])
  52.         i += 1
  53. if len(nonzerotemp) > 0:
  54.     output.append(nonzerotemp)
  55.  
  56. chunks = []
  57. for j in range(0,len(output)):
  58.     if len(output[j]) > 3000:
  59.         chunks.append(output[j])
  60. #########################################################################################################
  61.  
  62. for l in chunks:
  63.     for m in range(0,len(l)):
  64.         if l[m] == 0:
  65.              l[m] = randint(-0,+0)
  66.  
  67. inc_percent = 1 #10 percent
  68.  
  69. for l in chunks:
  70.     for m in range(0,len(l)):
  71.         if l[m] <= 0:
  72.             # negative value
  73.             l[m] = 0 - abs(l[m]) + abs(l[m])*inc_percent/100
  74.         else:
  75.             #positive vaule
  76.             l[m] =     abs(l[m]) + abs(l[m])*inc_percent/100
  77.  
  78. ########################################################
  79.  
  80. # Below code generates separate wav files depending on the number of loud voice detected.
  81.  
  82. NEW_RATE = 1 #Change it to > 1 if any amplification is required
  83.  
  84. print '[+] Possibly ',len(chunks),'number of loud voice detected...'
  85. for i in range(0, len(chunks)):
  86.     new_frame_rate = info[0]*NEW_RATE
  87.     print '[+] Creating No. ',str(i),'file..'
  88.     split = wave.open('cut_'+str(i)+'.wav', 'w')
  89.     split.setparams((info[0],info[1],info[2],0,info[4],info[5]))
  90. #   split.setparams((info[0],info[1],new_frame_rate,0,info[4],info[5]))
  91.  
  92.     #Add some silence at start selecting +15 to -15
  93.     for k in range(0,10000):
  94.         single_frame = struct.pack('<h', randint(-25,+25))
  95.         split.writeframes(single_frame)
  96.     # Add the voice for the first time
  97.     for frames in chunks[i]:
  98.         single_frame = struct.pack('<h', frames)
  99.         split.writeframes(single_frame)
  100.  
  101.     #Add some silence in between two digits
  102.     for k in range(0,10000):
  103.         single_frame = struct.pack('<h', randint(-25,+25))
  104.         split.writeframes(single_frame)
  105.  
  106.     # Repeat effect :  Add the voice second time
  107.     for frames in chunks[i]:
  108.         single_frame = struct.pack('<h', frames)
  109.         split.writeframes(single_frame)
  110.  
  111.     #Add silence at end
  112.     for k in range(0,10000):
  113.         single_frame = struct.pack('<h', randint(-25,+25))
  114.         split.writeframes(single_frame)
  115.  
  116.     split.close()#Close each files
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement