Advertisement
MegaLoler

Language Subtitle Studier

Jan 21st, 2015
240
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 8.58 KB | None | 0 0
  1. #! /usr/bin/python3
  2.  
  3. from subprocess import Popen
  4. import os
  5. import sys
  6. import time
  7. import random
  8. import requests
  9. import urllib.parse
  10. import re
  11. import alsaaudio
  12. import audioop
  13. import speech_recognition as sr
  14. import _thread
  15. import wave
  16.  
  17. READ_WPM = 300
  18. languageCode = "nl"
  19. tmpAudioFile = "/tmp/textToSpeech.mp3"
  20. tmpMicrophoneRecording = "/tmp/microphoneRecording.wav"
  21.  
  22. class color:
  23.    PURPLE = '\033[95m'
  24.    CYAN = '\033[96m'
  25.    DARKCYAN = '\033[36m'
  26.    BLUE = '\033[94m'
  27.    GREEN = '\033[92m'
  28.    YELLOW = '\033[93m'
  29.    RED = '\033[91m'
  30.    BOLD = '\033[1m'
  31.    UNDERLINE = '\033[4m'
  32.    END = '\033[0m'
  33.  
  34. if len(sys.argv) < 3:
  35.     print("Provide both the foreign subtitle (.srt) file and the native one.")
  36.     print("Example: substudy foreign.srt native.srt")
  37.     exit()
  38.  
  39. cards = []
  40.  
  41. foreignFile = sys.argv[1]
  42. nativeFile = sys.argv[2]
  43.  
  44. def clearScreen():
  45.     os.system('clear')
  46.  
  47. class Card:
  48.     def __init__(self, native=None, foreign=None):
  49.         self.native = native
  50.         self.foreign = foreign
  51.    
  52.     def __str__(self):
  53.         rep = color.BOLD + "NATIVE: " + color.END + str(self.native) + "\n"
  54.         rep += color.BOLD + "FOREIGN: " + color.END + str(self.foreign) + "\n"
  55.         return rep
  56.    
  57.     def say(self):
  58.         textToSpeech(self.foreign, languageCode)
  59.    
  60.     def read(self):
  61.         speedRead(self.foreign)
  62.  
  63. def getCard(index):
  64.     while index >= len(cards):
  65.         cards.append(Card())
  66.     return cards[index]
  67.  
  68. def setNative(index, value):
  69.     getCard(index).native = value
  70.  
  71. def setForeign(index, value):
  72.     getCard(index).foreign = value
  73.  
  74. def loadSubtitles(filename, callback):
  75.     f = open(filename, "r", encoding="iso-8859-15")
  76.     data = f.read()
  77.     f.close()
  78.    
  79.     chunks = data.strip().encode('unicode_escape').decode('unicode_escape').split("\n\n")
  80.    
  81.     i = -1
  82.     for chunk in chunks:
  83.         chunk = chunk.strip()
  84.         if not chunk: continue
  85.        
  86.         i += 1
  87.        
  88.         lines = chunk.split("\n", 2)
  89.         text = lines[2]
  90.         text = text.replace("\n", " ")
  91.         text = text.replace("  ", " ")
  92.         callback(i, text)
  93.  
  94. def loadNative():
  95.     loadSubtitles(nativeFile, setNative)
  96.  
  97. def loadForeign():
  98.     loadSubtitles(foreignFile, setForeign)
  99.  
  100. def load():
  101.     loadNative()
  102.     loadForeign()
  103.  
  104. def inputNumber():
  105.     inp = input()
  106.     try:
  107.         return int(inp)
  108.     except:
  109.         return None
  110.  
  111. def inputBoolean(prompt, default=None):
  112.     if default == True:
  113.         c = "(Y/n)"
  114.     elif default == False:
  115.         c = "(y/N)"
  116.     else:
  117.         c = "(y/n)"
  118.     inp = None
  119.     while inp != "y" and inp != "n" and (inp != "" or default == None):
  120.         inp = input(prompt + " " + c + " ").strip().lower()
  121.     if inp == "":
  122.         inp = default
  123.     else:
  124.         inp = inp == "y"
  125.     return inp
  126.  
  127. def inputChoice(prompt, choices):
  128.     keys = list(choices.keys())
  129.     sorted(keys)
  130.     inp = None
  131.     while inp == None:
  132.         print(color.BOLD + prompt + color.END)
  133.         i = 0
  134.         for choice in keys:
  135.             i += 1
  136.             print(str(i) + ") " + choice)
  137.         inp = inputNumber()
  138.         if inp == None:
  139.             print("That is not a number!")
  140.         elif inp < 1 or inp > len(keys):
  141.             print("That was not one of the choices!")
  142.             inp = None
  143.     return choices[keys[inp - 1]]
  144.  
  145. MODE_ORDERED = 0
  146. MODE_SCRAMBLED = 1
  147.  
  148. def inputMode():
  149.     return inputChoice("Select a mode:", {"Ordered": MODE_ORDERED, "Scrambled": MODE_SCRAMBLED})
  150.  
  151. MENU_OPTION_CARDS = 0
  152. MENU_OPTION_STUDY = 1
  153. MENU_OPTION_QUIT = 2
  154.  
  155. def inputMenuOption():
  156.     return inputChoice("Select a mode:", {"Study": MENU_OPTION_STUDY, "Print all cards": MENU_OPTION_CARDS, "Quit": MENU_OPTION_QUIT})
  157.  
  158. def getNewDeck():
  159.     deck = list(cards)
  160.     if inputMode() == MODE_SCRAMBLED: random.shuffle(deck)
  161.     return deck
  162.  
  163. def welcome():
  164.     print("Welcome to Subtitle Study!")
  165.  
  166. def bye():
  167.     print("Goodbye!")
  168.  
  169. def printCards():
  170.     for card in cards:
  171.         print(card)
  172.  
  173. def superStrip(s):
  174.     s = s.lower()
  175.     s = re.sub(r'([^\s\w]|_)+', '', s)
  176.     s = re.sub(' +',' ', s)
  177.     return s
  178.  
  179. def compare(a, b):
  180.     return superStrip(a) == superStrip(b)
  181.  
  182. def transcribe(card):
  183.     while True:
  184.         card.say()
  185.         inp = input(color.BOLD + "Transcribe:" + color.END + " ")
  186.         if inp: return compare(inp, card.foreign)
  187.  
  188. def read(card):
  189.     while True:
  190.         input("Press enter when ready to read.")
  191.         card.read()
  192.         inp = input(color.BOLD + "Copy:" + color.END + " ")
  193.         if inp: return compare(inp, card.foreign)
  194.  
  195. def speak(card):
  196.     print(color.BOLD + "Say:" + color.END + " " + card.foreign)
  197.     input("Press enter when ready.")
  198.     transcriptions = speechRecognition()
  199.     if transcriptions == None or len(transcriptions) == 0: return False
  200.     for t in transcriptions:
  201.         if compare(t["text"], card.foreign):
  202.             return True
  203.     return transcriptions[0]["text"]
  204.  
  205. def translate(card):
  206.     print(color.BOLD + "Translate:" + color.END + " " + card.foreign)
  207.     if compare(input("> "), card.native):
  208.         return True
  209.     else:
  210.         return False
  211.  
  212. def reverseTranslate(card):
  213.     print(color.BOLD + "Translate:" + color.END + " " + card.native)
  214.     if compare(input("> "), card.foreign):
  215.         return True
  216.     else:
  217.         return False
  218.  
  219. def studyCard(card):
  220.     while True:
  221.         clearScreen()
  222.         if reverseTranslate(card):
  223.             print(color.GREEN + color.BOLD + "Awesome!" + color.END)
  224.         else:
  225.             print(color.RED + color.BOLD + "Incorrect: " + color.END + card.native)
  226.        
  227.         input()
  228.         clearScreen()
  229.         if read(card):
  230.             print(color.GREEN + color.BOLD + "Exactly!" + color.END)
  231.         else:
  232.             print(color.RED + color.BOLD + "Sorry: " + color.END + card.foreign)
  233.        
  234.         input()
  235.         clearScreen()
  236.         if transcribe(card):
  237.             print(color.GREEN + color.BOLD + "Correct!" + color.END)
  238.         else:
  239.             print(color.RED + color.BOLD + "Wrong: " + color.END + card.foreign)
  240.            
  241.         input()
  242.         clearScreen()
  243.         result = speak(card)
  244.         if result == False:
  245.             print(color.RED + color.BOLD + "Couldn't recognize you!" + color.END)
  246.         elif result == True:
  247.             print(color.GREEN + color.BOLD + "Good!" + color.END)
  248.         else:
  249.             print(color.RED + color.BOLD + "Not quite!" + color.END + "  It sounded like you said \"" + result + "\"")
  250.        
  251.         input()
  252.         clearScreen()
  253.         if translate(card):
  254.             print(color.GREEN + color.BOLD + "Nice!" + color.END)
  255.         else:
  256.             print(color.RED + color.BOLD + "Nope: " + color.END + card.native)
  257.        
  258.         print()
  259.         if not inputBoolean("Do you want to repeat this card?", True): break
  260.  
  261. def study():
  262.     clearScreen()
  263.     deck = getNewDeck()
  264.     for card in deck: studyCard(card)
  265.     clearScreen()
  266.  
  267. def setup():
  268.     global microphone
  269.     global audioInput
  270.    
  271.     clearScreen()
  272.    
  273.     cards = alsaaudio.cards()
  274.     choices = {}
  275.     for card in cards:
  276.         choices[card] = card
  277.     microphone = inputChoice("Select a microphone source:", choices)
  278.    
  279.     audioInput = alsaaudio.PCM(alsaaudio.PCM_CAPTURE, alsaaudio.PCM_NONBLOCK, microphone)
  280.     audioInput.setchannels(1)
  281.     audioInput.setrate(44100)
  282.     audioInput.setformat(alsaaudio.PCM_FORMAT_S16_LE)
  283.     audioInput.setperiodsize(160)
  284.  
  285. def main():
  286.     clearScreen()
  287.     welcome()
  288.     while True:
  289.         choice = inputMenuOption()
  290.         if choice == MENU_OPTION_CARDS:
  291.             printCards()
  292.         elif choice == MENU_OPTION_STUDY:
  293.             study()
  294.         else:
  295.             break
  296.     bye()
  297.  
  298. def getFile(url):
  299.     return requests.Session().get(url).content
  300.  
  301. audioProcess = None
  302.  
  303. def playAudioFile(path):
  304.     global audioProcess
  305.     fnull = open(os.devnull, "w+")
  306.     if audioProcess != None: audioProcess.terminate()
  307.     audioProcess = Popen(["cvlc", path, "vlc://quit"], stdin=fnull, stdout=fnull, stderr=fnull)
  308.  
  309. def exportFile(path, data):
  310.     f = open(path, "wb")
  311.     f.write(data)
  312.     f.close()
  313.  
  314. def exportWav(path, data):
  315.     f = wave.open(path, "w")
  316.     f.setparams((1, 2, 44100, 0, 'NONE', 'not compressed'))
  317.     f.writeframes(data)
  318.     f.close()
  319.  
  320. def textToSpeech(text, language="en"):
  321.     data = getFile("http://translate.google.com/translate_tts?tl=" + language + "&q=" + text.replace(" ", "%20"))
  322.     exportFile(tmpAudioFile, data)
  323.     playAudioFile(tmpAudioFile)
  324.  
  325. def getMicrophoneData():
  326.     global micData
  327.     micData = b""
  328.     audioInput.read()
  329.     while recording:
  330.         length, data = audioInput.read()
  331.         micData += data
  332.         time.sleep(0.001)
  333.  
  334. def recordAudio():
  335.     global recording
  336.     recording = True
  337.     _thread.start_new_thread(getMicrophoneData,())
  338.     input(color.BOLD + color.RED + "[RECORDING] " + color.END + "Press enter to stop...")
  339.     print(color.BOLD + "Finished recording." + color.END)
  340.     recording = False
  341.     exportWav(tmpMicrophoneRecording, micData)
  342.     return tmpMicrophoneRecording
  343.  
  344. def speechRecognition():
  345.     rec = sr.Recognizer(language=languageCode)
  346.     audio = recordAudio()
  347.     playAudioFile(audio)
  348.     with sr.WavFile(audio) as source:
  349.         print("Analyzing audio...")
  350.         audio = rec.listen(source)
  351.     try:
  352.         return rec.recognize(audio, True)
  353.     except LookupError:
  354.         return None
  355.  
  356. def speedRead(string, wpm=READ_WPM):
  357.     string = re.sub(' +',' ', string)
  358.     words = string.split(" ")
  359.     clearScreen()
  360.     for word in words:
  361.         print(word)
  362.         time.sleep(1 / (wpm / 60.0))
  363.         clearScreen()
  364.  
  365. setup()
  366. load()
  367. main()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement