Language Subtitle Studier

#! /usr/bin/python3

from subprocess import Popen
import os
import sys
import time
import random
import requests
import urllib.parse
import re
import alsaaudio
import audioop
import speech_recognition as sr
import _thread
import wave

READ_WPM = 300
languageCode = "nl"
tmpAudioFile = "/tmp/textToSpeech.mp3"
tmpMicrophoneRecording = "/tmp/microphoneRecording.wav"

class color:
   PURPLE = '\033[95m'
   CYAN = '\033[96m'
   DARKCYAN = '\033[36m'
   BLUE = '\033[94m'
   GREEN = '\033[92m'
   YELLOW = '\033[93m'
   RED = '\033[91m'
   BOLD = '\033[1m'
   UNDERLINE = '\033[4m'
   END = '\033[0m'

if len(sys.argv) < 3:
    print("Provide both the foreign subtitle (.srt) file and the native one.")
    print("Example: substudy foreign.srt native.srt")
    exit()

cards = []

foreignFile = sys.argv[1]
nativeFile = sys.argv[2]

def clearScreen():
    os.system('clear')

class Card:
    def __init__(self, native=None, foreign=None):
        self.native = native
        self.foreign = foreign

    def __str__(self):
        rep = color.BOLD + "NATIVE: " + color.END + str(self.native) + "\n"
        rep += color.BOLD + "FOREIGN: " + color.END + str(self.foreign) + "\n"
        return rep

    def say(self):
        textToSpeech(self.foreign, languageCode)

    def read(self):
        speedRead(self.foreign)

def getCard(index):
    while index >= len(cards):
        cards.append(Card())
    return cards[index]

def setNative(index, value):
    getCard(index).native = value

def setForeign(index, value):
    getCard(index).foreign = value

def loadSubtitles(filename, callback):
    f = open(filename, "r", encoding="iso-8859-15")
    data = f.read()
    f.close()

    chunks = data.strip().encode('unicode_escape').decode('unicode_escape').split("\n\n")

    i = -1
    for chunk in chunks:
        chunk = chunk.strip()
        if not chunk: continue

        i += 1

        lines = chunk.split("\n", 2)
        text = lines[2]
        text = text.replace("\n", " ")
        text = text.replace("  ", " ")
        callback(i, text)

def loadNative():
    loadSubtitles(nativeFile, setNative)

def loadForeign():
    loadSubtitles(foreignFile, setForeign)

def load():
    loadNative()
    loadForeign()

def inputNumber():
    inp = input()
    try:
        return int(inp)
    except:
        return None

def inputBoolean(prompt, default=None):
    if default == True:
        c = "(Y/n)"
    elif default == False:
        c = "(y/N)"
    else:
        c = "(y/n)"
    inp = None
    while inp != "y" and inp != "n" and (inp != "" or default == None):
        inp = input(prompt + " " + c + " ").strip().lower()
    if inp == "":
        inp = default
    else:
        inp = inp == "y"
    return inp

def inputChoice(prompt, choices):
    keys = list(choices.keys())
    sorted(keys)
    inp = None
    while inp == None:
        print(color.BOLD + prompt + color.END)
        i = 0
        for choice in keys:
            i += 1
            print(str(i) + ") " + choice)
        inp = inputNumber()
        if inp == None:
            print("That is not a number!")
        elif inp < 1 or inp > len(keys):
            print("That was not one of the choices!")
            inp = None
    return choices[keys[inp - 1]]

MODE_ORDERED = 0
MODE_SCRAMBLED = 1

def inputMode():
    return inputChoice("Select a mode:", {"Ordered": MODE_ORDERED, "Scrambled": MODE_SCRAMBLED})

MENU_OPTION_CARDS = 0
MENU_OPTION_STUDY = 1
MENU_OPTION_QUIT = 2

def inputMenuOption():
    return inputChoice("Select a mode:", {"Study": MENU_OPTION_STUDY, "Print all cards": MENU_OPTION_CARDS, "Quit": MENU_OPTION_QUIT})

def getNewDeck():
    deck = list(cards)
    if inputMode() == MODE_SCRAMBLED: random.shuffle(deck)
    return deck

def welcome():
    print("Welcome to Subtitle Study!")

def bye():
    print("Goodbye!")

def printCards():
    for card in cards:
        print(card)

def superStrip(s):
    s = s.lower()
    s = re.sub(r'([^\s\w]|_)+', '', s)
    s = re.sub(' +',' ', s)
    return s

def compare(a, b):
    return superStrip(a) == superStrip(b)

def transcribe(card):
    while True:
        card.say()
        inp = input(color.BOLD + "Transcribe:" + color.END + " ")
        if inp: return compare(inp, card.foreign)

def read(card):
    while True:
        input("Press enter when ready to read.")
        card.read()
        inp = input(color.BOLD + "Copy:" + color.END + " ")
        if inp: return compare(inp, card.foreign)

def speak(card):
    print(color.BOLD + "Say:" + color.END + " " + card.foreign)
    input("Press enter when ready.")
    transcriptions = speechRecognition()
    if transcriptions == None or len(transcriptions) == 0: return False
    for t in transcriptions:
        if compare(t["text"], card.foreign):
            return True
    return transcriptions[0]["text"]

def translate(card):
    print(color.BOLD + "Translate:" + color.END + " " + card.foreign)
    if compare(input("> "), card.native):
        return True
    else:
        return False

def reverseTranslate(card):
    print(color.BOLD + "Translate:" + color.END + " " + card.native)
    if compare(input("> "), card.foreign):
        return True
    else:
        return False

def studyCard(card):
    while True:
        clearScreen()
        if reverseTranslate(card):
            print(color.GREEN + color.BOLD + "Awesome!" + color.END)
        else:
            print(color.RED + color.BOLD + "Incorrect: " + color.END + card.native)

        input()
        clearScreen()
        if read(card):
            print(color.GREEN + color.BOLD + "Exactly!" + color.END)
        else:
            print(color.RED + color.BOLD + "Sorry: " + color.END + card.foreign)

        input()
        clearScreen()
        if transcribe(card):
            print(color.GREEN + color.BOLD + "Correct!" + color.END)
        else:
            print(color.RED + color.BOLD + "Wrong: " + color.END + card.foreign)

        input()
        clearScreen()
        result = speak(card)
        if result == False:
            print(color.RED + color.BOLD + "Couldn't recognize you!" + color.END)
        elif result == True:
            print(color.GREEN + color.BOLD + "Good!" + color.END)
        else:
            print(color.RED + color.BOLD + "Not quite!" + color.END + "  It sounded like you said \"" + result + "\"")

        input()
        clearScreen()
        if translate(card):
            print(color.GREEN + color.BOLD + "Nice!" + color.END)
        else:
            print(color.RED + color.BOLD + "Nope: " + color.END + card.native)

        print()
        if not inputBoolean("Do you want to repeat this card?", True): break

def study():
    clearScreen()
    deck = getNewDeck()
    for card in deck: studyCard(card)
    clearScreen()

def setup():
    global microphone
    global audioInput

    clearScreen()

    cards = alsaaudio.cards()
    choices = {}
    for card in cards:
        choices[card] = card
    microphone = inputChoice("Select a microphone source:", choices)

    audioInput = alsaaudio.PCM(alsaaudio.PCM_CAPTURE, alsaaudio.PCM_NONBLOCK, microphone)
    audioInput.setchannels(1)
    audioInput.setrate(44100)
    audioInput.setformat(alsaaudio.PCM_FORMAT_S16_LE)
    audioInput.setperiodsize(160)

def main():
    clearScreen()
    welcome()
    while True:
        choice = inputMenuOption()
        if choice == MENU_OPTION_CARDS:
            printCards()
        elif choice == MENU_OPTION_STUDY:
            study()
        else:
            break
    bye()

def getFile(url):
    return requests.Session().get(url).content

audioProcess = None

def playAudioFile(path):
    global audioProcess
    fnull = open(os.devnull, "w+")
    if audioProcess != None: audioProcess.terminate()
    audioProcess = Popen(["cvlc", path, "vlc://quit"], stdin=fnull, stdout=fnull, stderr=fnull)

def exportFile(path, data):
    f = open(path, "wb")
    f.write(data)
    f.close()

def exportWav(path, data):
    f = wave.open(path, "w")
    f.setparams((1, 2, 44100, 0, 'NONE', 'not compressed'))
    f.writeframes(data)
    f.close()

def textToSpeech(text, language="en"):
    data = getFile("http://translate.google.com/translate_tts?tl=" + language + "&q=" + text.replace(" ", "%20"))
    exportFile(tmpAudioFile, data)
    playAudioFile(tmpAudioFile)

def getMicrophoneData():
    global micData
    micData = b""
    audioInput.read()
    while recording:
        length, data = audioInput.read()
        micData += data
        time.sleep(0.001)

def recordAudio():
    global recording
    recording = True
    _thread.start_new_thread(getMicrophoneData,())
    input(color.BOLD + color.RED + "[RECORDING] " + color.END + "Press enter to stop...")
    print(color.BOLD + "Finished recording." + color.END)
    recording = False
    exportWav(tmpMicrophoneRecording, micData)
    return tmpMicrophoneRecording

def speechRecognition():
    rec = sr.Recognizer(language=languageCode)
    audio = recordAudio()
    playAudioFile(audio)
    with sr.WavFile(audio) as source:
        print("Analyzing audio...")
        audio = rec.listen(source)
    try:
        return rec.recognize(audio, True)
    except LookupError:
        return None

def speedRead(string, wpm=READ_WPM):
    string = re.sub(' +',' ', string)
    words = string.split(" ")
    clearScreen()
    for word in words:
        print(word)
        time.sleep(1 / (wpm / 60.0))
        clearScreen()

setup()
load()
main()