NGrams

from random import choice, random
from nltk import word_tokenize
import argparse
import sys


def get_counts(context_length, training_text):

    counts = {}

    tokens = word_tokenize(training_text)
    for i in range(len(tokens) - context_length):
        context = []
        next_token = tokens[i + context_length]
        for j in range(context_length):
            context.append(tokens[i + j])

        # Add 1 to frequency or create new dictionary item for this tuple
        if tuple(context) in counts:
            if next_token in counts[tuple(context)]:
                counts[tuple(context)][next_token] += 1
            else:
                counts[tuple(context)][next_token] = 1
        else:
            counts[tuple(context)] = {next_token: 1}

    return counts


def generate_from_file(context_length, training_file, my_starter, output_length=60):

    # Open the training file
    with open(training_file, 'r') as f:
        training_data = f.read()

    counts = get_counts(context_length, training_data)
    if my_starter is not None:
        first_tokens = tuple(my_starter)
    else:
        first_tokens = choice(list(counts.keys()))  # Choose a random first context


    if first_tokens in counts:
        pass
    else:
        q = " ".join(first_tokens) + ". "
        first_tokens = choice(list(counts.keys()))
    output_list = list(first_tokens)
    current_context = first_tokens

    for i in range(output_length):
        a = counts[current_context]
        b=(counts[current_context].get)
        next_context = max(a, key=b)
        temp = list(current_context)
        temp.pop(0)  # Remove first token in previous context
        temp.append(next_context)  # Add new token for the next context
        next_token = temp[-1]
        next_context = tuple(temp)

        current_context = next_context

        output_list.append(next_token)

    x = (" ".join(output_list))
    print(q + x)


parser = argparse.ArgumentParser()
parser.add_argument("file")
parser.add_argument("-n", action="store", default="2")
parser.add_argument("--starter", action="store")

options = parser.parse_args()
number = int(options.n)
starter = options.starter
starter = word_tokenize(starter)
# starter = None

if len(starter) == number:
    generate_from_file(number,options.file,starter)
else:
    print("Input is too long.")
    sys.exit()