Untitled

KEY = 0
VALUE = 1
import string


def get_file_object(filename):
    try:
        file_object = open(filename, 'r')
        return file_object
    except FileNotFoundError:
        return None

def process_lines(file_object):
    '''Splits text up in lists of lists with paragraphs in seperate lists'''
    paragraph_list = []
    line_list = []
    for line in file_object:
        word_list = line.split()
        if line != "\n":
            for word in word_list:
                line_list.append(word.lower().strip(string.punctuation))
        else:
            paragraph_list.append(line_list)
            line_list = []
    if line_list not in paragraph_list:         #Takes the rest of the text, before it was missing the 3 paragraph
        paragraph_list.append(line_list)
    return paragraph_list

def find_words(paragraph_list):
    '''assigns each word with the paragraph it is located in, returned in tuples.'''
    a_dict = {}
    for index, paragraph in enumerate(paragraph_list, start=1):
        for word in paragraph:
            if word not in a_dict:
                a_dict[word] = list(str(index))
            else:
                a_dict[word].append(str(index))
    a_list= []
    for key, value in a_dict.items():
        value = [int(x) for x in value]
        remove_doubles = list(set(value))
        a_list.append((key, sorted(remove_doubles)))       #Returns the dict in list of tuples
    return a_list

def get_second(x):
    '''this function is to sort the list by the value'''
    return x[1]

def count_words(paragraph_list):
    '''counts words to find the top 10 and top 20 counts'''
    count_dict = {}
    for paragraph in paragraph_list:
        for word in paragraph:
            if word not in count_dict:
                count_dict[word] = 1
            else:
                count_dict[word] += 1
    count_list = []
    for word, value in count_dict.items():
        count_list.append((word, value),)
    count_list = sorted(count_list)
    count_list = sorted(count_list, key = get_second, reverse=True)  # get second sorts after the value.
    return count_list[:10], count_list[:20]

def print_lines(word_placement, top_10, top_20):
    '''prints lines and formats the text'''
    print()
    print("The paragraph index:")
    for a_tuple in sorted(word_placement):
        value = [str(x) for x in a_tuple[VALUE]]
        if len(value) >= 1:
            value = ", ".join(value)
        key = a_tuple[KEY]
        print("{} {}".format(key, value))
    print()
    print("The highest 10 counts: ")
    for a_tuple in top_10:
        print("{}: {}".format(a_tuple[KEY],a_tuple[VALUE]))
    print()
    print("The highest 20 counts: ")
    for a_tuple in top_20:
        print("{}: {}".format(a_tuple[KEY],a_tuple[VALUE]))

def main():
    filename = input("Enter filename: ")
    file_object = get_file_object(filename)
    if file_object:
        paragraph_list = process_lines(file_object)
        word_placement = find_words(paragraph_list)
        top_10, top_20 = count_words(paragraph_list)
        print_lines(word_placement, top_10, top_20)
    else:
        print("Filename {} not found!".format(filename))

main()