Untitled

punctuation = '!.,?";:'
letters = 'etaoinshrdlcumwfgypbvkjxqz'
numbers = '0123456789'

def number_of_words(text):
    # returns number of words in a string
    text = ' '.join(text.split())
    text = text.split(' ')
    count = 0
    for word in text:
        word = remove_punctuation(word)
        if is_word(word):
            count += 1
    return count

def remove_punctuation(word):
    #returns a string without punctation at the end as well as 's
    if word[-2:] == "'s":
            word = word[:-2]
    else:
        #TODO stuff like cat?.;! still get counted even if not a word per se
        while word[-1] in punctuation:
            word = word[:-1]
    return word

def is_word(word):
    #checks to see if string is only letters
    if word == '':
        return False
    for letter in word:
        if letter not in letters:
            return False
    return True

def number_of_letters(text):
    #counts number of letters in a string
    count = 0
    for letter in text:
        if letter in letters:
            count +=1
    return count

def number_of_symbols(text):
    #number of symbols in text excluding whitespace
    text = ' '.join(text.split())
    text = ' '.join(text.split(' '))
    count = 0
    for letter in text:
        if (letter in letters) or (letter in numbers):
            continue
        else:
            count += 1
    return count

def most_common_words(text):
    #returns list of 3 most common words
    word_list = {}
    text = ' '.join(text.split())
    text = text.split(' ')
    for word in text:
        word = remove_punctuation(word)
        if word == '':
            continue
        if word in word_list:
            word_list[word] += 1
        else:
            word_list[word] = 1
    word_list2 = []
    #flips keys/values into wordlist2
    for k, v in word_list.items():
        word_list2.append((v,k))
    #adds the top 3 words into answer
    answer = []
    for unused in range(3):
        current_max = max(word_list2)
        word_list2.remove(current_max)
        answer.append(current_max[1])

    return answer

def most_common_letters(text):
    #returns list of 3 most common letters
    letter_list = {}
    for letter in letters:
        letter_list[letter] = 0

    text = ' '.join(text.split())
    text = ' '.join(text.split(' '))

    for letter in text:
        if letter not in letters:
            continue
        letter_list[letter] += 1

    letter_list2 = []
    #flips keys/values into wordlist2
    for k, v in letter_list.items():
        letter_list2.append((v,k))
    #adds the top 3 words into answer
    answer = []
    for unused in range(3):
        current_max = max(letter_list2)
        letter_list2.remove(current_max)
        answer.append(current_max[1])

    return answer

def common_first_word(text):
    # returns most common first word in a paragraph
    text = text.split('\n')
    while ' ' in text:
        text.remove(' ')

    for paragraph_number in range(len(text)):
        text[paragraph_number] = text[paragraph_number].split(' ')

    word_list = {}
    for line in text:
        for word in line:
            if word == '':
                continue
            word = remove_punctuation(word)
            if is_word(word):
                if word in word_list:
                    word_list[word] += 1
                else:
                    word_list[word] = 1
                break

    word_list2 = []
    #flips keys/values into wordlist2
    for k, v in word_list.items():
        word_list2.append((v,k))

    return max(word_list2)[1]

def words_used_once(text):
    # returns a list of all words used once
    word_list = {}
    text = ' '.join(text.split())
    text = text.split(' ')
    for word in text:
        word = remove_punctuation(word)
        if word == '':
            continue
        if word in word_list:
            word_list[word] += 1
        else:
            word_list[word] = 1

    answer = []
    for word in word_list:
        if word_list[word] == 1:
            answer.append(word)

    return answer

def letters_used_once(text):
    # returns letters not used
    answer = []
    for letter in letters:
        if letter not in text:
            answer.append(letter)

    return answer

#TODO check to see if input is valid
text_location = input('Please type your text file location')
file = open(text_location,'r')
text_file = ' '.join(file.readlines()[:])
text_file = text_file.lower()
file.close()
print('{0} words'.format(number_of_words(text_file)))
print('{0} letters'.format(number_of_letters(text_file)))
print('{0} symbols'.format(number_of_symbols(text_file)))
top_3 = most_common_words(text_file)
print('Top three most common words: {0},{1},{2}'.format(top_3[0],top_3[1],top_3[2]))
top_3 = most_common_letters(text_file)
print('Top three most common letters: {0},{1},{2}'.format(top_3[0],top_3[1],top_3[2]))
print('{0} is the most common first word of all paragraphs'.format(common_first_word(text_file)))
print('Words only used once: {0}'.format(words_used_once(text_file)))
print('Letters not used in the document: {0}'.format(letters_used_once(text_file)))