ipp7_0_voldemort_british

#!/usr/bin/env python
# -*- coding: utf-8 -*-
# Filename: ipp7_0_voldemort_british.py
# Version: 1.0.0
# Author: Jeoi Reqi

"""
Description:
    - This script demonstrates "Chapter 3: Practice Project #7: Finding Voldemort: The British Brute-Force" from the book "Impractical Python Projects" by Lee Vaughan.
    - It aims to find possible names that can be formed from the letters in "tmvoordle" using various filters such as consonant-vowel patterns, trigrams, and letter pairs.
    - The script downloads a dictionary file containing English words from a URL and saves it alphabetically.
    - It also downloads a trigrams file and saves it for later use.

Requirements:
    - Python 3.x
    - The following modules
        - os
        - sys
        - requests
        - itertools (specifically the permutations function)
        - collections (specifically the counter function)

Functions:
    - main():
        - Executes the main functionality of the script.
    - download_dictionary(url, file_name):
        - Download a dictionary file from a URL and save it alphabetically.
    - load_dictionary(file):
        - Open a text file & turn contents into a set of lowercase strings.
    - download_trigrams_file(url, filename):
        - Download a trigrams file from a URL and save it.
    - prep_words(name, word_list_ini):
        - Filter words from the initial word list based on the length.
    - cv_map_words(word_list):
        - Map words from the word list to their consonant-vowel patterns.
    - cv_map_filter(name, filtered_cv_map):
        - Filter words based on consonant-vowel patterns.
    - trigram_filter(filter_1, trigrams_filtered):
        - Filter words based on trigrams.
    - letter_pair_filter(filter_2):
        - Filter words based on letter pairs.
    - view_by_letter(name, filter_3):
        - Display filtered words based on the starting letter provided by the user.

Usage:
    - Run the script directly in a Python 3.x environment:

            $ python ipp7_0_voldemort_british.py

Additional Notes:
    - The script uses various filtering techniques to narrow down the list of possible names that can be formed from the given set of letters.
    - It prompts the user to input a starting letter or press Enter to see all possible names.
"""

import os
import sys
import requests

from itertools import permutations
from collections import Counter

def download_dictionary(url, file_name):
    """
    Download a dictionary file from a URL and save it alphabetically.

    Parameters:
        url (str): The URL from which to download the dictionary file.
        file_name (str): The name to save the downloaded file as.

    Raises:
        requests.RequestException: If an error occurs during the HTTP request.
    """
    print("\nDownloading dictionary file from:\n" + "{}".format(url))
    try:
        response = requests.get(url)
        response.raise_for_status()  # Check if the request was successful

        # Split the content by lines, strip whitespace, and sort alphabetically
        sorted_content = sorted(line.strip() for line in response.text.strip().split('\n'))

        # Save the sorted content to the file
        with open(file_name, 'w', encoding = 'UTF-8') as f:
            f.write('\n'.join(sorted_content))

    except requests.RequestException as e:
        print("\nError downloading dictionary from {}: {}".format(url, e))
        sys.exit(1)
    else:
        print("\nDictionary file downloaded and saved alphabetically as: '{}'.".format(file_name))

def load_dictionary(file):
    """
    Open a text file & turn contents into a set of lowercase strings.

    Parameters:
        file (str): The name of the file to open.

    Returns:
        set: A set of lowercase strings containing the words from the file.
    """
    try:
        with open(file, encoding='utf-8') as in_file:
            loaded_txt = in_file.read().strip().split('\n')
            loaded_txt_set = {x.lower() for x in loaded_txt}  # Ensure loaded_txt is converted to a set
            return loaded_txt_set  # Return the set of lowercase strings
    except IOError as e:
        print("\n{}\nError opening {}. Terminating program.\n".format(e, file))
        sys.exit(1)

def download_trigrams_file(url, filename):
    """
    Download a trigrams file from a URL and save it.

    Parameters:
        url (str): The URL from which to download the trigrams file.
        filename (str): The name to save the downloaded file as.
    """
    print("\nDownloading least-likely_trigrams.txt...\n")
    response = requests.get(url)
    with open(filename, 'wb') as f:
        f.write(response.content)
    print("\nDownload completed!\n")

def main():
    """
    Main function to orchestrate the execution of the program.
    """
    # Print description of the script
    print("_" * 100)
    print("\n\t  :: Chapter 3: Practice Project #7: Finding Voldemort: The British Brute-Force ::")
    print("_" * 100)
    print("\n- This script aims to find possible names that can be formed from the letters in 'tmvoordle'.\n"
          "- It uses various filters such as consonant-vowel patterns, trigrams, and letter pairs.\n"
          "- It downloads a dictionary file containing English words from a URL and saves it alphabetically.\n"
          "- It also downloads a trigrams file and saves it for later use.\n")
    print("\nLoading dictionary file...\n")
    name = 'tmvoordle'
    name = name.lower()

    # Define the URL and file name for the dictionary
    dictionary_url = "https://raw.githubusercontent.com/dwyl/english-words/master/words_alpha.txt"
    dictionary_file = "dictionary.txt"

    # Download and load the dictionary file
    if dictionary_file not in os.listdir():
        print("\nLoading dictionary file...\n")
        download_dictionary(dictionary_url, dictionary_file)
    word_list_ini = load_dictionary(dictionary_file)

    # Check if least_likely_trigrams.txt is in the cwd
    trigrams_filename = 'least_likely_trigrams.txt'
    if trigrams_filename not in os.listdir():
        trigrams_url = 'https://github.com/rlvaugh/Impractical_Python_Projects/raw/master/Chapter_3/least-likely_trigrams.txt'
        download_trigrams_file(trigrams_url, trigrams_filename)

    trigrams_filtered = load_dictionary(trigrams_filename)

    word_list = prep_words(name, word_list_ini)
    filtered_cv_map = cv_map_words(word_list)
    filter_1 = cv_map_filter(name, filtered_cv_map)
    filter_2 = trigram_filter(filter_1, trigrams_filtered)
    filter_3 = letter_pair_filter(filter_2)
    view_by_letter(name, filter_3)

def prep_words(name, word_list_ini):
    """
    Filter words from the initial word list based on the length.

    Parameters:
        name (str): The name to filter words for.
        word_list_ini (list): The initial list of words.

    Returns:
        list: A filtered list of words based on the length.
    """
    print("- length initial word_list = {}".format(len(word_list_ini)))
    len_name = len(name)
    word_list = [word.lower() for word in word_list_ini if len(word) == len_name]
    print("- length of new word_list = {}".format(len(word_list)))
    return word_list

def cv_map_words(word_list):
    """
    Map words from the word list to their consonant-vowel patterns.

    Parameters:
        word_list (list): The list of words to map.

    Returns:
        set: A set containing the consonant-vowel patterns.
    """
    vowels = 'aeiouy'
    cv_mapped_words = []
    for word in word_list:
        temp = ''
        for letter in word:
            if letter in vowels:
                temp += 'v'
            else:
                temp += 'c'
        cv_mapped_words.append(temp)

    total = len(set(cv_mapped_words))
    target = 0.05
    n = int(total * target)
    count_pruned = Counter(cv_mapped_words).most_common(total - n)
    filtered_cv_map = set()
    for pattern, _ in count_pruned:
        filtered_cv_map.add(pattern)
    print("- length filtered_cv_map = {}".format(len(filtered_cv_map)))
    return filtered_cv_map

def cv_map_filter(name, filtered_cv_map):
    """
    Filter words based on consonant-vowel patterns.

    Parameters:
        name (str): The name to filter words for.
        filtered_cv_map (set): The set of filtered consonant-vowel patterns.

    Returns:
        set: A set containing the filtered words.
    """
    perms = {''.join(i) for i in permutations(name)}
    print("- length of initial permutations set = {}".format(len(perms)))
    vowels = 'aeiouy'
    filter_1 = set()
    for candidate in perms:
        temp = ''
        for letter in candidate:
            if letter in vowels:
                temp += 'v'
            else:
                temp += 'c'
        if temp in filtered_cv_map:
            filter_1.add(candidate)
    print("\n\t# choices after filter_1 = {}".format(len(filter_1)))
    return filter_1

"""DEFINING THE 3 FILTERS"""

def trigram_filter(filter_1, trigrams_filtered):
    """
    Filter words based on trigrams.

    Parameters:
        filter_1 (set): The set of words after the first filter.
        trigrams_filtered (set): The set of filtered trigrams.

    Returns:
        set: A set containing the filtered words.
    """
    filtered = set()
    for candidate in filter_1:
        for triplet in trigrams_filtered:
            triplet = triplet.lower()
            if triplet in candidate:
                filtered.add(candidate)
    filter_2 = filter_1 - filtered
    print("\t# of choices after filter_2 = {}".format(len(filter_2)))
    return filter_2

def letter_pair_filter(filter_2):
    """
    Filter words based on letter pairs.

    Parameters:
        filter_2 (set): The set of words after the second filter.

    Returns:
        set: A set containing the filtered words.
    """
    filtered = set()
    rejects = ['dt', 'lr', 'md', 'ml', 'mr', 'mt', 'mv', 'td', 'tv', 'vd', 'vl', 'vm', 'vr', 'vt']
    first_pair_rejects = ['ld', 'lm', 'lt', 'lv', 'rd', 'rl', 'rm', 'rt', 'rv', 'tl', 'tm']
    for candidate in filter_2:
        for r in rejects:
            if r in candidate:
                filtered.add(candidate)
        for fp in first_pair_rejects:
            if candidate.startswith(fp):
                filtered.add(candidate)
    filter_3 = filter_2 - filtered
    print("\t# of choices after filter_3 = {}".format(len(filter_3)))
    if 'voldemort' in filter_3:
        print("\nVoldemort found!", file=sys.stderr)
    return filter_3

def view_by_letter(name, filter_3):
    """
    Display filtered words based on the starting letter provided by the user.

    Parameters:
        name (str): The name to display filtered words for.
        filter_3 (set): The set of filtered words.
    """
    print("\nRemaining letters = {}".format(name))
    first = input("\nselect a starting letter or press Enter to see all: ")
    print()
    subset = []
    for candidate in filter_3:
        if candidate.startswith(first):
            subset.append(candidate)
    print(*sorted(subset), sep='\n')
    if first:
        print("\nNumber of choices starting with [{}] = {}".format(first, len(subset)))
    else:
        print("\nNumber of choices starting with [ALL] = {}".format(len(subset)))
    try_again = input("\nPress [ENTER] to try again. (or... Press any other key to Exit):")
    if try_again.lower() == '':
        view_by_letter(name, filter_3)
    else:
        print("_" * 100)
        print("\nThis concludes the demonstration of Chapter 3: Practice Project #7: Finding Voldemort: The British Brute-Force\n\n\t\t\t   Thank you for your attention...   Goodbye!")
        print("_" * 100)

if __name__ == '__main__':
    main()