Having an AI read books so I don't have to

import os
import requests
import json
import re
import signal
import time

import ebooklib
from ebooklib import epub
import markdownify

word_count_limit = 20000

# Define the URL
generateUrl = "http://localhost:5001/api/v1/generate"
tokenCountUrl = "http://localhost:5001/api/extra/tokencount"

# Define the headers for the request
headers = {
    'Content-Type': 'application/json'
}

def signal_handler(sig, frame):
    global running
    if (running):
        running = False
signal.signal(signal.SIGINT, signal_handler)

#Using Command-R model
def getPromptJson(prompt, memory = None):
    data = {
        "n": 1,
        "max_context_length": 65536,
        "rep_pen": 1.1,
        "temperature": 0.7,
        "top_p": 0.92,
        "top_k": 100,
        "top_a": 0,
        "typical": 1,
        "tfs": 1,
        "rep_pen_range": 320,
        "rep_pen_slope": 0.7,
        "sampler_order": [6, 0, 1, 3, 4, 2, 5],
        #"memory": "[Summary: The song celebrates the remarkable attributes and hardworking nature of ants, etc.]\n",
        "trim_stop": True,
        "min_p": 0,
        "dynatemp_range": 0,
        "dynatemp_exponent": 1,
        "smoothing_factor": 0,
        "banned_tokens": [],
        "render_special": False,
        #"xtc_threshold": 0.15,
        #"xtc_probability": 0.5,
        "presence_penalty": 0,
        "logit_bias": {},
        "quiet": True,
        "use_default_badwordsids": False,
        "bypass_eos": False,
        "stop_sequence": ["<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|USER_TOKEN|>", "<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>"],
        "prompt": "<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|USER_TOKEN|>" + prompt + "<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>",
        "max_length": 1024,
    }
    if memory:
        data["memory"] = memory
#    if grammar:
#        data["grammar"] = grammar
    return json.dumps(data)

def getResult(prompt):
    # Send the request and get the response
    try:
        response = requests.post(generateUrl, headers=headers, data=getPromptJson(prompt))
    except Exception as e:
          print(f"An error occurred: {e}")
          return False
    else:    # Check if the request was successful
        if response.status_code == 200:
            # Parse the response JSON into a Python dictionary
            response_data = json.loads(response.text)
            #print(response_data)
            return response_data["results"][0]["text"]
        else:
            print(f"Request failed with status code {response.status_code}")
            return False

#not used in this script
def countTokens(prompt):
    response = requests.post(tokenCountUrl, headers=headers, data=json.dumps({"prompt":prompt}))
    if response.status_code == 200:
        # Parse the response JSON into a Python dictionary
        response_data = json.loads(response.text)
        print(response_data)

def truncate_by_word_count(text, word_count_limit):
    """Truncates a text string by word count, preserving formatting.

    Args:
        text: The input text string.
        word_count_limit: The maximum number of words to include.

    Returns:
        The truncated text string.
    """

    truncated = False
    words_and_separators = re.split(r"(\s+)", text)
    truncated_words_and_separators = []
    word_count = 0
    for word_or_separator in words_and_separators:
        if word_or_separator.strip():  # Check if it's a word
            word_count += 1
        truncated_words_and_separators.append(word_or_separator)
        if word_count >= word_count_limit:
            truncated = True
            break

    truncated_text = ''.join(truncated_words_and_separators)
    return truncated_text, truncated


def extract_epub_data(epub_file_path, word_count_limit):
    """Extracts text, title, and author from an EPUB file.

    Args:
        epub_file_path: The path to the EPUB file.
        word_count_limit: The maximum number of words to include in the extracted text.

    Returns:
        A tuple containing the extracted text, title, and author.
    """

    book = epub.read_epub(epub_file_path)

    # Extract title and author from metadata (might need adjustment depending on the library)
    title = book.get_metadata("DC", 'title')  # Adjust based on your library's method
    author = book.get_metadata("DC", 'creator')  # Adjust based on your library's method

    # Extract text content
    text = ''
    items = list(book.get_items_of_type(ebooklib.ITEM_DOCUMENT))
    for item in items[1:]:  # Skip the first item
        text += markdownify.markdownify(item.get_body_content().decode("utf-8"), escape_misc=False) + '\n\n'
    extra_breaks = r"\n{3,}"  # Matches three or more consecutive newlines
    two_breaks = "\n\n"  # Replace with two newlines
    text = re.sub(extra_breaks, two_breaks, text)
    truncate_result = truncate_by_word_count(text, word_count_limit)

    return {"text":truncate_result[0], "title":title[0][0], "author":author[0][0], "truncated":truncate_result[1]}

def review_files(directory):
    reviews = None
    try:
        with open("reviews.json", 'r') as f:
            reviews = json.load(f)
    except:
        reviews = {}

    for root, dirs, files in os.walk(directory):
        for file in files:
            if not running:
                print("Interrupted.")
                with open("reviews.json", "w") as writefile:
                    json.dump(reviews, writefile, indent=4, sort_keys=True)
                return
            if file.endswith(".epub") and not (file in reviews):
                start_time = time.time()
                epub_file_path = os.path.join(root, file)
                result = extract_epub_data(epub_file_path, word_count_limit)
                if result["truncated"]:
                    prompt = f'The following is an excerpt from a work of fiction titled {result["title"]}.\n\n{result["text"]}\n\n### End of story excerpt.'
                else:
                    prompt = f'The following is a work of fiction titled {result["title"]}.\n\n{result["text"]}\n\n### End of story.'

                prompt = prompt + (
                    "\n\nI am trying to identify stories that feature"
                    #TODO: Insert a description of the kinds of stories you're interested in finding. Feel free
                    #to be wordy here, we're already feeding thousands of words of story into the AI so a few extra lines
                    #here shouldn't burden the AI much more than that.
                    " Please review this story and tell me whether it has any content that matches those criteria,"
                    " giving a brief summary of it if there is any.")

                print(f'Reviewing {result["title"]} by {result["author"]}\n')
                review = getResult(prompt)
                reviews[file] = {"title":result["title"], "author": result["author"], "review":review}
                print(f'{review}\n')
                with open("reviews.json", "w") as writefile:
                    json.dump(reviews, writefile, indent=4, sort_keys=True)

                end_time = time.time()
                elapsed_time = end_time - start_time
                minutes = int(elapsed_time // 60)
                seconds = int(elapsed_time % 60)
                print(f"Review took {minutes} minutes and {seconds} seconds.\n----\n")

running = True
review_files('.')