Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import PyPDF2
- import re
- import openai
- # Step 1: Convert the PDF file into a text file using a Python script
- def pdf_to_text(pdf_file):
- # Open the PDF file of your choice
- with open(pdf_file, 'rb') as pdf:
- reader = PyPDF2.PdfReader(pdf, strict=False)
- # no_pages = len(reader.pages)
- pdf_text = ""
- for page in reader.pages:
- content = page.extract_text()
- pdf_text += content
- #return the text
- return pdf_text
- # Step 2: Slice the 70,000 + words into chunks
- def slice_text(text, chunk_size):
- # Split the text into a list of words
- words = re.split('\W+', text)
- # Initialize a list to hold the chunks
- chunks = []
- # Iterate over the list of words
- for i in range(0, len(words), chunk_size):
- # Append a chunk of the specified size to the list
- chunks.append(words[i:i+chunk_size])
- # Return the list of chunks
- return chunks
- # Step 3: Summarize each of the chunks
- def summarize_chunks(chunks):
- # Initialize a list to hold the summaries
- summaries = []
- # Set the OpenAI API key
- openai.api_key = "API KEY"
- # Iterate over the chunks
- for chunk in chunks:
- i=0
- # Use the OpenAI API to summarize the chunk
- summary = openai.Completion.create(
- engine="text-davinci-002",
- prompt=f"Summarize this text:\n{chunk}",
- max_tokens=1024,
- temperature=0.5,
- top_p=1,
- frequency_penalty=1,
- presence_penalty=1
- )
- i += 1
- # Append the summary to the list
- summaries.append(summary)
- # Return the list of summaries
- return summaries
- # Step 4: Merge all of the chunks into one text file
- def merge_summaries(summaries):
- # Initialize a string to hold the merged summaries
- merged_summaries = ""
- # Iterate over the summaries
- for summary in summaries:
- # Add the summary to the merged summaries
- merged_summaries += summary #!!!! I get an error here
- # Return the merged summaries
- return merged_summaries
- # Step 5: Write a new summary from the merged chunks of text
- def write_summary(summary_text, output_file):
- # Use the OpenAI API to generate a summary from the merged text
- summary = openai.Completion.create(
- engine="text-davinci-002",
- prompt=f"Write a summary of this text:\n{summary_text}",
- max_tokens=1024,
- temperature=0.5,
- top_p=1,
- frequency_penalty=1,
- presence_penalty=1
- )
- # Open the output file in write mode
- with open(output_file, 'w') as f:
- # Write the summary to the file
- f.write(summary.text)
- # Step 6: Generate key notes from the summary
- def generate_key_notes(summary_text):
- # Use the OpenAI API to generate key notes from the summary text
- key_notes = openai.Completion.create(
- engine="text-davinci-002",
- prompt=f"Generate key notes from this text:\n{summary_text}",
- max_tokens=1024,
- temperature=0.5,
- top_p=1,
- frequency_penalty=1,
- presence_penalty=1
- )
- # Return the key notes
- return key_notes.text
- # Step 7: Create a step-by-step guide from the key notes
- def create_step_by_step_guide(key_notes):
- # Use the OpenAI API to generate a step-by-step guide from the key notes
- step_by_step_guide = openai.Completion.create(
- engine="text-davinci-002",
- prompt=f"Create a step-by-step guide from these key notes:\n{key_notes}",
- max_tokens=1024,
- temperature=0.5,
- top_p=1,
- frequency_penalty=1,
- presence_penalty=1
- )
- # Return the step-by-step guide
- return step_by_step_guide.text
- # Step 8: Summarize the notes into the bare essentials of the book
- def summarize_to_bare_essentials(summary_text):
- # Use the OpenAI API to summarize the text to the bare essentials
- bare_essentials = openai.Completion.create(
- engine="text-davinci-002",
- prompt=f"Summarize this text to the bare essentials:\n{summary_text}",
- max_tokens=1024,
- temperature=0.5,
- top_p=1,
- frequency_penalty=1,
- presence_penalty=1
- )
- # Return the bare essentials
- return bare_essentials.text
- # Step 9: Write a blog post from the notes
- def write_blog_post(key_notes, output_file):
- # Use the OpenAI API to generate a blog post from the key notes
- blog_post = openai.Completion.create(
- engine="text-davinci-002",
- prompt=f"Write a blog post from these key notes:\n{key_notes}",
- max_tokens=1024,
- temperature=0.5,
- top_p=1,
- frequency_penalty=1,
- presence_penalty=1
- )
- # Open the output file in write mode
- with open(output_file, 'w') as f:
- # Write the blog post to the file
- f.write(blog_post.text)
- # Step 10: Generate some mid-journey prompts from the notes
- def generate_mid_journey_prompts(key_notes):
- # Use the OpenAI API to generate mid-journey prompts from the key notes
- prompts = openai.Completion.create(
- engine="text-davinci-002",
- prompt=f"Generate some mid-journey prompts from these key notes:\n{key_notes}",
- max_tokens=1024,
- temperature=0.5,
- top_p=1,
- frequency_penalty=1,
- presence_penalty=1
- )
- # Return the prompts
- return prompts.text
- # Read the PDF file
- pdf_file = "mlops.pdf"
- text = pdf_to_text(pdf_file)
- # Slice the text into chunks
- chunks = slice_text(text, 500)
- print ("Slicing completed. Summarizing chunks...")
- # Summarize the chunks
- summaries = summarize_chunks(chunks)
- print ("Summarizing completed. Merging summaries...")
- # Merge the summaries
- summary_text = merge_summaries(summaries)
- # Write the summary to a file
- output_file = "summary.txt"
- write_summary(summary_text, output_file)
- # Generate key notes from the summary
- key_notes = generate_key_notes(summary_text)
- # Create a step-by-step guide from the key notes
- step_by_step_guide = create_step_by_step_guide(key_notes)
- # Generate key notes from the summary
- key_notes = generate_key_notes(summary_text)
- # Create a step-by-step guide from the key notes
- step_by_step_guide = create_step_by_step_guide(key_notes)
- # Summarize the notes to the bare essentials
- bare_essentials = summarize_to_bare_essentials(summary_text)
- # Write a blog post from the notes
- output_file = "blog_post.txt"
- write_blog_post(key_notes, output_file)
- # Generate some mid-journey prompts from the notes
- prompts = generate_mid_journey_prompts(key_notes)
- # Print the step-by-step guide, bare essentials, and prompts
- print(step_by_step_guide)
- print(bare_essentials)
- print(prompts)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement