Advertisement
beppemila

Python Code/pdf to summary using OpenAI APIs

Dec 26th, 2022 (edited)
3,536
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 6.47 KB | Source Code | 0 0
  1. import PyPDF2
  2. import re
  3. import openai
  4.  
  5. # Step 1: Convert the PDF file into a text file using a Python script
  6. def pdf_to_text(pdf_file):
  7.     # Open the PDF file of your choice
  8.     with open(pdf_file, 'rb') as pdf:
  9.         reader = PyPDF2.PdfReader(pdf, strict=False)
  10.         # no_pages = len(reader.pages)
  11.         pdf_text = ""
  12.  
  13.         for page in reader.pages:
  14.             content = page.extract_text()
  15.             pdf_text += content
  16.  
  17.     #return the text
  18.     return pdf_text
  19.    
  20.    
  21. # Step 2: Slice the 70,000 + words into chunks
  22. def slice_text(text, chunk_size):
  23.   # Split the text into a list of words
  24.   words = re.split('\W+', text)
  25.  
  26.   # Initialize a list to hold the chunks
  27.   chunks = []
  28.  
  29.   # Iterate over the list of words
  30.   for i in range(0, len(words), chunk_size):
  31.     # Append a chunk of the specified size to the list
  32.     chunks.append(words[i:i+chunk_size])
  33.  
  34.   # Return the list of chunks
  35.   return chunks
  36.  
  37. # Step 3: Summarize each of the chunks
  38. def summarize_chunks(chunks):
  39.   # Initialize a list to hold the summaries
  40.   summaries = []
  41.  
  42.   # Set the OpenAI API key
  43.   openai.api_key = "API KEY"
  44.  
  45.   # Iterate over the chunks
  46.   for chunk in chunks:
  47.     i=0
  48.     # Use the OpenAI API to summarize the chunk
  49.     summary = openai.Completion.create(
  50.         engine="text-davinci-002",
  51.         prompt=f"Summarize this text:\n{chunk}",
  52.         max_tokens=1024,
  53.         temperature=0.5,
  54.         top_p=1,
  55.         frequency_penalty=1,
  56.         presence_penalty=1
  57.     )
  58.     i += 1
  59.     # Append the summary to the list
  60.     summaries.append(summary)
  61.  
  62.   # Return the list of summaries
  63.   return summaries
  64.  
  65. # Step 4: Merge all of the chunks into one text file
  66. def merge_summaries(summaries):
  67.   # Initialize a string to hold the merged summaries
  68.   merged_summaries = ""
  69.  
  70.   # Iterate over the summaries
  71.   for summary in summaries:
  72.     # Add the summary to the merged summaries
  73.     merged_summaries += summary #!!!! I get an error here
  74.  
  75.   # Return the merged summaries
  76.   return merged_summaries
  77.  
  78. # Step 5: Write a new summary from the merged chunks of text
  79. def write_summary(summary_text, output_file):
  80.   # Use the OpenAI API to generate a summary from the merged text
  81.   summary = openai.Completion.create(
  82.       engine="text-davinci-002",
  83.       prompt=f"Write a summary of this text:\n{summary_text}",
  84.       max_tokens=1024,
  85.       temperature=0.5,
  86.       top_p=1,
  87.       frequency_penalty=1,
  88.       presence_penalty=1
  89.   )
  90.  
  91.   # Open the output file in write mode
  92.   with open(output_file, 'w') as f:
  93.     # Write the summary to the file
  94.     f.write(summary.text)
  95.  
  96. # Step 6: Generate key notes from the summary
  97. def generate_key_notes(summary_text):
  98.   # Use the OpenAI API to generate key notes from the summary text
  99.   key_notes = openai.Completion.create(
  100.       engine="text-davinci-002",
  101.       prompt=f"Generate key notes from this text:\n{summary_text}",
  102.       max_tokens=1024,
  103.       temperature=0.5,
  104.       top_p=1,
  105.       frequency_penalty=1,
  106.       presence_penalty=1
  107.   )
  108.  
  109.   # Return the key notes
  110.   return key_notes.text
  111.  
  112. # Step 7: Create a step-by-step guide from the key notes
  113. def create_step_by_step_guide(key_notes):
  114.   # Use the OpenAI API to generate a step-by-step guide from the key notes
  115.   step_by_step_guide = openai.Completion.create(
  116.       engine="text-davinci-002",
  117.       prompt=f"Create a step-by-step guide from these key notes:\n{key_notes}",
  118.       max_tokens=1024,
  119.       temperature=0.5,
  120.       top_p=1,
  121.       frequency_penalty=1,
  122.       presence_penalty=1
  123.   )
  124.  
  125.   # Return the step-by-step guide
  126.   return step_by_step_guide.text
  127.  
  128. # Step 8: Summarize the notes into the bare essentials of the book
  129. def summarize_to_bare_essentials(summary_text):
  130.   # Use the OpenAI API to summarize the text to the bare essentials
  131.   bare_essentials = openai.Completion.create(
  132.       engine="text-davinci-002",
  133.       prompt=f"Summarize this text to the bare essentials:\n{summary_text}",
  134.       max_tokens=1024,
  135.       temperature=0.5,
  136.       top_p=1,
  137.       frequency_penalty=1,
  138.       presence_penalty=1
  139.   )
  140.  
  141.   # Return the bare essentials
  142.   return bare_essentials.text
  143.  
  144. # Step 9: Write a blog post from the notes
  145. def write_blog_post(key_notes, output_file):
  146.   # Use the OpenAI API to generate a blog post from the key notes
  147.   blog_post = openai.Completion.create(
  148.       engine="text-davinci-002",
  149.       prompt=f"Write a blog post from these key notes:\n{key_notes}",
  150.       max_tokens=1024,
  151.       temperature=0.5,
  152.       top_p=1,
  153.       frequency_penalty=1,
  154.       presence_penalty=1
  155.   )
  156.  
  157.   # Open the output file in write mode
  158.   with open(output_file, 'w') as f:
  159.     # Write the blog post to the file
  160.     f.write(blog_post.text)
  161.  
  162. # Step 10: Generate some mid-journey prompts from the notes
  163. def generate_mid_journey_prompts(key_notes):
  164.   # Use the OpenAI API to generate mid-journey prompts from the key notes
  165.   prompts = openai.Completion.create(
  166.       engine="text-davinci-002",
  167.       prompt=f"Generate some mid-journey prompts from these key notes:\n{key_notes}",
  168.       max_tokens=1024,
  169.       temperature=0.5,
  170.       top_p=1,
  171.       frequency_penalty=1,
  172.       presence_penalty=1
  173.   )
  174.  
  175.   # Return the prompts
  176.   return prompts.text
  177.  
  178. # Read the PDF file
  179. pdf_file = "mlops.pdf"
  180. text = pdf_to_text(pdf_file)
  181.  
  182. # Slice the text into chunks
  183. chunks = slice_text(text, 500)
  184. print ("Slicing completed. Summarizing chunks...")
  185.  
  186. # Summarize the chunks
  187. summaries = summarize_chunks(chunks)
  188. print ("Summarizing completed. Merging summaries...")
  189.  
  190. # Merge the summaries
  191. summary_text = merge_summaries(summaries)
  192.  
  193. # Write the summary to a file
  194. output_file = "summary.txt"
  195. write_summary(summary_text, output_file)
  196.  
  197. # Generate key notes from the summary
  198. key_notes = generate_key_notes(summary_text)
  199.  
  200. # Create a step-by-step guide from the key notes
  201. step_by_step_guide = create_step_by_step_guide(key_notes)
  202.  
  203. # Generate key notes from the summary
  204. key_notes = generate_key_notes(summary_text)
  205.  
  206. # Create a step-by-step guide from the key notes
  207. step_by_step_guide = create_step_by_step_guide(key_notes)
  208.  
  209. # Summarize the notes to the bare essentials
  210. bare_essentials = summarize_to_bare_essentials(summary_text)
  211.  
  212. # Write a blog post from the notes
  213. output_file = "blog_post.txt"
  214. write_blog_post(key_notes, output_file)
  215.  
  216. # Generate some mid-journey prompts from the notes
  217. prompts = generate_mid_journey_prompts(key_notes)
  218.  
  219. # Print the step-by-step guide, bare essentials, and prompts
  220. print(step_by_step_guide)
  221. print(bare_essentials)
  222. print(prompts)
  223.  
  224.  
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement