Advertisement
incomestreamsurfer

update for simple mode

Apr 21st, 2024
131
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 17.84 KB | None | 0 0
  1. # MIT License
  2.  
  3. # Copyright (c) [2024] [Hamish Davison]
  4.  
  5. # Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "simplemode.py]"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
  6.  
  7. # The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
  8.  
  9. # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  10.  
  11.  
  12.  
  13. import os
  14. import openai
  15. import time
  16. import csv
  17. import requests
  18. from tqdm import tqdm
  19. import concurrent.futures
  20. import json
  21.  
  22. # Load configuration from a JSON file
  23. with open('config.json') as config_file:
  24. config = json.load(config_file)
  25.  
  26. # Set your OpenAI API key from the config file
  27. OPENAI_API_TOKEN = config["OPENAI_API_TOKEN"]
  28. print("Setting OpenAI API Key...")
  29. os.environ["OPENAI_API_KEY"] = OPENAI_API_TOKEN
  30.  
  31. # Update your Freeimage.host API Key here from the config file
  32. FREEIMAGE_HOST_API_KEY = config["FREEIMAGE_HOST_API_KEY"]
  33.  
  34. # Initialize the OpenAI client
  35. print("Initializing OpenAI client...")
  36. client = openai.OpenAI()
  37.  
  38. # Global list to store image URLs
  39. image_urls = []
  40.  
  41. def upload_to_freeimage_host(image_path, Keyword):
  42. """
  43. Uploads an image to Freeimage.host with {Keyword} in the filename.
  44. Also stores the image URL in a global list.
  45. """
  46. print(f"Uploading {image_path} to Freeimage.host...")
  47. with open(image_path, 'rb') as image_file:
  48. files = {'source': image_file}
  49. data = {
  50. 'key': FREEIMAGE_HOST_API_KEY,
  51. 'action': 'upload',
  52. 'format': 'json',
  53. 'name': f'{Keyword}_image.png' # Add {Keyword} in the filename
  54. }
  55.  
  56. response = requests.post('https://freeimage.host/api/1/upload', files=files, data=data)
  57.  
  58. if response.status_code == 200:
  59. url = response.json().get('image', {}).get('url', '')
  60. if url:
  61. print(f"Uploaded successfully: {url}")
  62. image_urls.append({'idea': Keyword, 'url': url}) # Store both idea and URL
  63. return url
  64. else:
  65. print("Upload successful but no URL returned, something went wrong.")
  66. else:
  67. print(f"Failed to upload to Freeimage.host: {response.status_code}, {response.text}")
  68. return None
  69.  
  70. def upload_file(file_path, purpose):
  71. print(f"Uploading file: {file_path} for purpose: {purpose}")
  72. with open(file_path, "rb") as file:
  73. response = client.files.create(file=file, purpose=purpose)
  74. print(f"File uploaded successfully, ID: {response.id}")
  75. return response.id
  76.  
  77. def clear_image_urls():
  78. """
  79. Clears the global list of image URLs.
  80. """
  81. global image_urls
  82. image_urls.clear()
  83. print("Cleared global image URLs.")
  84.  
  85. print("Commencing file uploads...")
  86. # Upload your files using paths from the config file
  87. internal_links_file_id = upload_file(config["path_to_example_file"], 'assistants')
  88. brand_plan_file_id = upload_file(config["path_to_example_file"], 'assistants')
  89. images_file_id = upload_file(config["path_to_website_images"], 'assistants')
  90.  
  91. print("Creating OpenAI Assistant...")
  92. assistant = client.beta.assistants.create(
  93. name="Content Creation Assistant",
  94. model="gpt-4-turbo-preview",
  95. instructions=f"You are writing for {config['business_name']}. Choose product images and internal links from {config['path_to_website_images']} and {config['path_to_links_file']} and embed them with markdown in the final article. You must never EVER invent internal links or image links as this can destroy my SEO. YOU MUST INCLUDE INTERNAL LINKS FROM {config['path_to_links_file']} - read this first and make sure to include real internal links in the final article in the blog post When told to use retrieval use retrieval, when told to use code_interpreter use code interpreter. The final content should include internal links and embedded product images from {config['path_to_website_images']} and should include formatting. Your basic steps are: 1. read {config['path_to_website_images']}, get the image, create some visualizations of data, store these for the final article. 2. Find relevant brand images {config['path_to_website_images']}, create an outline, then write an article with all of this data you've either created or found Copy the tone from {config['path_to_example_file']} EXACTLY. Read {config['path_to_example_file']}. Use this as a guide to shape the final {config['page_type']}. The {config['page_type']} should follow the length and tone of {config['path_to_example_file']}. You are SEOGPT, aiming to create in-depth and interesting blog posts for {config['business_name']}, an {config['business_type']} in {config['country']}, you should write at a grade 7 level {config['language']} Every blog post should include at least 3 product images and links to their other pages from {config['business_name']}.. Ensure the brand image links are accurate. Choose only relevant brand pages. Do not invent image links. Pick 5 strictly relevant brand images and internal links for the articles. First, read the attached files, then create a detailed outline for a {config['page_type']}, including up to 5 highly relevant internal collection links and brand image links.",
  96. tools=[{"type": "file_search"}, {"type": "code_interpreter"}]
  97. )
  98.  
  99. print("Assistant created successfully.")
  100.  
  101. # Create a thread
  102. thread_id = client.beta.threads.create().id
  103.  
  104. # Provide the file information to the assistant
  105. client.beta.threads.messages.create(
  106. thread_id=thread_id,
  107. role="assistant",
  108. content="Here are the required files for your task:",
  109. attachments=[
  110. {
  111. "file_id": internal_links_file_id,
  112. "tools": [{"type": "file_search"}]
  113. },
  114. {
  115. "file_id": brand_plan_file_id,
  116. "tools": [{"type": "file_search"}]
  117. },
  118. {
  119. "file_id": images_file_id,
  120. "tools": [{"type": "code_interpreter"}]
  121. }
  122. ]
  123. )
  124.  
  125. def wait_for_run_completion(thread_id, run_id, timeout=300):
  126. print(f"Waiting for run completion, thread ID: {thread_id}, run ID: {run_id}")
  127. start_time = time.time()
  128. while time.time() - start_time < timeout:
  129. run_status = client.beta.threads.runs.retrieve(thread_id=thread_id, run_id=run_id)
  130. if run_status.status == 'completed':
  131. print("Run completed successfully.")
  132. return run_status
  133. time.sleep(10)
  134. raise TimeoutError("Run did not complete within the specified timeout.")
  135.  
  136. def perplexity_research(Keyword, max_retries=3, delay=5):
  137. """
  138. Conducts perplexity research with retries on failure.
  139. Args:
  140. Keyword (str): The blog post idea to research.
  141. max_retries (int): Maximum number of retries.
  142. delay (int): Delay in seconds before retrying.
  143. Returns:
  144. dict or None: The response from the API or None if failed.
  145. """
  146. print(f"Starting perplexity research for: {Keyword}")
  147. url = "https://api.perplexity.ai/chat/completions"
  148. payload = {
  149. "model": "sonar-medium-online",
  150. "messages": [
  151. {
  152. "role": "system",
  153. "content": "Be precise and concise."
  154. },
  155. {
  156. "role": "user",
  157. "content": f"Find highly specific generalised data about {Keyword} in 2024. Do not give me any information about specific brands."
  158. }
  159. ]
  160. }
  161. headers = {
  162. "accept": "application/json",
  163. "content-type": "application/json",
  164. "authorization": f"Bearer {config['PERPLEXITY_API_KEY']}"
  165. }
  166.  
  167. for attempt in range(max_retries):
  168. response = requests.post(url, json=payload, headers=headers)
  169. if response.status_code == 200:
  170. print("Perplexity research completed successfully.")
  171. try:
  172. return response.json()
  173. except ValueError:
  174. print("JSON decoding failed")
  175. return None
  176. else:
  177. print(f"Perplexity research failed with status code: {response.status_code}. Attempt {attempt + 1} of {max_retries}.")
  178. time.sleep(delay)
  179.  
  180. print("Perplexity research failed after maximum retries.")
  181. return None
  182.  
  183. def get_internal_links(thread_id, Keyword):
  184. print(f"Fetching internal links relevant to: {Keyword}")
  185. get_request = f"Use Retrieval. Read brandimagesandlinks.txt, Choose 5 relevant pages, their links and their respective images, that are relevant to {Keyword}. Don't have more than 5. Now read images.txt - choose 5 relevant product images to this article"
  186. client.beta.threads.messages.create(thread_id=thread_id, role="user", content=get_request)
  187. get_request_run = client.beta.threads.runs.create(thread_id=thread_id, assistant_id=assistant.id)
  188. wait_for_run_completion(thread_id, get_request_run.id)
  189. messages = client.beta.threads.messages.list(thread_id=thread_id)
  190. print("Internal links fetched successfully.")
  191. return next((m.content for m in messages.data if m.role == "assistant"), None)
  192.  
  193. def create_data_vis(thread_id, perplexity_research, Keyword):
  194. print("Creating data visualizations...")
  195. for _ in range(3): # Loop to generate 3 visualizations
  196. get_request = f"Use Code Interpreter - invent a VERY simple Visualization of some interesting data from {perplexity_research}."
  197. client.beta.threads.messages.create(thread_id=thread_id, role="user", content=get_request)
  198. get_request_run = client.beta.threads.runs.create(thread_id=thread_id, assistant_id=assistant.id)
  199. wait_for_run_completion(thread_id, get_request_run.id)
  200.  
  201. messages = client.beta.threads.messages.list(thread_id=thread_id)
  202.  
  203. if hasattr(messages.data[0].content[0], 'image_file'):
  204. file_id = messages.data[0].content[0].image_file.file_id
  205.  
  206. image_data = client.files.content(file_id)
  207. image_data_bytes = image_data.read()
  208.  
  209. image_path = f"./visualization_image_{_}.png"
  210. with open(image_path, "wb") as file:
  211. file.write(image_data_bytes)
  212.  
  213. print(f"Visualization {_+1} created, attempting upload...")
  214. upload_to_freeimage_host(image_path, Keyword)
  215. else:
  216. print(f"No image file found in response for visualization {_+1}. Attempt aborted.")
  217.  
  218. def process_blog_post(thread_id, Keyword):
  219. print(f"Processing blog post for: {Keyword}")
  220. research_results = perplexity_research(Keyword)
  221. research_info = str(research_results)
  222.  
  223. create_data_vis(thread_id, research_info, Keyword)
  224.  
  225. internal_links = get_internal_links(thread_id, Keyword)
  226.  
  227. # Only include relevant image URLs for the current blog post idea
  228. relevant_image_urls = [img['url'] for img in image_urls if img['idea'] == Keyword]
  229. images_for_request = " ".join(relevant_image_urls)
  230.  
  231. outline_request = f"Use retrieval. Look at brandimagesandlinks.txt. Create a SHORT outline for a {config['page_type']} based on {perplexity_research}. Also include data visualizations from {create_data_vis} Do not invent image links. use the product images and internal links from {internal_links} and the include the custom graphs from {images_for_request} and use them to create an outline for a {config['page_type']} about {Keyword}' In the outline do not use sources or footnotes, but just add a relevant product images in a relevant section, and a relevant internal link in a relevant section. There is no need for a lot of sources, each article needs a minimum of 5 brand images and internal links."
  232.  
  233. client.beta.threads.messages.create(thread_id=thread_id, role="user", content=outline_request)
  234. outline_run = client.beta.threads.runs.create(thread_id=thread_id, assistant_id=assistant.id)
  235. wait_for_run_completion(thread_id, outline_run.id)
  236. messages = client.beta.threads.messages.list(thread_id=thread_id)
  237. outline = next((m.content for m in messages.data if m.role == "assistant"), None)
  238.  
  239. article = None
  240. if outline:
  241. article_request = f"Please include images from {get_internal_links} Write a short, snappy article in {config['language']} Write at a grade 7 level. ONLY USE INTERNAL LINKS FROM {internal_links} You never invent internal links or image links. Include images from {create_data_vis} also include real internal links from brandimagesandlinks.txt Based on \n{outline} and Make sure to use a mix of the {images_for_request} and brand images. Include highly specific information from {research_results}. Do not use overly creative or crazy language. Use a {config['tone']} tone of voice. Write as if writing for The Guardian newspaper.. Just give information. Don't write like a magazine. Use simple language. Do not invent image links. You are writing from a first person plural perspective for the business, refer to it in the first person plural. Add a key takeaway table at the top of the article, summarzing the main points. Never invent links or brand images Choose 5 internal links and 5 brand images that are relevant to a pillar page and then create a pillar page with good formatting based on the following outline:\n{outline}, Title should be around 60 characters. Include the brand images and internal links to other pillar pages naturally and with relevance inside the {config['page_type']}. Use markdown formatting and ensure to use tables and lists to add to formatting. Use 3 relevant brand images and pillar pages with internal links maximum. Never invent any internal links. Include all of the internal links and brand images from {outline} Use different formatting to enrich the pillar page. Always include a table at the very top wtih key takeaways, also include lists to make more engaging content. Use Based on the outline: {outline}, create an article. Use {images_for_request} with the image name inside [] and with the link from {images_for_request} in order to enrich the content, create a pillar page about this topic. Use the brand images and internal links gathered from {internal_links}. Use {research_info} to make the more relevant. The end product shuold look like {config['path_to_example_file']} as an example"
  242. client.beta.threads.messages.create(thread_id=thread_id, role="user", content=article_request)
  243. article_run = client.beta.threads.runs.create(thread_id=thread_id, assistant_id=assistant.id)
  244. wait_for_run_completion(thread_id, article_run.id)
  245. messages = client.beta.threads.messages.list(thread_id=thread_id)
  246. article = next((m.content for m in messages.data if m.role == "assistant"), None)
  247.  
  248. if article:
  249. print("Article created successfully.")
  250. clear_image_urls() # Call the new function here to clear the image URLs
  251. else:
  252. print("Failed to create an article.")
  253. return outline, article
  254.  
  255. def process_keywords_concurrent():
  256. input_file = 'keywords.csv'
  257. output_file = 'processed_keywords.csv'
  258.  
  259. # Corrected fieldnames array to include a missing comma and ensure it matches expected output
  260. fieldnames = ['Keyword', 'Outline', 'Article', 'Processed']
  261.  
  262. # Read all rows to be processed
  263. with open(input_file, newline='', encoding='utf-8') as csvfile:
  264. reader = csv.DictReader(csvfile)
  265. rows_to_process = [row for row in reader]
  266.  
  267. # Process each blog post idea concurrently
  268. with concurrent.futures.ThreadPoolExecutor(max_workers=5) as executor:
  269. future_to_row = {executor.submit(process_blog_post, client.beta.threads.create().id, row['Keyword']): row for row in rows_to_process}
  270.  
  271. # Initialize tqdm progress bar
  272. progress = tqdm(concurrent.futures.as_completed(future_to_row), total=len(rows_to_process), desc="Processing Keywords")
  273.  
  274. # Collect results first to avoid writing to the file inside the loop
  275. results = []
  276. for future in progress:
  277. row = future_to_row[future]
  278. try:
  279. outline, article = future.result() # Assuming this returns an outline and an article
  280. # Create a new dictionary for CSV output to ensure it matches the specified fieldnames
  281. processed_row = {
  282. 'Keyword': row['Keyword'],
  283. 'Outline': outline,
  284. 'Article': article,
  285. 'Processed': 'Yes'
  286. }
  287. results.append(processed_row)
  288. except Exception as exc:
  289. print(f'Keyword {row["Keyword"]} generated an exception: {exc}')
  290. # Handle failed processing by marking as 'Failed' but still match the fieldnames
  291. processed_row = {
  292. 'Keyword': row['Keyword'],
  293. 'Outline': '', # or you might use 'N/A' or similar placeholder
  294. 'Article': '', # same as above
  295. 'Processed': 'Failed'
  296. }
  297. results.append(processed_row)
  298.  
  299. # Write all results to the output file after processing
  300. with open(output_file, 'w', newline='', encoding='utf-8') as f_output: # Use 'w' to overwrite or create anew
  301. writer = csv.DictWriter(f_output, fieldnames=fieldnames)
  302. writer.writeheader()
  303. writer.writerows(results)
  304.  
  305. # Example usage
  306. if __name__ == "__main__":
  307. process_keywords_concurrent()
  308.  
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement