Advertisement
karim0209

THAu

Oct 31st, 2020
1,084
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 0.86 KB | None | 0 0
  1. #!/data/data/com.termux/files/usr/bin/python3
  2. # -*- coding: utf-8 -*-
  3. import requests
  4. from bs4 import BeautifulSoup
  5. from gtts import gTTS
  6. import re
  7.  
  8. URL = input("Enter your The Hindu Articles url name:")
  9. EX1 = ".txt"
  10. EX2 = ".mp3"
  11. Fn = "New"
  12. page = requests.get(URL)
  13. soup = BeautifulSoup(page.content, 'html.parser')
  14. body = soup.find('div', {'class' : 'paywall'})
  15. H1=soup.h1.text.strip()
  16. H2=soup.h2.text.strip()
  17. out_file = open(Fn+H1+EX1, "w")
  18. out_file.write("\n"+H1)
  19. out_file.write("\n"+H2)
  20. print(""+H1)
  21. for paras in body.find_all('p'):
  22.     out_file.write("\n"+paras.text.strip())
  23. out_file.close()
  24. with open(Fn+H1+EX1, 'r') as inp:
  25.     Pfile =open(H1+EX1, "w")
  26.     for line in inp:
  27.         if re.search('\S', line):
  28.             Pfile.write(line)
  29.     Pfile.close()
  30. with open (H1+EX1, encoding="utf-8") as file:
  31.     file=file.read()
  32. speak = gTTS(file,lang='en')
  33. speak.save (H1+EX2)
  34.  
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement