Advertisement
Guest User

Untitled

a guest
Oct 22nd, 2018
63
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 1.06 KB | None | 0 0
  1. from bs4 import BeautifulSoup
  2. import requests
  3. import csv
  4.  
  5. minURLLength = int(input("Start post: "))
  6. maxURLLength = int(input("End post: "))
  7.  
  8. baseURL = "https://www.strawpoll.me/"
  9.  
  10. keywords = ['teamspeak', 'ban', 'server', 'dipsy']
  11.  
  12. f = csv.writer(open(f'strawpoll/2/strawpoll-dump-{minURLLength}-{maxURLLength}.csv', 'w'))
  13. f.writerow(['ID', 'Title'])
  14.  
  15. def main():
  16. for i in range(minURLLength, maxURLLength):
  17. r = requests.get(baseURL + "%d" % i)
  18. data = r.text.encode('utf-8').decode('ascii', 'ignore')
  19. soup = BeautifulSoup(data, "lxml")
  20. for content in soup.find_all("form", {"class": "poll"}):
  21. pollTitle = content.find("h2").text.lower()
  22. if any(x in pollTitle for x in keywords):
  23. print(f'ID: {i} Title: {pollTitle}')
  24. f.writerow([i, pollTitle])
  25. else:
  26. print(f'ID: {i} No match')
  27. if __name__ == '__main__':
  28. main()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement