Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- from bs4 import BeautifulSoup
- import requests
- import csv
- minURLLength = int(input("Start post: "))
- maxURLLength = int(input("End post: "))
- baseURL = "https://www.strawpoll.me/"
- keywords = ['teamspeak', 'ban', 'server', 'dipsy']
- f = csv.writer(open(f'strawpoll/2/strawpoll-dump-{minURLLength}-{maxURLLength}.csv', 'w'))
- f.writerow(['ID', 'Title'])
- def main():
- for i in range(minURLLength, maxURLLength):
- r = requests.get(baseURL + "%d" % i)
- data = r.text.encode('utf-8').decode('ascii', 'ignore')
- soup = BeautifulSoup(data, "lxml")
- for content in soup.find_all("form", {"class": "poll"}):
- pollTitle = content.find("h2").text.lower()
- if any(x in pollTitle for x in keywords):
- print(f'ID: {i} Title: {pollTitle}')
- f.writerow([i, pollTitle])
- else:
- print(f'ID: {i} No match')
- if __name__ == '__main__':
- main()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement