Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import request
- from datetime import date
- import bs4
- def checkNeg(s):
- for i in range(len(s)):
- if s[i]=='-':
- return -1
- pagenum = 0
- ques = []
- link = []
- votes= []
- over = 0
- while pagenum<1000:
- pagenum = pagenum + 1
- pageurl = "https://stackoverflow.com/search?page="+str(pagenum)+ "&tab=Newest&q=android%20features"
- res = requests.get(pageurl) #Data from Stack Overflow
- soup = bs4.BeautifulSoup(res.text, 'html.parser') #Building a lxml file from res.text
- #building the question and hyperlink arrays
- Count = 0
- question_ref = soup.select('.question-hyperlink')
- vote_ref = soup.select('.vote')
- time_ref = soup.select('.relativetime')
- # print(len(question_ref))
- # print(len(vote_ref))
- for i in range(0,15):
- # print(i)
- qu = (question_ref[i].text)
- ques.append(qu.lstrip())
- link.append("https://www.stackoverflow.com"+question_ref[i].get('href'))
- strin = (vote_ref[i].text)
- if checkNeg(strin):
- votes.append(-1)
- else:
- vote = [int(s) for s in strin.split() if s.isdigit()]
- votes.append(vote[0])
- times = time_ref[i].text
- print(times)
- if "Dec" in times:
- if int(times[4:])<=6:
- over = 1
- break
- if over == 1:
- break
- time.sleep(1)
- most_recent_data = ques[:10]
- most_recent_link = link[:10]
- for i in range(len(votes)):
- for j in range(len(votes)-1):
- if votes[j]<votes[j+1]:
- votes[j],votes[j+1]=votes[j+1],votes[j]
- ques[j],ques[j+1] = ques[j+1],ques[j]
- link[j],link[j+1]=link[j+1],link[j]
- top_voted_ques = ques[:10]
- top_voted_link = link[:10]
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement