Advertisement
naimul64

QuoraBeautifulSoupComplete

Jul 19th, 2017
236
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 3.06 KB | None | 0 0
  1. from bs4 import BeautifulSoup
  2. from operator import itemgetter
  3. import urllib
  4.  
  5.  
  6. def round(decimal_place, floating_no):
  7.     floating_no_backup = floating_no
  8.  
  9.     if type(floating_no) == type(9):
  10.         floating_no = float(str(floating_no) + '.0')
  11.  
  12.     divby2 = 100
  13.     while divby2 != 0:
  14.         divby2 = floating_no / 2
  15.         divby2 = int(divby2)
  16.         if divby2 >= 1:
  17.             floating_no -= divby2
  18.     floating_no -= 1
  19.  
  20.     for i in range(decimal_place + 1):
  21.         floating_no *= 10
  22.     fractionIntoInt = int(floating_no)
  23.     if fractionIntoInt % 10 > 4:
  24.         fractionIntoInt = int(fractionIntoInt / 10)
  25.         fractionIntoInt += 1
  26.     else:
  27.         fractionIntoInt = int(fractionIntoInt / 10)
  28.     back_to_fraction = float(fractionIntoInt)
  29.     for i in range(decimal_place):
  30.         back_to_fraction /= 10
  31.  
  32.     rounded_float = float(int(floating_no_backup))
  33.     rounded_float += back_to_fraction
  34.  
  35.     return rounded_float
  36.  
  37.  
  38.  
  39. r = urllib.urlopen("file:///home/insan/Desktop/gurbaze/What%20is%20your%20most%20powerful%20tip_%20-%20Quora.html").read()
  40. soup = BeautifulSoup(r,"lxml")
  41. mydivs = soup.findAll("div", { "class" : "Answer AnswerBase" })
  42. print str(len(mydivs)) + " answers found"
  43. howAmI = []
  44. for div in mydivs:
  45.     try:
  46.         user = div.findAll("a", { "class" : "user" })[0].string
  47.     except:
  48.         user = "ANONYMOUS!!!!!!!!!"
  49.     # print user.strip
  50.  
  51.     try:
  52.         view = div.findAll("span", { "class" : "meta_num" })[0].string
  53.  
  54.         view = view.strip().replace(",","")
  55.         if  view[-1] == 'k':
  56.             view = view[:-1]
  57.             view = int(float(view) * 1000)
  58.         elif view[-1] == 'm':
  59.             view = view[:-1]
  60.             view = int(float(view) * 1000000)
  61.     except Exception as e:
  62.         #print e
  63.         view = 0
  64.     view = int(str(view))
  65.     #print str(view) + " views"
  66.  
  67.  
  68.     try:
  69.         upvote = div.findAll("a", { "class" : "VoterListModalLink" })[0].string
  70.         upvote = upvote.strip().replace(",","")
  71.         upvote = upvote.split(' ')[0]
  72.  
  73.         upvote = upvote.strip()
  74.         if  upvote[-1] == 'k':
  75.             upvote = upvote[:-1]
  76.             upvote = int(float(upvote) * 1000)
  77.         elif upvote[-1] == 'm':
  78.             upvote = upvote[:-1]
  79.             upvote = int(float(upvote) * 1000000)
  80.     except Exception as e:
  81.         #print e
  82.         upvote = 0
  83.     upvote = int(str(upvote))
  84.     # print str(upvote) + " upvotes"
  85.  
  86.     if upvote == 0 :
  87.         # print "No upvotes."
  88.         viewPerUpvote = 100000000000000
  89.     else:
  90.         viewPerUpvote = float(view)/float(upvote)
  91.         # print str(viewPerUpvote) + " views per upvote ===============>\n\n"
  92.  
  93.  
  94.  
  95.     try:
  96.         answer_link = div.findAll("a", { "class" : "answer_permalink" })[0]
  97.         answer_link = answer_link['href']
  98.  
  99.     except Exception as e:
  100.         answer_link =""
  101.  
  102.  
  103.     lisht = [user, view, upvote, round(decimal_place=2, floating_no=viewPerUpvote), answer_link]
  104.     howAmI.append(lisht)
  105.  
  106. sort_index = 3
  107. howAmI = sorted(howAmI, key=itemgetter(sort_index))
  108. for l in howAmI:
  109.     print l
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement