Advertisement
Guest User

Untitled

a guest
Jul 2nd, 2015
265
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 1.22 KB | None | 0 0
  1. import requests
  2. from bs4 import BeautifulSoup
  3. import operator
  4.  
  5. def start (url):
  6.     word_list = []
  7.     source_code = requests.get(url).text
  8.     soup = BeautifulSoup(source_code)
  9.     for post_text in soup.findAll('a',{'index_singleListingTitles'}):
  10.         content = post_text.string
  11.         words = content.lower().split()
  12.         for each_word in words:
  13.            
  14.             print (each_word)
  15.     clean_up_list(word_list)
  16.    
  17. def clean_up_list(word_list):
  18.     clean_word_list = []
  19.     for word in word_list:
  20.         symbols = "!@#$%^&*\"()_?.,><;'+|}{][]-=;'"
  21.         for i in range (0,len(symbols)):
  22.             word = word.replace(symbols[i],"")
  23.         if len(word) > 0:
  24.             print(word)
  25.             clean_word_list.append(word)
  26.            
  27.     create_dictionary(clean_word_list)
  28.  
  29. def create_dictionary(clean_word_list):
  30.     word_count = {}
  31.     for word in clean_word_list:
  32.         if word in word_count:
  33.             word_count[word] += 1
  34.         else:
  35.             word_count[word] = 1
  36.            
  37.     for key, value in sorted(word_count.items(), key=operator.itemgetter(1)):
  38.        
  39.         print (key,value)
  40.        
  41. start('https://buckysroom.org/tops.php?type=text&period=this-month')
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement