Advertisement
Guest User

Untitled

a guest
Jan 26th, 2015
171
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 2.19 KB | None | 0 0
  1. __author__ = 'SAJMON'
  2.  
  3. import urllib2
  4. from bs4 import BeautifulSoup
  5. from pymongo import MongoClient
  6. from PIL import Image #https://pypi.phttps://pypi.python.org/pypi/Pillow/2.7.0ython.org/pypi/Pillow/2.7.0
  7. import StringIO
  8.  
  9.  
  10. def Parser(state):
  11. link = 'http://en.wikipedia.org/wiki/'+state
  12. page = urllib2.urlopen(link)
  13. soup = BeautifulSoup(page)
  14. text = ""
  15. for link in soup.find_all('p'):
  16. text = text+link.text
  17. sparsowany_tekst = " ".join(text.split()) #bez wielokrotnych spacji
  18. #return sparsowany_tekst.encode('ascii','ignore')
  19. return sparsowany_tekst
  20.  
  21. def SentencesSpecifiedByTag(text,tag):
  22. endOfSentence = text.find(".")
  23. startOfSentence = 0
  24. result = ""
  25. lista=[]
  26.  
  27. while True:
  28. wynik = text.find(tag, startOfSentence, endOfSentence)
  29. if ( wynik != -1):
  30. result = text[startOfSentence:endOfSentence+1]
  31. print result
  32. lista.append(result)
  33. startOfSentence=endOfSentence+1
  34. endOfSentence=text.find(".", startOfSentence)
  35. if(endOfSentence == -1):
  36. break
  37. else:
  38. if(endOfSentence == len(text)):
  39. break
  40. startOfSentence = endOfSentence+1
  41. endOfSentence=text.find(".", startOfSentence+1)
  42. if(endOfSentence == -1):
  43. endOfSentence=len(text)
  44.  
  45. return lista
  46.  
  47. def DbConnectAndGetCollection(): #returns collection
  48. client = MongoClient('mongodb://szymo1993:sajmonek93@ds031741.mongolab.com:31741/projekt')
  49. db=client.projekt
  50. collection=db.panstwa
  51. return collection
  52.  
  53. def Check(country): #returns None if country not found or item when found
  54. panstwa = DbConnectAndGetCollection()
  55. return panstwa.find_one({"name": country})
  56.  
  57. def Insert(state):
  58. panstwa = DbConnectAndGetCollection()
  59. text = Parser(state)
  60. panstwa.insert({
  61. "name" : state,
  62. "text" : text,
  63. "flag" : "http://www.mapsofworld.com/images/world-countries-flags/"+state.lower()+"-flag.gif"
  64. })
  65.  
  66. def CompareImages(url1,url2):
  67. img1 = Image.open(StringIO(urllib2.urlopen(url1).read()))
  68. img2 = Image.open(StringIO(urllib2.urlopen(url2).read()))
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement