Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- __author__ = 'SAJMON'
- import urllib2
- from bs4 import BeautifulSoup
- from pymongo import MongoClient
- from PIL import Image #https://pypi.phttps://pypi.python.org/pypi/Pillow/2.7.0ython.org/pypi/Pillow/2.7.0
- import StringIO
- def Parser(state):
- link = 'http://en.wikipedia.org/wiki/'+state
- page = urllib2.urlopen(link)
- soup = BeautifulSoup(page)
- text = ""
- for link in soup.find_all('p'):
- text = text+link.text
- sparsowany_tekst = " ".join(text.split()) #bez wielokrotnych spacji
- #return sparsowany_tekst.encode('ascii','ignore')
- return sparsowany_tekst
- def SentencesSpecifiedByTag(text,tag):
- endOfSentence = text.find(".")
- startOfSentence = 0
- result = ""
- lista=[]
- while True:
- wynik = text.find(tag, startOfSentence, endOfSentence)
- if ( wynik != -1):
- result = text[startOfSentence:endOfSentence+1]
- print result
- lista.append(result)
- startOfSentence=endOfSentence+1
- endOfSentence=text.find(".", startOfSentence)
- if(endOfSentence == -1):
- break
- else:
- if(endOfSentence == len(text)):
- break
- startOfSentence = endOfSentence+1
- endOfSentence=text.find(".", startOfSentence+1)
- if(endOfSentence == -1):
- endOfSentence=len(text)
- return lista
- def DbConnectAndGetCollection(): #returns collection
- client = MongoClient('mongodb://szymo1993:sajmonek93@ds031741.mongolab.com:31741/projekt')
- db=client.projekt
- collection=db.panstwa
- return collection
- def Check(country): #returns None if country not found or item when found
- panstwa = DbConnectAndGetCollection()
- return panstwa.find_one({"name": country})
- def Insert(state):
- panstwa = DbConnectAndGetCollection()
- text = Parser(state)
- panstwa.insert({
- "name" : state,
- "text" : text,
- "flag" : "http://www.mapsofworld.com/images/world-countries-flags/"+state.lower()+"-flag.gif"
- })
- def CompareImages(url1,url2):
- img1 = Image.open(StringIO(urllib2.urlopen(url1).read()))
- img2 = Image.open(StringIO(urllib2.urlopen(url2).read()))
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement