Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import tweepy
- import re
- import requests
- import untangle
- def getTwitterResults():
- listOfPossibleLocations = []
- #Authentification keys nessccesary to utilize the Twitter API:
- consumer_key = 'Key'
- consumer_secret = 'Secret'
- access_token = 'Token'
- access_token_secret = 'Secret'
- auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
- auth.set_access_token(access_token, access_token_secret)
- api = tweepy.API(auth)
- #Specifying which users timeline to scrap, and how many tweets:
- timeline = api.user_timeline(id = 'rogalandops', count = 5)
- for status in timeline:
- update = (status.text+'\n')
- print(update) #Printing the tweets
- #Regex to search the tweets for place names
- mostcomplex_match = re.findall(r'[A-Z]\w*\s+\d+\s+\w*', update)
- medium_complex_match = re.findall(r'[A-Z]\w*\s+\d+\s', update)
- least_complex_match = re.findall(r'\b[A-Z].*?\b', update)
- another_try = re.findall(r'[A-Z]\S+', update)
- if mostcomplex_match:
- print(mostcomplex_match)
- print('most complex')
- listOfPossibleLocations.append(mostcomplex_match)
- elif medium_complex_match:
- print(medium_complex_match)
- print('medium')
- listOfPossibleLocations.append(medium_complex_match)
- elif another_try:
- print(another_try)
- print('another try')
- listOfPossibleLocations.append(another_try)
- elif least_complex_match:
- print(least_complex_match)
- print('least complex')
- listOfPossibleLocations.append(least_complex_match)
- else:
- print('No match')
- return listOfPossibleLocations
- def getPlaceCoordinates(listOfPossibleLocations):
- """Function gets the list of possible place names from getTwitterResults
- and tries to find coordinates using kartverkets place search"""
- #Converting nested list to single list
- singlelist = [item for sublist in listOfPossibleLocations for item in sublist]
- print(singlelist)
- for words in singlelist:
- #print(words)
- #search_string = str(words)
- try:
- url = 'https://ws.geonorge.no/SKWS3Index/ssr/sok?navn='+words
- obj = untangle.parse(url)
- kommunenavn = obj.sokRes.stedsnavn.kommunenavn.cdata.encode('ascii', 'replace')
- stedsnavn = obj.sokRes.stedsnavn.stedsnavn.cdata.encode('ascii', 'replace')
- aust = obj.sokRes.stedsnavn.aust.cdata.encode('ascii', 'replace')
- nord = obj.sokRes.stedsnavn.nord.cdata.encode('ascii', 'replace')
- print(kommunenavn, stedsnavn, aust, nord)
- except (AttributeError,UnicodeEncodeError, IndexError) as e:
- print(e)
- pass
- else:
- print(kommunenavn, stedsnavn, aust, nord)
- locations = getTwitterResults()
- coordinates = getPlaceCoordinates(locations)
- #print(locations, coordinates)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement