Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #!/usr/bin/python
- import wikitools as wt
- from wikitools import wiki
- from wikitools import api
- import simplejson as json
- from sets import Set
- import re
- import csv
- # create a Wiki object
- site = wt.Wiki('http://en.wikipedia.org/w/api.php')
- # default params for API requests
- savefile = '../enwiki_geotagged.txt'
- articlelist = '../enwiki_noredirect.txt'
- # read oldids
- print "Importing article list..."
- rf = open(articlelist, "r")
- #k=rf.read().split('\n')
- k=rf.readlines()
- # create the request object
- def call(params):
- req = wt.APIRequest(site, params)
- try:
- res = req.query()
- except wt.APIError, e:
- res = ''
- return res
- # default parameters
- par1 = {
- 'action':'query',
- 'prop': 'templates',
- 'tllimit':'1',
- 'tltemplates': 'Template:Coord'
- }
- rdone = 1
- # iterate through rev list
- for rev in k:
- rev = rev.strip()
- if len(rev)>0:
- par1['titles'] = rev
- print "--- Checking article: " + rev + "("+ str(rdone) +"/"+ str(len(k)) +")"
- res = call(par1)
- if len(res)>0:
- rv = res['query']['pages']
- key = rv.keys()[0]
- #reponse is kosher
- if 'templates' in rv[key].keys():
- print rev
- f=open(savefile,"a")
- f.write(rev + "\n")
- f.close()
- else:
- print "Could not retrieve a response from the API"
- rdone+=1
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement