Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #! /usr/bin/env python
- ## Usage: run the SEDE query
- ## http://data.stackexchange.com/travel%20answers/query/97659/wikitravel-mentions-for-offline-postprocessing
- ## then download the results to QueryResults.csv and run
- ## csv2markdown <QueryResults.csv
- import csv, re, sys, urllib
- rows = list(csv.reader(sys.stdin))[:]
- hits = {}
- for row in rows:
- (n, parent, ty, html, tagname) = row
- links = re.findall(r'http://wikitravel.org/en/[^"#<>? ]*', html)
- for link in links:
- place = link.split('/')[-1]
- pretty_place = urllib.unquote(re.sub(r'_', ' ', place))
- slug = n if parent == '' else '%s/#%s' % (parent, n)
- post_url = 'http://travel.stackexchange.com/q/' + slug if tagname == '' else \
- '\[[%s](http://travel.stackexchange.com/tags/%s/info)\]' % (tagname, tagname)
- if not hits.has_key(place): hits[place] = (pretty_place, set())
- hits[place][1].add(post_url)
- for place in sorted(hits):
- (pretty_place, locations) = hits[place]
- leader = '%s ([travel](http://wikitravel.org/en/%s), [voyage](http://en.wikivoyage.org/wiki/%s))' % (pretty_place, place, place)
- print ' — '.join([leader] + sorted(locations)) + " "
Add Comment
Please, Sign In to add comment