- import urllib
- import json
- from collections import defaultdict
- #f = urllib.urlopen("http://en.wikipedia.org/wiki/Categorical_list_of_programming_languages")
- f = urllib.urlopen("http://en.wikipedia.org/w/api.php?format=json&action=query&titles=List%20of%20programming%20languages%20by%20type&prop=revisions&rvprop=content")
- s = f.read()
- f.close()
- j = json.loads(s)
- def find_value_for_key(key, d):
- """ returns a value for 'key' in a dict 'd' """
- if type(d) == type([]):
- for x in d:
- return find_value_for_key(key, x)
- for k,v in d.items():
- if k == key:
- return v
- if type(v) == type({}) or type(v) == type([]):
- return find_value_for_key(key, v)
- def parse_category(s):
- """ == Wirth languages == """
- return s.strip("= ")
- def parse_lang(s):
- """
- * [[Pascal (programming language)|Pascal]]
- * [[XPath]]
- * [http://xmlmosaic.codeplex.com XMLmosaic]
- * [http://www.ozonehouse.com/mark/codeworks.html Glyphic Script]
- * Nemerle (compiled into Intermediate Language bytecode)
- * Sculptor
- """
- if "[[" in s:
- ss = s[s.find("[[") + 2 : s.find("]]")].split("|")
- # return s
- return ss[1] if len(ss) > 1 else ss[0]
- elif "[" in s:
- ss = s[s.find("[") + 1 : s.find("]")].split()
- return " ".join(ss[1:])
- elif "(" in s:
- return s[:s.find("(")].strip("* ")
- return s.strip("* ")
- def parse(s):
- langs = defaultdict(list)
- category = "?"
- for line in s.splitlines():
- if line.startswith("== See also"):
- break
- elif line.startswith("=="):
- category = parse_category(line)
- elif line.startswith("*"):
- langs[parse_lang(line)].append(category)
- return langs
- def format_textile(d):
- print "h1. Categories of programming languages"
- print
- print "h2. Generated from 'List of programming languages by type' from Wikipedia: http://en.wikipedia.org/wiki/Categorical_list_of_programming_languages"
- for k in sorted(d.iterkeys()):
- print
- print "h3. " + k.encode("utf-8")
- for category in d[k]:
- print "* " + category.encode("utf-8")
- format_textile(parse(find_value_for_key("*", j)))