Don't like ads? PRO users don't see any ads ;-)
Guest

Untitled

By: a guest on Aug 22nd, 2012  |  syntax: None  |  size: 2.21 KB  |  hits: 9  |  expires: Never
download  |  raw  |  embed  |  report abuse  |  print
Text below is selected. Please press Ctrl+C to copy to your clipboard. (⌘+C on Mac)
  1. import urllib
  2. import json
  3. from collections import defaultdict
  4.  
  5. #f = urllib.urlopen("http://en.wikipedia.org/wiki/Categorical_list_of_programming_languages")
  6. f = urllib.urlopen("http://en.wikipedia.org/w/api.php?format=json&action=query&titles=List%20of%20programming%20languages%20by%20type&prop=revisions&rvprop=content")
  7.  
  8. s = f.read()
  9.  
  10. f.close()
  11.  
  12. j = json.loads(s)
  13.  
  14. def find_value_for_key(key, d):
  15.     """ returns a value for 'key' in a dict 'd' """
  16.     if type(d) == type([]):
  17.         for x in d:
  18.             return find_value_for_key(key, x)
  19.     for k,v in d.items():
  20.         if k == key:
  21.             return v
  22.         if type(v) == type({}) or type(v) == type([]):
  23.             return find_value_for_key(key, v)
  24.  
  25. def parse_category(s):
  26.     """ == Wirth languages == """
  27.     return s.strip("= ")
  28.  
  29. def parse_lang(s):
  30.     """
  31.     * [[Pascal (programming language)|Pascal]]
  32.     * [[XPath]]
  33.     * [http://xmlmosaic.codeplex.com XMLmosaic]
  34.     * [http://www.ozonehouse.com/mark/codeworks.html Glyphic Script]
  35.     * Nemerle (compiled into Intermediate Language bytecode)
  36.     * Sculptor
  37.     """
  38.     if "[[" in s:
  39.         ss = s[s.find("[[") + 2 : s.find("]]")].split("|")
  40. #        return s
  41.         return ss[1] if len(ss) > 1 else ss[0]
  42.     elif "[" in s:
  43.         ss = s[s.find("[") + 1 : s.find("]")].split()
  44.         return " ".join(ss[1:])
  45.     elif "(" in s:
  46.         return s[:s.find("(")].strip("* ")
  47.        
  48.     return s.strip("* ")
  49.    
  50. def parse(s):
  51.     langs = defaultdict(list)
  52.     category = "?"
  53.    
  54.     for line in s.splitlines():
  55.         if line.startswith("== See also"):
  56.             break
  57.         elif line.startswith("=="):
  58.             category = parse_category(line)
  59.         elif line.startswith("*"):
  60.             langs[parse_lang(line)].append(category)
  61.    
  62.     return langs
  63.  
  64. def format_textile(d):
  65.     print "h1. Categories of programming languages"
  66.     print
  67.     print "h2. Generated from 'List of programming languages by type' from Wikipedia: http://en.wikipedia.org/wiki/Categorical_list_of_programming_languages"
  68.     for k in sorted(d.iterkeys()):
  69.         print
  70.         print "h3. " + k.encode("utf-8")
  71.         for category in d[k]:
  72.             print "* " + category.encode("utf-8")
  73.    
  74. format_textile(parse(find_value_for_key("*", j)))