Advertisement
pradyunsg

StackExchange API Types extractor

Mar 20th, 2013
121
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 2.33 KB | None | 0 0
  1. from bs4 import BeautifulSoup as bs
  2. import urllib2, re, bs4
  3.  
  4. def parse_type(link):
  5.     html = urllib2.urlopen(link).read()
  6.     soup = bs(html)
  7.     methods = soup.find_all('div',{'class':'method'})
  8.     retval = []
  9.     for i in methods:
  10.         name = i.find('div',{'class':'method-name'}).get_text()
  11.         desc = i.find('div',{'class':'method-description'}).get_text()
  12.         name = re.sub('\s+',' ',name.replace('\r\n','\n').replace('\n','')).strip()
  13.         desc = re.sub('\s+',' ',desc.replace('\r\n','\n').replace('\n',''))
  14.         desc = desc.replace('may be absent',' (1)').replace('unchanged in unsafe filters','(2)').strip()
  15.         retval.append([name,desc])
  16.     return retval
  17.  
  18. def return_type(s):
  19.     s = s.replace('string','str')
  20.     s = s.replace('integer','int')
  21.     s = s.replace('boolean','bool')
  22.     if 'array' in s:
  23.         return 'array(%s)' % (s[s.find('array')+9:].split()[0],)
  24.     return s
  25.    
  26. url = 'https://api.stackexchange.com/docs?tab=type#docs'
  27. url_base = 'https://api.stackexchange.com'
  28. html = urllib2.urlopen(url).read()
  29. soup = bs(html)
  30.  
  31. obj_re = re.compile(r'.*Each of these methods returns (.+) objects..*')
  32. url1_re = re.compile(r'/docs/types/(.+)')
  33. url2_re = re.compile(r'/docs/(.+)')
  34. links = []
  35. l1 = soup.find_all('a',{'href':url1_re})
  36. l2 = [url_base+i['href'] for i in l1]
  37. l3 = [i.get_text().replace('objects','').strip() for i in l1]
  38.  
  39. two_1 = {}
  40. w1 = max(map(len,l3))
  41. w2 = 28
  42.  
  43. for i in range(len(l2)):
  44.     first = True
  45.     url, obj, info = l2[i], l3[i], parse_type(l2[i])
  46.     for i in info:
  47.         if '2.1' in i[0]:
  48.             i[0] = i[0].replace('2.1','').strip()
  49.             if obj not in two_1:
  50.                 two_1[obj] = []
  51.             two_1[obj].append(i)
  52.         elif first:
  53.             print obj.ljust(w1),'|',i[0].ljust(w2),'|',return_type(i[1])
  54.             first = False
  55.         else:
  56.             print ' '*w1,'|',i[0].ljust(w2),'|',return_type(i[1])
  57.     if not first:
  58.         print '-'*(w1+w2+30)
  59. print '</pre>'
  60. print 'v2.1 Specific methods'
  61. print '<pre>'
  62. for obj in two_1:
  63.     first = True
  64.     for i in two_1[obj]:
  65.         if first:
  66.             print obj.ljust(w1),'|',i[0].ljust(w2),'|',return_type(i[1])
  67.             first = False
  68.         else:
  69.             print ' '*w1,'|',i[0].ljust(w2),'|',return_type(i[1])
  70.     if not first:
  71.         print '-'*(w1+w2+30)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement