Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- from bs4 import BeautifulSoup as bs
- import urllib2, re, bs4
- def parse_type(link):
- html = urllib2.urlopen(link).read()
- soup = bs(html)
- methods = soup.find_all('div',{'class':'method'})
- retval = []
- for i in methods:
- name = i.find('div',{'class':'method-name'}).get_text()
- desc = i.find('div',{'class':'method-description'}).get_text()
- name = re.sub('\s+',' ',name.replace('\r\n','\n').replace('\n','')).strip()
- desc = re.sub('\s+',' ',desc.replace('\r\n','\n').replace('\n',''))
- desc = desc.replace('may be absent',' (1)').replace('unchanged in unsafe filters','(2)').strip()
- retval.append([name,desc])
- return retval
- def return_type(s):
- s = s.replace('string','str')
- s = s.replace('integer','int')
- s = s.replace('boolean','bool')
- if 'array' in s:
- return 'array(%s)' % (s[s.find('array')+9:].split()[0],)
- return s
- url = 'https://api.stackexchange.com/docs?tab=type#docs'
- url_base = 'https://api.stackexchange.com'
- html = urllib2.urlopen(url).read()
- soup = bs(html)
- obj_re = re.compile(r'.*Each of these methods returns (.+) objects..*')
- url1_re = re.compile(r'/docs/types/(.+)')
- url2_re = re.compile(r'/docs/(.+)')
- links = []
- l1 = soup.find_all('a',{'href':url1_re})
- l2 = [url_base+i['href'] for i in l1]
- l3 = [i.get_text().replace('objects','').strip() for i in l1]
- two_1 = {}
- w1 = max(map(len,l3))
- w2 = 28
- for i in range(len(l2)):
- first = True
- url, obj, info = l2[i], l3[i], parse_type(l2[i])
- for i in info:
- if '2.1' in i[0]:
- i[0] = i[0].replace('2.1','').strip()
- if obj not in two_1:
- two_1[obj] = []
- two_1[obj].append(i)
- elif first:
- print obj.ljust(w1),'|',i[0].ljust(w2),'|',return_type(i[1])
- first = False
- else:
- print ' '*w1,'|',i[0].ljust(w2),'|',return_type(i[1])
- if not first:
- print '-'*(w1+w2+30)
- print '</pre>'
- print 'v2.1 Specific methods'
- print '<pre>'
- for obj in two_1:
- first = True
- for i in two_1[obj]:
- if first:
- print obj.ljust(w1),'|',i[0].ljust(w2),'|',return_type(i[1])
- first = False
- else:
- print ' '*w1,'|',i[0].ljust(w2),'|',return_type(i[1])
- if not first:
- print '-'*(w1+w2+30)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement