- import urllib2, re, os, logging, unicodedata
- from BeautifulSoup import BeautifulSoup
- #import MySQLdb
- class PHP:
- def __init__(self):
- soup = BeautifulSoup(urllib2.urlopen('http://php.net/quickref.php'))
- def mineData(self,what):
- #db = MySQLdb.connect("localhost","root","","api")
- #c = db.cursor()
- logger = logging.getLogger('mine')
- hdlr = logging.FileHandler('mine.log')
- formatter = logging.Formatter('%(asctime)s %(levelname)s %(message)s')
- hdlr.setFormatter(formatter)
- logger.addHandler(hdlr)
- logger.setLevel(logging.WARNING)
- soup = BeautifulSoup(urllib2.urlopen('http://php.net/quickref.php'))
- for x in soup.find('tr', valign='top').findAll('a'):
- if re.compile('/manual/en/'+what).match(x['href']):
- url = 'http://php.net/' + x['href']
- print url
- fun = BeautifulSoup(urllib2.urlopen(url))
- try:
- # Function Name
- kind =""
- name = fun.find('h1', 'refname').find(text=True).strip()
- # Function Kind
- if (re.search("::" , name)):
- kind = 'method'
- except AttributeError:
- logger.error("Function Name: " + url)
- continue
- try:
- # PHP Version
- version = fun.find('p', 'verinfo').find(text=True).strip()
- except AttributeError:
- logger.error('PHP Version: ' + url)
- continue
- try:
- # Long Description
- lDescription = ''
- if fun.find('div', 'description'):
- for x in fun.find('div', 'description').findAll('p', {'class' : re.compile("^(|sim)para")}):
- for y in x.findAll(text=True):
- lDescription = lDescription + y
- else:
- lDescription = None
- except AttributeError:
- logger.error('Long Description: ' + url)
- continue
- try:
- # Return Type of Function
- if(fun.find('div', 'methodsynopsis')):
- rType = fun.find('div', 'methodsynopsis').span.find(text=True).strip()
- else:
- rType = None
- except AttributeError:
- logger.error('Function Return Type: ' + url)
- continue
- try:
- # Return Value Description of Function
- if(fun.find('div', 'returnvalues')):
- returnValueDescription = re.sub("\s+", " ", ''.join(fun.find('div', 'returnvalues').find('p', 'para').findAll(text=True)).replace('\n', '')).strip()
- else:
- returnValueDescription = None
- except AttributeError:
- logger.error('Return Value Description: ' + url)
- continue
- try:
- # Short Description
- sDescription = re.sub("\s+", " ", ''.join(fun.find('p', 'refpurpose').findAll(text=True)).replace('\n', '')).strip()
- except AttributeError:
- logger.error("Short Description: " + url)
- continue
- try:
- # Parameters
- if fun.find('span', 'methodparam').find(text=True) == 'void':
- param = 'void'
- elif fun.find('span', 'methodparam').find(text=True) != 'void':
- #[[x.find(('span', 'type')).find(text=True), x.find(('tt', 'parameter')).find(text=True)] for x in fun.findAll('span', 'methodparam')]
- para = ''
- for x in fun.findAll('span', 'methodparam'):
- para += x.find(('span', 'type')).find(text=True)+' '+x.find(('tt', 'parameter')).find(text=True)+', '
- param = para.rstrip(', ')
- else:
- param = None
- except AttributeError:
- logger.error("Parameters: " + url)
- continue
- #try:
- # c.execute('insert into phpFunctions values("%s", "%s", "%s", "%s", "%s", "%s", "%s", "%s")' % (name, version, lDescription, rType, returnValueDescription, sDescription, param, url))
- #except:
- # report("DataBase Insert: " + url)
- # continue
- return