Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import scrapy
- import pprint
- from scrapy.spider import BaseSpider
- from scrapy.selector import Selector
- from FYP.items import FypItem
- import time
- import MySQLdb
- class ExampleSpider(scrapy.Spider):
- name = "sideBody"
- f = open("side-effects.txt",'r')
- start_urls = [url.strip() for url in f.readlines()]
- f.close()
- custom_settings = {
- "DOWNLOAD_DELAY": 5,
- "CONCURRENT_REQUESTS_PER_DOMAIN": 2
- }
- def parse(self, response):
- #f=open("a.json","w+")
- hxs = Selector(response)
- test=hxs.xpath('//div[@class="contentBox"]' )
- # Open database connection
- db = MySQLdb.connect(host="brandonkzw.ecorp.ga",user="brandoao_att",password="tengteng",db="brandoao_tengtengfyp",charset="utf8",use_unicode=True)
- db.set_character_set('utf8')
- # prepare a cursor object using cursor() method
- cursor = db.cursor()
- for test in test :
- item = FypItem()
- item['h1text'] =test.xpath ('h1/text()').extract()
- item['urlname'] =test.xpath ('/html//link/@href')[0].extract()
- item['para1']=test.xpath("//p[not(@class)]/text()").extract()
- #print item
- # cursor.execute('''INSERT into brandoao_tengtengfyp.drugsA1 (h1text, para1)
- # values (%s, %s)''',
- # (item['h1text'], item['para1']))
- para1=str(item['para1'])[3:-2].replace("u'","")
- para1=para1.replace(", '","")
- para1=para1.replace(", ,","")
- para1=para1.replace("',","")
- para1=para1.replace("'","")
- para1=para1.replace(' ", ', "")
- para1=para1.replace(' "u ', "")
- para1=para1.replace(' " ', "")
- para1=para1.replace(' &', "")
- para1=para1.replace("u2122","")
- para1=para1.replace(" ') ", "")
- h1text=str(item['h1text'] ).replace("u'","")
- urlname=str(item['urlname'])
- #print urlname
- asd = ("INSERT IGNORE into brandoao_tengtengfyp.sideEffect (title, urlname,para1) VALUES ('" + str(item['h1text'])[3:-2] + "','"+ str(item['urlname'])[22:-5]+"','"+ para1 + "')")
- cursor.execute(asd);
- #ursor.execute("INSERT into brandoao_tengtengfyp.drugsA1 (h1text, para1) VALUES ('a','b22')")
- # Commit your changes in the database
- db.commit()
- #print para1
- #yield item
- db.close()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement