Advertisement
Guest User

Untitled

a guest
Feb 8th, 2017
100
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 2.62 KB | None | 0 0
  1. import scrapy
  2. import pprint
  3. from scrapy.spider import BaseSpider
  4. from scrapy.selector import Selector
  5. from FYP.items import FypItem
  6. import time
  7.  
  8. import MySQLdb
  9.  
  10. class ExampleSpider(scrapy.Spider):
  11. name = "sideBody"
  12. f = open("side-effects.txt",'r')
  13. start_urls = [url.strip() for url in f.readlines()]
  14. f.close()
  15. custom_settings = {
  16. "DOWNLOAD_DELAY": 5,
  17. "CONCURRENT_REQUESTS_PER_DOMAIN": 2
  18. }
  19.  
  20. def parse(self, response):
  21. #f=open("a.json","w+")
  22. hxs = Selector(response)
  23. test=hxs.xpath('//div[@class="contentBox"]' )
  24. # Open database connection
  25. db = MySQLdb.connect(host="brandonkzw.ecorp.ga",user="brandoao_att",password="tengteng",db="brandoao_tengtengfyp",charset="utf8",use_unicode=True)
  26.  
  27. db.set_character_set('utf8')
  28. # prepare a cursor object using cursor() method
  29. cursor = db.cursor()
  30. for test in test :
  31. item = FypItem()
  32. item['h1text'] =test.xpath ('h1/text()').extract()
  33. item['urlname'] =test.xpath ('/html//link/@href')[0].extract()
  34. item['para1']=test.xpath("//p[not(@class)]/text()").extract()
  35.  
  36. #print item
  37. # cursor.execute('''INSERT into brandoao_tengtengfyp.drugsA1 (h1text, para1)
  38. # values (%s, %s)''',
  39. # (item['h1text'], item['para1']))
  40. para1=str(item['para1'])[3:-2].replace("u'","")
  41. para1=para1.replace(", '","")
  42. para1=para1.replace(", ,","")
  43. para1=para1.replace("',","")
  44. para1=para1.replace("'","")
  45. para1=para1.replace(' ", ', "")
  46. para1=para1.replace(' "u ', "")
  47. para1=para1.replace(' " ', "")
  48. para1=para1.replace(' &', "")
  49. para1=para1.replace("u2122","")
  50. para1=para1.replace(" ') ", "")
  51. h1text=str(item['h1text'] ).replace("u'","")
  52. urlname=str(item['urlname'])
  53. #print urlname
  54.  
  55. asd = ("INSERT IGNORE into brandoao_tengtengfyp.sideEffect (title, urlname,para1) VALUES ('" + str(item['h1text'])[3:-2] + "','"+ str(item['urlname'])[22:-5]+"','"+ para1 + "')")
  56.  
  57.  
  58. cursor.execute(asd);
  59. #ursor.execute("INSERT into brandoao_tengtengfyp.drugsA1 (h1text, para1) VALUES ('a','b22')")
  60. # Commit your changes in the database
  61. db.commit()
  62.  
  63. #print para1
  64. #yield item
  65.  
  66. db.close()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement