Guest User

Python 爬蟲

a guest
Sep 6th, 2016
297
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 1.26 KB | None | 0 0
  1. # -*- coding: UTF-8 -*-
  2. from  bs4  import  BeautifulSoup
  3. import urllib
  4. import requests
  5. import MySQLdb
  6.  
  7.  
  8. train_link = []
  9. train_news_title = []
  10. links = ['http://www.railway.gov.tw/tw/news.aspx?n=6807']
  11.  
  12. for link in links:
  13.     res = requests.get(link)
  14.     soup = BeautifulSoup(res.text.encode("utf-8"), "html.parser")
  15.     train_table = soup.findAll('tr',{'class':'gray01 text_12_1pt form01'})
  16.  
  17. conn = MySQLdb.connect(host='localhost',user='root',passwd='',db='crawl')
  18. cur=conn.cursor()
  19.  
  20. for train_link in train_table:
  21.     link = 'http://www.railway.gov.tw/tw/' + [tag['href'] for tag in train_link.findAll('a',{'href':True})][0]
  22.     train_link.append(link)
  23.    
  24. for train_news_title in train_table:
  25.     news_title = [tag['title'] for tag in train_news_title.findAll('span',{'title':True})][0]
  26.     train_news_title.append(news_title)
  27.    
  28.    
  29. if  len(train_link) == len(train_news_title):
  30.         number1 = len(train_link)
  31.         number2 = len(train_news_title)
  32. for j in range(number1):
  33.         a = train_link[j]
  34.         b = train_news_title[j]
  35. print a,b
  36. sql = "INSERT INTO RAILWAY(URL,TITLE)  VALUES ('%s','%s')" %(a,b)
  37. try:
  38.     cur.execute(sql)
  39.     conn.commit()
  40. except Exception, e:
  41.     print e
  42.     conn.rollback()
  43.        
  44. cur.close()
  45. conn.close()
Advertisement
Add Comment
Please, Sign In to add comment