Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- # -*- coding: UTF-8 -*-
- from bs4 import BeautifulSoup
- import urllib
- import requests
- import MySQLdb
- train_link = []
- train_news_title = []
- links = ['http://www.railway.gov.tw/tw/news.aspx?n=6807']
- for link in links:
- res = requests.get(link)
- soup = BeautifulSoup(res.text.encode("utf-8"), "html.parser")
- train_table = soup.findAll('tr',{'class':'gray01 text_12_1pt form01'})
- conn = MySQLdb.connect(host='localhost',user='root',passwd='',db='crawl')
- cur=conn.cursor()
- for train_link in train_table:
- link = 'http://www.railway.gov.tw/tw/' + [tag['href'] for tag in train_link.findAll('a',{'href':True})][0]
- train_link.append(link)
- for train_news_title in train_table:
- news_title = [tag['title'] for tag in train_news_title.findAll('span',{'title':True})][0]
- train_news_title.append(news_title)
- if len(train_link) == len(train_news_title):
- number1 = len(train_link)
- number2 = len(train_news_title)
- for j in range(number1):
- a = train_link[j]
- b = train_news_title[j]
- print a,b
- sql = "INSERT INTO RAILWAY(URL,TITLE) VALUES ('%s','%s')" %(a,b)
- try:
- cur.execute(sql)
- conn.commit()
- except Exception, e:
- print e
- conn.rollback()
- cur.close()
- conn.close()
Advertisement
Add Comment
Please, Sign In to add comment