Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- # -*- coding: UTF-8 -*-
- from bs4 import BeautifulSoup
- import requests
- train_link = []
- train_news_title = []
- links = ['http://www.railway.gov.tw/tw/news.aspx?n=6807']
- for link in links:
- res = requests.get(link)
- soup = BeautifulSoup(res.text.encode("utf-8"), "html.parser")
- train_table = soup.findAll('tr', ['gray01', 'text_12_1pt'])
- for tr in train_table:
- link = 'http://www.railway.gov.tw/tw/' + \
- tr.findAll('a', {'href': True})[0]['href']
- news_title = tr.findAll('span', {'title': True})[0]['title']
- train_link.append(link)
- train_news_title.append(news_title)
- assert len(train_link) == len(train_news_title)
- for j in range(len(train_link)):
- a = train_link[j]
- b = train_news_title[j]
- print a, b
Advertisement
Add Comment
Please, Sign In to add comment