Guest User

Untitled

a guest
Sep 7th, 2016
134
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 0.76 KB | None | 0 0
  1. # -*- coding: UTF-8 -*-
  2. from bs4 import BeautifulSoup
  3. import requests
  4.  
  5. train_link = []
  6. train_news_title = []
  7. links = ['http://www.railway.gov.tw/tw/news.aspx?n=6807']
  8.  
  9. for link in links:
  10.     res = requests.get(link)
  11.     soup = BeautifulSoup(res.text.encode("utf-8"), "html.parser")
  12.     train_table = soup.findAll('tr', ['gray01', 'text_12_1pt'])
  13.  
  14. for tr in train_table:
  15.     link = 'http://www.railway.gov.tw/tw/' + \
  16.            tr.findAll('a', {'href': True})[0]['href']
  17.     news_title = tr.findAll('span', {'title': True})[0]['title']
  18.  
  19.     train_link.append(link)
  20.     train_news_title.append(news_title)
  21.  
  22. assert len(train_link) == len(train_news_title)
  23.  
  24. for j in range(len(train_link)):
  25.     a = train_link[j]
  26.     b = train_news_title[j]
  27.     print a, b
Advertisement
Add Comment
Please, Sign In to add comment