sunsexsurf

rss_parser

Apr 10th, 2020
224
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 0.74 KB | None | 0 0
  1. import re
  2. import requests as req
  3.  
  4. re_coin_link = re.compile(r'(http\S+coins?.htm)', re.DOTALL)
  5.  
  6. rss_url = 'http://www.cbr.ru/rss/RssPress'
  7. resp_rss = req.get(rss_url)
  8. press_relise_coin_url = re_coin_link.findall(resp_rss.text)
  9. dates = []
  10.  
  11. for i in press_relise_coin_url:
  12.     resp_html_press_relise = req.get(i)
  13.     html_press_relise = resp_html_press_relise.text
  14.     clear_html_press_relise = re.sub(r' |«|»', ' ', html_press_relise,flags = re.DOTALL)
  15.  
  16.     re_date = re.compile(r'Банк России\s(\d*)\s([а-яА-Я]*)\s(\d*).*выпускает в обращение', re.DOTALL)
  17.     get_date = re_date.findall(clear_html_press_relise)
  18.     # print(get_date)
  19.  
  20.     dates.append(list({"date": x[0], 'month': x[1], "year": x[2]} for x in get_date))
  21.  
  22. # print(cont)
  23. print(dates)
Add Comment
Please, Sign In to add comment