sunsexsurf

Globus

Apr 11th, 2020
312
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 2.73 KB | None | 0 0
  1. import re
  2. import os
  3. import requests as req
  4.  
  5. from functions.supportfunctions import dump
  6. from functions.SQLHelper import SQLHelper
  7.  
  8.  
  9. bank_name = 'globus_bank'
  10.  
  11. name_db = 'coins.db'
  12. cur_dir = '/Users/sergeyilyin/Google Drive/cbr/coins'
  13. path_db = os.path.join(cur_dir, name_db)
  14.  
  15. db = SQLHelper(path_db)
  16.  
  17. bank = db["banks_list"].find({"eng_name": bank_name})
  18. urls = db["bank_info"].find({"bank_id":bank[0]["id"]})
  19. htmls = [url["href_page"] for url in urls]
  20.  
  21.  
  22. # print(htmls)
  23.  
  24. def parser(html_list):
  25.     # cont=[]
  26.     cat_list = []
  27.     name_list = []
  28.     sell_price_list = []
  29.     buy_price_list = []
  30.     for i in html_list:
  31.         # print(i)
  32.         resp = req.get(i)
  33.         html = resp.text
  34.         html = re.sub(r'<!--.*?-->', '', html, flags=re.DOTALL)
  35.         # print('Обращаемся к серверу банка')
  36.         # print(html)
  37.         re_cat = re.compile(r"product-artikul.*?(\d{4}-\d{4})", re.DOTALL)
  38.         re_name = re.compile(r"product-name.*?(\d*?[-А-Яа-я()\s]*)</a>", re.DOTALL)
  39.         re_sell_price = re.compile(r"class=\"price giftd-price\">([а-яА-Я\s]+|\d+?\s+?\d+)", re.DOTALL)
  40.         re_buy_price = re.compile(r"span class=\"price\">([а-яА-Я\s]+|\d+?\s+?\d+)", re.DOTALL)
  41.  
  42.         cat_list.extend(re_cat.findall(html))
  43.         name_list.extend(re_name.findall(html))
  44.         sell_price_list.extend(re_sell_price.findall(html))
  45.         for i in sell_price_list:
  46.             if float(i.strip().replace(' ','')) == True:
  47.                 float(i.strip().replace(' ',''))
  48.             else: None
  49.             print(i)
  50.         buy_price_list.extend(re_buy_price.findall(html))
  51.  
  52.         # print(sell_price_list)
  53.  
  54.     cont = {'sell': [{"cat_number": c, "coin_name": cn, "price": p}
  55.                      for c, cn, p in zip(cat_list, name_list, sell_price_list)],
  56.             'buy': [{"cat_number": c, "coin_name": cn, "price": p}
  57.                      for c, cn, p in zip(cat_list, name_list, buy_price_list)]}
  58.             # print(cat_list)
  59.  
  60.     # print(cont)
  61.  
  62.     return cont
  63.  
  64.  
  65.         # cont = {'sell': [{"cat_number": i[0], "price": float(i[1])} for i in re_cat.findall(html)]}
  66.  
  67.         # cont = {"sell": [{"cat_number": c, "price": p}
  68.         #                  for c, p in zip(cat_number, price)]}
  69.  
  70.         # cont.extend(list(re_cat.findall(html)))
  71.  
  72.     # print('Формируем пары значений: каталожный номер - цена')
  73.  
  74.     # cont=set(cont)
  75.  
  76.     # cont = {"sell":[{"cat_number":(i[0]),"price": float(i[1].replace(' ',''))} for i in cont]}
  77.     # return cont
  78.  
  79. # print(parser(htmls))
  80.  
  81. cont = parser(htmls)
  82.  
  83. # print(cont)
  84.  
  85. # print('Сохраняем дамп')
  86.  
  87. # dump(cont, bank_name)
  88.  
  89. print('Парсер закончил работу')
Advertisement
Add Comment
Please, Sign In to add comment