Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import re
- import os
- import requests as req
- from functions.supportfunctions import dump
- from functions.SQLHelper import SQLHelper
- bank_name = 'globus_bank'
- name_db = 'coins.db'
- cur_dir = '/Users/sergeyilyin/Google Drive/cbr/coins'
- path_db = os.path.join(cur_dir, name_db)
- db = SQLHelper(path_db)
- bank = db["banks_list"].find({"eng_name": bank_name})
- urls = db["bank_info"].find({"bank_id":bank[0]["id"]})
- htmls = [url["href_page"] for url in urls]
- # print(htmls)
- def parser(html_list):
- # cont=[]
- cat_list = []
- name_list = []
- sell_price_list = []
- buy_price_list = []
- for i in html_list:
- # print(i)
- resp = req.get(i)
- html = resp.text
- html = re.sub(r'<!--.*?-->', '', html, flags=re.DOTALL)
- # print('Обращаемся к серверу банка')
- # print(html)
- re_cat = re.compile(r"product-artikul.*?(\d{4}-\d{4})", re.DOTALL)
- re_name = re.compile(r"product-name.*?(\d*?[-А-Яа-я()\s]*)</a>", re.DOTALL)
- re_sell_price = re.compile(r"class=\"price giftd-price\">([а-яА-Я\s]+|\d+?\s+?\d+)", re.DOTALL)
- re_buy_price = re.compile(r"span class=\"price\">([а-яА-Я\s]+|\d+?\s+?\d+)", re.DOTALL)
- cat_list.extend(re_cat.findall(html))
- name_list.extend(re_name.findall(html))
- sell_price_list.extend(re_sell_price.findall(html))
- for i in sell_price_list:
- if float(i.strip().replace(' ','')) == True:
- float(i.strip().replace(' ',''))
- else: None
- print(i)
- buy_price_list.extend(re_buy_price.findall(html))
- # print(sell_price_list)
- cont = {'sell': [{"cat_number": c, "coin_name": cn, "price": p}
- for c, cn, p in zip(cat_list, name_list, sell_price_list)],
- 'buy': [{"cat_number": c, "coin_name": cn, "price": p}
- for c, cn, p in zip(cat_list, name_list, buy_price_list)]}
- # print(cat_list)
- # print(cont)
- return cont
- # cont = {'sell': [{"cat_number": i[0], "price": float(i[1])} for i in re_cat.findall(html)]}
- # cont = {"sell": [{"cat_number": c, "price": p}
- # for c, p in zip(cat_number, price)]}
- # cont.extend(list(re_cat.findall(html)))
- # print('Формируем пары значений: каталожный номер - цена')
- # cont=set(cont)
- # cont = {"sell":[{"cat_number":(i[0]),"price": float(i[1].replace(' ',''))} for i in cont]}
- # return cont
- # print(parser(htmls))
- cont = parser(htmls)
- # print(cont)
- # print('Сохраняем дамп')
- # dump(cont, bank_name)
- print('Парсер закончил работу')
Advertisement
Add Comment
Please, Sign In to add comment