Advertisement
Korotkodul

flow

Nov 30th, 2022 (edited)
877
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
C++ 3.73 KB | None | 0 0
  1. import requests
  2. from bs4 import BeautifulSoup
  3.  
  4. from threading import Thread
  5. from time import sleep
  6.  
  7. #link = "https://edition.cnn.com/"
  8. goto =    "https://www.povarenok.ru/recipes/kitchen/77/"
  9. #link = "https://www.povarenok.ru/recipes/show/70127/"
  10.  
  11. pages = [''] * 13
  12. pages[0] = goto
  13.  
  14. s = 'https://www.povarenok.ru/recipes/kitchen/77/~2/'
  15. pages[1] = s
  16.  
  17.  
  18.  
  19. for i in range(2, 13):
  20.     s = pages[i - 1]
  21.     s = goto + '~' + str(i + 1) + '/'
  22.     pages[i] = s
  23. print(pages)
  24. """в конце изменить goto:
  25.  
  26. """
  27. #for i in range(13):
  28.  
  29. names = []
  30. recep = []
  31. photo = []
  32. #<div class="ingredients-bl">
  33. debug = False
  34.  
  35. def ingrid(goto):
  36.     #print("goto = ", goto)
  37.     html = requests.get(goto)
  38.     html.encoding = 'windows-1251'
  39.     sp = BeautifulSoup(html.text, 'lxml')
  40.     ing = sp.find('div', "ingredients-bl")
  41.     all = ing.find_all('span')
  42.     #print("all")
  43.     #print(all)
  44.     ing_list = []
  45.  
  46.     for i in range(0, len(all), 2):
  47.         if i + 1 >= len(all):
  48.             break
  49.         one_ing = all[i]
  50.         one_ing = str(one_ing)
  51.         since = one_ing.find('n>') + 2
  52.         till = one_ing.find('</')
  53.         ing_str = one_ing[since: till]
  54.         amount = all[i + 1]
  55.         amount = str(amount)
  56.         since = amount.find('n>') + 2
  57.         till = amount.find('</')
  58.         amount_str = amount[since: till]
  59.         res = ing_str + "  " + amount_str
  60.         ing_list.append(res)
  61.         #print(one_ing, amount)
  62.     #print("ing list")
  63.     #print(ing_list)
  64.     recep.append(ing_list)
  65.     #print(ing)
  66.  
  67. def work(goto):
  68.     print("WORK")
  69.     html = requests.get(goto)
  70.     html.encoding = 'windows-1251'
  71.     sp = BeautifulSoup(html.text, 'lxml')
  72.     all = sp.find_all('div', "m-img desktop-img conima")
  73.     #print(all)
  74.     for rec in all:
  75.         if debug:
  76.             print("NEW REC")
  77.             print(rec)
  78.         #print(rec)
  79.         raw = rec.find('img')
  80.         #отсюда название и  ссылка на картинку
  81.         #print("IMAGE")
  82.         #print(img)
  83.         raw = str(raw)
  84.         since = raw.find('Ре')
  85.         till = raw.find('src')
  86.         name = raw[since: till]
  87.         names.append(name)
  88.  
  89.         since = raw.find('htt')
  90.         till = raw.find('/>') - 1
  91.         photo_link = raw[since: till]
  92.         photo.append(photo_link)
  93.         #КАК получить список ингридиентов???
  94.         link = rec.find('a')
  95.         if debug:
  96.             print("link")
  97.             print(link)
  98.             print("end link")
  99.         link = str(link)
  100.         till = link.find(">")
  101.         since = link.find("htt")
  102.         link = link[since: till - 1]
  103.         ingrid(link)
  104.         if debug:
  105.             print("new link")
  106.             print(link)
  107.         #print()
  108.         #print()
  109.         #break
  110.  
  111. cnt = -1
  112. def flow():
  113.     global cnt
  114.  
  115.     while cnt + 1 < len(pages):
  116.         print("flow1")
  117.         cnt += 1
  118.         work(pages[cnt])
  119.  
  120. flow1 = Thread(target=flow)
  121. flow1.start()
  122.  
  123. while cnt + 1 < len(pages):
  124.     print("flow0")
  125.     cnt += 1
  126.     work(pages[cnt])
  127.  
  128.  
  129. print("photo", len(photo))
  130. print(photo)
  131. print("recep", len(recep))
  132. print(recep)
  133. print("names", len(names))
  134. print(names)
  135.  
  136. import lxml
  137. from xlwt import *
  138. workbook = Workbook(encoding = 'utf-8')
  139. table = workbook.add_sheet('data')
  140. table.write(0, 0, 'Название')
  141. table.write(0, 1, 'Рецепт')
  142. table.write(0, 2, 'Фото')
  143.  
  144. N = len(recep)
  145. line = 0
  146. for i in range(N):
  147.     line += 1
  148.     table.write(line, 0, names[i])
  149.     table.write(line, 2, photo[i])
  150.     table.write(line, 1, 'ингридиенты')
  151.     for j in range(len(recep[i])):
  152.         line += 1
  153.         table.write(line, 1, recep[i][j])
  154.     line += 1
  155.  
  156. workbook.save('recep4.xls')
  157. print("FILE SAVED")
  158.  
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement