Advertisement
Guest User

Untitled

a guest
Dec 7th, 2019
156
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 13.77 KB | None | 0 0
  1. import urllib
  2. from urllib.parse import urlparse
  3. from bs4 import BeautifulSoup, SoupStrainer
  4. import requests
  5. from selenium import webdriver
  6. import re
  7. import time
  8. import mysql.connector
  9. import pymysql.cursors
  10.  
  11. chromedriver = 'G:\selenium\chromedriver'
  12. options = webdriver.ChromeOptions()
  13. options.add_argument('headless')
  14. browser = webdriver.Chrome(executable_path=chromedriver, chrome_options=options)
  15. #browser.set_page_load_timeout(100)
  16. browser.get("https://art-dtex.ru/catalog/komplekty_postelnogo_belya/")
  17.  
  18. for element in browser.find_elements_by_css_selector('div.b-catalog-section.bx_catalog_line'):
  19. cnt=element.text
  20. #print (element.text)
  21. with open('C:\workfile.csv', 'a') as f:
  22. f.write("{}\n".format(cnt))
  23. bigtext = element.get_attribute('innerHTML')
  24. newlink = re.findall('<a href="([^"]*)">', bigtext)
  25. newestlnk=[]
  26. for elem in newlink:
  27. if elem[0] == "/":
  28. lnk='https://art-dtex.ru'+elem
  29. newestlnk.append(lnk)
  30. #print('Newestlnk=', newestlnk)
  31. for elemlnk in newestlnk:
  32. browser.get(elemlnk)
  33. time.sleep(3)
  34. podcat=browser.find_elements_by_xpath('/html/body/section/div/div/div[2]/div/div[3]/div[2]/div[2]/ul/li[1]')
  35. if len(podcat)>0:
  36. for element1 in browser.find_elements_by_css_selector('div.b-catalog-section.bx_catalog_line'):
  37. bigtext2 = element1.get_attribute('innerHTML')
  38. newlink2 = re.findall('<a href="([^"]*)">', bigtext2)
  39. newestlnk2=[]
  40. for elem2 in newlink2:
  41. lnk3='https://art-dtex.ru'+elem2
  42. newestlnk2.append(lnk3)
  43. for elemlnk2 in newestlnk2:
  44. #print('elemlnk2=', elemlnk2)
  45. browser.get(elemlnk2)
  46. time.sleep(3)
  47. for pgsweb in browser.find_elements_by_xpath("/html/body/section/div/div/div[2]/div/div[3]/div[2]/div[2]/div[4]/div"):
  48. pgs=pgsweb.text.split()
  49. #print('pgs= ', pgs)
  50. pg=pgs[-1]
  51. pgint = int(pg)
  52. #print('pg= ', pgint)
  53. i=1
  54. while i<=pgint:
  55. browser.get(elemlnk2 +"?PAGEN_1=%s" % i)
  56. time.sleep(3)
  57. #print('I`m on page ',i)
  58. kol = len(browser.find_elements_by_class_name('b-catalog-items-el'))
  59. #print('Постелек на этой странице ', kol)
  60. #print("https://art-dtex.ru/catalog/velyur/" +"?PAGEN_1=%s" % i)
  61.  
  62. k=0
  63. while k<kol:
  64. postelki=[]
  65. for postelka in browser.find_elements_by_class_name('b-catalog-items-el'):
  66. bigtext4 = postelka.get_attribute('innerHTML')
  67. #print(bigtext4)
  68. newlink4= re.findall('<a href="([^"]*)">', bigtext4)
  69. #print(type(newlink4))
  70. #print('Pstlk: ', newlink4)
  71. for elem4 in newlink4:
  72. postelki.append(elem4)
  73. #if elem2[0] == "/":
  74. for postelk in postelki:
  75. #print(type(postelk))
  76. print(postelk)
  77. browser.get('https://art-dtex.ru'+postelk)
  78. time.sleep(3)
  79. #print('Я пришёл')
  80. naims=[]
  81. for naim in browser.find_elements_by_xpath('/html/body/section/div/div/div[2]/div/div/div[1]/div[2]/div[1]/div'):
  82. naim=naim.text
  83. naims.append(naim)
  84. #with open('C:\workfile.csv', 'a') as f:
  85. #f.write("{}\n".format(naim))
  86. pic = browser.find_element_by_css_selector('div.medium-5.small-12.columns.product__img')
  87. pictext = pic.get_attribute('innerHTML')
  88. #print(pictext)
  89. piclink = re.search('<img src="([^"]*)">', pictext)
  90. if piclink == None:
  91. continue
  92. else:
  93. #print(piclink.group(1))
  94. ppic = urllib.request.urlopen(piclink.group(1)).read()
  95. picout = open("C:\Postelki\%s.jpg" % naim, "wb")
  96. picout.write(ppic)
  97. picout.close
  98. prov=browser.find_elements_by_css_selector('td.sizes-td-art')
  99. #print(prov)
  100. artiks=[]
  101. razms=[]
  102. kompls=[]
  103. if len(prov)!=0:
  104. for artik in browser.find_elements_by_css_selector('td.sizes-td-art'):
  105. artik=artik.text
  106. artiks.append(artik)
  107. #with open('C:\workfile.csv', 'a') as f:
  108. #f.write("{}\n".format(artik))
  109. for razm in browser.find_elements_by_css_selector('td.sizes-td-size'):
  110. razm=razm.text
  111. razms.append(razm)
  112. #with open('C:\workfile.csv', 'a') as f:
  113. #f.write("{}\n".format(razm))
  114. for kompl in browser.find_elements_by_css_selector('td.sizes-td-kompl'):
  115. kompl=kompl.text
  116. kompls.append(kompl)
  117. #with open('C:\workfile.csv', 'a') as f:
  118. #f.write("{}\n".format(kompl))
  119. j=0
  120. mydb = mysql.connector.connect(host='localhost', user='root', password='njkmrjnfr', db='new')
  121. mycursor = mydb.cursor()
  122. while j<len(artiks):
  123. sql="INSERT INTO postelki (Наименование, Артикул, Размер, Комплектация) VALUES (%s, %s, %s, %s)"
  124. val=(naims[0], artiks[j], razms[j], kompls[j])
  125. mycursor.execute(sql, val)
  126. mydb.commit()
  127.  
  128. j+=1
  129. else:
  130. mydb = mysql.connector.connect(host='localhost', user='root', password='njkmrjnfr', db='new')
  131. mycursor = mydb.cursor()
  132. sql="INSERT INTO postelki (Наименование, Артикул) VALUES (%s, %s)"
  133. skoro='Скоро в продаже'
  134. val=(naims[0], skoro)
  135. mycursor.execute(sql, val)
  136. mydb.commit()
  137.  
  138. k+=1
  139. i+=1
  140. else:
  141. print("Эта страница особенная")
  142. for pgsweb in browser.find_elements_by_xpath("/html/body/section/div/div/div[2]/div/div[3]/div[2]/div[2]/div[4]/div"):
  143. pgs=pgsweb.text.split()
  144. #print('pgs= ', pgs)
  145. pg=pgs[-1]
  146. pgint = int(pg)
  147.  
  148. i=1
  149. while i<=pgint:
  150. browser.get(elemlnk2 +"?PAGEN_1=%s" % i)
  151. time.sleep(3)
  152.  
  153. kol = len(browser.find_elements_by_class_name('b-catalog-items-el'))
  154. k=0
  155. while k<kol:
  156. postelki=[]
  157. for postelka in browser.find_elements_by_class_name('b-catalog-items-el'):
  158. bigtext4 = postelka.get_attribute('innerHTML')
  159.  
  160. newlink4= re.findall('<a href="([^"]*)">', bigtext4)
  161.  
  162. for elem4 in newlink4:
  163. postelki.append(elem4)
  164.  
  165. for postelk in postelki:
  166.  
  167. #print(postelk)
  168. browser.get('https://art-dtex.ru'+postelk)
  169. time.sleep(3)
  170.  
  171. naims=[]
  172. for naim in browser.find_elements_by_xpath('/html/body/section/div/div/div[2]/div/div/div[1]/div[2]/div[1]/div'):
  173. naim=naim.text
  174. naims.append(naim)
  175.  
  176. pic = browser.find_element_by_css_selector('div.medium-5.small-12.columns.product__img')
  177. pictext = pic.get_attribute('innerHTML')
  178.  
  179. piclink = re.search('<img src="([^"]*)">', pictext)
  180.  
  181. ppic = urllib.request.urlopen(piclink.group(1)).read()
  182. picout = open("C:\Postelki\%s.jpg" % naim, "wb")
  183. picout.write(ppic)
  184. picout.close
  185. prov=browser.find_elements_by_css_selector('td.sizes-td-art')
  186.  
  187. artiks=[]
  188. razms=[]
  189. kompls=[]
  190. if len(prov)!=0:
  191. for artik in browser.find_elements_by_css_selector('td.sizes-td-art'):
  192. artik=artik.text
  193. artiks.append(artik)
  194.  
  195. for razm in browser.find_elements_by_css_selector('td.sizes-td-size'):
  196. razm=razm.text
  197. razms.append(razm)
  198.  
  199. for kompl in browser.find_elements_by_css_selector('td.sizes-td-kompl'):
  200. kompl=kompl.text
  201. kompls.append(kompl)
  202.  
  203. j=0
  204. mydb = mysql.connector.connect(host='localhost', user='root', password='njkmrjnfr', db='new')
  205. mycursor = mydb.cursor()
  206. while j<len(artiks):
  207. sql="INSERT INTO postelki (Наименование, Артикул, Размер, Комплектация) VALUES (%s, %s, %s, %s)"
  208. val=(naims[0], artiks[j], razms[j], kompls[j])
  209. mycursor.execute(sql, val)
  210. mydb.commit()
  211.  
  212. j+=1
  213. else:
  214. mydb = mysql.connector.connect(host='localhost', user='root', password='njkmrjnfr', db='new')
  215. mycursor = mydb.cursor()
  216. sql="INSERT INTO postelki (Наименование, Артикул) VALUES (%s, %s)"
  217. skoro='Скоро в продаже'
  218. val=(naims[0], skoro)
  219. mycursor.execute(sql, val)
  220. mydb.commit()
  221.  
  222. k+=1
  223. i+=1
  224. browser.quit()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement