Advertisement
Guest User

Untitled

a guest
Mar 5th, 2018
78
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 3.97 KB | None | 0 0
  1.  
  2. from selenium import webdriver
  3. from selenium.webdriver.common.keys import Keys
  4. import pandas as ps
  5. from difflib import SequenceMatcher
  6. import urllib.request as urllib
  7. import imageio as IO
  8. from PIL import Image
  9. import os, errno
  10. import numpy as np
  11.  
  12.  
  13.  
  14. def similar(a, b):
  15. return SequenceMatcher(None, a, b).ratio()
  16.  
  17. UrlLogTo = "https://dam.ledvance.info/servlet/login"
  18. Username = "ledvance2016"
  19. Password = "ledvance2016"
  20.  
  21. ClassName = "layout-preview pblink"
  22.  
  23.  
  24.  
  25.  
  26.  
  27. def main():
  28. fileObject = open("size.txt", "r")
  29. Size = fileObject.readline()
  30. Size = int(Size)
  31. fileObject.close()
  32.  
  33. driver = webdriver.Chrome(executable_path="C:/Users/mstan/Downloads/chromedriver_win32/chromedriver.exe")
  34.  
  35. driver.get(UrlLogTo)
  36.  
  37. fillingUsername = driver.find_element_by_id("user")
  38. fillingUsername.click()
  39. fillingUsername.send_keys(Username)
  40.  
  41. fillingPassword = driver.find_element_by_id("password")
  42. fillingPassword.click()
  43. fillingPassword.send_keys(Password)
  44.  
  45. fillingPassword.send_keys(Keys.ENTER)
  46.  
  47.  
  48.  
  49. dataFrame = ps.read_excel("LEDVANCE-kartoteka.xlsx")
  50. ids = dataFrame['Kod producenta']
  51. description = dataFrame['Nazwa']
  52.  
  53. np.savetxt("description.txt", description, fmt='%s')
  54.  
  55.  
  56.  
  57.  
  58. for i in range(1,Size):
  59. firstId = ids[i]
  60. firstName = description[i]
  61.  
  62. wordsToSearch = firstName.split()
  63.  
  64. fillingSearchFor = driver.find_element_by_name("simpleValue")
  65. fillingSearchFor.click()
  66. fillingSearchFor.send_keys(firstId)
  67.  
  68. eansearch = driver.find_element_by_name("eansearch")
  69. eansearch.click()
  70.  
  71. originalURL = driver.current_url
  72. elements = driver.find_elements_by_tag_name("a")
  73. elements.__len__()
  74. z = 0
  75. #kombinować z foorlopem
  76. if not os.path.exists('{0}'.format(i)):
  77. os.makedirs('{0}'.format(i))
  78.  
  79. for y in range(elements.__len__()):
  80. try:
  81. elements = driver.find_elements_by_tag_name("a")
  82.  
  83.  
  84. print("iterator {0}".format(y))
  85.  
  86. splittedName = elements[y].get_attribute('name').split('_')
  87. fullname = ''
  88. for word in splittedName:
  89. fullname = fullname+' '+word
  90.  
  91. print (fullname)
  92.  
  93. print(similar(fullname,firstName))
  94. if((elements[y].get_attribute('class') == 'layout-preview pblink') and (similar(elements[y].get_attribute('name'),firstName)>0.1) ):
  95. z = z + 1
  96. Url = elements[y].get_attribute('href')
  97. slicedUrl = Url[70:]
  98. Name = elements[y].get_attribute('name')
  99. slicedName = Name[10:]
  100. driver.get(Url)
  101. print("entered")
  102. filename = '{0}.png'.format(z)
  103.  
  104. driver.save_screenshot(filename)
  105. urlToPhoto = driver.find_element_by_tag_name('img')
  106. size = urlToPhoto.size
  107. location = urlToPhoto.location
  108. im = Image.open(filename)
  109. left = location['x']
  110. top = location['y']
  111. right = location['x'] + size['width']
  112. bottom = location['y'] + size['height']
  113. im = im.crop((left, top, right, bottom))
  114. directoryOfFileToCreate = '{0}/'.format(i)
  115.  
  116. if not os.path.exists(directoryOfFileToCreate):
  117. os.makedirs(directoryOfFileToCreate)
  118.  
  119. directoryOfScreen = directoryOfFileToCreate + '{0}.png'.format(z)
  120.  
  121.  
  122. im.save(directoryOfScreen)
  123.  
  124. driver.get(originalURL)
  125.  
  126. except Exception:
  127. print("some error")
  128.  
  129.  
  130. print(i)
  131. print("iteration -------------------")
  132.  
  133.  
  134.  
  135. input("")
  136.  
  137.  
  138. if __name__ == '__main__':
  139. main()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement