Advertisement
ikov34

Untitled

Jan 15th, 2021
1,027
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 5.75 KB | None | 0 0
  1. import unittest
  2. import os
  3. import sys
  4. import time
  5. from selenium import webdriver
  6. from selenium.webdriver.support import expected_conditions as EC
  7. from selenium.webdriver.common.by import By
  8. from selenium.common.exceptions import StaleElementReferenceException, NoSuchElementException
  9. from selenium.webdriver.support.ui import WebDriverWait
  10. from urllib.parse import urlparse
  11.  
  12.  
  13. class GenericTest(unittest.TestCase):
  14.    
  15.     def setUp(self):
  16.         try:
  17.             # Ensure existance of urls.txt
  18.             if not os.path.exists("urls.txt"):
  19.                 print(f'Please provide an "urls.txt" file in working directory, containing the URLs to all tested pages.')
  20.                 sys.exit(-1)
  21.  
  22.             # Read all urls
  23.             urls_in = open('urls.txt', 'r')            
  24.             self.pages_urls = urls_in.read().split('\n')
  25.             urls_in.close()
  26.  
  27.             # Setup seen urls queue
  28.             self.seen_links = []
  29.  
  30.             # Array for storing unique URL types that denote specific page types
  31.             self.seen_parts = []
  32.  
  33.             # Init web driver
  34.             self.driver = webdriver.Firefox()        
  35.  
  36.             # Flag for sucessful/failed unit test
  37.             self.any_failed = False
  38.  
  39.             self.wait_timeout = 5
  40.  
  41.             self.driver.implicitly_wait(self.wait_timeout)
  42.  
  43.             self.base_url = ""
  44.  
  45.             self.err_counter = 0
  46.  
  47.         except:
  48.             print("Error in initialisation!")
  49.             sys.exit(-1)
  50.    
  51.    
  52.     def remove_duplicates(self, urls):        
  53.        
  54.         filtered = []
  55.  
  56.         #Filtering step
  57.         for url in urls:
  58.             if not url is None and '/' in url:
  59.                 ending = url.split("?")[0] if '?' in url else url
  60.                 if self.base_url in url and not ending in self.seen_parts:
  61.                     filtered.append(url)
  62.                     self.seen_parts.append(ending)
  63.                
  64.         return filtered
  65.  
  66.  
  67.  
  68.  
  69.     def process_url(self, url, depth=0, max_depth=4):      
  70.  
  71.         if 'uploads' in url: # Is a file, must skip
  72.             return
  73.  
  74.         if urlparse(url).netloc != urlparse(self.base_url).netloc: # Is other domain site
  75.             return
  76.        
  77.         if '#' in url: # Is empty link
  78.             return
  79.  
  80.         if 'stran' in url: # Is generic next page button
  81.             return
  82.  
  83.         driver = self.driver
  84.  
  85.         driver.get(url)
  86.         if "Napaka" in driver.page_source: # An url failed
  87.             print(f"FAILED {url}: Error {driver.find_element(By.ID, 'errorCode').text} thrown on line {driver.find_element(By.ID, 'errorLine').text} of file {driver.find_element(By.ID, 'errorFile').text}")
  88.             self.err_counter += 1
  89.             self.any_failed = True # Since an url failed, the generic test case will not be successful
  90.  
  91.         else:
  92.             #print(f"OK {url}")
  93.             link_elts = driver.find_elements_by_css_selector(".content-wrap a")
  94.             urls = [link_el.get_attribute("href") for link_el in link_elts] # Find all links on page
  95.  
  96.             urls = self.remove_duplicates(urls)                  
  97.  
  98.             if depth < max_depth: # If max url depth not exceeded
  99.                 for link in urls:
  100.                     try:
  101.                         #print(f'Handling url {link}.....')
  102.                         if not link is None and link not in self.seen_links: # link on page has not been crawled yet
  103.                             self.seen_links.append(link)
  104.                             if "inpis" in link: # Crawl only if is link on same domain..
  105.                                 self.process_url(link, depth+1, max_depth)
  106.                     except StaleElementReferenceException as e1:                        
  107.                         print(e1.msg)
  108.                     except NoSuchElementException as e2:
  109.                         print(e2.msg)
  110.  
  111.     def test_generic(self):                
  112.         # Get all webpage urls that will be crawled and tested
  113.         for page_url in self.pages_urls:  
  114.  
  115.             self.err_counter = 0 # Reset error counter for this navbar link page        
  116.  
  117.             # For each web page, process all links from navbar
  118.             print(f'Processing {page_url}....',end='')
  119.             self.seen_links.append(page_url)
  120.             self.driver.get(page_url)
  121.  
  122.             if "Napaka" in self.driver.page_source: # An url failed
  123.                 print(f"FAILED {page_url}: Error {self.driver.find_element(By.ID, 'errorCode').text} thrown on line {self.driver.find_element(By.ID, 'errorLine').text} of file {self.driver.find_element(By.ID, 'errorFile').text}")
  124.                 self.any_failed = True # Since an url failed, the generic test case will not be successful
  125.                 self.err_counter+=1
  126.             else:
  127.                 nav_links_xpath =  '//*[@id="wrapper"]/header//a[@href]'
  128.                 nav_links_elts = self.driver.find_elements_by_xpath(nav_links_xpath)
  129.                 nav_links_urls = [nav_link_elt.get_attribute("href") for nav_link_elt in nav_links_elts]
  130.                 for nav_links_url in nav_links_urls:                
  131.                     if not nav_links_url in self.seen_links:
  132.                         self.base_url = nav_links_url
  133.                         self.process_url(nav_links_url)
  134.                         self.seen_links.append(nav_links_url)
  135.            
  136.             if self.err_counter == 0:
  137.                 print('OK!')
  138.             else:
  139.                 print(f'Failed with {self.err_counter} errors')
  140.                    
  141.            
  142.  
  143.         assert not self.any_failed, "URLs with errors exist!"
  144.  
  145.     def tearDown(self):
  146.         self.driver.close()
  147.  
  148. if __name__ == "__main__":
  149.     try:
  150.         unittest.main()
  151.     except AssertionError as msg:
  152.         print(msg)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement