ikov34

Untitled

Jan 15th, 2021
869
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. import unittest
  2. import os
  3. import sys
  4. import time
  5. from selenium import webdriver
  6. from selenium.webdriver.support import expected_conditions as EC
  7. from selenium.webdriver.common.by import By
  8. from selenium.common.exceptions import StaleElementReferenceException, NoSuchElementException
  9. from selenium.webdriver.support.ui import WebDriverWait
  10. from urllib.parse import urlparse
  11.  
  12.  
  13. class GenericTest(unittest.TestCase):
  14.    
  15.     def setUp(self):
  16.         try:
  17.             # Ensure existance of urls.txt
  18.             if not os.path.exists("urls.txt"):
  19.                 print(f'Please provide an "urls.txt" file in working directory, containing the URLs to all tested pages.')
  20.                 sys.exit(-1)
  21.  
  22.             # Read all urls
  23.             urls_in = open('urls.txt', 'r')            
  24.             self.pages_urls = urls_in.read().split('\n')
  25.             urls_in.close()
  26.  
  27.             # Setup seen urls queue
  28.             self.seen_links = []
  29.  
  30.             # Array for storing unique URL types that denote specific page types
  31.             self.seen_parts = []
  32.  
  33.             # Init web driver
  34.             self.driver = webdriver.Firefox()        
  35.  
  36.             # Flag for sucessful/failed unit test
  37.             self.any_failed = False
  38.  
  39.             self.wait_timeout = 5
  40.  
  41.             self.driver.implicitly_wait(self.wait_timeout)
  42.  
  43.             self.base_url = ""
  44.  
  45.             self.err_counter = 0
  46.  
  47.         except:
  48.             print("Error in initialisation!")
  49.             sys.exit(-1)
  50.    
  51.    
  52.     def remove_duplicates(self, urls):        
  53.        
  54.         filtered = []
  55.  
  56.         #Filtering step
  57.         for url in urls:
  58.             if not url is None and '/' in url:
  59.                 ending = url.split("?")[0] if '?' in url else url
  60.                 if self.base_url in url and not ending in self.seen_parts:
  61.                     filtered.append(url)
  62.                     self.seen_parts.append(ending)
  63.                
  64.         return filtered
  65.  
  66.  
  67.  
  68.  
  69.     def process_url(self, url, depth=0, max_depth=4):      
  70.  
  71.         if 'uploads' in url: # Is a file, must skip
  72.             return
  73.  
  74.         if urlparse(url).netloc != urlparse(self.base_url).netloc: # Is other domain site
  75.             return
  76.        
  77.         if '#' in url: # Is empty link
  78.             return
  79.  
  80.         if 'stran' in url: # Is generic next page button
  81.             return
  82.  
  83.         driver = self.driver
  84.  
  85.         driver.get(url)
  86.         if "Napaka" in driver.page_source: # An url failed
  87.             print(f"FAILED {url}: Error {driver.find_element(By.ID, 'errorCode').text} thrown on line {driver.find_element(By.ID, 'errorLine').text} of file {driver.find_element(By.ID, 'errorFile').text}")
  88.             self.err_counter += 1
  89.             self.any_failed = True # Since an url failed, the generic test case will not be successful
  90.  
  91.         else:
  92.             #print(f"OK {url}")
  93.             link_elts = driver.find_elements_by_css_selector(".content-wrap a")
  94.             urls = [link_el.get_attribute("href") for link_el in link_elts] # Find all links on page
  95.  
  96.             urls = self.remove_duplicates(urls)                  
  97.  
  98.             if depth < max_depth: # If max url depth not exceeded
  99.                 for link in urls:
  100.                     try:
  101.                         #print(f'Handling url {link}.....')
  102.                         if not link is None and link not in self.seen_links: # link on page has not been crawled yet
  103.                             self.seen_links.append(link)
  104.                             if "inpis" in link: # Crawl only if is link on same domain..
  105.                                 self.process_url(link, depth+1, max_depth)
  106.                     except StaleElementReferenceException as e1:                        
  107.                         print(e1.msg)
  108.                     except NoSuchElementException as e2:
  109.                         print(e2.msg)
  110.  
  111.     def test_generic(self):                
  112.         # Get all webpage urls that will be crawled and tested
  113.         for page_url in self.pages_urls:  
  114.  
  115.             self.err_counter = 0 # Reset error counter for this navbar link page        
  116.  
  117.             # For each web page, process all links from navbar
  118.             print(f'Processing {page_url}....',end='')
  119.             self.seen_links.append(page_url)
  120.             self.driver.get(page_url)
  121.  
  122.             if "Napaka" in self.driver.page_source: # An url failed
  123.                 print(f"FAILED {page_url}: Error {self.driver.find_element(By.ID, 'errorCode').text} thrown on line {self.driver.find_element(By.ID, 'errorLine').text} of file {self.driver.find_element(By.ID, 'errorFile').text}")
  124.                 self.any_failed = True # Since an url failed, the generic test case will not be successful
  125.                 self.err_counter+=1
  126.             else:
  127.                 nav_links_xpath =  '//*[@id="wrapper"]/header//a[@href]'
  128.                 nav_links_elts = self.driver.find_elements_by_xpath(nav_links_xpath)
  129.                 nav_links_urls = [nav_link_elt.get_attribute("href") for nav_link_elt in nav_links_elts]
  130.                 for nav_links_url in nav_links_urls:                
  131.                     if not nav_links_url in self.seen_links:
  132.                         self.base_url = nav_links_url
  133.                         self.process_url(nav_links_url)
  134.                         self.seen_links.append(nav_links_url)
  135.            
  136.             if self.err_counter == 0:
  137.                 print('OK!')
  138.             else:
  139.                 print(f'Failed with {self.err_counter} errors')
  140.                    
  141.            
  142.  
  143.         assert not self.any_failed, "URLs with errors exist!"
  144.  
  145.     def tearDown(self):
  146.         self.driver.close()
  147.  
  148. if __name__ == "__main__":
  149.     try:
  150.         unittest.main()
  151.     except AssertionError as msg:
  152.         print(msg)
RAW Paste Data