Advertisement
Guest User

Untitled

a guest
Dec 28th, 2018
121
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 7.37 KB | None | 0 0
  1. import random
  2. import urllib.request as urll
  3. #import time
  4. #undocument if sleep() is needed
  5. from pathlib import Path
  6.  
  7. url = 'https://prnt.sc/'
  8. visual_divider = '-----------------' #'--------------'
  9.  
  10. request_headers = {'User-Agent': 'Mozilla/5.0'}
  11. #The headers below often gave 403, the headers above usually give positive results.
  12. #Just comment one and uncomment another if you want to change headers
  13. '''
  14. request_headers = {
  15. "Accept-Language": "en-US,en;q=0.5",
  16. "User-Agent": "Mozilla/5.0 (Windows NT 10.0; WOW64; rv:40.0) Gecko/20100101 Firefox/40.0",
  17. "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
  18. "Referer": "https://google.com",
  19. "Connection": "keep-alive"
  20. }'''
  21.  
  22. FullRandomMode = False
  23. FirstLettersMode = False
  24.  
  25. noimage = 'st.prntscr.com/2018/10/13/2048/img/0_173a7b_211be8ff.png'
  26.  
  27.  
  28. fullrandomrange = ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9']
  29. randomletters = ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z']
  30. randomnumbers = ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9']
  31.  
  32. print('\nFull Random Mode - 1\n\nEnter first letters Mode - 2\n')
  33. decision = input()
  34.  
  35. while decision == '1':
  36.     print('\nEnterig Full Random Mode!\n')
  37.     FullRandomMode = True
  38.     break
  39. while decision == '2':
  40.     print('\nFirst Letters Mode chosen:\n\nEnter your first letters (up to 6):')
  41.     first_letters = input()
  42.     letters_count = len(first_letters)
  43.     if letters_count >0 and letters_count<7:
  44.         print('\nYou have entered '+str(letters_count)+' characters.')
  45.     else:
  46.         print('\nYou have entered a wrong number of characters.\nStart the script again and enter from 1 to 6 characters.')
  47.         break
  48.     first_letters = first_letters.casefold()
  49.     #Assuming there was no symbols enteres
  50.     need_to_generate = 6-letters_count
  51.     print ('\nWe will need to generate an additional '+str(need_to_generate)+' characters to complete the link')
  52.     print('\nEntering First Letters Mode\n')
  53.     FirstLettersMode = True
  54.     break
  55. while decision not in ['1', '2']:
  56.     print('\nRestart the script and enter eiter 1 or 2')
  57.     break
  58.  
  59. def scrapefullrandom():
  60.     #Generating random URL
  61.     random_characters_list = []
  62.     for i in range(0,6):
  63.         random_characters_list.append(random.choice(fullrandomrange))
  64.     random_characters = ''.join(random_characters_list)
  65.     full_url = url+random_characters
  66.     #Getting html code
  67.     request = urll.Request(full_url, headers=request_headers)
  68.     page = urll.urlopen(request).read()
  69.     #From here done without BeautifulSoup
  70.     page_str = str(page)
  71.     #Seeing if it's a valid URL (contains an image)
  72.     if noimage in page_str:
  73.         print(full_url+' - '+'Invalid URL\n'+visual_divider)
  74.     else:
  75.         #Searching for direct image url
  76.         page_str_noreturns = page_str.replace(' ', '\n')
  77.         page_list = page_str_noreturns.split('\n')
  78.         #Excepting error if no image was detected (can just fix the overall detection)
  79.         try:
  80.             page_list_image_url_number = page_list.index('screenshot-image"')
  81.         except ValueError:
  82.             print(full_url+' - '+'Invalid URL\n'+visual_divider)
  83.             return
  84.         print(full_url+' - '+'Valid URL')
  85.         page_list_image_url_number += 1
  86.         almost_image_url = page_list[page_list_image_url_number]
  87.         image_url = almost_image_url[5:][:-1]
  88.         print(image_url+' ')
  89.         #Determining what extension the file has
  90.         if 'png' in image_url:
  91.             extension = '.png'
  92.         if 'jpg' in image_url:
  93.             extension = '.jpg'
  94.         if 'gif' in image_url:
  95.             extension = '.gif'
  96.         if 'jpeg' in image_url:
  97.             extension = '.jpeg'
  98.         #Parsing a name for the image
  99.         page_list_image_name_number = page_list_image_url_number+5
  100.         almost_image_name = page_list[page_list_image_name_number]
  101.         image_name = almost_image_name[10:][:-3]
  102.         print('('+image_name+extension+')')    
  103.         #Opening the image
  104.         image_file = Path(image_name+extension)
  105.         if image_file.is_file():
  106.             print('\nFile already exists!\nSkipping that image.\n'+visual_divider)
  107.             return
  108.         else:
  109.             with open('1.txt', 'a') as file:
  110.                 file.write('\n'+full_url)
  111.             image_request = urll.Request(image_url, headers=request_headers)
  112.             try:
  113.                 image_resourced = urll.urlopen(image_request)
  114.             except:
  115.                 print('\nAn error occured, HTTP Error 404, continuing running the script!\n'+visual_divider)
  116.                 return
  117.             #Saving the image
  118.             image_output = open(image_name+extension, 'wb')
  119.             image_output.write(image_resourced.read())
  120.             image_output.close()
  121.             print('Image downloaded!\n'+visual_divider)
  122.             #Writing the direct image URL in the text file
  123.             with open('1.txt','a') as file:
  124.                 file.write('\n'+image_url+'\n')
  125.             #Using sleep() to not get banned right away (if needed, just uncomment the line below)
  126.             #time.sleep(1)
  127.  
  128. def scrapeletters():
  129.     random_characters_list = []
  130.     for i in range(0, need_to_generate):
  131.         random_characters_list.append(random.choice(fullrandomrange))
  132.     random_characters=''.join(random_characters_list)
  133.     full_url=url+first_letters+random_characters
  134.    
  135.     #Getting html code
  136.     request = urll.Request(full_url, headers=request_headers)
  137.     page = urll.urlopen(request).read()
  138.     #From here done without BeautifulSoup
  139.     page_str = str(page)
  140.     #Seeing if it's a valid URL (contains an image)
  141.     if noimage in page_str:
  142.         print(full_url+' - '+'Invalid URL\n'+visual_divider)
  143.     else:
  144.         #Searching for direct image url
  145.         page_str_noreturns = page_str.replace(' ', '\n')
  146.         page_list = page_str_noreturns.split('\n')
  147.         #Excepting error if no image was detected (can fix the overall detection)
  148.         try:
  149.             page_list_image_url_number = page_list.index('screenshot-image"')
  150.         except ValueError:
  151.             print(full_url+' - '+'Invalid URL\n'+visual_divider)
  152.             return
  153.         print(full_url+' - '+'Valid URL')
  154.         page_list_image_url_number += 1
  155.         almost_image_url = page_list[page_list_image_url_number]
  156.         image_url = almost_image_url[5:][:-1]
  157.         print(image_url+' ')
  158.         #Determining what extension the file has
  159.         if 'png' in image_url:
  160.             extension = '.png'
  161.         if 'jpg' in image_url:
  162.             extension = '.jpg'
  163.         if 'gif' in image_url:
  164.             extension = '.gif'
  165.         if 'jpeg' in image_url:
  166.             extension = '.jpeg'
  167.         #Parsing a name for the image
  168.         page_list_image_name_number = page_list_image_url_number+5
  169.         almost_image_name = page_list[page_list_image_name_number]
  170.         image_name = almost_image_name[10:][:-3]
  171.         print('('+image_name+extension+')')    
  172.         #Opening the image
  173.         image_file = Path(image_name+extension)
  174.         if image_file.is_file():
  175.             print('File already exists!\nSkipping that image.\n'+visual_divider)
  176.             return
  177.         else:
  178.             with open('1.txt', 'a') as file:
  179.                 file.write('\n'+full_url)
  180.             image_request = urll.Request(image_url, headers=request_headers)
  181.             try:
  182.                 image_resourced = urll.urlopen(image_request)
  183.             except:
  184.                 print('\nAn error occured, HTTP Error 404, continuing running the script!\n'+visual_divider)
  185.                 return
  186.             #Saving the image
  187.             image_output = open(image_name+extension, 'wb')
  188.             image_output.write(image_resourced.read())
  189.             image_output.close()
  190.             print('Image downloaded!\n'+visual_divider)
  191.             #Writing the direct image URL in the text file
  192.             with open('1.txt','a') as file:
  193.                 file.write('\n'+image_url+'\n')
  194.             #Using sleep() to not get banned right away (if needed, just uncomment the line below)
  195.             #time.sleep(1)
  196.  
  197. while FullRandomMode == True:
  198.     scrapefullrandom()
  199. else:
  200.     pass
  201. while FirstLettersMode == True:
  202.     scrapeletters()
  203. else:
  204.     pass
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement