Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import random
- import urllib.request as urll
- #import time
- #undocument if sleep() is needed
- from pathlib import Path
- url = 'https://prnt.sc/'
- visual_divider = '-----------------' #'--------------'
- request_headers = {'User-Agent': 'Mozilla/5.0'}
- #The headers below often gave 403, the headers above usually give positive results.
- #Just comment one and uncomment another if you want to change headers
- '''
- request_headers = {
- "Accept-Language": "en-US,en;q=0.5",
- "User-Agent": "Mozilla/5.0 (Windows NT 10.0; WOW64; rv:40.0) Gecko/20100101 Firefox/40.0",
- "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
- "Referer": "https://google.com",
- "Connection": "keep-alive"
- }'''
- FullRandomMode = False
- FirstLettersMode = False
- noimage = 'st.prntscr.com/2018/10/13/2048/img/0_173a7b_211be8ff.png'
- fullrandomrange = ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9']
- randomletters = ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z']
- randomnumbers = ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9']
- print('\nFull Random Mode - 1\n\nEnter first letters Mode - 2\n')
- decision = input()
- while decision == '1':
- print('\nEnterig Full Random Mode!\n')
- FullRandomMode = True
- break
- while decision == '2':
- print('\nFirst Letters Mode chosen:\n\nEnter your first letters (up to 6):')
- first_letters = input()
- letters_count = len(first_letters)
- if letters_count >0 and letters_count<7:
- print('\nYou have entered '+str(letters_count)+' characters.')
- else:
- print('\nYou have entered a wrong number of characters.\nStart the script again and enter from 1 to 6 characters.')
- break
- first_letters = first_letters.casefold()
- #Assuming there was no symbols enteres
- need_to_generate = 6-letters_count
- print ('\nWe will need to generate an additional '+str(need_to_generate)+' characters to complete the link')
- print('\nEntering First Letters Mode\n')
- FirstLettersMode = True
- break
- while decision not in ['1', '2']:
- print('\nRestart the script and enter eiter 1 or 2')
- break
- def scrapefullrandom():
- #Generating random URL
- random_characters_list = []
- for i in range(0,6):
- random_characters_list.append(random.choice(fullrandomrange))
- random_characters = ''.join(random_characters_list)
- full_url = url+random_characters
- #Getting html code
- request = urll.Request(full_url, headers=request_headers)
- page = urll.urlopen(request).read()
- #From here done without BeautifulSoup
- page_str = str(page)
- #Seeing if it's a valid URL (contains an image)
- if noimage in page_str:
- print(full_url+' - '+'Invalid URL\n'+visual_divider)
- else:
- #Searching for direct image url
- page_str_noreturns = page_str.replace(' ', '\n')
- page_list = page_str_noreturns.split('\n')
- #Excepting error if no image was detected (can just fix the overall detection)
- try:
- page_list_image_url_number = page_list.index('screenshot-image"')
- except ValueError:
- print(full_url+' - '+'Invalid URL\n'+visual_divider)
- return
- print(full_url+' - '+'Valid URL')
- page_list_image_url_number += 1
- almost_image_url = page_list[page_list_image_url_number]
- image_url = almost_image_url[5:][:-1]
- print(image_url+' ')
- #Determining what extension the file has
- if 'png' in image_url:
- extension = '.png'
- if 'jpg' in image_url:
- extension = '.jpg'
- if 'gif' in image_url:
- extension = '.gif'
- if 'jpeg' in image_url:
- extension = '.jpeg'
- #Parsing a name for the image
- page_list_image_name_number = page_list_image_url_number+5
- almost_image_name = page_list[page_list_image_name_number]
- image_name = almost_image_name[10:][:-3]
- print('('+image_name+extension+')')
- #Opening the image
- image_file = Path(image_name+extension)
- if image_file.is_file():
- print('\nFile already exists!\nSkipping that image.\n'+visual_divider)
- return
- else:
- with open('1.txt', 'a') as file:
- file.write('\n'+full_url)
- image_request = urll.Request(image_url, headers=request_headers)
- try:
- image_resourced = urll.urlopen(image_request)
- except:
- print('\nAn error occured, HTTP Error 404, continuing running the script!\n'+visual_divider)
- return
- #Saving the image
- image_output = open(image_name+extension, 'wb')
- image_output.write(image_resourced.read())
- image_output.close()
- print('Image downloaded!\n'+visual_divider)
- #Writing the direct image URL in the text file
- with open('1.txt','a') as file:
- file.write('\n'+image_url+'\n')
- #Using sleep() to not get banned right away (if needed, just uncomment the line below)
- #time.sleep(1)
- def scrapeletters():
- random_characters_list = []
- for i in range(0, need_to_generate):
- random_characters_list.append(random.choice(fullrandomrange))
- random_characters=''.join(random_characters_list)
- full_url=url+first_letters+random_characters
- #Getting html code
- request = urll.Request(full_url, headers=request_headers)
- page = urll.urlopen(request).read()
- #From here done without BeautifulSoup
- page_str = str(page)
- #Seeing if it's a valid URL (contains an image)
- if noimage in page_str:
- print(full_url+' - '+'Invalid URL\n'+visual_divider)
- else:
- #Searching for direct image url
- page_str_noreturns = page_str.replace(' ', '\n')
- page_list = page_str_noreturns.split('\n')
- #Excepting error if no image was detected (can fix the overall detection)
- try:
- page_list_image_url_number = page_list.index('screenshot-image"')
- except ValueError:
- print(full_url+' - '+'Invalid URL\n'+visual_divider)
- return
- print(full_url+' - '+'Valid URL')
- page_list_image_url_number += 1
- almost_image_url = page_list[page_list_image_url_number]
- image_url = almost_image_url[5:][:-1]
- print(image_url+' ')
- #Determining what extension the file has
- if 'png' in image_url:
- extension = '.png'
- if 'jpg' in image_url:
- extension = '.jpg'
- if 'gif' in image_url:
- extension = '.gif'
- if 'jpeg' in image_url:
- extension = '.jpeg'
- #Parsing a name for the image
- page_list_image_name_number = page_list_image_url_number+5
- almost_image_name = page_list[page_list_image_name_number]
- image_name = almost_image_name[10:][:-3]
- print('('+image_name+extension+')')
- #Opening the image
- image_file = Path(image_name+extension)
- if image_file.is_file():
- print('File already exists!\nSkipping that image.\n'+visual_divider)
- return
- else:
- with open('1.txt', 'a') as file:
- file.write('\n'+full_url)
- image_request = urll.Request(image_url, headers=request_headers)
- try:
- image_resourced = urll.urlopen(image_request)
- except:
- print('\nAn error occured, HTTP Error 404, continuing running the script!\n'+visual_divider)
- return
- #Saving the image
- image_output = open(image_name+extension, 'wb')
- image_output.write(image_resourced.read())
- image_output.close()
- print('Image downloaded!\n'+visual_divider)
- #Writing the direct image URL in the text file
- with open('1.txt','a') as file:
- file.write('\n'+image_url+'\n')
- #Using sleep() to not get banned right away (if needed, just uncomment the line below)
- #time.sleep(1)
- while FullRandomMode == True:
- scrapefullrandom()
- else:
- pass
- while FirstLettersMode == True:
- scrapeletters()
- else:
- pass
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement