Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import random
- import urllib.request as urll
- #import time
- #undocument if sleep() is needed
- url = 'https://prnt.sc/'
- request_headers = {'User-Agent': 'Mozilla/5.0'}
- #The headers below often gave 403, the headers above usually give positive results.
- #Just comment one and uncomment another if you want to change headers
- '''
- request_headers = {
- "Accept-Language": "en-US,en;q=0.5",
- "User-Agent": "Mozilla/5.0 (Windows NT 10.0; WOW64; rv:40.0) Gecko/20100101 Firefox/40.0",
- "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
- "Referer": "https://google.com",
- "Connection": "keep-alive"
- }'''
- FullRandomMode = False
- FirstLettersMode = False
- noimage = 'st.prntscr.com/2018/10/13/2048/img/0_173a7b_211be8ff.png'
- fullrandomrange = ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9']
- randomletters = ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z']
- randomnumbers = ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9']
- print('\nFull Random Mode - 1\n\nEnter first letters Mode - 2\n')
- decision = input()
- while decision == '1':
- print('\nEnterig Full Random Mode!')
- FullRandomMode = True
- break
- while decision == '2':
- print('\nFirst Letters Mode chosen:\n\nEnter your first letters (up to 6):')
- first_letters = input()
- print('\nEntering First Letters Mode')
- FirstLettersMode = True
- break
- while decision not in ['1', '2']:
- print('\nRestart the script and enter eiter 1 or 2')
- break
- def scrapefullrandom():
- #Generating random URL
- r1 = random.choice(fullrandomrange)
- r2 = random.choice(fullrandomrange)
- r3 = random.choice(fullrandomrange)
- r4 = random.choice(fullrandomrange)
- r5 = random.choice(fullrandomrange)
- r6 = random.choice(fullrandomrange)
- fullrandomsymbols=r1+r2+r3+r4+r5+r6
- full_url = url+fullrandomsymbols
- #Getting html code
- request = urll.Request(full_url, headers=request_headers)
- page = urll.urlopen(request).read()
- #From here done without BeautifulSoup
- page_str = str(page)
- #Seeing if it's a valid URL (contains an image)
- if noimage in page_str:
- print(full_url+'\n'+'Invalid URL')
- else:
- print(full_url+'\n'+'Valid URL')
- with open('1.txt', 'a') as file:
- file.write('\n'+full_url)
- #Searching for direct image url
- page_str_noreturns = page_str.replace(' ', '\n')
- page_list = page_str_noreturns.split('\n')
- #Excepting error if no image was detected (can fix the overall detection)
- try:
- page_list_image_url_number = page_list.index('screenshot-image"')
- except ValueError:
- print('\nAn error occured, valid URL but no image detected, continuing running the script!\n')
- with open('1.txt','a') as file:
- file.write('\nAn Error Occured - No image detected\n')
- return
- page_list_image_url_number += 1
- almost_image_url = page_list[page_list_image_url_number]
- image_url = almost_image_url[5:][:-1]
- print(image_url)
- #Parsing a name for the image
- page_list_image_name_number = page_list_image_url_number+5
- almost_image_name = page_list[page_list_image_name_number]
- image_name = almost_image_name[10:][:-3]
- print(image_name)
- #Opening the image
- image_request = urll.Request(image_url, headers=request_headers)
- image_resourced = urll.urlopen(image_request)
- #Saving the image (assuming they are all .png)
- image_output = open(image_name+'.png', 'wb')
- image_output.write(image_resourced.read())
- image_output.close()
- print('Image downloaded!')
- #Writing the direct image URL in the text file
- with open('1.txt','a') as file:
- file.write('\nDownloaded! Direct URL at:\n'+image_url+'\n')
- #Using sleep() to not get banned right away (if needed, just uncomment the line below)
- #time.sleep(1)
- def scrapeletters():
- l1 = random.choice(fullrandomrange)
- l2 = random.choice(fullrandomrange)
- l3 = random.choice(fullrandomrange)
- l4 = random.choice(fullrandomrange)
- l5 = random.choice(fullrandomrange)
- l6 = random.choice(fullrandomrange)
- #first_letters
- symbols = l1+l2+l3+l4+l5+l6
- full_url = url+symbols
- source = urll.urlopen(urll.Request(full_url, headers={'User-Agent': 'Mozilla/5.0'}))
- html = source.read()
- if noimage in str(html):
- print(full_url+'\n'+'Invalid URL')
- else:
- print(full_url+'\n'+'Valid URL')
- with open('1.txt', 'a', encoding='utf-8') as file:
- file.write('\n'+full_url)
- while FullRandomMode == True:
- scrapefullrandom()
- else:
- pass
- while FirstLettersMode == True:
- scrapeletters()
- else:
- pass
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement