Advertisement
iama_alpaca

ArmorGamesSwfFinder.py

Jul 26th, 2017
2,930
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 2.13 KB | None | 0 0
  1. from bs4 import BeautifulSoup as bs4
  2. import requests, re, sys
  3.  
  4. # Function used for downloading images
  5. def download(url, file_name):
  6.     with open(file_name, "wb") as file:
  7.         response = requests.get(url, headers={'user-agent': 'Mozilla/5.0 (Windows NT 6.1; rv:50.0) Gecko/20100101 Firefox/50.0'})
  8.         file.write(response.content)
  9.  
  10. open('armorgames_rip.txt', 'ab+')
  11.  
  12. for i in ['action', 'adventure', 'arcade', 'shooting', 'puzzle-skill', 'strategy', 'sports']:
  13.     category = i
  14.     print("Ripping Category: {}".format(category))
  15.     url = "http://armorgames.com/category/{}".format(i)
  16.     res = requests.get(url, headers={'user-agent': 'Mozilla/5.0 (Windows NT 6.1; rv:50.0) Gecko/20100101 Firefox/50.0'})
  17.     soup = bs4(res.text, 'html.parser')
  18.     pages = soup.select_one('div[class="pagination"]').text.split()
  19.     lastpage = int(pages[-3])
  20.     print(str(lastpage)+" pages")
  21.  
  22.     page = 1
  23.     while page <= lastpage:
  24.         print("\n----------{}: Page {}----------\n".format(category, str(page)))
  25.         url = "http://armorgames.com/category/{}/date/{}".format(category, str(page))
  26.         print('URL: {}'.format(url))
  27.         res = requests.get(url, headers={'user-agent': 'Mozilla/5.0 (Windows NT 6.1; rv:50.0) Gecko/20100101 Firefox/50.0'})
  28.         soup = bs4(res.text, 'html.parser')
  29.         gamelinks = soup.select('div[id="categorylisting"] ul[class="gamelisting"] h5 a')
  30.         for i in gamelinks:
  31.             if i['href'].startswith("/play/"):
  32.                 url = 'http://armorgames.com'+i['href']
  33.                 #print(url)
  34.                 res = requests.get(url,headers={'user-agent': 'Mozilla/5.0 (Windows NT 6.1; rv:50.0) Gecko/20100101 Firefox/50.0'})
  35.                 soup = bs4(res.text, 'html.parser')
  36.                 sfile = soup.select_one('embed[id="gamefileEmbed"]')
  37.                 if sfile is not None:
  38.                     if sfile['src'].startswith("//"):
  39.                         swf = 'http:'+sfile['src']
  40.                     elif sfile['src'].startswith('/files/'):
  41.                         swf = 'http://cache.armorgames.com'+sfile['src']
  42.                     else:
  43.                         swf = 'http://'+sfile['src']
  44.                     if swf.split('?')[0] not in open('armorgames_rip.txt', 'r').read():
  45.                         with open('armorgames_rip.txt', 'a+') as f:
  46.                             f.write(swf.split('?')[0]+'\n')
  47.                     print(swf.split('?')[0])
  48.                 #download(swf, title+'.swf')
  49.         page += 1
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement