ArmorGamesSwfFinder.py

from bs4 import BeautifulSoup as bs4
import requests, re, sys

# Function used for downloading images
def download(url, file_name):
    with open(file_name, "wb") as file:
        response = requests.get(url, headers={'user-agent': 'Mozilla/5.0 (Windows NT 6.1; rv:50.0) Gecko/20100101 Firefox/50.0'})
        file.write(response.content)

open('armorgames_rip.txt', 'ab+')

for i in ['action', 'adventure', 'arcade', 'shooting', 'puzzle-skill', 'strategy', 'sports']:
    category = i
    print("Ripping Category: {}".format(category))
    url = "http://armorgames.com/category/{}".format(i)
    res = requests.get(url, headers={'user-agent': 'Mozilla/5.0 (Windows NT 6.1; rv:50.0) Gecko/20100101 Firefox/50.0'})
    soup = bs4(res.text, 'html.parser')
    pages = soup.select_one('div[class="pagination"]').text.split()
    lastpage = int(pages[-3])
    print(str(lastpage)+" pages")

    page = 1
    while page <= lastpage:
        print("\n----------{}: Page {}----------\n".format(category, str(page)))
        url = "http://armorgames.com/category/{}/date/{}".format(category, str(page))
        print('URL: {}'.format(url))
        res = requests.get(url, headers={'user-agent': 'Mozilla/5.0 (Windows NT 6.1; rv:50.0) Gecko/20100101 Firefox/50.0'})
        soup = bs4(res.text, 'html.parser')
        gamelinks = soup.select('div[id="categorylisting"] ul[class="gamelisting"] h5 a')
        for i in gamelinks:
            if i['href'].startswith("/play/"):
                url = 'http://armorgames.com'+i['href']
                #print(url)
                res = requests.get(url,headers={'user-agent': 'Mozilla/5.0 (Windows NT 6.1; rv:50.0) Gecko/20100101 Firefox/50.0'})
                soup = bs4(res.text, 'html.parser')
                sfile = soup.select_one('embed[id="gamefileEmbed"]')
                if sfile is not None:
                    if sfile['src'].startswith("//"):
                        swf = 'http:'+sfile['src']
                    elif sfile['src'].startswith('/files/'):
                        swf = 'http://cache.armorgames.com'+sfile['src']
                    else:
                        swf = 'http://'+sfile['src']
                    if swf.split('?')[0] not in open('armorgames_rip.txt', 'r').read():
                        with open('armorgames_rip.txt', 'a+') as f:
                            f.write(swf.split('?')[0]+'\n')
                    print(swf.split('?')[0])
                #download(swf, title+'.swf')
        page += 1