Untitled

import aiohttp
import re
import bs4
import os
import sys
import asyncio

max_connection = 10
loop = asyncio.get_event_loop()
sem = asyncio.Semaphore(value=max_connection)

async def request_and_write(path, img_name, img_request):
    """
    Get the image from request and write to disk
    :param path: absolute path to folder
    :param img_name: image name
    :param img_request: the image request to get data from
    :return: None
    """
    os.makedirs(path, exist_ok=True)
    with open(os.path.join(path, img_name), 'wb') as image_file:
        while True:
            chunk = await img_request.content.read(100000)
            if not chunk:
                break
            image_file.write(chunk)
    print('===> Done: {0} !'.format(img_name))

async def get_one_chapter(link, path):
    """
    Download one chapter ~ 20-25 .jpg images
    :param link: the link to one chapter
    :param path:
    :return: None
    """
    async with aiohttp.ClientSession(loop=loop).get(link) as res_local:
        assert res_local.status == 200
        html_local = await res_local.read()
        soup = bs4.BeautifulSoup(html_local, 'html.parser')
        imgRegex = re.compile('.*.jpg')

        # extract the src links
        for img in soup.select("a.img-link img[src]"):
            imgLink = img['src']
            img_name = re.search(imgRegex, os.path.basename(imgLink)).group()
            #request the image
            async with sem:
                async with aiohttp.ClientSession(loop=loop).get(imgLink) as img_res:
                    await request_and_write(path, img_name, img_res)

async def get_all(seed, manga_name, folder):
    """
    Performs downloading all chapters of each version of the manga
    :param seed: the seed link (http://mangapark.me/manga/berserk)
    :param manga_name: manga name (Berserk)
    :param folder: save destination
    :return: None
    """
    async with aiohttp.ClientSession(loop=loop).get(seed) as res:
        assert res.status == 200
        html = await res.read()
        soup = bs4.BeautifulSoup(html, 'html.parser')

    # get List of versions
    versions = [ver for ver in soup.select('a.st')]
    print('There are %s versions' %len(versions))
    versionTags = ['s%s' %(x+1) for x in range(len(versions))]
    print(versionTags)

    #start downloading all chapters
    for link in soup.find_all('a', target = '_blank', text = re.compile('all')):
        #link to the 'all' button in web page
        chapLink = 'http://mangapark.me' + link.get('href')
        chapName = os.path.basename(chapLink)

        # iterate through each version in all links
        for ver in versionTags:
            if ver in chapLink:
                # download
                filePath = os.path.join(folder, manga_name, ver, chapName)
                await get_one_chapter(chapLink, filePath)
                print('@@@@@ DONE CHAPTER {1} OF VERSION {0}'.format(ver, chapName))


# main operation
script, seed_link, manga_name, folder = sys.argv
loop.run_until_complete(get_all(seed_link, manga_name, folder))
loop.close()