light novel picture scraper

from urllib.request import urlopen, Request
from urllib.error import HTTPError, URLError
from bs4 import BeautifulSoup
from os import mkdir


AID = "1508"
print("AID =", AID)


def buildpath(path):
    curpath = './'
    while path.find('/') != -1:
        curpath += path[:path.find('/') + 1]
        path = path[path.find('/') + 1:]
        try:
            mkdir(curpath)
        finally:
            continue


articleUrl = 'https://www.wenku8.net/modules/article/reader.php?aid=' + AID


def cmp(e):
    return int(e)


def getCID():
    req = Request(articleUrl, headers={'User-Agent': 'Mozilla/5.0'})
    data = urlopen(req)
    bs = BeautifulSoup(data, 'html.parser')
    CID = []
    for i in bs.find_all('a'):
        if i.text == "插图":
            CID.append(i['href'][i['href'].rfind('=') + 1:])
    CID.sort(key=cmp)
    print("CID =", CID)
    return CID


pictureUrlBase = "http://picture.wenku8.com/pictures/1/" + AID + "/";


def getImage(cid, pid):
    url = pictureUrlBase + cid + '/' + pid + '.jpg'
    try:
        req = Request(url, headers={'User-Agent': 'Mozilla/5.0'})
        data = urlopen(req)
    except HTTPError as e:
        return False
    except URLError as e:
        return False
    else:
        print(url, " ... success")
        url = url[26:]
        buildpath(url)
        file = open(url, 'wb')
        file.write(data.read())
        file.close()
        return True


cur = 1
for cid in getCID():
    print("now scraping:", cid)
    while getImage(cid, str(cur)) == False:
        cur += 10

    forward = cur
    while getImage(cid, str(forward)) == True:
        forward -= 1
    while getImage(cid, str(cur)) == True:
        cur += 1