Element14 EAGLE CAD libraries downloader tool

#!/usr/bin/env python3

import urllib.request
import urllib.parse
import urllib.error
import html.parser
import http.cookiejar
import re
import time

## CONFIG START
USERNAME = ""
PASSWORD = ""
## CONFIG END

SITE_ROOT = "https://www.element14.com"
SITE_LOGIN = "/community/cs_login"
SITE_INBOX = "/community/inbox"
SITE_LIBS = "/community/community/cadsoft_eagle/eagle_cad_libraries"
SITE_LIB_PREFIX = "/community/docs/DOC-"
SITE_DOWNLOAD_PREFIX = "/community/servlet/JiveServlet/download/"

class LinkCollector(html.parser.HTMLParser):
    STATE_INIT = 1
    STATE_LINK = 2

    def __init__(self, regex):
        super().__init__(convert_charrefs=True)
        self.state = self.STATE_INIT
        self.regex = regex
        self.href = None
        self.name = None
        self.links = []

    def handle_starttag(self, tag, attrs):
        attrs = dict(attrs)
        if tag == 'a' and self.regex.search(attrs.get('href', '')):
            self.state = self.STATE_LINK
            self.href = SITE_ROOT + attrs.get('href', '')
            self.name = None

    def handle_data(self, data):
        if self.state == self.STATE_LINK:
            self.name = data

    def handle_endtag(self, tag):
        if tag == 'a' and self.state == self.STATE_LINK:
            self.links.append((self.href, self.name))
            self.state = self.STATE_INIT

#cj = None

def init():
    global cj # just to debug it from main()
    cj = http.cookiejar.CookieJar()
    opener = urllib.request.build_opener(urllib.request.HTTPCookieProcessor(cj))
    return opener

def login(opener):
    data = {'username': USERNAME, 'password': PASSWORD, 'autoLogin': 'true'}
    request = urllib.request.Request(
        SITE_ROOT + SITE_LOGIN,
        data=urllib.parse.urlencode(data).encode('utf-8'),
        headers={'Content-Type': 'application/x-www-form-urlencoded;charset=utf-8'}
    )
    response = opener.open(request)
    return response

def list_libs(opener):
    response = opener.open(SITE_ROOT + SITE_LIBS)
    parser = LinkCollector(re.compile('^' + SITE_LIB_PREFIX))
    charset = response.info().get_param('charset', 'utf-8')
    parser.feed(response.read().decode(charset))
    return parser.links

def download_lib(opener, liburl):
    libnumber = liburl.split('-')[-1]
    response = opener.open(liburl)
    parser = LinkCollector(re.compile('^' + SITE_DOWNLOAD_PREFIX + libnumber))
    charset = response.info().get_param('charset', 'utf-8')
    parser.feed(response.read().decode(charset))
    processed = []
    for (url, name) in parser.links:
        #print(url); continue
        if url in processed:
            continue
        print(' -> Downloading', url)
        filename = url.split('/')[-1]
        with opener.open(url) as resp, open(filename, 'wb') as file:
            file.write(resp.read())
        processed.append(url)

def main():
    global cj
    opener = init()
    res = login(opener)
    print(res.code)
    print(cj)
    data = res.read().decode(res.info().get_param('charset', 'utf-8'))
    if SITE_INBOX in data:
        print('Login succeeded')
    else:
        print('Login failed')
        return
    libs = list_libs(opener)
    for (url, name) in libs:
        print('Processing library', name)
        download_lib(opener, url)
        #break

if __name__ == '__main__':
    main()