hitomi.py

import sqlite3
import argparse
import json
import glob


conn = sqlite3.connect('galleries.db')
c = conn.cursor()


def update_db():
    c.execute('''
        CREATE TABLE IF NOT EXISTS galleries
        (
            id INTEGER PRIMARY KEY,
            type TEXT,
            artists TEXT,
            groups TEXT,
            parodys TEXT,
            tags TEXT,
            characters TEXT,
            language TEXT,
            galleryname TEXT,
            UNIQUE (id)
        )
        ''')
    db_keys = ['id', 'type', 'a', 'g', 'p', 't', 'c', 'l', 'n']
    files = glob.glob('galleries*.json')
    for i, filename in enumerate(files):
        with open(filename, 'r', encoding='utf8') as f:
            data = json.load(f)
            print('({}/{}) reading file {} with {} entries'.format(i + 1, len(files), filename, len(data)))
            for entry in data:
                current_obj = [entry[key] if key in entry else '' for key in db_keys]
                current_obj = ['|'.join(elem) if type(elem) is list else elem for elem in current_obj]
                c.execute("INSERT OR REPLACE INTO galleries VALUES (?,?,?,?,?,?,?,?,?)", current_obj)
    conn.commit()
    c.execute('SELECT Count(*) FROM galleries')
    num, = c.fetchone()
    print('DB has {} entries'.format(num))


def find(q):
    q = 'SELECT id, type, artists, groups, parodys, tags, characters, language, galleryname FROM galleries WHERE {0}'.format(q)
    print(q)
    c.execute(q)
    return c


def print_info(title, arr):
    if arr:
        arr = arr.split('|')
        print('{}:'.format(title), ', '.join(arr))


def print_gallery(gallery_data):
    id, type, artists, groups, parodys, tags, characters, language, galleryname = gallery_data
    print('{} https://hitomi.la/galleries/{}.html'.format(type, id))
    print_info('Title', galleryname)
    print_info('Artists', artists)
    print_info('Groups', groups)
    print_info('Parodys', parodys)
    print_info('Tags', tags)
    print_info('Characters', characters)
    print_info('Language', language)


def fetch(q):
    с = find(q)
    print('=' * 80)
    n = 10
    data = c.fetchmany(n)
    while len(data):
        for result in data:
            print_gallery(result)
            print('=' * 80)
        data = c.fetchmany(n)
        if len(data) and input('Press Enter for more, type "q" to exit\n') is 'q':
            break
    print('=' * 35, 'END', '=' * 40)


def format_query(field, vals=[]):
    if vals and len(vals):
        return ['{} NOT LIKE "%{}%"'.format(field, v[1:]) if v[0] == '!' else '{} LIKE "%{}%"'.format(field, v) for v in vals]
    return []


def search_galleries(args):
    if args.id:
        fetch('id={}'.format(args.id))
    else:
        q = []

        if args.type != 'all':
            q.append('type="{}"'.format(args.type))

        if args.language != 'all':
            q.append('language="{}"'.format(args.language))

        if args.name:
            q.append('galleryname LIKE "%{}%"'.format(args.name))

        q.extend(format_query('artists', args.artists))
        q.extend(format_query('groups', args.groups))
        q.extend(format_query('parodys', args.parodys))
        q.extend(format_query('tags', args.tags))
        q.extend(format_query('characters', args.characters))

        if len(q):
            fetch(' AND '.join(q))
            return True
        else:
            return False


if __name__ == '__main__':
    parser = argparse.ArgumentParser(description='''Search for galleries in local copy of hitomi.la database.
To use this script, first you have to manually download galleries*.json files from hitomi.la at urls:
https://ltn.hitomi.la/galleries0.json,
https://ltn.hitomi.la/galleries1.json,
https://ltn.hitomi.la/galleries2.json,
...,
https://ltn.hitomi.la/galleries19.json, etc.
(there are currently 20 files, but there can be more in the future)
Save then in the same folder as this script.
Then, call "python hitomy.py update" to create SQLite database. After that, you can use "python hitomy.py search ..." for quick and simple search.
To update galleries with new entries, re-download last galleriesXX.json (and add new JSONs if there are any) file and run "python hitomy.py update" again.
You can delete all JSON files after update is done.
''',
                                     epilog='Use "!" in search queries for exclusion, i.e.: "search -t shota !yaoi"')

    parser.add_argument('action', type=str, choices=['search', 'update'],
                        help='update reads all galleries*.json files in directory and populates database, search will return results from query to database (see other options)')

    parser.add_argument('-id', type=int,
                        help='search by ID (other options are ignored)')

    parser.add_argument('--tags', '-t', metavar='T', type=str, nargs='+',
                        help='search tags')

    parser.add_argument('--artists', '-a', metavar='A', type=str, nargs='+',
                        help='search for artists')

    parser.add_argument('--groups', '-g', metavar='G', type=str, nargs='+',
                        help='search gallery groups')

    parser.add_argument('--parodys', '-p', metavar='P', type=str, nargs='+',
                        help='search parodys')

    parser.add_argument('--characters', '-c', metavar='C', type=str, nargs='+',
                        help='search for characters')

    parser.add_argument('--language', '-l', metavar='L', default='all', choices=['all', 'czech', 'italian', 'persian', 'polish', 'thai', 'esperanto', 'tagalog', 'ukrainian', 'danish', 'french', 'arabic', 'portuguese', 'romanian', 'english', 'korean', 'russian', 'vietnamese', 'slovak', 'hungarian', 'turkish', 'dutch', 'latin', 'spanish', 'greek', 'german', 'indonesian', 'norwegian', 'japanese', 'hebrew', 'catalan', 'finnish', 'albanian', 'estonian', 'chinese', 'swedish'],
                        help='search media language')

    parser.add_argument('--name', '-n', metavar='N', type=str,
                        help='search for title')

    parser.add_argument('--type', '-T', default='all', choices=['all', 'doujinshi', 'manga', 'artistcg', 'gamecg', 'anime'],
                        help='search media type')

    args = parser.parse_args()

    if args.action == 'search':
        r = search_galleries(args)
        if not r:
            parser.print_help()
    elif args.action == 'update':
        update_db()
    else:
        parser.print_help()

    conn.close()