Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import sqlite3
- import argparse
- import json
- import glob
- conn = sqlite3.connect('galleries.db')
- c = conn.cursor()
- def update_db():
- c.execute('''
- CREATE TABLE IF NOT EXISTS galleries
- (
- id INTEGER PRIMARY KEY,
- type TEXT,
- artists TEXT,
- groups TEXT,
- parodys TEXT,
- tags TEXT,
- characters TEXT,
- language TEXT,
- galleryname TEXT,
- UNIQUE (id)
- )
- ''')
- db_keys = ['id', 'type', 'a', 'g', 'p', 't', 'c', 'l', 'n']
- files = glob.glob('galleries*.json')
- for i, filename in enumerate(files):
- with open(filename, 'r', encoding='utf8') as f:
- data = json.load(f)
- print('({}/{}) reading file {} with {} entries'.format(i + 1, len(files), filename, len(data)))
- for entry in data:
- current_obj = [entry[key] if key in entry else '' for key in db_keys]
- current_obj = ['|'.join(elem) if type(elem) is list else elem for elem in current_obj]
- c.execute("INSERT OR REPLACE INTO galleries VALUES (?,?,?,?,?,?,?,?,?)", current_obj)
- conn.commit()
- c.execute('SELECT Count(*) FROM galleries')
- num, = c.fetchone()
- print('DB has {} entries'.format(num))
- def find(q):
- q = 'SELECT id, type, artists, groups, parodys, tags, characters, language, galleryname FROM galleries WHERE {0}'.format(q)
- print(q)
- c.execute(q)
- return c
- def print_info(title, arr):
- if arr:
- arr = arr.split('|')
- print('{}:'.format(title), ', '.join(arr))
- def print_gallery(gallery_data):
- id, type, artists, groups, parodys, tags, characters, language, galleryname = gallery_data
- print('{} https://hitomi.la/galleries/{}.html'.format(type, id))
- print_info('Title', galleryname)
- print_info('Artists', artists)
- print_info('Groups', groups)
- print_info('Parodys', parodys)
- print_info('Tags', tags)
- print_info('Characters', characters)
- print_info('Language', language)
- def fetch(q):
- с = find(q)
- print('=' * 80)
- n = 10
- data = c.fetchmany(n)
- while len(data):
- for result in data:
- print_gallery(result)
- print('=' * 80)
- data = c.fetchmany(n)
- if len(data) and input('Press Enter for more, type "q" to exit\n') is 'q':
- break
- print('=' * 35, 'END', '=' * 40)
- def format_query(field, vals=[]):
- if vals and len(vals):
- return ['{} NOT LIKE "%{}%"'.format(field, v[1:]) if v[0] == '!' else '{} LIKE "%{}%"'.format(field, v) for v in vals]
- return []
- def search_galleries(args):
- if args.id:
- fetch('id={}'.format(args.id))
- else:
- q = []
- if args.type != 'all':
- q.append('type="{}"'.format(args.type))
- if args.language != 'all':
- q.append('language="{}"'.format(args.language))
- if args.name:
- q.append('galleryname LIKE "%{}%"'.format(args.name))
- q.extend(format_query('artists', args.artists))
- q.extend(format_query('groups', args.groups))
- q.extend(format_query('parodys', args.parodys))
- q.extend(format_query('tags', args.tags))
- q.extend(format_query('characters', args.characters))
- if len(q):
- fetch(' AND '.join(q))
- return True
- else:
- return False
- if __name__ == '__main__':
- parser = argparse.ArgumentParser(description='''Search for galleries in local copy of hitomi.la database.
- To use this script, first you have to manually download galleries*.json files from hitomi.la at urls:
- https://ltn.hitomi.la/galleries0.json,
- https://ltn.hitomi.la/galleries1.json,
- https://ltn.hitomi.la/galleries2.json,
- ...,
- https://ltn.hitomi.la/galleries19.json, etc.
- (there are currently 20 files, but there can be more in the future)
- Save then in the same folder as this script.
- Then, call "python hitomy.py update" to create SQLite database. After that, you can use "python hitomy.py search ..." for quick and simple search.
- To update galleries with new entries, re-download last galleriesXX.json (and add new JSONs if there are any) file and run "python hitomy.py update" again.
- You can delete all JSON files after update is done.
- ''',
- epilog='Use "!" in search queries for exclusion, i.e.: "search -t shota !yaoi"')
- parser.add_argument('action', type=str, choices=['search', 'update'],
- help='update reads all galleries*.json files in directory and populates database, search will return results from query to database (see other options)')
- parser.add_argument('-id', type=int,
- help='search by ID (other options are ignored)')
- parser.add_argument('--tags', '-t', metavar='T', type=str, nargs='+',
- help='search tags')
- parser.add_argument('--artists', '-a', metavar='A', type=str, nargs='+',
- help='search for artists')
- parser.add_argument('--groups', '-g', metavar='G', type=str, nargs='+',
- help='search gallery groups')
- parser.add_argument('--parodys', '-p', metavar='P', type=str, nargs='+',
- help='search parodys')
- parser.add_argument('--characters', '-c', metavar='C', type=str, nargs='+',
- help='search for characters')
- parser.add_argument('--language', '-l', metavar='L', default='all', choices=['all', 'czech', 'italian', 'persian', 'polish', 'thai', 'esperanto', 'tagalog', 'ukrainian', 'danish', 'french', 'arabic', 'portuguese', 'romanian', 'english', 'korean', 'russian', 'vietnamese', 'slovak', 'hungarian', 'turkish', 'dutch', 'latin', 'spanish', 'greek', 'german', 'indonesian', 'norwegian', 'japanese', 'hebrew', 'catalan', 'finnish', 'albanian', 'estonian', 'chinese', 'swedish'],
- help='search media language')
- parser.add_argument('--name', '-n', metavar='N', type=str,
- help='search for title')
- parser.add_argument('--type', '-T', default='all', choices=['all', 'doujinshi', 'manga', 'artistcg', 'gamecg', 'anime'],
- help='search media type')
- args = parser.parse_args()
- if args.action == 'search':
- r = search_galleries(args)
- if not r:
- parser.print_help()
- elif args.action == 'update':
- update_db()
- else:
- parser.print_help()
- conn.close()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement