Advertisement
Guest User

hitomi.py

a guest
Jan 24th, 2019
228
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. import sqlite3
  2. import argparse
  3. import json
  4. import glob
  5.  
  6.  
  7. conn = sqlite3.connect('galleries.db')
  8. c = conn.cursor()
  9.  
  10.  
  11. def update_db():
  12.     c.execute('''
  13.        CREATE TABLE IF NOT EXISTS galleries
  14.        (
  15.            id INTEGER PRIMARY KEY,
  16.            type TEXT,
  17.            artists TEXT,
  18.            groups TEXT,
  19.            parodys TEXT,
  20.            tags TEXT,
  21.            characters TEXT,
  22.            language TEXT,
  23.            galleryname TEXT,
  24.            UNIQUE (id)
  25.        )
  26.        ''')
  27.     db_keys = ['id', 'type', 'a', 'g', 'p', 't', 'c', 'l', 'n']
  28.     files = glob.glob('galleries*.json')
  29.     for i, filename in enumerate(files):
  30.         with open(filename, 'r', encoding='utf8') as f:
  31.             data = json.load(f)
  32.             print('({}/{}) reading file {} with {} entries'.format(i + 1, len(files), filename, len(data)))
  33.             for entry in data:
  34.                 current_obj = [entry[key] if key in entry else '' for key in db_keys]
  35.                 current_obj = ['|'.join(elem) if type(elem) is list else elem for elem in current_obj]
  36.                 c.execute("INSERT OR REPLACE INTO galleries VALUES (?,?,?,?,?,?,?,?,?)", current_obj)
  37.     conn.commit()
  38.     c.execute('SELECT Count(*) FROM galleries')
  39.     num, = c.fetchone()
  40.     print('DB has {} entries'.format(num))
  41.  
  42.  
  43. def find(q):
  44.     q = 'SELECT id, type, artists, groups, parodys, tags, characters, language, galleryname FROM galleries WHERE {0}'.format(q)
  45.     print(q)
  46.     c.execute(q)
  47.     return c
  48.  
  49.  
  50. def print_info(title, arr):
  51.     if arr:
  52.         arr = arr.split('|')
  53.         print('{}:'.format(title), ', '.join(arr))
  54.  
  55.  
  56. def print_gallery(gallery_data):
  57.     id, type, artists, groups, parodys, tags, characters, language, galleryname = gallery_data
  58.     print('{} https://hitomi.la/galleries/{}.html'.format(type, id))
  59.     print_info('Title', galleryname)
  60.     print_info('Artists', artists)
  61.     print_info('Groups', groups)
  62.     print_info('Parodys', parodys)
  63.     print_info('Tags', tags)
  64.     print_info('Characters', characters)
  65.     print_info('Language', language)
  66.  
  67.  
  68. def fetch(q):
  69.     с = find(q)
  70.     print('=' * 80)
  71.     n = 10
  72.     data = c.fetchmany(n)
  73.     while len(data):
  74.         for result in data:
  75.             print_gallery(result)
  76.             print('=' * 80)
  77.         data = c.fetchmany(n)
  78.         if len(data) and input('Press Enter for more, type "q" to exit\n') is 'q':
  79.             break
  80.     print('=' * 35, 'END', '=' * 40)
  81.  
  82.  
  83. def format_query(field, vals=[]):
  84.     if vals and len(vals):
  85.         return ['{} NOT LIKE "%{}%"'.format(field, v[1:]) if v[0] == '!' else '{} LIKE "%{}%"'.format(field, v) for v in vals]
  86.     return []
  87.  
  88.  
  89. def search_galleries(args):
  90.     if args.id:
  91.         fetch('id={}'.format(args.id))
  92.     else:
  93.         q = []
  94.  
  95.         if args.type != 'all':
  96.             q.append('type="{}"'.format(args.type))
  97.  
  98.         if args.language != 'all':
  99.             q.append('language="{}"'.format(args.language))
  100.  
  101.         if args.name:
  102.             q.append('galleryname LIKE "%{}%"'.format(args.name))
  103.  
  104.         q.extend(format_query('artists', args.artists))
  105.         q.extend(format_query('groups', args.groups))
  106.         q.extend(format_query('parodys', args.parodys))
  107.         q.extend(format_query('tags', args.tags))
  108.         q.extend(format_query('characters', args.characters))
  109.  
  110.         if len(q):
  111.             fetch(' AND '.join(q))
  112.             return True
  113.         else:
  114.             return False
  115.  
  116.  
  117. if __name__ == '__main__':
  118.     parser = argparse.ArgumentParser(description='''Search for galleries in local copy of hitomi.la database.
  119. To use this script, first you have to manually download galleries*.json files from hitomi.la at urls:
  120. https://ltn.hitomi.la/galleries0.json,
  121. https://ltn.hitomi.la/galleries1.json,
  122. https://ltn.hitomi.la/galleries2.json,
  123. ...,
  124. https://ltn.hitomi.la/galleries19.json, etc.
  125. (there are currently 20 files, but there can be more in the future)
  126. Save then in the same folder as this script.
  127. Then, call "python hitomy.py update" to create SQLite database. After that, you can use "python hitomy.py search ..." for quick and simple search.
  128. To update galleries with new entries, re-download last galleriesXX.json (and add new JSONs if there are any) file and run "python hitomy.py update" again.
  129. You can delete all JSON files after update is done.
  130. ''',
  131.                                      epilog='Use "!" in search queries for exclusion, i.e.: "search -t shota !yaoi"')
  132.  
  133.     parser.add_argument('action', type=str, choices=['search', 'update'],
  134.                         help='update reads all galleries*.json files in directory and populates database, search will return results from query to database (see other options)')
  135.  
  136.     parser.add_argument('-id', type=int,
  137.                         help='search by ID (other options are ignored)')
  138.  
  139.     parser.add_argument('--tags', '-t', metavar='T', type=str, nargs='+',
  140.                         help='search tags')
  141.  
  142.     parser.add_argument('--artists', '-a', metavar='A', type=str, nargs='+',
  143.                         help='search for artists')
  144.  
  145.     parser.add_argument('--groups', '-g', metavar='G', type=str, nargs='+',
  146.                         help='search gallery groups')
  147.  
  148.     parser.add_argument('--parodys', '-p', metavar='P', type=str, nargs='+',
  149.                         help='search parodys')
  150.  
  151.     parser.add_argument('--characters', '-c', metavar='C', type=str, nargs='+',
  152.                         help='search for characters')
  153.  
  154.     parser.add_argument('--language', '-l', metavar='L', default='all', choices=['all', 'czech', 'italian', 'persian', 'polish', 'thai', 'esperanto', 'tagalog', 'ukrainian', 'danish', 'french', 'arabic', 'portuguese', 'romanian', 'english', 'korean', 'russian', 'vietnamese', 'slovak', 'hungarian', 'turkish', 'dutch', 'latin', 'spanish', 'greek', 'german', 'indonesian', 'norwegian', 'japanese', 'hebrew', 'catalan', 'finnish', 'albanian', 'estonian', 'chinese', 'swedish'],
  155.                         help='search media language')
  156.  
  157.     parser.add_argument('--name', '-n', metavar='N', type=str,
  158.                         help='search for title')
  159.  
  160.     parser.add_argument('--type', '-T', default='all', choices=['all', 'doujinshi', 'manga', 'artistcg', 'gamecg', 'anime'],
  161.                         help='search media type')
  162.  
  163.     args = parser.parse_args()
  164.  
  165.     if args.action == 'search':
  166.         r = search_galleries(args)
  167.         if not r:
  168.             parser.print_help()
  169.     elif args.action == 'update':
  170.         update_db()
  171.     else:
  172.         parser.print_help()
  173.  
  174.     conn.close()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement