View difference between Paste ID: 5C9A6GFc and sSQMh414
SHOW: | | - or go back to the newest paste.
1
import sqlite3
2
import argparse
3
import json
4
import glob
5
6
7
conn = sqlite3.connect('galleries.db')
8
c = conn.cursor()
9
10
11
def update_db():
12
    c.execute('''
13
        CREATE TABLE IF NOT EXISTS galleries
14
        (
15
            id INTEGER PRIMARY KEY,
16
            type TEXT,
17
            artists TEXT,
18
            groups TEXT,
19
            parodys TEXT,
20
            tags TEXT,
21
            characters TEXT,
22
            language TEXT,
23
            galleryname TEXT,
24
            UNIQUE (id)
25
        )
26
        ''')
27
    db_keys = ['id', 'type', 'a', 'g', 'p', 't', 'c', 'l', 'n']
28
    files = glob.glob('galleries*.json')
29
    for i, filename in enumerate(files):
30
        with open(filename, 'r', encoding='utf8') as f:
31
            data = json.load(f)
32
            print('({}/{}) reading file {} with {} entries'.format(i + 1, len(files), filename, len(data)))
33
            for entry in data:
34
                current_obj = [entry[key] if key in entry else '' for key in db_keys]
35
                current_obj = ['|'.join(elem) if type(elem) is list else elem for elem in current_obj]
36
                c.execute("INSERT OR REPLACE INTO galleries VALUES (?,?,?,?,?,?,?,?,?)", current_obj)
37
    conn.commit()
38
    c.execute('SELECT Count(*) FROM galleries')
39
    num, = c.fetchone()
40
    print('DB has {} entries'.format(num))
41
42
43
def find(q):
44
    q = 'SELECT id, type, artists, groups, parodys, tags, characters, language, galleryname FROM galleries WHERE {0}'.format(q)
45
    print(q)
46
    c.execute(q)
47
    return c
48
49
50
def print_info(title, arr):
51
    if arr:
52
        arr = arr.split('|')
53
        print('{}:'.format(title), ', '.join(arr))
54
55
56
def print_gallery(gallery_data):
57
    id, type, artists, groups, parodys, tags, characters, language, galleryname = gallery_data
58
    print('{} https://hitomi.la/galleries/{}.html'.format(type, id))
59
    print_info('Title', galleryname)
60
    print_info('Artists', artists)
61
    print_info('Groups', groups)
62
    print_info('Parodys', parodys)
63
    print_info('Tags', tags)
64
    print_info('Characters', characters)
65
    print_info('Language', language)
66
67
68
def fetch(q):
69
    ั = find(q)
70
    print('=' * 80)
71
    n = 10
72
    data = c.fetchmany(n)
73
    while len(data):
74
        for result in data:
75
            print_gallery(result)
76
            print('=' * 80)
77
        data = c.fetchmany(n)
78
        if len(data) and input('Press Enter for more, type "q" to exit\n') is 'q':
79
            break
80
    print('=' * 35, 'END', '=' * 40)
81
82
83
def format_query(field, vals=[]):
84
    if vals and len(vals):
85
        return ['{} NOT LIKE "%{}%"'.format(field, v[1:]) if v[0] == '!' else '{} LIKE "%{}%"'.format(field, v) for v in vals]
86
    return []
87
88
89
def search_galleries(args):
90
    if args.id:
91
        fetch('id={}'.format(args.id))
92
    else:
93
        q = []
94
95
        if args.type != 'all':
96
            q.append('type="{}"'.format(args.type))
97
98
        if args.language != 'all':
99
            q.append('language="{}"'.format(args.language))
100
101
        if args.name:
102
            q.append('galleryname LIKE "%{}%"'.format(args.name))
103
104
        q.extend(format_query('artists', args.artists))
105
        q.extend(format_query('groups', args.groups))
106
        q.extend(format_query('parodys', args.parodys))
107
        q.extend(format_query('tags', args.tags))
108
        q.extend(format_query('characters', args.characters))
109
110
        if len(q):
111
            fetch(' AND '.join(q))
112
            return True
113
        else:
114
            return False
115
116
117
if __name__ == '__main__':
118
    parser = argparse.ArgumentParser(description='''Search for galleries in local copy of hitomi.la database.
119
To use this script, first you have to manually download galleries*.json files from hitomi.la at urls:
120
https://ltn.hitomi.la/galleries0.json,
121
https://ltn.hitomi.la/galleries1.json,
122
https://ltn.hitomi.la/galleries2.json,
123
...,
124
https://ltn.hitomi.la/galleries19.json, etc.
125
(there are currently 20 files, but there can be more in the future)
126
Save then in the same folder as this script.
127
Then, call "python hitomy.py update" to create SQLite database. After that, you can use "python hitomy.py search ..." for quick and simple search.
128
To update galleries with new entries, re-download last galleriesXX.json (and add new JSONs if there are any) file and run "python hitomy.py update" again.
129
You can delete all JSON files after update is done.
130
''',
131
                                     epilog='Use "!" in search queries for exclusion, i.e.: "search -t shota !yaoi"')
132
133
    parser.add_argument('action', type=str, choices=['search', 'update'],
134
                        help='update reads all galleries*.json files in directory and populates database, search will return results from query to database (see other options)')
135
136
    parser.add_argument('-id', type=int,
137
                        help='search by ID (other options are ignored)')
138
139
    parser.add_argument('--tags', '-t', metavar='T', type=str, nargs='+',
140
                        help='search tags')
141
142
    parser.add_argument('--artists', '-a', metavar='A', type=str, nargs='+',
143
                        help='search for artists')
144
145
    parser.add_argument('--groups', '-g', metavar='G', type=str, nargs='+',
146
                        help='search gallery groups')
147
148
    parser.add_argument('--parodys', '-p', metavar='P', type=str, nargs='+',
149
                        help='search parodys')
150
151
    parser.add_argument('--characters', '-c', metavar='C', type=str, nargs='+',
152
                        help='search for characters')
153
154
    parser.add_argument('--language', '-l', metavar='L', default='all', choices=['all', 'czech', 'italian', 'persian', 'polish', 'thai', 'esperanto', 'tagalog', 'ukrainian', 'danish', 'french', 'arabic', 'portuguese', 'romanian', 'english', 'korean', 'russian', 'vietnamese', 'slovak', 'hungarian', 'turkish', 'dutch', 'latin', 'spanish', 'greek', 'german', 'indonesian', 'norwegian', 'japanese', 'hebrew', 'catalan', 'finnish', 'albanian', 'estonian', 'chinese', 'swedish'],
155
                        help='search media language')
156
157
    parser.add_argument('--name', '-n', metavar='N', type=str,
158
                        help='search for title')
159
160
    parser.add_argument('--type', '-T', default='all', choices=['all', 'doujinshi', 'manga', 'artistcg', 'gamecg', 'anime'],
161
                        help='search media type')
162
163
    args = parser.parse_args()
164
165
    if args.action == 'search':
166
        r = search_galleries(args)
167
        if not r:
168
            parser.print_help()
169
    elif args.action == 'update':
170
        update_db()
171
    else:
172
        parser.print_help()
173
174
    conn.close()