Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #!/usr/bin/python
- import mysql.connector as db
- import re
- dbhost = 'xxx'
- dbuser = 'xxx'
- dbpassword = 'xxx'
- releases_regex = '^(?:u4a-)?[b-df-hj-np-tv-z0-9\.\-]+$' # extra prefix on u4a posts, vowels removed
- releases_groupid = 1000040
- nzedb_connection = db.connect(host=dbhost, user=dbuser, password=dbpassword, database='nzedb')
- nzedb_cursor = nzedb_connection.cursor()
- nzedb_cursor.execute("select `id`,`searchname`,`postdate` from `releases` where `groups_id`=%s and `searchname` REGEXP %s order by `postdate` desc;", (releases_groupid, releases_regex,))
- releases_matches = nzedb_cursor.fetchall()
- print 'processing ' + str(len(releases_matches)) + ' releases'
- for row in releases_matches:
- id = row[0]
- searchname = row[1]
- postdate = row[2]
- cleansearchname = searchname
- # irregular site names
- if re.search('^mmgs\.', cleansearchname):
- cleansearchname = re.sub('^mmgs\.', 'mmysgrl.', cleansearchname)
- elif re.search('^1kf\.', cleansearchname):
- cleansearchname = re.sub('^1kf\.', '1000fcls.', cleansearchname)
- elif re.search('^3rd\.', cleansearchname):
- cleansearchname = re.sub('^3rd\.', 'thrdmvs.', cleansearchname)
- elif re.search('^ztd\.', cleansearchname):
- cleansearchname = re.sub('^ztd\.', 'zrtlrnc.', cleansearchname)
- elif re.search('^pssn-hd\.', cleansearchname):
- cleansearchname = re.sub('^pssn-hd\.', 'pssnhd.', cleansearchname) # only so it isn't confused for a group name below
- # check for resolution
- resolution = None
- if re.search('\.(720p|hd)$', cleansearchname):
- resolution = '720p'
- cleansearchname = re.sub('\.(720p|hd)$', '', cleansearchname)
- elif re.search('\.(1080p|fllhd)$', cleansearchname):
- resolution = '1080p'
- cleansearchname = re.sub('\.(1080p|fllhd)$', '', cleansearchname)
- elif re.search('\.4k$', cleansearchname):
- resolution = '2160p'
- cleansearchname = re.sub('\.4k$', '', cleansearchname)
- else:
- resolution = '1080p'
- # ignore 2d
- if re.search('^tmwvrnt\.', cleansearchname) and re.search('\.2d$', cleansearchname):
- cleansearchname = re.sub('\.2d$', '', cleansearchname)
- # check for group prefix
- grpname = None
- grpmatch = re.search('^(gsh)\.', cleansearchname) # these guys use gush. not gush- for some reason
- if grpmatch:
- cleansearchname = re.sub('^gsh\.', 'gsh-', cleansearchname)
- if not grpmatch:
- grpmatch = re.search('^(u4a)-', cleansearchname)
- if not grpmatch:
- grpmatch = re.search('^([a-z]+)-', cleansearchname)
- if grpmatch:
- # remove group name (replaced at end later)
- cleansearchname = re.sub('^[a-z0-9]+-', '', cleansearchname)
- # normalize separators
- cleansearchname = re.sub('-', '.', cleansearchname)
- # attempt to fix disc numbers
- cleansearchname = re.sub('cd1$', 'disc1', cleansearchname)
- cleansearchname = re.sub('cd2$', 'disc2', cleansearchname)
- cleansearchname = re.sub('d1$', 'disc1', cleansearchname)
- cleansearchname = re.sub('d2$', 'disc2', cleansearchname)
- grpname = grpmatch.group(1)
- # begin pattern
- # this wildcard is notionally complete for scene releases but just harder to read
- #wc = '[a-z0-9\.\-_]*'
- wc = '.*'
- pattern = wc.join(list(cleansearchname))
- # add .resolution. (if known)
- if resolution:
- pattern += wc + resolution
- # add -group (if known)
- if grpname:
- pattern += wc + '-' + wc + wc.join(list(grpname))
- # complete pattern
- print 'searchname : ' + searchname
- print 'postdate : ' + str(postdate)
- print 'cleansearchname: ' + cleansearchname
- print 'resolution : ' + (resolution if resolution else 'unknown')
- print 'release group : ' + (grpname if grpname else 'unknown')
- print 'pattern : ' + pattern
- nzedb_cursor.execute("select `title`,`source` from `predb` where `created` >= date_sub(%s, interval 1 day) and `created` <= date_add(%s, interval 1 day) and `title` like '%.XXX.%' and `title` REGEXP %s;", (postdate, postdate, pattern,))
- predb_matches = nzedb_cursor.fetchall()
- if predb_matches:
- if len(predb_matches) == 1:
- match = predb_matches[0]
- title = match[0]
- source = match[1]
- # if the only match was 4k but 4k wasn't detected as a resolution, change title resolution to unknown
- if resolution != '2160p' and re.search('\.2160p\.', title):
- title = re.sub('\.2160p\.', '.UNKNOWN.', title)
- #print str(id) + ':' + searchname + ':' + title
- #query = 'update `releases` set `searchname`=' + title + ' where `id`=' + str(id) + ';';
- #print query
- print 'rename : ' + title
- print 'source : ' + source
- nzedb_cursor.execute("update `releases` set `searchname`=%s where `id`=%s;", (title, id))
- nzedb_connection.commit()
- #break
- else:
- print 'rename : [skipping, multiple matches]'
- for match in predb_matches:
- title = match[0]
- print ' ' + title
- else:
- print 'rename : [no match]'
- print ''
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement