Advertisement
Guest User

unmungeabe.py

a guest
Jul 26th, 2017
186
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 4.78 KB | None | 0 0
  1. #!/usr/bin/python
  2. import mysql.connector as db
  3. import re
  4.  
  5. dbhost = 'xxx'
  6. dbuser = 'xxx'
  7. dbpassword = 'xxx'
  8. releases_regex = '^(?:u4a-)?[b-df-hj-np-tv-z0-9\.\-]+$' # extra prefix on u4a posts, vowels removed
  9. releases_groupid = 1000040
  10.  
  11. nzedb_connection = db.connect(host=dbhost, user=dbuser, password=dbpassword, database='nzedb')
  12. nzedb_cursor = nzedb_connection.cursor()
  13. nzedb_cursor.execute("select `id`,`searchname`,`postdate` from `releases` where `groups_id`=%s and `searchname` REGEXP %s order by `postdate` desc;", (releases_groupid, releases_regex,))
  14. releases_matches = nzedb_cursor.fetchall()
  15. print 'processing ' + str(len(releases_matches)) + ' releases'
  16. for row in releases_matches:
  17.     id = row[0]
  18.     searchname = row[1]
  19.     postdate = row[2]
  20.     cleansearchname = searchname
  21.  
  22.     # irregular site names
  23.     if re.search('^mmgs\.', cleansearchname):
  24.         cleansearchname = re.sub('^mmgs\.', 'mmysgrl.', cleansearchname)
  25.     elif re.search('^1kf\.', cleansearchname):
  26.         cleansearchname = re.sub('^1kf\.', '1000fcls.', cleansearchname)
  27.     elif re.search('^3rd\.', cleansearchname):
  28.         cleansearchname = re.sub('^3rd\.', 'thrdmvs.', cleansearchname)
  29.     elif re.search('^ztd\.', cleansearchname):
  30.         cleansearchname = re.sub('^ztd\.', 'zrtlrnc.', cleansearchname)
  31.     elif re.search('^pssn-hd\.', cleansearchname):
  32.         cleansearchname = re.sub('^pssn-hd\.', 'pssnhd.', cleansearchname) # only so it isn't confused for a group name below
  33.  
  34.     # check for resolution
  35.     resolution = None
  36.     if re.search('\.(720p|hd)$', cleansearchname):
  37.         resolution = '720p'
  38.         cleansearchname = re.sub('\.(720p|hd)$', '', cleansearchname)
  39.     elif re.search('\.(1080p|fllhd)$', cleansearchname):
  40.         resolution = '1080p'
  41.         cleansearchname = re.sub('\.(1080p|fllhd)$', '', cleansearchname)
  42.     elif re.search('\.4k$', cleansearchname):
  43.         resolution = '2160p'
  44.         cleansearchname = re.sub('\.4k$', '', cleansearchname)
  45.     else:
  46.         resolution = '1080p'
  47.  
  48.     # ignore 2d
  49.     if re.search('^tmwvrnt\.', cleansearchname) and re.search('\.2d$', cleansearchname):
  50.         cleansearchname = re.sub('\.2d$', '', cleansearchname)
  51.  
  52.     # check for group prefix
  53.     grpname = None
  54.     grpmatch = re.search('^(gsh)\.', cleansearchname) # these guys use gush. not gush- for some reason
  55.     if grpmatch:
  56.         cleansearchname = re.sub('^gsh\.', 'gsh-', cleansearchname)
  57.     if not grpmatch:
  58.         grpmatch = re.search('^(u4a)-', cleansearchname)
  59.     if not grpmatch:
  60.         grpmatch = re.search('^([a-z]+)-', cleansearchname)
  61.     if grpmatch:
  62.         # remove group name (replaced at end later)
  63.         cleansearchname = re.sub('^[a-z0-9]+-', '', cleansearchname)
  64.         # normalize separators
  65.         cleansearchname = re.sub('-', '.', cleansearchname)
  66.         # attempt to fix disc numbers
  67.         cleansearchname = re.sub('cd1$', 'disc1', cleansearchname)
  68.         cleansearchname = re.sub('cd2$', 'disc2', cleansearchname)
  69.         cleansearchname = re.sub('d1$', 'disc1', cleansearchname)
  70.         cleansearchname = re.sub('d2$', 'disc2', cleansearchname)
  71.         grpname = grpmatch.group(1)
  72.  
  73.     # begin pattern
  74.     # this wildcard is notionally complete for scene releases but just harder to read
  75.     #wc = '[a-z0-9\.\-_]*'
  76.     wc = '.*'
  77.     pattern = wc.join(list(cleansearchname))
  78.  
  79.     # add .resolution. (if known)
  80.     if resolution:
  81.         pattern += wc + resolution
  82.  
  83.     # add -group (if known)
  84.     if grpname:
  85.         pattern += wc + '-' + wc + wc.join(list(grpname))
  86.  
  87.     # complete pattern
  88.     print 'searchname     : ' + searchname
  89.     print 'postdate       : ' + str(postdate)
  90.     print 'cleansearchname: ' + cleansearchname
  91.     print 'resolution     : ' + (resolution if resolution else 'unknown')
  92.     print 'release group  : ' + (grpname if grpname else 'unknown')
  93.     print 'pattern        : ' + pattern
  94.     nzedb_cursor.execute("select `title`,`source` from `predb` where `created` >= date_sub(%s, interval 1 day) and `created` <= date_add(%s, interval 1 day) and `title` like '%.XXX.%' and `title` REGEXP %s;", (postdate, postdate, pattern,))
  95.     predb_matches = nzedb_cursor.fetchall()
  96.     if predb_matches:
  97.         if len(predb_matches) == 1:
  98.             match = predb_matches[0]
  99.             title = match[0]
  100.             source = match[1]
  101.             # if the only match was 4k but 4k wasn't detected as a resolution, change title resolution to unknown
  102.             if resolution != '2160p' and re.search('\.2160p\.', title):
  103.                 title = re.sub('\.2160p\.', '.UNKNOWN.', title)
  104.             #print str(id) + ':' + searchname + ':' + title
  105.             #query = 'update `releases` set `searchname`=' + title + ' where `id`=' + str(id) + ';';
  106.             #print query
  107.             print 'rename         : ' + title
  108.             print 'source         : ' + source
  109.             nzedb_cursor.execute("update `releases` set `searchname`=%s where `id`=%s;", (title, id))
  110.             nzedb_connection.commit()
  111.             #break
  112.         else:
  113.             print 'rename         : [skipping, multiple matches]'
  114.             for match in predb_matches:
  115.                 title = match[0]
  116.                 print '                 ' + title
  117.     else:
  118.         print 'rename         : [no match]'
  119.     print ''
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement