Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- # -*- coding: utf-8 -*-
- import rarfile
- import os
- import shutil
- import time
- import pymysql
- import sys
- import hashlib
- ARCHTIVE_DIR = 'archive'
- TARGET_DIR = 'target'
- PROTECTED_DIR = 'protected'
- DAMAGED_DIR = 'damaged'
- DUPLICATE_DIR = 'duplicate'
- # удалять архивы после распаковки?
- REMOVE = False
- REMOVE_DUPLICATE = False
- if not os.path.isdir(ARCHTIVE_DIR):
- os.mkdir(ARCHTIVE_DIR)
- if not os.path.isdir(TARGET_DIR):
- os.mkdir(TARGET_DIR)
- if not os.path.isdir(PROTECTED_DIR):
- os.mkdir(PROTECTED_DIR)
- if not os.path.isdir(DAMAGED_DIR):
- os.mkdir(DAMAGED_DIR)
- if not os.path.isdir(DUPLICATE_DIR):
- os.mkdir(DUPLICATE_DIR)
- TOTAL_FILES = 0
- TOTAL_PROCCESSED = 0
- DUPLICATED = 0
- DAMAGED = 0
- PROTECTED = 0
- PERC_DIFF = 10
- CURRENT_PERC = 0
- '''
- CREATE TABLE IF NOT EXISTS `hash_arc` (
- `id` int(11) NOT NULL AUTO_INCREMENT,
- `date` date NOT NULL,
- `filename` varchar(255) NOT NULL,
- `hash` varchar(32) NOT NULL,
- `bad` tinyint(1) DEFAULT '0',
- PRIMARY KEY(id)
- ) ENGINE=InnoDB DEFAULT CHARSET=utf8;
- '''
- def file_as_bytes(file):
- with file:
- return file.read()
- def process_rar(rar_name, mysql_cursor, password=None):
- #print(rar_name)
- global TOTAL_FILES, TOTAL_PROCCESSED, DAMAGED, PROTECTED, PERC_DIFF, CURRENT_PERC, REMOVE_DUPLICATE, DUPLICATED
- #return
- try:
- fname = os.path.basename(rar_name)
- TOTAL_PROCCESSED += 1
- perc = float(TOTAL_PROCCESSED / TOTAL_FILES) * 100
- if perc - CURRENT_PERC >= PERC_DIFF:
- CURRENT_PERC = perc
- print("Всего обработано файлов: %s из %s (%s%%)" %(TOTAL_PROCCESSED,TOTAL_FILES, int(perc)))
- print("Защищенных архивов: %s" % (PROTECTED))
- print("Поврежденных архивов %s" % (DAMAGED))
- print("Дубликатов %s" % (DUPLICATED))
- hash = hashlib.md5(file_as_bytes(open(rar_name, 'rb'))).hexdigest()
- #print(hash)
- query = ("SELECT * FROM hash_arc where hash = '%s'" % hash)
- data = {}
- try:
- mysql_cursor.execute(query)
- # conn.commit()
- except Exception as e:
- print(e)
- for data in cursor:
- #print(data)
- pass
- arch_name = os.path.basename(rar_name)
- if len(data) < 1:
- # no duplicate in database
- query = "INSERT INTO hash_arc(date, filename, hash) VALUES(date(NOW()), '%s', '%s')" % (arch_name, hash)
- try:
- mysql_cursor.execute(query)
- # conn.commit()
- except Exception as e:
- print(e)
- else:
- # duplicated in database
- DUPLICATED += 1
- if REMOVE_DUPLICATE == True:
- os.remove(rar_name)
- else:
- shutil.copyfile(rar_name, os.path.join(DUPLICATE_DIR, fname))
- print(arch_name + ' уже существует в БД')
- query = "UPDATE hash_arc SET date = date(NOW()) where hash = '%s'" % (hash)
- try:
- mysql_cursor.execute(query)
- return
- # conn.commit()
- except Exception as e:
- print(e)
- return
- with rarfile.RarFile(rar_name) as archive:
- #print(archive.namelist)
- fname = os.path.basename(rar_name)
- fname = os.path.splitext(fname)[0]
- #print(fname)
- dirname = fname
- #print(dirname)
- #return
- if archive.needs_password():
- PROTECTED += 1
- fname = (time.strftime("%d.%m__")) + fname + '.rar'
- shutil.copyfile(rar_name, os.path.join(PROTECTED_DIR, fname))
- if REMOVE == True:
- #os.rename(rar_name, os.path.join(PROTECTED_DIR, fname))
- os.remove(rar_name)
- return
- # if REMOVE == True:
- # os.remove(rar_name)
- try:
- os.mkdir(os.path.join(TARGET_DIR, dirname))
- except Exception as e:
- print(dirname + " exists")
- try:
- #shutil.rmtree(os.path.join(TARGET_DIR, dirname))
- archive.extractall(os.path.join(TARGET_DIR, dirname), pwd=password)
- except Exception as e:
- print(e)
- DAMAGED += 1
- if REMOVE == True:
- # shutil.rmtree(os.path.join(TARGET_DIR, dirname))
- print("Removing " + rar_name)
- os.remove(rar_name)
- return
- if REMOVE == True:
- os.remove(rar_name)
- except Exception as e:
- DAMAGED += 1
- print(e)
- if REMOVE == True:
- os.remove(rar_name)
- print("Removing " + rar_name)
- DB_HOST = 'localhost'
- DB_NAME = 'test'
- DB_USER = 'root'
- DB_PASS = ''
- conn = pymysql.connect(host=DB_HOST,
- database=DB_NAME,
- user=DB_USER,
- password=DB_PASS)
- if conn.open:
- print('Connected to MySQL database')
- else:
- print("Can't connect to database")
- sys.exit()
- conn.autocommit = False
- cursor = conn.cursor();
- files_path = []
- for root, dirs, files in os.walk(ARCHTIVE_DIR):
- for name in files:
- files_path.append(os.path.join(root, name))
- # print("Processing " + os.path.join(root, name))
- # print(files_path)
- TOTAL_FILES = len(files_path)
- for file in files_path:
- print("Processing " + file)
- process_rar(file, cursor)
- print("Всего обработано файлов: %s из %s (%s%%)" % (TOTAL_PROCCESSED, TOTAL_FILES, int(CURRENT_PERC)))
- print("Защищенных архивов: %s" % (PROTECTED))
- print("Поврежденных архивов %s" % (DAMAGED))
- print("Дубликатов %s" % (DUPLICATED))
- conn.commit()
- '''
- for root, dirs, files in os.walk(ARCHTIVE_DIR):
- for name in files:
- try:
- print("Processing " + os.path.join(root, name))
- process_rar(os.path.join(root, name))
- except Exception as e:
- print(e)
- continue
- '''
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement