Advertisement
Guest User

Untitled

a guest
Dec 24th, 2018
119
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 10.80 KB | None | 0 0
  1. # -*- coding: utf-8 -*-
  2.  
  3. import rarfile
  4. import os
  5. import shutil
  6. import time
  7. import pymysql
  8. import sys
  9. import hashlib
  10. import zipfile
  11.  
  12. ARCHTIVE_DIR = 'archive'
  13. TARGET_DIR = 'target'
  14. PROTECTED_DIR = 'protected'
  15. DAMAGED_DIR = 'damaged'
  16. DUPLICATE_DIR = 'duplicate'
  17.  
  18. # удалять архивы после распаковки?
  19. REMOVE = False
  20. REMOVE_DUPLICATE = False
  21.  
  22. if not os.path.isdir(ARCHTIVE_DIR):
  23. os.mkdir(ARCHTIVE_DIR)
  24. if not os.path.isdir(TARGET_DIR):
  25. os.mkdir(TARGET_DIR)
  26. if not os.path.isdir(PROTECTED_DIR):
  27. os.mkdir(PROTECTED_DIR)
  28. if not os.path.isdir(DAMAGED_DIR):
  29. os.mkdir(DAMAGED_DIR)
  30. if not os.path.isdir(DUPLICATE_DIR):
  31. os.mkdir(DUPLICATE_DIR)
  32.  
  33. TOTAL_FILES = 0
  34. TOTAL_PROCCESSED = 0
  35. DUPLICATED = 0
  36. DAMAGED = 0
  37. PROTECTED = 0
  38. PERC_DIFF = 50
  39. CURRENT_PERC = 0
  40.  
  41. '''
  42. CREATE TABLE IF NOT EXISTS `hash_arc` (
  43. `id` int(11) NOT NULL AUTO_INCREMENT,
  44. `date` date NOT NULL,
  45. `filename` varchar(255) NOT NULL,
  46. `hash` varchar(32) NOT NULL,
  47. `bad` tinyint(1) DEFAULT '0',
  48. PRIMARY KEY(id)
  49. ) ENGINE=InnoDB DEFAULT CHARSET=utf8;
  50. '''
  51.  
  52.  
  53. def file_as_bytes(file):
  54. with file:
  55. return file.read()
  56.  
  57. def process_rar(rar_name, mysql_cursor, password=None):
  58. #print(rar_name)
  59. global TOTAL_FILES, TOTAL_PROCCESSED, DAMAGED, PROTECTED, PERC_DIFF, CURRENT_PERC, REMOVE_DUPLICATE, DUPLICATED
  60. #return
  61. try:
  62. fname = os.path.basename(rar_name)
  63. TOTAL_PROCCESSED += 1
  64. perc = float(TOTAL_PROCCESSED / TOTAL_FILES) * 100
  65. if perc - CURRENT_PERC >= PERC_DIFF:
  66. CURRENT_PERC = perc
  67. print("Всего обработано файлов: %s из %s (%s%%)" %(TOTAL_PROCCESSED,TOTAL_FILES, int(perc)))
  68. print("Защищенных архивов: %s" % (PROTECTED))
  69. print("Поврежденных архивов %s" % (DAMAGED))
  70. print("Дубликатов %s" % (DUPLICATED))
  71. hash = hashlib.md5(file_as_bytes(open(rar_name, 'rb'))).hexdigest()
  72. #print(hash)
  73. query = ("SELECT * FROM hash_arc where hash = '%s'" % hash)
  74. data = {}
  75. try:
  76. mysql_cursor.execute(query)
  77. # conn.commit()
  78. except Exception as e:
  79. print(e)
  80. for data in cursor:
  81. #print(data)
  82. pass
  83.  
  84. arch_name = os.path.basename(rar_name)
  85. if len(data) < 1:
  86. # no duplicate in database
  87. query = "INSERT INTO hash_arc(date, filename, hash) VALUES(date(NOW()), '%s', '%s')" % (arch_name, hash)
  88. try:
  89. mysql_cursor.execute(query)
  90. # conn.commit()
  91. except Exception as e:
  92. print(e)
  93. else:
  94. # duplicated in database
  95. DUPLICATED += 1
  96. if REMOVE_DUPLICATE == True:
  97. os.remove(rar_name)
  98. else:
  99. shutil.copyfile(rar_name, os.path.join(DUPLICATE_DIR, fname))
  100. print(arch_name + ' уже существует в БД')
  101. query = "UPDATE hash_arc SET date = date(NOW()) where hash = '%s'" % (hash)
  102.  
  103. try:
  104. mysql_cursor.execute(query)
  105. return
  106. # conn.commit()
  107. except Exception as e:
  108. print(e)
  109. return
  110.  
  111. with rarfile.RarFile(rar_name) as archive:
  112. #print(archive.namelist)
  113. fname = os.path.basename(rar_name)
  114. fname = os.path.splitext(fname)[0]
  115. #print(fname)
  116. dirname = fname
  117. #print(dirname)
  118. #return
  119. if archive.needs_password():
  120. PROTECTED += 1
  121. fname = (time.strftime("%d.%m__")) + fname + '.rar'
  122. shutil.copyfile(rar_name, os.path.join(PROTECTED_DIR, fname))
  123. if REMOVE == True:
  124. #os.rename(rar_name, os.path.join(PROTECTED_DIR, fname))
  125. os.remove(rar_name)
  126. return
  127. # if REMOVE == True:
  128. # os.remove(rar_name)
  129. try:
  130. os.mkdir(os.path.join(TARGET_DIR, dirname))
  131. except Exception as e:
  132. print(dirname + " exists")
  133. try:
  134. #shutil.rmtree(os.path.join(TARGET_DIR, dirname))
  135. archive.extractall(os.path.join(TARGET_DIR, dirname), pwd=password)
  136. except Exception as e:
  137. print(e)
  138. DAMAGED += 1
  139. if REMOVE == True:
  140. # shutil.rmtree(os.path.join(TARGET_DIR, dirname))
  141. print("Removing " + rar_name)
  142. os.remove(rar_name)
  143. return
  144. if REMOVE == True:
  145. os.remove(rar_name)
  146. except Exception as e:
  147. DAMAGED += 1
  148. print(e)
  149. if REMOVE == True:
  150. os.remove(rar_name)
  151. print("Removing " + rar_name)
  152.  
  153.  
  154. def process_zip(rar_name, mysql_cursor, password=None):
  155. #print(rar_name)
  156. global TOTAL_FILES, TOTAL_PROCCESSED, DAMAGED, PROTECTED, PERC_DIFF, CURRENT_PERC, REMOVE_DUPLICATE, DUPLICATED
  157. #return
  158. try:
  159. fname = os.path.basename(rar_name)
  160. TOTAL_PROCCESSED += 1
  161. perc = float(TOTAL_PROCCESSED / TOTAL_FILES) * 100
  162. if perc - CURRENT_PERC >= PERC_DIFF:
  163. CURRENT_PERC = perc
  164. print("Всего обработано файлов: %s из %s (%s%%)" %(TOTAL_PROCCESSED,TOTAL_FILES, int(perc)))
  165. print("Защищенных архивов: %s" % (PROTECTED))
  166. print("Поврежденных архивов %s" % (DAMAGED))
  167. print("Дубликатов %s" % (DUPLICATED))
  168. hash = hashlib.md5(file_as_bytes(open(rar_name, 'rb'))).hexdigest()
  169. #print(hash)
  170. query = ("SELECT * FROM hash_arc where hash = '%s'" % hash)
  171. data = {}
  172. try:
  173. mysql_cursor.execute(query)
  174. # conn.commit()
  175. except Exception as e:
  176. print(e)
  177. for data in cursor:
  178. #print(data)
  179. pass
  180.  
  181. arch_name = os.path.basename(rar_name)
  182. if len(data) < 1:
  183. # no duplicate in database
  184. query = "INSERT INTO hash_arc(date, filename, hash) VALUES(date(NOW()), '%s', '%s')" % (arch_name, hash)
  185. try:
  186. mysql_cursor.execute(query)
  187. # conn.commit()
  188. except Exception as e:
  189. print(e)
  190. else:
  191. # duplicated in database
  192. DUPLICATED += 1
  193. if REMOVE_DUPLICATE == True:
  194. os.remove(rar_name)
  195. else:
  196. shutil.copyfile(rar_name, os.path.join(DUPLICATE_DIR, fname))
  197. print(arch_name + ' уже существует в БД')
  198. query = "UPDATE hash_arc SET date = date(NOW()) where hash = '%s'" % (hash)
  199.  
  200. try:
  201. mysql_cursor.execute(query)
  202. return
  203. # conn.commit()
  204. except Exception as e:
  205. print(e)
  206. return
  207.  
  208. with zipfile.ZipFile(rar_name) as archive:
  209. #print(archive.namelist)
  210. fname = os.path.basename(rar_name)
  211. fname = os.path.splitext(fname)[0]
  212. #print(fname)
  213. dirname = fname
  214. #print(dirname)
  215. for zinfo in archive.infolist():
  216. is_encrypted = zinfo.flag_bits & 0x1
  217. if is_encrypted:
  218. PROTECTED += 1
  219. fname = (time.strftime("%d.%m__")) + fname + '.zip'
  220. shutil.copyfile(rar_name, os.path.join(PROTECTED_DIR, fname))
  221. if REMOVE == True:
  222. #os.rename(rar_name, os.path.join(PROTECTED_DIR, fname))
  223. os.remove(rar_name)
  224. return
  225. # if REMOVE == True:
  226. # os.remove(rar_name)
  227. #return
  228. # if archive.needs_password():
  229. # PROTECTED += 1
  230. # fname = (time.strftime("%d.%m__")) + fname + '.rar'
  231. # shutil.copyfile(rar_name, os.path.join(PROTECTED_DIR, fname))
  232. # if REMOVE == True:
  233. # #os.rename(rar_name, os.path.join(PROTECTED_DIR, fname))
  234. # os.remove(rar_name)
  235. # return
  236. # # if REMOVE == True:
  237. # # os.remove(rar_name)
  238. try:
  239. os.mkdir(os.path.join(TARGET_DIR, dirname))
  240. except Exception as e:
  241. print(dirname + " exists")
  242. try:
  243. #shutil.rmtree(os.path.join(TARGET_DIR, dirname))
  244. archive.extractall(os.path.join(TARGET_DIR, dirname), pwd=password)
  245. except Exception as e:
  246. print(e)
  247. DAMAGED += 1
  248. if REMOVE == True:
  249. # shutil.rmtree(os.path.join(TARGET_DIR, dirname))
  250. print("Removing " + rar_name)
  251. os.remove(rar_name)
  252. return
  253. if REMOVE == True:
  254. os.remove(rar_name)
  255. except Exception as e:
  256. DAMAGED += 1
  257. print(e)
  258. if REMOVE == True:
  259. os.remove(rar_name)
  260. print("Removing " + rar_name)
  261.  
  262.  
  263.  
  264. DB_HOST = 'localhost'
  265. DB_NAME = 'test'
  266. DB_USER = 'root'
  267. DB_PASS = ''
  268.  
  269. conn = pymysql.connect(host=DB_HOST,
  270. database=DB_NAME,
  271. user=DB_USER,
  272. password=DB_PASS)
  273. if conn.open:
  274. print('Connected to MySQL database')
  275. else:
  276. print("Can't connect to database")
  277. sys.exit()
  278.  
  279. conn.autocommit = False
  280. cursor = conn.cursor();
  281.  
  282. files_path = []
  283. for root, dirs, files in os.walk(ARCHTIVE_DIR):
  284. for name in files:
  285. files_path.append(os.path.join(root, name))
  286. # print("Processing " + os.path.join(root, name))
  287. # print(files_path)
  288. TOTAL_FILES = len(files_path)
  289. for file in files_path:
  290. print("Processing " + file)
  291. if file.lower().find('.rar') != -1:
  292. process_rar(file, cursor)
  293. elif file.lower().find('.zip') != -1:
  294. process_zip(file, cursor)
  295. else:
  296. print("Not archive")
  297.  
  298. print("Всего обработано файлов: %s из %s (%s%%)" % (TOTAL_PROCCESSED, TOTAL_FILES, int(CURRENT_PERC)))
  299. print("Защищенных архивов: %s" % (PROTECTED))
  300. print("Поврежденных архивов %s" % (DAMAGED))
  301. print("Дубликатов %s" % (DUPLICATED))
  302. conn.commit()
  303. '''
  304. for root, dirs, files in os.walk(ARCHTIVE_DIR):
  305. for name in files:
  306. try:
  307. print("Processing " + os.path.join(root, name))
  308. process_rar(os.path.join(root, name))
  309. except Exception as e:
  310. print(e)
  311. continue
  312. '''
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement