jcunews

DeDup.vbs

Mar 14th, 2020
616
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. 'DeDup v1.0.1
  2. 'https://pastebin.com/u/jcunews
  3. 'https://greasyfork.org/en/users/85671-jcunews
  4. 'https://www.reddit.com/u/jcunews1
  5. '
  6. 'Convert duplicate files into hardlinks or symlinks to save disk space.
  7. 'Requires Windows Vista or newer version.
  8. '
  9. 'Usage: dedup [options] {path} [options]
  10. '
  11. 'Options:
  12. '/c  Continue to next file even if errors occur.
  13. '/m  Use symlinks instead of hardlinks.
  14. '/s  Process subdirectories.
  15. '/t  Test/simulation mode. Does not actually change anything.
  16. '
  17. 'Notes:
  18. '- This script can not detect hardlinks. They will be seen as separate files.
  19. '  So, this script will keep converting files if processing the same directory
  20. '  for multiple times.
  21. '- Hardlink file dates will always be the same as the link target file's.
  22. '  Symlink file dates will always be set to the current time.
  23. '- Once hardlinks are moved into different drve, the links will break and
  24. '  become separate copies. Once symlinks or their target file are moved into
  25. '  different directory, the links will no longer point to existing file.
  26.  
  27. sub help
  28.   set f = fs.opentextfile(wscript.scriptfullname)
  29.   do while true
  30.     s = f.readline
  31.     if s = "" then wscript.quit 1
  32.     wscript.stdout.writeline mid(s, 2)
  33.   loop
  34.   wscript.quit 1
  35. end sub
  36.  
  37. 'treat number as 64-bit integer and convert it to binary string
  38. function qwordAsStr(n)
  39.   dim r, i
  40.   r = ""
  41.   for i = 0 to 7
  42.     r = r & chr(n - int(n / 256) * 256)
  43.     n = int(n / 256)
  44.   next
  45.   qwordasstr = r
  46. end function
  47.  
  48. 'returns binary string of: {8 chars file size},{20 chars sha1}
  49. function calcHash(path)
  50.   dim h
  51.   ds.loadfromfile path
  52.   ds.position = 0
  53.   hs.position = 0
  54.   hs.seteos
  55.   hs.type = 1 'binary
  56.  hs.write cr.computehash_2(ds.read)
  57.   hs.position = 0
  58.   hs.type = 2 'text
  59.  hs.charset = "x-user-defined"
  60.   calchash = qwordasstr(ds.size) & hs.readtext
  61. end function
  62.  
  63. 'calculate hash on specified directory
  64. sub processDir(path)
  65.   dim l, f
  66.   set l = fs.getfolder(path)
  67.   for each f in l.files
  68.     if (f.attributes and 1024) = 0 then
  69.       redim preserve hashfiles(ubound(hashfiles) + 1)
  70.       hashfiles(ubound(hashfiles)) = calchash(f.path) & f.path
  71.     end if
  72.     wscript.stdout.write "."
  73.   next
  74.   if not recurs then exit sub
  75.   for each f in l.subfolders
  76.     processdir f.path
  77.   next
  78. end sub
  79.  
  80. 'compare binary string
  81. function binComp(s1, s2)
  82.   dim i, a, b
  83.   for i = 1 to 28
  84.     a = asc(mid(s1, i, 1))
  85.     b = asc(mid(s2, i, 1))
  86.     if a < b then
  87.       bincomp = -1
  88.       exit function
  89.     elseif a > b then
  90.       bincomp = 1
  91.       exit function
  92.     end if
  93.   next
  94.   bincomp = 0
  95. end function
  96.  
  97. 'array quicksort. modified for binary string array.
  98. 'original author: Christopher J. Scharer
  99. sub array_quicksort(byref rarr_arraytosort(), byval rlng_low, _
  100.   byval rlng_high)
  101.   dim var_pivot, lng_swap, lng_low, lng_high
  102.   lng_low = rlng_low
  103.   lng_high = rlng_high
  104.   var_pivot = rarr_arraytosort((rlng_low + rlng_high) / 2)
  105.   do while lng_low <= lng_high
  106.     do while bincomp(rarr_arraytosort(lng_low), var_pivot) < 0 and _
  107.       lng_low < rlng_high
  108.       lng_low = lng_low + 1
  109.     loop
  110.     do while bincomp(var_pivot, rarr_arraytosort(lng_high)) < 0 and _
  111.       lng_high > rlng_low
  112.       lng_high = lng_high - 1
  113.     loop
  114.     if lng_low <= lng_high then
  115.       lng_swap = rarr_arraytosort(lng_low)
  116.       rarr_arraytosort(lng_low) = rarr_arraytosort(lng_high)
  117.       rarr_arraytosort(lng_high) = lng_swap
  118.       lng_low = lng_low + 1
  119.       lng_high = lng_high - 1
  120.     end if
  121.   loop
  122.   if rlng_low < lng_high then
  123.     array_quicksort rarr_arraytosort, rlng_low, lng_high
  124.   end if
  125.   if lng_low < rlng_high then
  126.     array_quicksort rarr_arraytosort, lng_low, rlng_high
  127.   end if
  128. end sub
  129.  
  130. 'format number with thousand separator
  131. function comma(byval n)
  132.   dim r, i
  133.   n = cstr(int(n))
  134.   i = len(n) - 2
  135.   r = ""
  136.   do while i > 1
  137.     r = "," & mid(n, i, 3) & r
  138.     i = i - 3
  139.   loop
  140.   comma = left(n, i + 2) & r
  141. end function
  142.  
  143. function strSize(n, byval sign)
  144.   if sign and (n > 0) then
  145.     sign = "+"
  146.   else
  147.     sign = ""
  148.   end if
  149.   if n >= 1073741824 then
  150.     strsize = "(" & sign & comma(n / 1073741824) & " GB) "
  151.   elseif n >= 1048576 then
  152.     strsize = "(" & sign & comma(n / 1048576) & " MB) "
  153.   elseif n >= 1024 then
  154.     strsize = "(" & sign & comma(n / 1024) & " KB) "
  155.   else
  156.     strsize = ""
  157.   end if
  158. end function
  159.  
  160.  
  161.  
  162. 'process command line parameters
  163. set fs = createobject("scripting.filesystemobject")
  164. path = ""
  165. igerr = false
  166. test = false
  167. slink = false
  168. recurs = false
  169. for each s in wscript.arguments
  170.   if left(s, 1) = "/" then
  171.     select case ucase(s)
  172.       case "/C" igerr = true
  173.       case "/M" slink = true
  174.       case "/S" recurs = true
  175.       case "/T" test = true
  176.       case else help
  177.     end select
  178.   elseif path = "" then
  179.     path = s
  180.   else
  181.     help
  182.   end if
  183. next
  184. if path = "" then help
  185. df = fs.getfolder(path).drive.freespace
  186.  
  187. set ds = createobject("adodb.stream")
  188. ds.open
  189. ds.type = 1 'binary
  190. set hs = createobject("adodb.stream")
  191. hs.open
  192. on error resume next
  193. set cr = createobject("system.security.cryptography.sha1cryptoserviceprovider")
  194. if err.number <> 0 then
  195.   wscript.stdout.writeline _
  196.     "This script requires .NET Framework of any version."
  197.   wscript.quit 2
  198. end if
  199. on error goto 0
  200.  
  201. wscript.stdout.write "Gathering file information"
  202. redim hashfiles(-1) 'hash+path
  203. processdir path
  204. wscript.stdout.writeline
  205.  
  206. wscript.stdout.writeline "Sorting file information..."
  207. array_quicksort hashfiles, 0, ubound(hashfiles)
  208.  
  209. set ws = createobject("wscript.shell")
  210. uniqcount = 0
  211. dupecount = 0
  212. okcount = 0
  213. errcount = 0
  214. freed = 0
  215. redim dups(-1) '[[{8 chars file size},{20 chars sha1}], ...]
  216. prevhash = ""
  217. prevfile = ""
  218. for each s in hashfiles
  219.   h = left(s, 28)
  220.   f = mid(s, 29)
  221.   if h <> prevhash then
  222.     if ubound(dups) >= 0 then
  223.       uniqcount = uniqcount + 1
  224.       dupecount = dupecount + ubound(dups)
  225.       freed = freed + ubound(dups) * fs.getfile(dups(0)(1)).size
  226.       wscript.stdout.writeline vbcrlf & "Uniq: " & dups(0)(1)
  227.       on error resume next
  228.       for i = 1 to ubound(dups)
  229.         wscript.stdout.writeline "Link: " & dups(i)(1)
  230.         if not test then
  231.           err.clear
  232.           set sf = fs.getfile(dups(i)(1))
  233.           sn = sf.name
  234.           sf.name = sn & ".todelete"
  235.           if err.number = 0 then
  236.             if slink then
  237.               s = ""
  238.             else
  239.               s = "/h "
  240.             end if
  241.             set xc = ws.exec("cmd.exe /c mklink " & s & """" & dups(i)(1) & _
  242.               """ """ & dups(0)(1) & """")
  243.             if err.number = 0 then
  244.               do while xc.status = 0
  245.                 wscript.sleep 50
  246.               loop
  247.               if xc.exitcode = 0 then
  248.                 okcount = okcount + 1
  249.                 fs.deletefile dups(i)(1) & ".todelete"
  250.               else
  251.                 do while not xc.stdout.atendofstream
  252.                   wscript.stdout.writeline xc.stdout.readline
  253.                 loop
  254.                 sf.name = sn
  255.                 errcount = errcount + 1
  256.                 if not igerr then wscript.quit
  257.               end if
  258.             else
  259.               wscript.stdout.writeline err.description
  260.               sf.name = sn
  261.               errcount = errcount + 1
  262.               if not igerr then wscript.quit
  263.             end if
  264.           else
  265.             wscript.stdout.writeline err.description
  266.             errcount = errcount + 1
  267.             if not igerr then wscript.quit
  268.           end if
  269.         end if
  270.       next
  271.       on error goto 0
  272.       redim dups(-1)
  273.     end if
  274.   elseif ubound(dups) >= 0 then
  275.     redim preserve dups(ubound(dups) + 1)
  276.     dups(ubound(dups)) = array(h, f)
  277.   else
  278.     redim preserve dups(ubound(dups) + 2)
  279.     dups(ubound(dups) - 1) = array(prevhash, prevfile)
  280.     dups(ubound(dups)) = array(h, f)
  281.   end if
  282.   prevhash = h
  283.   prevfile = f
  284. next
  285. df = fs.getfolder(path).drive.freespace - df
  286. if df > 0 then
  287.   df = "+" & comma(df) & " Bytes " & strsize(df, true)
  288. else
  289.   df = comma(df) & " Bytes " & strsize(df, false)
  290. end if
  291. wscript.stdout.writeline vbcrlf & _
  292.   "Found " & comma(dupecount) & " duplicates of " & comma(uniqcount) & _
  293.   " unique files." & _
  294.   vbcrlf & _
  295.   comma(okcount) & " duplicates has been successfully linked. " & _
  296.   comma(errcount) & " have failed." & _
  297.   vbcrlf & _
  298.   comma(freed) & " Bytes " & strsize(freed, false) & _
  299.   "of disk space is supposedly be freed." & _
  300.   vbcrlf & _
  301.   "Actual disk free space difference: " & df
RAW Paste Data