Advertisement
Guest User

Untitled

a guest
Nov 21st, 2019
189
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 9.58 KB | None | 0 0
  1. #!/usr/bin/perl -w
  2. #
  3. # This parses mp4, wmv, mkv top level chunks, displays offset and length,
  4. # and if invalid data is added at the end, it can be removed with the -f
  5. # flag.
  6. #
  7. # To use this perl script on windows, strawberryperl from
  8. # strawberryperl.com
  9. #
  10. # perl D:\torrents\videocleaner.pl D:\torrents\clip.mp4
  11. #
  12. # If it shows anything invalid at the end of the file, running it again with
  13. # -f will remove those extra bytes.
  14. #
  15. # perl D:\torrents\videocleaner.pl -f D:\torrents\clip.mp4
  16. #
  17.  
  18. use File::Copy;
  19. use Fcntl qw(:flock SEEK_END);
  20.  
  21. my $force = 0;
  22. my $bytescutoff = 1000;
  23.  
  24.  
  25. # Help
  26. if ($ARGV[0] eq "-h") {
  27. print "Usage: $0 [-f] <media filename>\n";
  28. print " -f splits invalid trailing bytes out into a separate file.\n";
  29. exit;
  30. } elsif ($ARGV[0] eq '-f') {
  31. $force = 1;
  32. shift @ARGV;
  33. }
  34. $cfn = $ARGV[0];
  35.  
  36.  
  37. open(my $fh, "<", $cfn) or die $!;
  38. binmode($fh);
  39. $cfs = -s $fh;
  40.  
  41. # truncate filename (arg1) of a given size (arg2) by (arg3) bytes.
  42. # returns nothing
  43. sub binTrunc {
  44. my ($cfn, $cfs, $truncbytes) = @_;
  45.  
  46. my $newbytes = $cfs - $truncbytes;
  47. print "truncating $cfn to $newbytes\n";
  48.  
  49. # make sure file is readable
  50. my $perm = (stat $cfn)[2] & 07777;
  51. chmod($perm | 0600, $cfn) or die "Unable to make file writable, $!\n";
  52. # truncate
  53. truncate($cfn, $cfs-$truncbytes) or die "Unable to truncate file, $!\n";
  54. }
  55.  
  56.  
  57. sub backupTag {
  58. my ($tagfile, $c4stag) = @_;
  59.  
  60. my $tagfn = $tagfile;
  61. my $eidx = 1;
  62.  
  63. # find a name that doesn't exist
  64. while ( -e $tagfn ) {
  65. $tagfn = "$tagfile.$eidx";
  66. $eidx++;
  67. }
  68.  
  69. open(my $fho, ">", $tagfn) or die $!;
  70. binmode($fho);
  71. print $fho $c4stag;
  72. close($fho);
  73.  
  74. print "Backed up trailing data to ($tagfn)\n";
  75. }
  76.  
  77. # identify the file type given the first few bytes from the file
  78. # (needs to be passed at least 8 bytes)
  79. # returns "mp4" or "wmv" or "mkv"
  80. sub filetype {
  81. my ($header) = @_;
  82. my $ftype;
  83.  
  84. # examples
  85. # ASF "\x30\x26\xb2\x75\x8e\x66\xcf\x11\xa6\xd9\x00\xaa\x00\x62\xce\x6c"
  86. # MP4 "\x00\x00\x00.ftypisom"
  87. # Matroska 1a 45 df a3 93 42 82 88 6d 61 74 72 6f 73 6b 61
  88. # = 00011010b 45 df a3 10010011b (19)...
  89. #
  90. # 00000001b 42 85 81 01 18 53 80 (id)
  91. # = 67 01 00 = 01100111b 00000001b = 9985
  92.  
  93. if (($header =~ /\A\x00\x00\x00.ftyp(isom|iso2|qt |mp4[12]|M4V )/) ||
  94. ($header =~ /\A\x00...moov/)) {
  95. $ftype = "mp4";
  96. } elsif (substr($header, 0, 4) eq "\x30\x26\xb2\x75") {
  97. $ftype = "wmv";
  98. } elsif (substr($header, 0, 4) eq "\x1a\x45\xdf\xa3") {
  99. $ftype = "mkv";
  100. } else {
  101. $ftype = "unknown";
  102. }
  103. return $ftype;
  104. }
  105.  
  106.  
  107. # takes file type as argument
  108. sub chunk_min {
  109. my ($ftype) = @_;
  110. my $chunkmin;
  111.  
  112. if ($ftype eq "mp4") {
  113. $chunkmin = 8;
  114. } elsif ($ftype eq "wmv") {
  115. $chunkmin = 24;
  116. } elsif ($ftype eq "mkv") {
  117. $chunkmin = 16;
  118. } else {
  119. die "Unknown file header type\n";
  120. }
  121. }
  122.  
  123. # takes file type and chunkdata as arguments
  124. sub decode_chunk_type_length {
  125. my ($ftype, $contfh, $pos, $chunkdata) = @_;
  126.  
  127. my $prettypos = sprintf("%10u", $pos);
  128. my ($chunklength, $chunktype);
  129. my $chunkextra = 0;
  130.  
  131. if ($ftype eq "wmv") {
  132. # ASF uses little-endian for number storage
  133. ($chunktype, $chunklength) = unpack('(H32Q)<', $chunkdata);
  134. } elsif ($ftype eq "mp4") {
  135. # ISO mpeg uses big-endian for number storage
  136. ($chunklength, $chunktype) = unpack('(LA4)>', $chunkdata);
  137. if ($chunklength == 1) {
  138. # 32 bit size 1 in mp4 means 64 bit size after the chunk type
  139. my $chunk64size;
  140. read($contfh, $chunk64size, 8) or die $!;
  141. ($chunklength) = unpack('(Q)>', $chunk64size);
  142. $chunkextra += 8;
  143. }
  144. } elsif ($ftype eq "mkv") {
  145. my ($vint1len, $vint2len);
  146. # unpack mkv chunk id
  147. ($vint1len, $chunktype) = &ebml_size_unpack($chunkdata);
  148. if ($vint1len > 0) {
  149. # unpack mkv chunk size
  150. ($vint2len, $chunklength) = &ebml_size_unpack(substr($chunkdata, $vint1len, -1));
  151. }
  152. if ($vint1len < 1 || $vint2len < 1) {
  153. print "corrupt mkv block (invalid block size)\n";
  154. $chunklength = 0;
  155. $chunktype = "----";
  156. } else {
  157. $chunklength += $vint2len + $vint1len;
  158. }
  159. $chunktype = sprintf("%x", $chunktype);
  160. } else {
  161. die "Unknown file type\n";
  162. }
  163.  
  164. printf("offset %11i length %11i (type %s)\n", $prettypos, $chunklength, $chunktype);
  165. return ($chunktype, $chunklength, $chunkextra);
  166. }
  167.  
  168.  
  169. sub is_chunk_size_ok {
  170. my ($ftype, $chunkmin, $chunklength, $remainingbytes) = @_;
  171.  
  172. if ($chunklength > $remainingbytes) {
  173. return 0;
  174. }
  175.  
  176. if ($ftype eq 'mkv') {
  177. # is 0 if there's a problem with the chunk
  178. return $chunklength >= 2;
  179. } else {
  180. return $chunklength >= $chunkmin;
  181. }
  182. }
  183.  
  184. # variable length integer unpacker for matroska (mkv) files only.
  185. # takes binary data/string with a mkv chunk type and variable length integer
  186. # returns a list (chunk type, chunk size)
  187. sub ebml_size_unpack {
  188. my ($chunkdata) = @_;
  189.  
  190. # encoded chunk size does not include elementid and chunk size
  191. # specs say big endian
  192.  
  193. my $rawbytei = ord(substr($chunkdata, 0, 1));
  194. my $vibytes;
  195.  
  196. if ($rawbytei == 0) {
  197. $vibytes = -1;
  198. } else {
  199. $vibytes = 8 - int(log($rawbytei)/log(2));
  200. }
  201.  
  202. # check for missing set bit, or not enough header bytes
  203. return (-1, -1) if ( ($vibytes == -1) || ($vibytes > length($chunkdata)) );
  204.  
  205. my $accu = (0xff >> $vibytes) & $rawbytei;
  206.  
  207. my $i = 1;
  208. while ($i < $vibytes) {
  209. $accu = $accu * 256 + ord(substr($chunkdata, $i, 1));
  210. $i++;
  211. }
  212. #print "returning $vibytes, $accu\n";
  213. return ($vibytes, $accu);
  214. }
  215.  
  216.  
  217. # Parses the file according to its
  218. # file type, looking for small broken chunks near the end.
  219. #
  220. # Returns the number of tag/junk bytes found at the end of the file, up to the number
  221. # set in the second parameter for safety, so it doesn't truncate half of a file
  222. sub findtagbytes {
  223. # ISO media file top level structure
  224. # Series of chunks, 4 byte size (BE), 4 byte chunk name, and data
  225.  
  226. # ASF v1 media file top level structure
  227. # Series of chunks, 16 byte guid, 8 byte chunk length (LE), and data
  228.  
  229. my ($contname, $bytescutoff) = @_;
  230.  
  231. open(my $contfh, "<", $contname) or die $!;
  232. binmode($contfh);
  233.  
  234. # detect file type
  235. read($contfh, my $header, 16);
  236. my $ftype = &filetype($header);
  237. my $chunkmin = &chunk_min($ftype);
  238.  
  239. my $contsize = -s $contfh;
  240.  
  241. printf("File type %s, size (%12i)\n", $ftype, $contsize);
  242.  
  243. my $pos = 0;
  244. # reset file position
  245. seek($contfh, $pos, 0); # 0 is SEEK_SET in fcntl module
  246. while ($pos < $contsize) {
  247. my $remainingbytes = $contsize - $pos;
  248.  
  249. # failsafe position check, in case code is modified and file seeking doesn't
  250. # work correctly, avoid corrupting files
  251. my $testpos = tell($contfh);
  252. if ($testpos != $pos) {
  253. die "Mismatch between real pos $testpos and assumed pos $pos\n";
  254. }
  255.  
  256. # If not enough room for a valid chunk, print warning and abort
  257. if ($remainingbytes < $chunkmin) {
  258. print "small junk section at end ($remainingbytes bytes)\n";
  259. close($contfh);
  260. return $remainingbytes;
  261. }
  262.  
  263. # read the chunk header
  264. my $chunkdata;
  265. read($contfh, $chunkdata, $chunkmin) or die $!;
  266.  
  267. # decode and print chunk header info
  268. my ($chunktype, $chunklength, $chunkextra) = &decode_chunk_type_length($ftype, $contfh, $pos, $chunkdata);
  269.  
  270. $bytes_after = $remainingbytes - $chunklength;
  271. printf(" (%12i) bytes remaining", $bytes_after);
  272. print "\n";
  273.  
  274. # check for problems
  275. if (! &is_chunk_size_ok($ftype, $chunkmin + $chunkextra, $chunklength, $remainingbytes)) {
  276. if ($remainingbytes > $bytescutoff) {
  277. # if there are too many remaining bytes, do nothing, to avoid corruption
  278. print "CAUTION: invalid chunk with >$bytescutoff ($remainingbytes) bytes left in file\n" .
  279. "corruption probably unrelated to trailing junk bytes... not modifying\n";
  280. # set to zero to avoid truncate
  281. $remainingbytes = 0;
  282. } else {
  283. print " Junk detected at end of file\n";
  284. }
  285.  
  286. close($contfh);
  287. return $remainingbytes;
  288. }
  289.  
  290. last if ($chunklength == 0);
  291.  
  292. $pos += $chunklength;
  293. seek($contfh, $pos, 0); # 0 is SEEK_SET in module fcntl
  294. }
  295. close($contfh);
  296. return 0;
  297. }
  298.  
  299.  
  300. # This section looks for junk or broken top layer chunks
  301. # In addition to what some clip sites add
  302. # Some files uploaded to file locker sites have a few added null characters
  303.  
  304. my $tagbytes = findtagbytes($cfn, $bytescutoff);
  305.  
  306. print "\n";
  307.  
  308. if ($tagbytes > 0) {
  309. # findtagbytes() found between 1 and $bytescutoff junk bytes at the end of the file
  310. # not belonging to a valid media file chunk, so remove it
  311. #
  312. # Read the trailing bytes into a variable for backup purposes.
  313. seek($fh, -$tagbytes, SEEK_END);
  314. read($fh, $c4stag, $tagbytes);
  315.  
  316. die "Run with '-f' flag to actually truncate the file." unless $force;
  317.  
  318. # Back up the trailing data and remove it by truncating the media file
  319. &backupTag($cfn . ".tag", $c4stag);
  320. &binTrunc($cfn, $cfs, $tagbytes);
  321. print "cleaned $cfn\n";
  322. } else {
  323. print "No trailing junk data detected (limited to 1-$bytescutoff bytes)\n";
  324. }
  325.  
  326. close($fh);
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement