Vearie

bz2pak.py - BZ2 Pak Archive Parser

Feb 6th, 2022 (edited)
247
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 31.89 KB | None | 0 0
  1. """This module provides a full-featured pak archive reader/writer (PAK_Archive) and a simple effecient pak reader called PAK. Basic usage example: [print(f) for f in PAK("data.pak")]"""
  2. VERSION = 1.03
  3.  
  4. import os
  5. import zlib
  6.  
  7. from ctypes import Structure, c_uint32, c_char, c_ubyte, sizeof
  8. from collections import namedtuple
  9. from tempfile import NamedTemporaryFile, SpooledTemporaryFile
  10.  
  11. PAK_TEMP_FILE_DIRECTORY = os.path.join(os.environ["tmp"], "bz2pak")
  12.  
  13. class Header(Structure):
  14.     _fields_ = [
  15.         ("magic", c_char*4),
  16.         ("version", c_uint32),
  17.         ("folder_count", c_uint32),
  18.         ("folder_offset", c_uint32),
  19.         ("file_count", c_uint32),
  20.         ("file_offset", c_uint32),
  21.         ("junk", c_ubyte*32)
  22.     ]
  23.  
  24. class FileInfo(Structure):
  25.     _fields_ = [
  26.         ("offset", c_uint32),
  27.         ("comp_size", c_uint32),
  28.         ("real_size", c_uint32)
  29.     ]
  30.  
  31. File = namedtuple("File", "folder_index name data_offset comp_size real_size") # Reference to internal pak file
  32. FileExtern = namedtuple("FileExtern", "folder_index name compression external_path mtime") # Reference to external file
  33. FileTemp = namedtuple("FileTemp", "folder_index name compression temporary_file") # File stored in memory or on disk when too large
  34.  
  35. class PAK:
  36.     def __init__(self, pak_file_path):
  37.         self.pak_file = None # Prevent AttributeError in __del__ if exception is raised
  38.        
  39.         if not os.path.exists(pak_file_path):
  40.             raise FileNotFoundError("PAK %r does not exist" % pak_file_path)
  41.        
  42.         if not os.path.isfile(pak_file_path):
  43.             raise IsADirectoryError("PAK %r is not a file" % pak_file_path)
  44.        
  45.         self.path_is_bytes = isinstance(pak_file_path, bytes)
  46.         self.pak_file = open(pak_file_path, "rb")
  47.         self.header = Header()
  48.        
  49.         self.pak_file.readinto(self.header)
  50.        
  51.         if self.header.magic != b"DOCP":
  52.             raise RuntimeError("invalid header identifier %r" % header.magic)
  53.        
  54.         if self.header.version != 2:
  55.             raise RunTimeError("PAK version %d not supported" % header.version)
  56.        
  57.         self.file_offset = self.header.file_offset
  58.         self.files = []
  59.        
  60.         self.folder_offset = self.header.folder_offset
  61.         self.folders = [type(pak_file_path)()] # 0th is root which is the default construction of path input b"" or ""
  62.    
  63.     def __del__(self):
  64.         if self.pak_file is not None:
  65.             self.pak_file.close()
  66.    
  67.     def __getitem__(self, index):
  68.         if isinstance(index, slice):
  69.             sl = index
  70.             return [self[i] for i in range(self.header.file_count)[sl.start:sl.stop:sl.step]]
  71.        
  72.         elif not isinstance(index, int):
  73.             raise TypeError("expected %r for file index by got %r" % (int, type(index)))
  74.        
  75.         if index < 0:
  76.             index = self.header.file_count + index # negative goes in reverse (e.g. [-1] is final item)
  77.        
  78.         if index >= self.header.file_count or index < 0:
  79.             raise IndexError("index out of range")
  80.        
  81.         self.pak_file.seek(self.file_offset)
  82.        
  83.         while len(self.files) <= index:
  84.             folder_index = c_uint32()
  85.             self.pak_file.readinto(folder_index)
  86.            
  87.             name_length = c_ubyte()
  88.             self.pak_file.readinto(name_length)
  89.             name = self.pak_file.read(name_length.value)
  90.             if not self.path_is_bytes:
  91.                 name = name.decode()
  92.            
  93.             file_info = FileInfo()
  94.             self.pak_file.readinto(file_info)
  95.            
  96.             try:
  97.                 self.file_offset = self.pak_file.tell()
  98.                 self.files += [_PAK_File(self, os.path.join(self._folder(folder_index.value), name), file_info)]
  99.             except IndexError:
  100.                 raise IndexError("file %r has out of range folder index %d > %d" % (name, folder_index.value, self.header.folder_count))
  101.        
  102.         return self.files[index]
  103.    
  104.     def __iter__(self):
  105.         for index in range(self.header.file_count):
  106.             yield self[index]
  107.    
  108.     def __len__(self):
  109.         return self.header.file_count
  110.    
  111.     def _folder(self, index):
  112.         if index >= self.header.folder_count+1: # +1 for root or 'non-folder'
  113.             raise IndexError("index out of range")
  114.        
  115.         restore_offset = self.pak_file.tell()
  116.         self.pak_file.seek(self.folder_offset, 0)
  117.        
  118.         while len(self.folders) <= index:
  119.             name_length = c_ubyte()
  120.             self.pak_file.readinto(name_length)
  121.             name = self.pak_file.read(name_length.value)
  122.             if not self.path_is_bytes:
  123.                 name = name.decode()
  124.            
  125.             self.folder_offset = self.pak_file.tell()
  126.             self.folders += [name]
  127.        
  128.         self.pak_file.seek(restore_offset)
  129.         return self.folders[index]
  130.  
  131. class _PAK_File(os.PathLike):
  132.     def __init__(self, pak, virtual_path, file_info):
  133.         # Need to keep this reference of pak so its __del__ doesn't get called until its last file goes out of scope.
  134.         # That's a requirement for using this syntax: with open(PAK("data.pak")[0]) as f: f.read()
  135.         self._pak = pak
  136.        
  137.         self.pak_file = pak.pak_file
  138.         self.virtual_path = virtual_path
  139.         self.file_info = file_info
  140.        
  141.         # Temp file will only be created from pak if the user calls open() on this object.
  142.         self.tempfile = None
  143.    
  144.     def __fspath__(self):
  145.         if self.tempfile is None:
  146.             if not os.path.exists(PAK_TEMP_FILE_DIRECTORY):
  147.                 os.makedirs(PAK_TEMP_FILE_DIRECTORY)
  148.            
  149.             self.tempfile = NamedTemporaryFile(suffix=os.path.basename(self.virtual_path), delete=False, dir=PAK_TEMP_FILE_DIRECTORY)
  150.            
  151.             self.pak_file.seek(self.file_info.offset)
  152.             is_compressed = (self.file_info.comp_size != self.file_info.real_size)
  153.             if is_compressed:
  154.                 self.tempfile.write(zlib.decompress(self.pak_file.read(self.file_info.comp_size)))
  155.             else:
  156.                 self.tempfile.write(self.pak_file.read(self.file_info.real_size))
  157.            
  158.             self.tempfile.close()
  159.        
  160.         return self.tempfile.name
  161.    
  162.     def __getitem__(self, item):
  163.         if isinstance(item, slice):
  164.             return self.virtual_path[item.start:item.stop:item.step]
  165.        
  166.         raise TypeError("unsupported type %r" % type(item))
  167.    
  168.     def __str__(self):
  169.         return str(self.virtual_path)
  170.    
  171.     def __repr__(self):
  172.         return "%r" % str(self)
  173.    
  174.     def __del__(self):
  175.         # Unfortunately python being terminated can leave stranded temp files.
  176.         if self.tempfile is not None:
  177.             if os.path.exists(self.tempfile.name) and os.path.isfile(self.tempfile.name):
  178.                 os.unlink(self.tempfile.name)
  179.  
  180. # These are used in the older PAK_Archive class
  181. class PakError(Exception): pass
  182. class PakFileError(PakError): pass
  183. class PakFileConflict(PakFileError):
  184.     def __init__(self, msg, index=None):
  185.         self.msg = msg
  186.         self.index = index # Index of conflicting item
  187.    
  188.     def __str__(self):
  189.         return self.msg
  190.  
  191. class PAK_Archive:
  192.     """Full archive class with methods to read, write and modify contents in a multitude of ways."""
  193.     magic =  b"DOCP" # 1346588484
  194.     version = 2
  195.     default_compression = 6 # 6 is the default used in pak explorer
  196.     max_path = 255
  197.     encoding = "ascii"
  198.    
  199.     def __init__(self, pak_file_path=None):
  200.         self.pak_file_file_last_mtime = None
  201.         self.pak_file_path = None
  202.         self.folders = [""] # 0th is root ("") which always exists
  203.         self.files = []
  204.        
  205.         if pak_file_path:
  206.             self.load(pak_file_path)
  207.    
  208.     def __str__(self):
  209.         return "<%s %r %d folder paths %d files>" % (
  210.             __class__.__name__,
  211.             self.pak_file_path,
  212.             len(self.folders) - 1,
  213.             len(self.files)
  214.         )
  215.  
  216.     def _check(self):
  217.         """If the pak file has changed or been deleted this will raise an exception."""
  218.         if self.pak_file_path is None:
  219.             return
  220.        
  221.         if not os.path.exists(self.pak_file_path):
  222.             # Raises exception if file cannot be opened for reading.
  223.             with open(self.pak_file_path, "rb"): pass
  224.        
  225.         if os.path.getmtime(self.pak_file_path) != self.pak_file_file_last_mtime:
  226.             raise PakFileError("%r was changed")
  227.    
  228.     def _validate_file_path(self, path, match_full_path=True):
  229.         """Raises detailed exception if file already exists."""
  230.         if match_full_path:
  231.             if self.file_exists(path, match_full_path=True):
  232.                 raise PakFileConflict("file %r already exists" % path, self.get_file_index(path, match_full_path=True))
  233.         else:
  234.             file_name = os.path.basename(path)
  235.             if self.file_exists(file_name, match_full_path=False):
  236.                 conflict_index = self.get_file_index(file_name)
  237.                 conflict_file = self.files[conflict_index]
  238.                 conflict_folder = self.folders[conflict_file.folder_index]
  239.                 conflict_path = os.path.join(conflict_folder, conflict_file.name)
  240.                 raise PakFileConflict("file %r already exists as %r" % (file_name, conflict_path), conflict_index)
  241.    
  242.     def get_comparable_path(self, path):
  243.         if not path:
  244.             return ""
  245.        
  246.         path = os.path.normpath(path)
  247.         path = os.path.normcase(path)
  248.         path = path.replace("/", "\\")
  249.        
  250.         while path[0] == "\\":
  251.             path = path[1::] # Remove any trailing slashes
  252.        
  253.         while path[-1] == "\\":
  254.             path = path[0:-1] # Remove any leading slashes
  255.        
  256.         return path
  257.    
  258.     def load(self, pak_file_path):
  259.         """Sets the pak file for this object. The file may be overwritten by save()."""
  260.         self.pak_file_file_last_mtime = None
  261.         self.pak_file_file_last_mtime = os.path.getmtime(pak_file_path)
  262.         self.pak_file_path = pak_file_path
  263.        
  264.         with open(self.pak_file_path, "rb") as f:
  265.             length = c_ubyte()
  266.             header = Header()
  267.             f.readinto(header)
  268.            
  269.             if header.magic != __class__.magic:
  270.                 raise PakError("invalid magic %r" % header.magic)
  271.            
  272.             if header.version != __class__.version:
  273.                 raise PakError("unsupported version %r" % header.version)
  274.            
  275.             # Paths of folders
  276.             f.seek(header.folder_offset)
  277.             self.folders = [""] # "" is root of pak
  278.             for folder_index in range(header.folder_count):
  279.                 f.readinto(length)
  280.                 self.folders += [f.read(length.value).decode(encoding=__class__.encoding)]
  281.            
  282.             # Paths and offets of files
  283.             f.seek(header.file_offset)
  284.             self.files = []
  285.             for file_index in range(header.file_count):
  286.                 folder_index = c_uint32()
  287.                 info = FileInfo()
  288.                 comp_size = c_uint32()
  289.                 real_size = c_uint32()
  290.                
  291.                 f.readinto(folder_index)
  292.                 f.readinto(length)
  293.                 name = f.read(length.value).decode(encoding=__class__.encoding)
  294.                 f.readinto(info)
  295.                
  296.                 self.files += [File(folder_index.value, name, info.data_offset, info.comp_size, info.real_size)]
  297.    
  298.     def save(self, save_to_file=None, keep_backup=True):
  299.         """Create final pak file on disk.
  300.        
  301.         save_to_file - Location to save pak file to on disk. Defaults to currently opened pak.
  302.         keep_backup - If True, the original pak file opened (if one was loaded) will be backed up.
  303.        
  304.         """
  305.         self._check()
  306.        
  307.         if save_to_file is None:
  308.             save_to_file = self.pak_file_path
  309.        
  310.         # Temp pak is needed because the source pak is read from in the middle of the process of writing the new one.
  311.         tmp_pak = NamedTemporaryFile(suffix=".pak", prefix="bz2pak", delete=False)
  312.        
  313.         # 1/4 - Header (placeholder)
  314.         tmp_pak.write(b"\0"*sizeof(Header))
  315.        
  316.         # 2/4 - File Content
  317.         size_info = dict() # Required for temporary/external files we have not yet compressed
  318.         file_content_offset = []
  319.         for file_index, file in enumerate(self.files):
  320.             file_content_offset += [tmp_pak.tell()]
  321.            
  322.             # Read the compressed file content out of the original pak we are working on
  323.             if type(file) is File:
  324.                 with open(self.pak_file_path, "rb") as src:
  325.                     src.seek(file.data_offset)
  326.                     tmp_pak.write(src.read(file.comp_size))
  327.            
  328.             elif type(file) in (FileExtern, FileTemp):
  329.                 # Read and compress external file into the new temp-pak
  330.                 if type(file) is FileExtern:
  331.                     with open(file.external_path, "rb") as f:
  332.                         data = f.read()
  333.                 # Read from tmp file
  334.                 else:
  335.                     file.temporary_file.seek(0)
  336.                     data = file.temporary_file.read()
  337.                
  338.                 real_size = len(data)
  339.                
  340.                 if file.compression > 0:
  341.                     data = zlib.compress(data, file.compression)
  342.                
  343.                 comp_size = len(data)
  344.                 tmp_pak.write(data)
  345.                 size_info[file] = (comp_size, real_size)
  346.            
  347.             else:
  348.                 raise TypeError("file index %d invalid type %r" % (file_index, type(file).__name__))
  349.        
  350.         # 3/4 - Folder Table
  351.         folder_offset = tmp_pak.tell()
  352.         for folder in self.folders[1::]:
  353.             name_to_bytes = folder.encode(encoding=__class__.encoding)
  354.             tmp_pak.write(c_ubyte(len(name_to_bytes)))
  355.             tmp_pak.write(name_to_bytes)
  356.        
  357.         # 4/4 - File Table
  358.         file_offset = tmp_pak.tell()
  359.         for file_index, file in enumerate(self.files):
  360.             if type(file) is File:
  361.                 name_to_bytes = file.name.encode(encoding=__class__.encoding)[0:__class__.max_path]
  362.                 tmp_pak.write(c_uint32(file.folder_index))
  363.                 tmp_pak.write(c_ubyte(len(name_to_bytes)))
  364.                 tmp_pak.write(name_to_bytes)
  365.                 tmp_pak.write(c_uint32(file_content_offset[file_index]))
  366.                 tmp_pak.write(c_uint32(file.comp_size))
  367.                 tmp_pak.write(c_uint32(file.real_size))
  368.            
  369.             elif type(file) in (FileExtern, FileTemp):
  370.                 if type(file) is FileExtern:
  371.                     if not os.path.exists(file.external_path):
  372.                         raise FileNotFoundError("external file %r not found for %r index %d" % (file.external_path, file.name, file_index))
  373.                    
  374.                     if not os.path.isfile(file.external_path):
  375.                         raise IsADirectoryError("external file %r is actually a folder but file was expected for %r index %d" % (file.external_path, file.name, file_index))
  376.                    
  377.                     if os.path.getmtime(file.external_path) != file.mtime:
  378.                         raise FileExistsError("external file %r was modified since being added for %r index %d" % (file.external_path, file.name, file_index))
  379.                
  380.                 name_to_bytes = file.name.encode(encoding=__class__.encoding)[0:__class__.max_path]
  381.                 tmp_pak.write(c_uint32(file.folder_index))
  382.                 tmp_pak.write(c_ubyte(len(name_to_bytes)))
  383.                 tmp_pak.write(name_to_bytes)
  384.                 tmp_pak.write(c_uint32(file_content_offset[file_index]))
  385.                 comp_size, real_size = size_info[file]
  386.                 tmp_pak.write(c_uint32(comp_size))
  387.                 tmp_pak.write(c_uint32(real_size))
  388.             else:
  389.                 raise TypeError("invalid type for file %r" % type(file).__name__)
  390.        
  391.         # Fill in header
  392.         header = Header(__class__.magic, __class__.version, len(self.folders)-1, folder_offset, len(self.files), file_offset, (c_ubyte*32)())
  393.         jump_back = tmp_pak.tell()
  394.         tmp_pak.seek(0)
  395.         tmp_pak.write(header)
  396.         tmp_pak.seek(jump_back)
  397.        
  398.         if keep_backup:
  399.             backup_dir = os.path.dirname(save_to_file)
  400.             backup_name, backup_ext = os.path.splitext(os.path.basename(save_to_file))
  401.             for i in range(0xFFFFFFFF):
  402.                 backup_path = os.path.join(backup_dir, backup_name + "-backup%d" % i + backup_ext)
  403.                 if not os.path.exists(backup_path):
  404.                     os.rename(save_to_file, backup_path)
  405.                     break
  406.         elif os.path.exists(save_to_file):
  407.             os.unlink(save_to_file)
  408.        
  409.         tmp_pak.close()
  410.         try:
  411.             os.rename(tmp_pak.name, save_to_file)
  412.         except OSError:
  413.             # Just copy and delete old if rename fails
  414.             with open(tmp_pak.name, "rb") as src:
  415.                 with open(save_to_file, "wb") as dst:
  416.                     dst.write(src.read())
  417.        
  418.         self.pak_file_path = save_to_file
  419.         self.pak_file_file_last_mtime = os.path.getmtime(self.pak_file_path)
  420.    
  421.     def read(self, file, match_full_path=False, decompress_data=True):
  422.         """Returns content of file in pak as bytes, decompressing data if necessary.
  423.        
  424.         file - Index, name, path (if match_full_path is True) or one of the 3 file objects: File, FileTemp, FileExtern.
  425.         match_full_path - file is an absoltue path, otherwise treated as a file name that could match in any file with the name.
  426.         decompress_data - If False, raw compressed data block will be returned, if compressed.
  427.         """
  428.         self._check()
  429.        
  430.         if type(file) is int:
  431.             file = self.files[file]
  432.        
  433.         elif type(file) is str:
  434.             file_index = self.get_file_index(file, match_full_path=match_full_path)
  435.             file = self.files[file_index]
  436.        
  437.         elif type(file) not in (File, FileExtern, FileTemp):
  438.             raise TypeError("invalid type for file %r" % type(file).__name__)
  439.        
  440.         if type(file) is File:
  441.             with open(self.pak_file_path, "rb") as f:
  442.                 is_compressed = file.comp_size != file.real_size
  443.                 f.seek(file.data_offset)
  444.                 data = f.read(file.comp_size)
  445.                 if decompress_data and is_compressed:
  446.                     return zlib.decompress(data)
  447.                
  448.                 return data
  449.        
  450.         elif type(file) is FileExtern:
  451.             with open(file.external_path, "rb") as f:
  452.                 return f.read()
  453.        
  454.         elif type(file) is FileTemp:
  455.             file.temporary_file.seek(0)
  456.             return file.temporary_file.read()
  457.        
  458.         else:
  459.             raise TypeError("Unsupported type for file %r." % type(file).__name__)
  460.    
  461.     def get_folder_index(self, folder):
  462.         """Returns index of folder (path or index). Raises exception if not found."""
  463.         if type(folder) is int:
  464.             if folder >= len(self.folders):
  465.                 raise IndexError("folder index out of range")
  466.            
  467.             return folder
  468.        
  469.         elif type(folder) != str:
  470.             raise TypeError("folder must be str (folder path) or int (folder index), not %r" % type(folder).__name__)
  471.        
  472.         comparable_folder = self.get_comparable_path(folder)
  473.         for index, compare_folder in enumerate(self.folders):
  474.             if comparable_folder == self.get_comparable_path(compare_folder):
  475.                 return index
  476.        
  477.         raise PakFileError("folder %r not found." % folder)
  478.    
  479.     def get_file_index(self, file, match_full_path=False, occurrence=0):
  480.         """Returns index of file name or full path if match_full_path is True. Raises exception if not found.
  481.        
  482.         match_full_path - file is an absoltue path, otherwise treated as a file name that could match in any file with the name.
  483.         occurrence - Which occurrence to match if more than one exist.
  484.         """
  485.         if type(file) is int:
  486.             if file >= len(self.files):
  487.                 raise IndexError("file index out of range (%d/%d)" % (file, len(self.files)))
  488.            
  489.             return file
  490.        
  491.         elif type(file) != str:
  492.             raise TypeError("file must be str (file path) or int (file index), not %r" % type(file).__name__)
  493.        
  494.         occurrence_counter = 0
  495.        
  496.         if match_full_path:
  497.             # Strings with no path will be treated as being in pak root ("")
  498.             file_folder_path = os.path.dirname(file)
  499.            
  500.             # Get folder index of this path (it must exist)
  501.             file_folder_index = self.get_folder_index(file_folder_path)
  502.            
  503.             file_name = os.path.basename(file)
  504.             file_name = os.path.normcase(file_name)
  505.            
  506.             for compare_file_index, compare_file in enumerate(self.files):
  507.                 if compare_file.folder_index == file_folder_index:
  508.                     # This file is in the same folder. This way we only have to compare ints to check for the folder portion.
  509.                     if file_name == os.path.normcase(compare_file.name):
  510.                         if occurrence < occurrence_counter:
  511.                             occurrence += 1
  512.                         else:
  513.                             return compare_file_index
  514.            
  515.             raise PakFileError("file %r not found" % file)
  516.         else:
  517.             file_name = os.path.normcase(file)
  518.             for compare_file_index, compare_file in enumerate(self.files):
  519.                 if file_name == os.path.normcase(compare_file.name):
  520.                     if occurrence < occurrence_counter:
  521.                         occurrence += 1
  522.                     else:
  523.                         return compare_file_index
  524.            
  525.             raise PakFileError("no files named %r found" % file)
  526.    
  527.     def folder_exists(self, folder):
  528.         """Returns True if folder exists, otherwise False."""
  529.         try:
  530.             self.get_folder_index(folder)
  531.             return True
  532.        
  533.         except IndexError:
  534.             return False
  535.        
  536.         except PakFileError:
  537.             return False
  538.    
  539.     def file_exists(self, file, match_full_path=False):
  540.         """Returns True if folder exists, otherwise False."""
  541.         try:
  542.             self.get_file_index(file, match_full_path=match_full_path)
  543.             return True
  544.        
  545.         except IndexError:
  546.             return False
  547.        
  548.         except PakFileError:
  549.             return False
  550.    
  551.     def get_subfolder_indices(self, target_folder=0):
  552.         """Yields index of each subfolder belonging to target_folder (can be index or path)."""
  553.         target_folder_index = self.get_folder_index(target_folder)
  554.         target_folder = self.folders[target_folder_index]
  555.         target_folder = self.get_comparable_path(target_folder)    
  556.        
  557.         for folder_index, folder in enumerate(self.folders[1::]):
  558.             folder_compare = self.get_comparable_path(folder)
  559.             folder_parent = os.path.dirname(folder_compare)
  560.             if folder_parent == target_folder:
  561.                 yield folder_index+1
  562.    
  563.     def add_folder(self, new_folder_path):
  564.         """Create new folder path in pak.
  565.        
  566.         new_folder_path - Path to create. Must not already exist.
  567.        
  568.         Returns index of newly created folder path.
  569.        
  570.         If folder already exists PakFileConflict is raised.
  571.        
  572.         Call PAK.save() to apply changes.
  573.         """
  574.         self._check()
  575.        
  576.         new_folder_path = os.path.normpath(new_folder_path)
  577.        
  578.         if len(new_folder_path) >= __class__.max_path:
  579.             raise PakFileError("folder path %r uses %d/%d characters" % (new_folder_path, len(new_folder_path), __class__.max_path))
  580.        
  581.         if len(new_folder_path) == 0:
  582.             raise PakFileError("0-length folder path")
  583.        
  584.         if self.folder_exists(new_folder_path):
  585.             raise PakFileConflict("folder %r already exists" % new_folder_path, self.get_folder_index(new_folder_path))
  586.        
  587.         self.folders += [new_folder_path]
  588.        
  589.         return len(self.folders) - 1
  590.  
  591.     def delete_folder(self, folder, delete_files=False):
  592.         """Delete folder in pak. Note that intermediate-level folders may only implicitly exist.
  593.        
  594.         folder - Index of or path to folder to delete.
  595.         delete_files - If True, all files will be deleted in all subfolders. Otherwise they are moved to root.
  596.        
  597.         If a folder is not found matching the index or exact path PakFileError is raised.
  598.        
  599.         Call PAK.save() to apply changes.
  600.         """
  601.         self._check()
  602.        
  603.         folder_index = self.get_folder_index(folder)
  604.        
  605.         if folder_index == 0:
  606.             raise PakFileError("cannot delete root")
  607.        
  608.         subfolder_indices = list(self.get_subfolder_indices(folder))
  609.        
  610.         for subfolder in subfolder_indices:
  611.             self.delete_folder(subfolder, delete_files=delete_files)
  612.        
  613.         file_indices_in_folder = []
  614.         files_indices_affected = []
  615.         for file_index, file in enumerate(self.files):
  616.             if file.folder_index == folder_index:
  617.                 file_indices_in_folder += [file_index]
  618.             elif file.folder_index > folder_index:
  619.                 # Decrement folder indices later after we're sure no exceptions were thrown
  620.                 files_indices_affected += [file]
  621.        
  622.         for file_index in file_indices_in_folder:
  623.             if delete_files:
  624.                 self.delete_file(file_index)
  625.             else:
  626.                 # Set each file objects folder index to pak root instead of deleting it
  627.                 self.files[file_index] = self.files[file_index]._replace(folder_index=0)
  628.        
  629.         del self.folders[folder_index]
  630.        
  631.         # File references to folder indices > folder_index must now be decremented by 1
  632.         for file in files_indices_affected:
  633.             file = file._replace(folder_index=file.folder_index-1)
  634.    
  635.     def add_file(self, path, external_path_or_data, compression=default_compression, allow_multiple_files=False, overwrite=False):
  636.         """Create new file in pak from data or from external file.
  637.        
  638.         path - Full path to local pak file to be created. Folders will be created automatically if necessary.
  639.         external_path_or_data - Path (must be str) to external file or bytes-like object for data.
  640.         compression - Valid values 0 (none) to 9 (high).
  641.         allow_multiple_files - If False, raises exception if a file with the name given already exists anywhere in pak.
  642.         overwrite - If True, if the file already exists in pak it will be overwritten, otherwise an exception is raised.
  643.        
  644.         Returns index of new file.
  645.        
  646.         Call PAK.save() to apply changes.
  647.         """
  648.         self._check()
  649.        
  650.         is_external_path = type(external_path_or_data) is str
  651.         if is_external_path:
  652.             external_path_or_data = os.path.abspath(external_path_or_data)
  653.            
  654.             if not os.path.exists(external_path_or_data):
  655.                 raise FileNotFoundError("external file %r not found" % external_path_or_data)
  656.            
  657.             if not os.path.isfile(external_path_or_data):
  658.                 raise IsADirectoryError("external file %r is actually a folder but file was expected" % external_path_or_data)
  659.        
  660.         folder_index = None
  661.         folder_portion = os.path.dirname(path)
  662.         file_index = None
  663.         file_name = os.path.basename(path)
  664.        
  665.         if len(file_name) >= __class__.max_path:
  666.             raise PakFileError("file name %r uses %d/%d characters" % (file_name, len(file_name), __class__.max_path))
  667.        
  668.         if len(file_name) == 0:
  669.             raise PakFileError("0-length file name")
  670.        
  671.         try:
  672.             self._validate_file_path(path, match_full_path=allow_multiple_files)
  673.         except PakFileConflict as exception:
  674.             if overwrite:
  675.                 file_index = exception.index
  676.                 folder_index = self.files[file_index].folder_index
  677.             else:
  678.                 raise exception
  679.        
  680.         if folder_index is None:
  681.             try:
  682.                 folder_index = self.get_folder_index(folder_portion)
  683.             except PakFileError as exception:
  684.                 folder_index = self.add_folder(folder_portion)
  685.        
  686.         name, ext = os.path.splitext(file_name)
  687.         if is_external_path:
  688.             file = FileExtern(folder_index, file_name, compression, external_path_or_data, os.path.getmtime(external_path_or_data))
  689.         else:
  690.             file = FileTemp(folder_index, file_name, compression, SpooledTemporaryFile(max_size=4096, suffix=ext, prefix="bz2pak"))
  691.             file.temporary_file.write(external_path_or_data)
  692.        
  693.         if file_index is not None:
  694.             self.files[file_index] = file
  695.         else:
  696.             file_index = len(self.files)
  697.             self.files += [file]
  698.        
  699.         return file_index
  700.    
  701.     def delete_file(self, file, match_full_path=False):
  702.         """Removes the file (index or path), raises exception if not found.
  703.        
  704.         file - Index, name, or full path (if match_full_path is True) of file to delete.
  705.         match_full_path - If True and file is a path, only a file matching the exact path wil be deleted.
  706.        
  707.         If the file is not found PakFileError is raised.
  708.        
  709.         Call PAK.save() to apply changes.
  710.         """
  711.         self._check()
  712.        
  713.         file_index = self.get_file_index(file, match_full_path=match_full_path)
  714.         del self.files[file_index]
  715.    
  716.     def move_file(self, file, new_path, match_full_path=False, allow_multiple_files=False, overwrite=False):
  717.         """Move existing file to new location.
  718.        
  719.         file - Index or name of file. If match_full_path is True then file is treated as absolute path instead of name.
  720.         new_path - New full path for pak file location. Folders will be created automatically where necessary.
  721.         match_full_path - If True and file is a path, only a file matching the exact path wil be moved.
  722.         compression - Valid values 0 (none) to 9 (high).
  723.         allow_multiple_files - If False, raises exception if a file with the name given already exists anywhere in pak.
  724.         overwrite - If True, if the file already exists in pak it will be overwritten, otherwise an exception is raised.
  725.        
  726.         Folders will be created automatically in new_path as needed.
  727.         An exception will be raised if file does not exist, or of new_path does exist and overwritse is False.
  728.        
  729.         Call PAK.save() to apply changes.
  730.         """
  731.         self._check()
  732.        
  733.         replace_file_index = None
  734.         file_index = self.get_file_index(file, match_full_path=match_full_path)
  735.        
  736.         folder_index = None
  737.         new_folder_portion = os.path.dirname(new_path)
  738.         new_file_name = os.path.basename(new_path)
  739.        
  740.         if len(new_file_name) >= __class__.max_path:
  741.             raise PakFileError("file name %r uses %d/%d characters" % (new_file_name, len(new_file_name), __class__.max_path))
  742.        
  743.         try:
  744.             self._validate_file_path(new_path, match_full_path=allow_multiple_files)
  745.         except PakFileConflict as exception:
  746.             if overwrite:
  747.                 replace_file_index = exception.index
  748.                 folder_index = self.files[new_file_index].folder_index
  749.             else:
  750.                 raise exception
  751.        
  752.         if folder_index is None:
  753.             try:
  754.                 folder_index = self.get_folder_index(new_folder_portion)
  755.             except PakFileError as exception:
  756.                 folder_index = self.add_folder(new_folder_portion)
  757.        
  758.         if replace_file_index:
  759.             self.files[replace_file_index] = self.files[file_index]
  760.             del self.files[file_index] # Overwrite
  761.             file_index = replace_file_index
  762.        
  763.         self.files[file_index] = self.files[file_index]._replace(folder_index=folder_index, name=new_file_name)
  764.    
  765.     def rename_file(self, file, new_name, match_full_path=False, allow_multiple_files=False, overwrite=False):
  766.         """Same as move_file, but a simple name can be given instead of a full path for the new name."""
  767.        
  768.         if any((c in "\\/") for c in new_name):
  769.             raise PakFileError("name %r contains invalid character %r" % (new_name, invalid))
  770.        
  771.         file_to_rename_index = self.get_file_index(file, match_full_path=match_full_path)
  772.         file_to_rename = self.files[file_to_rename_index]
  773.         folder = self.folders[file_to_rename.folder_index]
  774.         new_name_to_path = os.path.join(folder, new_name)
  775.         return self.move_file(file_to_rename_index, new_name_to_path, match_full_path=True, allow_multiple_files=allow_multiple_files, overwrite=overwrite)
  776.    
  777.     def dump(self, output_folder, pak_folder=0, full_paths=True, file_filter=None, overwrite=False):
  778.         """Export the file and folder structure of the pak (decompressing all files).
  779.        
  780.         output_folder - External folder to write all files and folders to.
  781.         pak_folder - Which pak folder to start exporting from. Can be str path or int (pak folder index).
  782.         full_paths - If True, pak folder paths will be created in output_folder for files to mirror pak hierarchy.
  783.         file_filter - If set, each file will be passed to the callable and only exported if it returns true.
  784.         overwrite - If True, existing files will be overwritten if they exist and files are to be written.
  785.         """
  786.         self._check()
  787.        
  788.         if not os.path.exists(output_folder):
  789.             os.makedirs(output_folder)
  790.        
  791.         if type(pak_folder) is int:
  792.             pak_folder = self.folders[pak_folder_index]
  793.        
  794.         pak_folder_comparable = self.get_comparable_path(pak_folder)
  795.        
  796.         for file in self.files:
  797.             folder = self.folders[file.folder_index]
  798.             folder_comparable = self.get_comparable_path(folder)
  799.            
  800.             if pak_folder_comparable != folder_comparable[0:len(pak_folder_comparable)]:
  801.                 continue # File is not inside of folder caller wanted to export
  802.            
  803.             if callable(file_filter) and not file_filter(file):
  804.                 continue
  805.            
  806.             if full_paths:
  807.                 file_external_path = os.path.join(output_folder, folder, file.name)
  808.                 folder_external_path = os.path.dirname(file_external_path)
  809.                 if not os.path.exists(folder_external_path):
  810.                     os.makedirs(folder_external_path)
  811.             else:
  812.                 file_external_path = os.path.join(output_folder, file.name)
  813.            
  814.             if os.path.exists(file_external_path):
  815.                 if os.path.isdir(file_external_path):
  816.                     raise IsADirectoryError("%r is a directory but should be a file" % file_external_path)
  817.                 elif not overwrite:
  818.                     raise FileExistsError("%r already exists" % file_external_path)
  819.            
  820.             with open(file_external_path, "wb") as f:
  821.                 f.write(self.read(file))
  822.  
  823. # Basic usage of PAK class (for pak reading only)
  824. if __name__ == "__main__":
  825.     # Iterate over all files in pak archive
  826.     for path in PAK(r"D:\Program Files (x86)\Battlezone II 1.2\data.pak"):
  827.         # Show 512 bytes of data from each file
  828.         if path[-4::].casefold() in [".txt"]:
  829.             with open(path, "rb") as f:
  830.                 print(path, "\n\t", f.read(512), "\n")
  831.    
  832.     # You can access files by index
  833.     with open(PAK(r"D:\Program Files (x86)\Battlezone II 1.2\data.pak")[5], "rb") as f: # Get 5th file from pak
  834.         print("Some data:", f.read(64))
  835.    
  836.     # And you can use slice notation
  837.     for path in PAK(r"D:\Program Files (x86)\Battlezone II 1.2\data.pak")[-5::]: # Get the last 5 files
  838.         print(path)
  839.  
  840. # Usage samples of PAK_Archive class
  841. if 0: #__name__ == "__main__":
  842.     # Loading pak files:
  843.     if os.path.exists("data.pak"):
  844.         pak = PAK_Archive("data.pak")
  845.  
  846.         # Exporting pak files:
  847.         pak.dump(
  848.             "Export Folder",
  849.             pak_folder="weapons/insane",
  850.             full_paths=True,
  851.             # Only export TXT and ODF files in "weapons/insane" folder (includes subfolders)
  852.             file_filter=lambda file: file.name.casefold()[-4::] in (".odf", ".txt"),
  853.             overwrite=False
  854.         )
  855.    
  856.     # Create new pak:
  857.     pak = PAK_Archive()
  858.     file_index = pak.add_file("Ancient Aliens/File.txt", b"Bytes-like-data", compression=0, overwrite=False)
  859.     pak.add_file("Code/Python/Old/bz2pak.py", __file__, compression=6, overwrite=False)
  860.     pak.move_file("bz2pak.py", "Code/Python/New/bz2pak moved.py", match_full_path=False, allow_multiple_files=False, overwrite=False)
  861.     pak.rename_file("bz2pak moved.py", "bz2pak renamed.py")
  862.    
  863.     # 3 ways to delete a pak file:
  864.     # pak.delete_file(file_index) # Index match
  865.     # pak.delete_file("file.txt") # Name match (only deletes first found)
  866.     pak.delete_file("Ancient Aliens/File.txt", match_full_path=True) # Exact path match
  867.    
  868.     pak.add_folder("Extras")
  869.     christian_folder = pak.add_folder("Extras/Christian")
  870.     f1s_file = pak.add_file("Extras/Christian/Feared_1.txt", b"God Fearing Christian Man is Saved", compression=0)
  871.     pak.rename_file(f1s_file, "Jesus is LORD.txt")
  872.     pak.delete_folder(christian_folder or "Extras/Christian", delete_files=False)
  873.    
  874.     # Iterating over and reading pak files:
  875.     for file in pak.files:
  876.         print(file)
  877.         data = pak.read(file)
  878.         print("\t", "(%d bytes)" % len(data), data[0:64], "\n")
  879.    
  880.     # Saving pak files:
  881.     pak.save("bz2pak.pak", keep_backup=False)
  882.     print(pak)
  883.  
Add Comment
Please, Sign In to add comment