dbaranchuk

io_utils

Apr 23rd, 2021 (edited)
174
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. import struct
  2. import numpy as np
  3.  
  4.  
  5. """
  6.                  IO Utils
  7. """
  8.  
  9.  
  10. def read_fbin(filename, start_idx=0, chunk_size=None):
  11.     """ Read *.fbin file that contains float32 vectors
  12.    Args:
  13.        :param filename (str): path to *.fbin file
  14.        :param start_idx (int): start reading vectors from this index
  15.        :param chunk_size (int): number of vectors to read.
  16.                                 If None, read all vectors
  17.    Returns:
  18.        Array of float32 vectors (numpy.ndarray)
  19.    """
  20.     with open(filename, "rb") as f:
  21.         nvecs, dim = np.fromfile(f, count=2, dtype=np.int32)
  22.         nvecs = (nvecs - start_idx) if chunk_size is None else chunk_size
  23.         arr = np.fromfile(f, count=nvecs * dim, dtype=np.float32,
  24.                           offset=start_idx * 4 * dim)
  25.     return arr.reshape(nvecs, dim)
  26.  
  27.  
  28. def read_ibin(filename, start_idx=0, chunk_size=None):
  29.     """ Read *.ibin file that contains int32 vectors
  30.    Args:
  31.        :param filename (str): path to *.ibin file
  32.        :param start_idx (int): start reading vectors from this index
  33.        :param chunk_size (int): number of vectors to read.
  34.                                 If None, read all vectors
  35.    Returns:
  36.        Array of int32 vectors (numpy.ndarray)
  37.    """
  38.     with open(filename, "rb") as f:
  39.         nvecs, dim = np.fromfile(f, count=2, dtype=np.int32)
  40.         nvecs = (nvecs - start_idx) if chunk_size is None else chunk_size
  41.         arr = np.fromfile(f, count=nvecs * dim, dtype=np.int32,
  42.                           offset=start_idx * 4 * dim)
  43.     return arr.reshape(nvecs, dim)
  44.  
  45.  
  46. def write_fbin(filename, vecs):
  47.     """ Write an array of float32 vectors to *.fbin file
  48.    Args:s
  49.        :param filename (str): path to *.fbin file
  50.        :param vecs (numpy.ndarray): array of float32 vectors to write
  51.    """
  52.     assert len(vecs.shape) == 2, "Input array must have 2 dimensions"
  53.     with open(filename, "wb") as f:
  54.         nvecs, dim = vecs.shape
  55.         f.write(struct.pack('<i', nvecs))
  56.         f.write(struct.pack('<i', dim))
  57.         vecs.astype('float32').flatten().tofile(f)
  58.  
  59.        
  60. def write_ibin(filename, vecs):
  61.     """ Write an array of int32 vectors to *.ibin file
  62.    Args:
  63.        :param filename (str): path to *.ibin file
  64.        :param vecs (numpy.ndarray): array of int32 vectors to write
  65.    """
  66.     assert len(vecs.shape) == 2, "Input array must have 2 dimensions"
  67.     with open(filename, "wb") as f:
  68.         nvecs, dim = vecs.shape
  69.         f.write(struct.pack('<i', nvecs))
  70.         f.write(struct.pack('<i', dim))
  71.         vecs.astype('int32').flatten().tofile(f)
RAW Paste Data