dbaranchuk

io_utils

Apr 23rd, 2021 (edited)
1,198
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 2.60 KB | None | 0 0
  1. import struct
  2. import numpy as np
  3.  
  4.  
  5. """
  6.                  IO Utils
  7. """
  8.  
  9.  
  10. def read_fbin(filename, start_idx=0, chunk_size=None):
  11.     """ Read *.fbin file that contains float32 vectors
  12.    Args:
  13.        :param filename (str): path to *.fbin file
  14.        :param start_idx (int): start reading vectors from this index
  15.        :param chunk_size (int): number of vectors to read.
  16.                                 If None, read all vectors
  17.    Returns:
  18.        Array of float32 vectors (numpy.ndarray)
  19.    """
  20.     with open(filename, "rb") as f:
  21.         nvecs, dim = np.fromfile(f, count=2, dtype=np.int32)
  22.         nvecs = (nvecs - start_idx) if chunk_size is None else chunk_size
  23.         arr = np.fromfile(f, count=nvecs * dim, dtype=np.float32,
  24.                           offset=start_idx * 4 * dim)
  25.     return arr.reshape(nvecs, dim)
  26.  
  27.  
  28. def read_ibin(filename, start_idx=0, chunk_size=None):
  29.     """ Read *.ibin file that contains int32 vectors
  30.    Args:
  31.        :param filename (str): path to *.ibin file
  32.        :param start_idx (int): start reading vectors from this index
  33.        :param chunk_size (int): number of vectors to read.
  34.                                 If None, read all vectors
  35.    Returns:
  36.        Array of int32 vectors (numpy.ndarray)
  37.    """
  38.     with open(filename, "rb") as f:
  39.         nvecs, dim = np.fromfile(f, count=2, dtype=np.int32)
  40.         nvecs = (nvecs - start_idx) if chunk_size is None else chunk_size
  41.         arr = np.fromfile(f, count=nvecs * dim, dtype=np.int32,
  42.                           offset=start_idx * 4 * dim)
  43.     return arr.reshape(nvecs, dim)
  44.  
  45.  
  46. def write_fbin(filename, vecs):
  47.     """ Write an array of float32 vectors to *.fbin file
  48.    Args:s
  49.        :param filename (str): path to *.fbin file
  50.        :param vecs (numpy.ndarray): array of float32 vectors to write
  51.    """
  52.     assert len(vecs.shape) == 2, "Input array must have 2 dimensions"
  53.     with open(filename, "wb") as f:
  54.         nvecs, dim = vecs.shape
  55.         f.write(struct.pack('<i', nvecs))
  56.         f.write(struct.pack('<i', dim))
  57.         vecs.astype('float32').flatten().tofile(f)
  58.  
  59.        
  60. def write_ibin(filename, vecs):
  61.     """ Write an array of int32 vectors to *.ibin file
  62.    Args:
  63.        :param filename (str): path to *.ibin file
  64.        :param vecs (numpy.ndarray): array of int32 vectors to write
  65.    """
  66.     assert len(vecs.shape) == 2, "Input array must have 2 dimensions"
  67.     with open(filename, "wb") as f:
  68.         nvecs, dim = vecs.shape
  69.         f.write(struct.pack('<i', nvecs))
  70.         f.write(struct.pack('<i', dim))
  71.         vecs.astype('int32').flatten().tofile(f)
Add Comment
Please, Sign In to add comment