SHARE
TWEET

Untitled

a guest Oct 21st, 2019 77 Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. class CustomIterableDatasetv2(IterableDataset):
  2.  
  3.     def __init__(self, filename_en, filename_gm):
  4.  
  5.         #Store the filenames in object's memory
  6.         self.filename_en = filename_en
  7.         self.filename_gm = filename_gm
  8.  
  9.         #And that's it, we no longer need to store the contents in the memory
  10.  
  11.     def preprocess(self, text):
  12.  
  13.         ### Do something with text here
  14.         text_pp = text.split()
  15.         ###
  16.  
  17.         return text_pp
  18.  
  19.     def line_mapper(self, line):
  20.        
  21.         #We only have the text in the file for this case
  22.         text = line
  23.         text = self.preprocess(text)
  24.         return text
  25.  
  26.  
  27.     def __iter__(self):
  28.  
  29.         #Create an iterator
  30.         en_itr = open(self.filename_en)
  31.         gm_itr = open(self.filename_gm)
  32.        
  33.         #Map each element using the line_mapper
  34.         mapped_en_itr = map(self.line_mapper, en_itr)
  35.         mapped_gm_itr = map(self.line_mapper, gm_itr)
  36.        
  37.         #Zip both iterators
  38.         zipped_itr = zip(mapped_en_itr, mapped_gm_itr)
  39.        
  40.         return zipped_itr
RAW Paste Data
We use cookies for various purposes including analytics. By continuing to use Pastebin, you agree to our use of cookies as described in the Cookies Policy. OK, I Understand
Not a member of Pastebin yet?
Sign Up, it unlocks many cool features!
 
Top