Advertisement
Guest User

Untitled

a guest
Sep 15th, 2019
111
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 2.68 KB | None | 0 0
  1. DATA_DIR="."
  2.  
  3. # load Pickle file
  4. def load_ds(fname=os.path.join(DATA_DIR,'/atis.train.pkl'), verbose=True):
  5. with open(fname, 'rb') as stream:
  6. ds,dicts = pickle.load(stream)
  7. if verbose:
  8. print('Done loading: ', fname)
  9. print(' samples: {:4d}'.format(len(ds['query'])))
  10. print(' vocab_size: {:4d}'.format(len(dicts['token_ids'])))
  11. print(' slot count: {:4d}'.format(len(dicts['slot_ids'])))
  12. print(' intent count: {:4d}'.format(len(dicts['intent_ids'])))
  13. return ds,dicts
  14.  
  15. # convert Pickle file to arrays
  16. def load_atis(filename, add_start_end_token=False, verbose=True):
  17. train_ds, dicts = load_ds(os.path.join(DATA_DIR,filename), verbose)
  18. t2i, s2i, in2i = map(dicts.get, ['token_ids', 'slot_ids','intent_ids'])
  19. i2t, i2s, i2in = map(lambda d: {d[k]:k for k in d.keys()}, [t2i,s2i,in2i])
  20. query, slots, intent = map(train_ds.get, ['query', 'slot_labels', 'intent_labels'])
  21.  
  22. if add_start_end_token:
  23. i2s[178] = 'BOS'
  24. i2s[179] = 'EOS'
  25. s2i['BOS'] = 178
  26. s2i['EOS'] = 179
  27.  
  28. input_tensor = []
  29. target_tensor = []
  30. query_data = []
  31. intent_data = []
  32. slot_data = []
  33. to_show = np.random.randint(0, len(query)-1, 5)
  34. for i in range(len(query)):
  35. input_tensor.append(query[i])
  36. slot_text = []
  37. slot_vector = []
  38. for j in range(len(query[i])):
  39. slot_text.append(i2s[slots[i][j]])
  40. slot_vector.append(slots[i][j])
  41. if add_start_end_token:
  42. slot_text[0] = 'BOS'
  43. slot_vector[0] = 178
  44. slot_text[-1] = 'EOS'
  45. slot_vector[-1]= 179
  46. target_tensor.append(slot_vector)
  47. q = ' '.join(map(i2t.get, query[i]))
  48. query_data.append(q.replace('BOS', '').replace('EOS',''))
  49. intent_data.append(i2in[intent[i][0]])
  50. slot = ' '.join(slot_text)
  51. slot_data.append(slot[1:-1])
  52. if i in to_show and verbose:
  53. print('Query text:', q)
  54. print('Query vector: ', query[i])
  55. print('Intent label: ', i2in[intent[i][0]])
  56. print('Slot text: ', slot)
  57. print('Slot vector: ', slot_vector)
  58. print('*'*74)
  59. query_data = np.array(query_data)
  60. intent_data = np.array(intent_data)
  61. slot_data = np.array(slot_data)
  62. intent_data_label = np.array(intent).flatten()
  63. return t2i, s2i, in2i, i2t, i2s, i2in, input_tensor, target_tensor,
  64. query_data, intent_data, intent_data_label, slot_data
  65.  
  66. # load ATIS training dataset
  67. t2i_train, s2i_train, in2i_train, i2t_train, i2s_train, i2in_train, \
  68. input_tensor_train, target_tensor_train, \
  69. query_data_train, intent_data_train, intent_data_label_train, slot_data_train = load_atis('atis.train.pkl')
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement