Advertisement
Guest User

Untitled

a guest
Jun 26th, 2019
402
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 2.94 KB | None | 0 0
  1. UndefinedMetricWarning: Precision and F-score are ill-defined and being set to 0.0 in labels with no predi
  2. cted samples.
  3.  
  4. 'precision', 'predicted', average, warn_for)
  5. precision recall f1-score support
  6.  
  7. non-VPN 0.81 1.00 0.89 29432
  8. VPN 0.00 0.00 0.00 6973
  9.  
  10. micro avg 0.81 0.81 0.81 36405
  11. macro avg 0.40 0.50 0.45 36405
  12. weighted avg 0.65 0.81 0.72 36405
  13.  
  14. def data_generotto(path: str, batchsize: int):
  15. while True:
  16. chunks = pd.read_csv(os.path.join(
  17. path, "shuffled.csv"), sep=';', chunksize=batchsize)
  18.  
  19. for i, chunk in enumerate(chunks):
  20. X, y = preprocess.preprocess(chunk)
  21.  
  22. # X = np.array(X).reshape(X.shape[0], 1, X.shape[1])
  23.  
  24. yield (X, y)
  25.  
  26. # sorry for messy code
  27. def balance_train_data(data, fold_count=3):
  28. """Balance the data using sklearn.utils resample to max sentiment count."""
  29. balanced_data = pd.DataFrame()
  30. data_dict = dict(data['label'].value_counts())
  31.  
  32. for label in data_dict.keys():
  33. df = data[data.label == label]
  34. samples_count = int(
  35. (max(data_dict.values()) - data_dict[label])/fold_count)
  36. df_up = resample(df, replace=True,
  37. n_samples=samples_count, random_state=42)
  38.  
  39. print("Resampled {} tweets: {} + {} = {}".format(label,
  40. len(df), len(df_up), len(df)+len(df_up)))
  41. balanced_data = pd.concat([balanced_data, df, df_up])
  42. return shuffle(balanced_data, random_state=42)
  43.  
  44. def create_model(model_folder_name):
  45. global folder_name
  46. folder_name = model_folder_name
  47. model = Sequential()
  48.  
  49. model.add(Dense(8, activation='relu', input_dim=4))
  50. model.add(Dense(4, kernel_initializer='uniform', activation='relu'))
  51. model.add(Dense(1, kernel_initializer='uniform', activation='sigmoid'))
  52.  
  53. optimizer = optimizers.Adam(lr=0.0001)
  54.  
  55. model.compile(optimizer=optimizer, loss="binary_crossentropy",
  56. metrics=['accuracy'])
  57. model.summary(print_fn=myprint)
  58.  
  59. return model, optimizer.get_config(), "ann"
  60.  
  61. model.fit_generator(data_generotto(
  62. "./complete_csv", BS), steps_per_epoch=TRAIN_SIZE // BS, epochs=EPOCHS, callbacks=[es])
  63.  
  64. save_model(model, f"./models/{model_folder_name}/MODEL.h5")
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement