Advertisement
Guest User

Untitled

a guest
Jun 20th, 2019
85
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 0.91 KB | None | 0 0
  1. val resampledTrainDF = {
  2.  
  3. val positiveLabel = "1"
  4. val trainDF_positives = trainDF.where(F.col(label) === positiveLabel)
  5. val trainDF_negatives = trainDF.where(F.col(label) =!= positiveLabel)
  6.  
  7. val withReplacement = trainDF_positives.count >= trainDF_negatives.count
  8.  
  9. if (withReplacement) {
  10. // downsampling positives
  11. val sampSize = math.round( (1.0 * trainDF_negatives.count / trainDF_positives.count) * 1000) / 1000.0
  12. println("Downsampling Positives by " + (1 - sampSize)*100 + " %")
  13. trainDF_positives.sample(false, sampSize).union(trainDF_negatives)
  14. } else {
  15. //downsampling negatives
  16. val sampSize = math.round( (1.0 * trainDF_positives.count / trainDF_negatives.count) * 1000) / 1000.0
  17. println("Downsampling Negatives by " + (1 - sampSize)*100 + "%")
  18. trainDF_negatives.sample(false, sampSize).union(trainDF_positives)
  19. }
  20.  
  21. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement