Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- val resampledTrainDF = {
- val positiveLabel = "1"
- val trainDF_positives = trainDF.where(F.col(label) === positiveLabel)
- val trainDF_negatives = trainDF.where(F.col(label) =!= positiveLabel)
- val withReplacement = trainDF_positives.count >= trainDF_negatives.count
- if (withReplacement) {
- // downsampling positives
- val sampSize = math.round( (1.0 * trainDF_negatives.count / trainDF_positives.count) * 1000) / 1000.0
- println("Downsampling Positives by " + (1 - sampSize)*100 + " %")
- trainDF_positives.sample(false, sampSize).union(trainDF_negatives)
- } else {
- //downsampling negatives
- val sampSize = math.round( (1.0 * trainDF_positives.count / trainDF_negatives.count) * 1000) / 1000.0
- println("Downsampling Negatives by " + (1 - sampSize)*100 + "%")
- trainDF_negatives.sample(false, sampSize).union(trainDF_positives)
- }
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement