Advertisement
Guest User

Untitled

a guest
Mar 23rd, 2017
79
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 0.57 KB | None | 0 0
  1. def setMinPartitions(sc: SparkContext, context: JobContext, minPartitions: Int) {
  2. val defaultMaxSplitBytes = sc.getConf.get(config.FILES_MAX_PARTITION_BYTES)
  3. val openCostInBytes = sc.getConf.get(config.FILES_OPEN_COST_IN_BYTES)
  4. val defaultParallelism = sc.defaultParallelism
  5. val files = listStatus(context).asScala
  6. val totalBytes = files.filterNot(_.isDirectory).map(_.getLen + openCostInBytes).sum
  7. val bytesPerCore = totalBytes / defaultParallelism
  8. val maxSplitSize = Math.min(defaultMaxSplitBytes, Math.max(openCostInBytes, bytesPerCore))
  9. super.setMaxSplitSize(maxSplitSize)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement