Advertisement
Vzzarr

Pipe

Oct 26th, 2017
132
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Java 2.32 KB | None | 0 0
  1. try (final BwaSparkEngine engine = new BwaSparkEngine(ctx, referenceArguments.getReferenceFileName().replaceAll("2bit", "fasta"), indexImageFile, getHeaderForReads(), getReferenceSequenceDictionary())) {
  2.             final JavaRDD<GATKRead> alignedReads = engine.alignPaired(getReads());
  3.             final JavaRDD<GATKRead> markedReadsWithOD = MarkDuplicatesSpark.mark(alignedReads, engine.getHeader(), duplicatesScoringStrategy, new OpticalDuplicateFinder(), getRecommendedNumReducers());
  4.             final JavaRDD<GATKRead> markedReads = MarkDuplicatesSpark.cleanupTemporaryAttributes(markedReadsWithOD);
  5.  
  6.  
  7.  
  8.             if (joinStrategy == JoinStrategy.BROADCAST && ! getReference().isCompatibleWithSparkBroadcast())
  9.                 throw new UserException.Require2BitReferenceForBroadcast();
  10.  
  11.             // The initial reads have already had the WellformedReadFilter applied to them, which
  12.             // is all the filtering that ApplyBQSR wants. BQSR itself wants additional filtering
  13.             // performed, so we do that here.
  14.             //NOTE: this filter doesn't honor enabled/disabled commandline filters
  15.             final ReadFilter bqsrReadFilter = ReadFilter.fromList(BaseRecalibrator.getBQSRSpecificReadFilterList(), getHeaderForReads());
  16.             final JavaRDD<GATKRead> filteredReadsForBQSR = markedReads.filter(read -> bqsrReadFilter.test(read));
  17.  
  18.             final VariantsSparkSource variantsSparkSource = new VariantsSparkSource(ctx);
  19.             final JavaRDD<GATKVariant> bqsrKnownVariants = variantsSparkSource.getParallelVariants(baseRecalibrationKnownVariantPaths, getIntervals());
  20.  
  21.             final JavaPairRDD<GATKRead, ReadContextData> rddReadContext = AddContextDataToReadSpark.add(ctx, filteredReadsForBQSR, getReference(), bqsrKnownVariants, baseRecalibrationKnownVariantPaths, joinStrategy, getHeaderForReads().getSequenceDictionary(), readShardSize, readShardPadding);
  22.             //note: we use the reference dictionary from the reads themselves.
  23.             final RecalibrationReport bqsrReport = BaseRecalibratorSparkFn.apply(rddReadContext, getHeaderForReads(), getHeaderForReads().getSequenceDictionary(), bqsrArgs);
  24.  
  25.             final Broadcast<RecalibrationReport> reportBroadcast = ctx.broadcast(bqsrReport);
  26.             final JavaRDD<GATKRead> finalReads = ApplyBQSRSparkFn.apply(markedReads, reportBroadcast, getHeaderForReads(), applyBqsrArgs.toApplyBQSRArgumentCollection(bqsrArgs.PRESERVE_QSCORES_LESS_THAN));
  27.  
  28.             writeReads(ctx, output, finalReads);
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement