Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- try (final BwaSparkEngine engine = new BwaSparkEngine(ctx, referenceArguments.getReferenceFileName().replaceAll("2bit", "fasta"), indexImageFile, getHeaderForReads(), getReferenceSequenceDictionary())) {
- final JavaRDD<GATKRead> alignedReads = engine.alignPaired(getReads());
- final JavaRDD<GATKRead> markedReadsWithOD = MarkDuplicatesSpark.mark(alignedReads, engine.getHeader(), duplicatesScoringStrategy, new OpticalDuplicateFinder(), getRecommendedNumReducers());
- final JavaRDD<GATKRead> markedReads = MarkDuplicatesSpark.cleanupTemporaryAttributes(markedReadsWithOD);
- if (joinStrategy == JoinStrategy.BROADCAST && ! getReference().isCompatibleWithSparkBroadcast())
- throw new UserException.Require2BitReferenceForBroadcast();
- // The initial reads have already had the WellformedReadFilter applied to them, which
- // is all the filtering that ApplyBQSR wants. BQSR itself wants additional filtering
- // performed, so we do that here.
- //NOTE: this filter doesn't honor enabled/disabled commandline filters
- final ReadFilter bqsrReadFilter = ReadFilter.fromList(BaseRecalibrator.getBQSRSpecificReadFilterList(), getHeaderForReads());
- final JavaRDD<GATKRead> filteredReadsForBQSR = markedReads.filter(read -> bqsrReadFilter.test(read));
- final VariantsSparkSource variantsSparkSource = new VariantsSparkSource(ctx);
- final JavaRDD<GATKVariant> bqsrKnownVariants = variantsSparkSource.getParallelVariants(baseRecalibrationKnownVariantPaths, getIntervals());
- final JavaPairRDD<GATKRead, ReadContextData> rddReadContext = AddContextDataToReadSpark.add(ctx, filteredReadsForBQSR, getReference(), bqsrKnownVariants, baseRecalibrationKnownVariantPaths, joinStrategy, getHeaderForReads().getSequenceDictionary(), readShardSize, readShardPadding);
- //note: we use the reference dictionary from the reads themselves.
- final RecalibrationReport bqsrReport = BaseRecalibratorSparkFn.apply(rddReadContext, getHeaderForReads(), getHeaderForReads().getSequenceDictionary(), bqsrArgs);
- final Broadcast<RecalibrationReport> reportBroadcast = ctx.broadcast(bqsrReport);
- final JavaRDD<GATKRead> finalReads = ApplyBQSRSparkFn.apply(markedReads, reportBroadcast, getHeaderForReads(), applyBqsrArgs.toApplyBQSRArgumentCollection(bqsrArgs.PRESERVE_QSCORES_LESS_THAN));
- writeReads(ctx, output, finalReads);
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement