Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #!/usr/bin/env Nextflow
- /*
- * Defining pipeline input data
- */
- params.query = "$baseDir/data/sample.fa"
- params.db = "$baseDir/blast-db/pdb/tiny"
- params.out = "result.txt"
- params.chunkSize = 5
- db_name = file(params.db).name
- db_path = file(params.db).parent
- /*
- * Given the query parameter creates a channel emitting the query fasta file(s),
- * the file is split in chunks containing as many sequences as defined by 'chunkSize'.
- * Finally assign the result channel to the variable 'fasta'
- */
- Channel
- .fromPath(params.query)
- .splitFasta(by: params.chunkSize)
- .set { fasta }
- /*
- * Executes a BLAST job for each chunk emitted by the 'fasta' channel
- * and creates as output a channel named 'top_hits' emitting BLAST matches
- */
- process blast {
- input:
- file 'query.fa' from fasta
- file db_path
- output:
- file top_hits
- """
- blastp -db $db_path/$db_name -query query.fa -outfmt 6 > blast_result
- cat blast_result | head -n 10 | cut -f 2 > top_hits
- """
- }
- /*
- * Each time a file emitted by the 'top_hits' channel an extract job is executed
- * producing a file containing the matching sequences
- */
- process extract {
- input:
- file top_hits
- file db_path
- output:
- file sequences
- """
- blastdbcmd -db $db_path/$db_name -entry_batch top_hits | head -n 10 > sequences
- """
- }
- /*
- * Collects all the sequences files into a single file
- * and prints the resulting file content when complete
- */
- sequences
- .collectFile(name: params.out)
- .println { file -> "matching sequences:\n ${file.text}" }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement