Untitled

#!/usr/bin/env Nextflow

/*
 * Defining pipeline input data
 */
params.query = "$baseDir/data/sample.fa"
params.db = "$baseDir/blast-db/pdb/tiny"
params.out = "result.txt"
params.chunkSize = 5

db_name = file(params.db).name
db_path = file(params.db).parent
/*
 * Given the query parameter creates a channel emitting the query fasta file(s),
 * the file is split in chunks containing as many sequences as defined by 'chunkSize'.
 * Finally assign the result channel to the variable 'fasta'
 */
Channel
    .fromPath(params.query)
    .splitFasta(by: params.chunkSize)
    .set { fasta }
/*
 * Executes a BLAST job for each chunk emitted by the 'fasta' channel
 * and creates as output a channel named 'top_hits' emitting BLAST matches
 */
process blast {
    input:
    file 'query.fa' from fasta
    file db_path

    output:
    file top_hits

    """
    blastp -db $db_path/$db_name -query query.fa -outfmt 6 > blast_result
    cat blast_result | head -n 10 | cut -f 2 > top_hits
    """
}
/*
 * Each time a file emitted by the 'top_hits' channel an extract job is executed
 * producing a file containing the matching sequences
 */
process extract {
    input:
    file top_hits
    file db_path

    output:
    file sequences

    """
    blastdbcmd -db $db_path/$db_name -entry_batch top_hits | head -n 10 > sequences
    """
}
/*
 * Collects all the sequences files into a single file
 * and prints the resulting file content when complete
 */
sequences
    .collectFile(name: params.out)
    .println { file -> "matching sequences:\n ${file.text}" }