Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- 38 107 C 3 T 6 C/T
- 38 241 C 4 T 5 C/T
- 38 247 T 4 C 5 T/C
- 38 259 T 3 C 6 T/C
- 38 275 G 3 A 5 G/A
- 38 304 C 4 T 5 C/T
- 38 323 T 3 A 5 T/A
- 38 107 C 8 T 8 C/T
- 38 222 - 6 A 7 -/A
- 38 241 C 7 T 10 C/T
- 38 247 T 7 C 10 T/C
- 38 259 T 7 C 10 T/C
- 38 275 G 6 A 11 G/A
- 38 304 C 5 T 12 C/T
- 38 323 T 4 A 12 T/A
- 38 343 G 13 A 5 G/A
- 107
- 222
- 241
- 247
- 259
- 275
- 304
- 323
- 343
- Position File1_Ref File1_Alt File2_Ref File2_Alt
- 107 3 6 8 8
- 222 6 7
- 241 4 5 7 10
- 247 4 5 7 10
- 259 3 6 7 10
- 275 3 5 6 11
- 304 4 5 5 12
- 323 3 5 4 12
- 343 13 5
- # add file1
- $ join -e' ' -1 1 -2 2 -a 1 -o 0,2.4,2.6 <(sort -n index) <(sort -n -k2 file1) > file1.merged
- # add file2
- $ join -e' ' -1 1 -2 2 -a 1 -o 0,1.2,1.3,2.4,2.6 file1.merged <(sort -n -k2 file2) > file2.merged
- # create the header
- $ echo "Position File1_Ref File1_Alt File2_Ref File2_Alt" > report
- $ cat file2.merged >> report
- $ cat report
- Position File1_Ref File1_Alt File2_Ref File2_Alt
- 107 3 6 8 8
- 222 6 7
- 241 4 5 7 10
- 247 4 5 7 10
- 259 3 6 7 10
- 275 3 5 6 11
- 304 4 5 5 12
- 323 3 5 4 12
- 323 4 12 4 12
- 343 13 5 13 5
- #!/bin/bash
- INDEX_FILE=index # the name of the file containing the index data
- REPORT_FILE=report # the file to write the report to
- TMP_FILE=$(mktemp) # a temporary file
- header="Position" # the report header
- num_processed=0 # the number of files processed so far
- # loop over all files beginning with "file".
- # this pattern can be changed to something else e.g. *.vcf
- for file in file*
- do
- echo "Processing $file"
- if [[ $num_processed -eq 0 ]]
- then
- # it's the first file so use the INDEX file in the join
- join -e' ' -t, -1 1 -2 2 -a 1 -o 0,2.4,2.6 <(sort -n "$INDEX_FILE") <(sed 's/ +/,/g' "$file") > "$TMP_FILE"
- else
- # work out the output fields
- for ((outputFields="0",j=2; j < $((2 + $num_processed * 2)); j++))
- do
- outputFields="$outputFields,1.$j"
- done
- outputFields="$outputFields,2.4,2.6"
- # join this file with the current report
- join -e' ' -t, -1 1 -2 2 -a 1 -o "$outputFields" "$REPORT_FILE" <(sed 's/ +/,/g' "$file") > "$TMP_FILE"
- fi
- ((num_processed++))
- header="$header,File${num_processed}_Ref,File${num_processed}_Alt"
- mv "$TMP_FILE" "$REPORT_FILE"
- done
- # add the header to the report
- echo "$header" | cat - "$REPORT_FILE" > "$TMP_FILE" && mv "$TMP_FILE" "$REPORT_FILE"
- # the report is a csv file. Uncomment the line below to make it space-separated.
- # tr ',' ' ' < "$REPORT_FILE" > "$TMP_FILE" && mv "$TMP_FILE" "$REPORT_FILE"
- #!/usr/bin/perl
- use strict;
- use warnings;
- use File::Slurp qw/ slurp /;
- use Text::Table;
- my $path = '.';
- my @file = qw/ o33.txt o44.txt /;
- my @position = slurp('index.txt') =~ /d+/g;
- my %data;
- for my $filename (@file) {
- open my $fh, "$path/$filename" or die "Can't open $filename $!";
- while (<$fh>) {
- my ($pos, $ref, $alt) = (split)[1, 3, 5];
- $data{$pos}{$filename} = [$ref, $alt];
- }
- close $fh or die "Can't close $filename $!";
- }
- my @head;
- for my $file (@file) {
- push @head, "${file}_Ref", "${file}_Alt";
- }
- my $tb = Text::Table->new( map {title => $_}, "Position", @head);
- for my $pos (@position) {
- $tb->load( [
- $pos,
- map $data{$pos}{$_} ? @{ $data{$pos}{$_} } : ('', ''), @file
- ]
- );
- }
- print $tb;
Add Comment
Please, Sign In to add comment