Guest User

Untitled

a guest
Jan 19th, 2019
95
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 3.45 KB | None | 0 0
  1. 38 107 C 3 T 6 C/T
  2. 38 241 C 4 T 5 C/T
  3. 38 247 T 4 C 5 T/C
  4. 38 259 T 3 C 6 T/C
  5. 38 275 G 3 A 5 G/A
  6. 38 304 C 4 T 5 C/T
  7. 38 323 T 3 A 5 T/A
  8.  
  9. 38 107 C 8 T 8 C/T
  10. 38 222 - 6 A 7 -/A
  11. 38 241 C 7 T 10 C/T
  12. 38 247 T 7 C 10 T/C
  13. 38 259 T 7 C 10 T/C
  14. 38 275 G 6 A 11 G/A
  15. 38 304 C 5 T 12 C/T
  16. 38 323 T 4 A 12 T/A
  17. 38 343 G 13 A 5 G/A
  18.  
  19. 107
  20. 222
  21. 241
  22. 247
  23. 259
  24. 275
  25. 304
  26. 323
  27. 343
  28.  
  29. Position File1_Ref File1_Alt File2_Ref File2_Alt
  30. 107 3 6 8 8
  31. 222 6 7
  32. 241 4 5 7 10
  33. 247 4 5 7 10
  34. 259 3 6 7 10
  35. 275 3 5 6 11
  36. 304 4 5 5 12
  37. 323 3 5 4 12
  38. 343 13 5
  39.  
  40. # add file1
  41. $ join -e' ' -1 1 -2 2 -a 1 -o 0,2.4,2.6 <(sort -n index) <(sort -n -k2 file1) > file1.merged
  42.  
  43. # add file2
  44. $ join -e' ' -1 1 -2 2 -a 1 -o 0,1.2,1.3,2.4,2.6 file1.merged <(sort -n -k2 file2) > file2.merged
  45.  
  46. # create the header
  47. $ echo "Position File1_Ref File1_Alt File2_Ref File2_Alt" > report
  48. $ cat file2.merged >> report
  49.  
  50. $ cat report
  51.  
  52. Position File1_Ref File1_Alt File2_Ref File2_Alt
  53. 107 3 6 8 8
  54. 222 6 7
  55. 241 4 5 7 10
  56. 247 4 5 7 10
  57. 259 3 6 7 10
  58. 275 3 5 6 11
  59. 304 4 5 5 12
  60. 323 3 5 4 12
  61. 323 4 12 4 12
  62. 343 13 5 13 5
  63.  
  64. #!/bin/bash
  65.  
  66. INDEX_FILE=index # the name of the file containing the index data
  67. REPORT_FILE=report # the file to write the report to
  68. TMP_FILE=$(mktemp) # a temporary file
  69.  
  70. header="Position" # the report header
  71. num_processed=0 # the number of files processed so far
  72.  
  73. # loop over all files beginning with "file".
  74. # this pattern can be changed to something else e.g. *.vcf
  75. for file in file*
  76. do
  77. echo "Processing $file"
  78. if [[ $num_processed -eq 0 ]]
  79. then
  80. # it's the first file so use the INDEX file in the join
  81. join -e' ' -t, -1 1 -2 2 -a 1 -o 0,2.4,2.6 <(sort -n "$INDEX_FILE") <(sed 's/ +/,/g' "$file") > "$TMP_FILE"
  82. else
  83. # work out the output fields
  84. for ((outputFields="0",j=2; j < $((2 + $num_processed * 2)); j++))
  85. do
  86. outputFields="$outputFields,1.$j"
  87. done
  88. outputFields="$outputFields,2.4,2.6"
  89.  
  90. # join this file with the current report
  91. join -e' ' -t, -1 1 -2 2 -a 1 -o "$outputFields" "$REPORT_FILE" <(sed 's/ +/,/g' "$file") > "$TMP_FILE"
  92. fi
  93. ((num_processed++))
  94. header="$header,File${num_processed}_Ref,File${num_processed}_Alt"
  95. mv "$TMP_FILE" "$REPORT_FILE"
  96. done
  97.  
  98. # add the header to the report
  99. echo "$header" | cat - "$REPORT_FILE" > "$TMP_FILE" && mv "$TMP_FILE" "$REPORT_FILE"
  100.  
  101. # the report is a csv file. Uncomment the line below to make it space-separated.
  102. # tr ',' ' ' < "$REPORT_FILE" > "$TMP_FILE" && mv "$TMP_FILE" "$REPORT_FILE"
  103.  
  104. #!/usr/bin/perl
  105. use strict;
  106. use warnings;
  107. use File::Slurp qw/ slurp /;
  108. use Text::Table;
  109.  
  110. my $path = '.';
  111. my @file = qw/ o33.txt o44.txt /;
  112. my @position = slurp('index.txt') =~ /d+/g;
  113. my %data;
  114.  
  115. for my $filename (@file) {
  116. open my $fh, "$path/$filename" or die "Can't open $filename $!";
  117. while (<$fh>) {
  118. my ($pos, $ref, $alt) = (split)[1, 3, 5];
  119. $data{$pos}{$filename} = [$ref, $alt];
  120. }
  121. close $fh or die "Can't close $filename $!";
  122. }
  123.  
  124. my @head;
  125. for my $file (@file) {
  126. push @head, "${file}_Ref", "${file}_Alt";
  127. }
  128.  
  129. my $tb = Text::Table->new( map {title => $_}, "Position", @head);
  130.  
  131. for my $pos (@position) {
  132. $tb->load( [
  133. $pos,
  134. map $data{$pos}{$_} ? @{ $data{$pos}{$_} } : ('', ''), @file
  135. ]
  136. );
  137. }
  138. print $tb;
Add Comment
Please, Sign In to add comment