Guest User

Untitled

a guest
Jun 20th, 2018
77
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 3.65 KB | None | 0 0
  1. E00489:44:HNNVYCCXX:3:1101:24890:5616 99 22 16052014 150M
  2. E00489:44:HNNVYCCXX:1:1110:21704:27345 99 22 16052044 150M
  3. E00489:44:HNNVYCCXX:1:1217:2372:69519 163 22 16052044 150M
  4. E00489:44:HNNVYCCXX:3:2123:8948:16779 99 22 16052044 150M
  5. E00489:44:HNNVYCCXX:2:2213:2920:25534 147 22 16052054 146M4S
  6. E00489:44:HNNVYCCXX:2:2206:5020:71717 83 22 16052055 145M5S
  7. E00489:44:HNNVYCCXX:4:2206:12642:40829 99 22 16052056 144M6S
  8.  
  9. #!/bin/perl
  10.  
  11. #initialize hashes
  12. my %softhash;
  13. my %IDhash;
  14. my $file1Name = $ARGV[0] . 'all_sc_positions.txt';
  15. my $file2Name = $ARGV[0] . 'edge_sc_positions.txt';
  16. open(my $fh_all, '>', $file1Name);
  17. open(my $fh_edge, '>', $file2Name);
  18.  
  19. #for each line
  20. while (my $line = <STDIN>) {
  21.  
  22. #read in line and parse
  23. chomp($line);
  24. my @a = split("t", $line);
  25. my $start = $a[3];
  26. my $cigar = $a[4];
  27. my @b = split(/[A-Z]/, $cigar);# keeps track of numbers
  28. my @c = split(/[0-9]*/, $cigar); #keeps track of letters
  29. my $loc = $start; #distance from start of read
  30. my $var_start;
  31. my $var_end;
  32.  
  33. #loop over type of alignment in cigar string, build hashes of candidate locations
  34. for (my $i = 0; $i <= $#c; $i++) {
  35.  
  36. #if there is softclipping or an indel, find the location and store in hash
  37. if ($c[$i] eq "S" || $c[$i] eq "I" || $c[$i] eq "D") {
  38.  
  39. #find softclipping location
  40. if ($i == 1) {
  41.  
  42. $var_start = $start - $b[0];
  43. $var_end = $start;
  44.  
  45. if ($c[$i] eq "S") {
  46.  
  47. for (my $pos = $var_start; $pos < $var_end; $pos++) {
  48. $softhash{$a[2]}{$pos}++;
  49. }
  50.  
  51. $var_start = $var_end - 1;
  52. $var_end = $var_start;
  53. }
  54.  
  55. } else {
  56.  
  57. for (my $j = $i-2; $j >= 0; $j--) {# subtract 2 from i, because the first value from c will always be empty
  58. $loc = $loc + $b[$j];
  59. }
  60.  
  61. $var_start = $loc;
  62. $var_end = $loc + $b[($i-1)];
  63.  
  64. if ($c[$i] eq "S") {
  65.  
  66. for (my $pos=$var_start; $pos<$var_end; $pos++) {
  67. $softhash{$a[2]}{$pos}++;
  68. }
  69.  
  70. $var_end = $var_start;
  71. }
  72. }
  73.  
  74. $IDhash{$a[2]}{$var_start}{$var_end}{$c[$i]}++;
  75. }
  76. }
  77. }
  78.  
  79. #write out edge features
  80. foreach my $key_chr (sort(keys %IDhash)) {
  81.  
  82. foreach my $key_start (sort { $a <=> $b } (keys %{ $IDhash{$key_chr} })) {
  83.  
  84. foreach my $key_end (sort { $a <=> $b } (keys %{ $IDhash{$key_chr}{$key_start} })){
  85.  
  86. print $fh_edge "$key_chrt$key_startt$key_endt";
  87. my $sum = $IDhash{$key_chr}{$key_start}{$key_end}{I} + $IDhash{$key_chr}{$key_start}{$key_end}{D} + $IDhash{$key_chr}{$key_start}{$key_end}{S};
  88. print $fh_edge "$sum,";
  89.  
  90. for my $l ('S','I','D') {
  91.  
  92. if (defined($IDhash{$key_chr}{$key_start}{$key_end}{$l})) {
  93. print $fh_edge "$IDhash{$key_chr}{$key_start}{$key_end}{$l},";
  94. } else {
  95. print $fh_edge "0,";
  96. }
  97. }
  98.  
  99. print $fh_edge "n";
  100. }
  101. }
  102. }
  103.  
  104. #write out "all" features
  105. foreach my $key_chr (sort(keys %softhash)) {
  106.  
  107. foreach my $key_pos (sort { $a <=> $b } (keys %{ $softhash{$key_chr} })) {
  108.  
  109. print $fh_all "$key_chrt$key_post";
  110. print $fh_all "$softhash{$key_chr}{$key_pos}n";
  111. }
  112. }
Add Comment
Please, Sign In to add comment