Guest User

Untitled

a guest
Jan 21st, 2018
79
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 1.41 KB | None | 0 0
  1. 1 7100
  2. 1 35000
  3. 1 49321
  4. 1 49759
  5. 2 44842
  6. 2 52794
  7. 2 53558
  8. 3 53859
  9. 3 54013
  10. 3 55172
  11.  
  12. 1 6408 8000
  13. 1 11822 16373
  14. 1 18716 23389
  15. 1 27690 34330
  16. 1 36552 39191
  17. 1 39313 44565
  18. 2 44839 50247
  19. 2 60987 65017
  20. 2 65705 71523
  21.  
  22. 1 6408 8000
  23. 2 44839 50247
  24.  
  25. k=1;
  26. data_test=$(cat "test")
  27. data_db=$(cat "db")
  28. while read -r line
  29. do
  30. # helps to keep count of test rows
  31. printf "$k n"
  32.  
  33. # get cato
  34. cato=$(echo $line | awk '{print $1}')
  35. # get pos
  36. pos=$(echo $line | awk '{print $2}')
  37. # get number of chars in pos (to reduce number of lines awk needs to look through later)
  38. pos_chr=$(echo -n $pos | wc -c)
  39. # get lines in db that start with cato and pos chars match start or stop
  40. matched=$(echo "$data_db" | grep -Ew "^$cato" | grep -Ew "[0-9]{$pos_chr}")
  41. #echo "$db_cat"
  42.  
  43. # if matched is not empty
  44. if [ ! -z "$matched" ]; then
  45. # use awk to print lines in db where pos > start and pos < stop
  46. echo "$matched" | awk -v apos='$pos' 'BEGIN{OFS="t"}{if(apos>$3 && apos<$4) print $0}'
  47. #check
  48. #echo "$matched" | awk -v apos=$pos 'BEGIN{OFS="t"}{print apos,$0}'
  49. fi
  50.  
  51. ((k=k+1))
  52. done <<< "$data_test"
  53.  
  54. awk 'NR==FNR{ if (v != $1) c=0; a[$1][++c]=$2; v=$1; next }
  55. $1 in a{
  56. len=length(a[$1]);
  57. for(i=1; i<=len; i++)
  58. if(a[$1][i] >= $2 && a[$1][i] <= $3) { print $0; break }
  59. }' test db
  60.  
  61. 1 6408 8000
  62. 2 44839 50247
Add Comment
Please, Sign In to add comment