Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- 1 7100
- 1 35000
- 1 49321
- 1 49759
- 2 44842
- 2 52794
- 2 53558
- 3 53859
- 3 54013
- 3 55172
- 1 6408 8000
- 1 11822 16373
- 1 18716 23389
- 1 27690 34330
- 1 36552 39191
- 1 39313 44565
- 2 44839 50247
- 2 60987 65017
- 2 65705 71523
- 1 6408 8000
- 2 44839 50247
- k=1;
- data_test=$(cat "test")
- data_db=$(cat "db")
- while read -r line
- do
- # helps to keep count of test rows
- printf "$k n"
- # get cato
- cato=$(echo $line | awk '{print $1}')
- # get pos
- pos=$(echo $line | awk '{print $2}')
- # get number of chars in pos (to reduce number of lines awk needs to look through later)
- pos_chr=$(echo -n $pos | wc -c)
- # get lines in db that start with cato and pos chars match start or stop
- matched=$(echo "$data_db" | grep -Ew "^$cato" | grep -Ew "[0-9]{$pos_chr}")
- #echo "$db_cat"
- # if matched is not empty
- if [ ! -z "$matched" ]; then
- # use awk to print lines in db where pos > start and pos < stop
- echo "$matched" | awk -v apos='$pos' 'BEGIN{OFS="t"}{if(apos>$3 && apos<$4) print $0}'
- #check
- #echo "$matched" | awk -v apos=$pos 'BEGIN{OFS="t"}{print apos,$0}'
- fi
- ((k=k+1))
- done <<< "$data_test"
- awk 'NR==FNR{ if (v != $1) c=0; a[$1][++c]=$2; v=$1; next }
- $1 in a{
- len=length(a[$1]);
- for(i=1; i<=len; i++)
- if(a[$1][i] >= $2 && a[$1][i] <= $3) { print $0; break }
- }' test db
- 1 6408 8000
- 2 44839 50247
Add Comment
Please, Sign In to add comment