Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #!/bin/bash
- echo "Quantos senadores você quer stalkear?"
- read max
- echo "A partir de qual ID o programa deve começar?"
- read id
- echo "---------------"
- count=1
- aux="/home/jose/tmp"
- img="/home/jose/img.jpg"
- pasta="/home/jose/senadores"
- range1="<dl class=\"dl-horizontal\">"
- range2="</dl>"
- from1="<dt>"
- to1="</dt>"
- from2="<dd>"
- to2="</dd>"
- if [ -d "$pasta" ]; then
- rm -rf $pasta
- fi
- mkdir $pasta
- while [ $count -le $max ]; do
- # DEBUG
- echo "COUNT: $count"
- echo "ID: $id"
- wget -q www6g.senado.leg.br/transparencia/sen/$id -O $aux
- notfound=`sed -n '/Desculpe/{p}' $aux`
- if [ ! "$notfound" ]; then
- wget -q www.senado.gov.br/senadores/img/fotos-oficiais/senador$id.jpg -O $img
- if [ ! $? -ne 0 ]; then
- cp $aux senador.html
- # DELIMITA RANGE
- a="$(cat $aux)"; a="$(echo "${a#*$range1}")"; aux2="$range1${a%%$range2*}$range2"; grep -v "$aux2" $aux > temp && mv temp $aux
- aux2=`sed -e "s:$range1::g" <<< $aux2`; aux2=`sed -e "s:$range2::g" <<< $aux2`; aux2=`sed -e 's/ //g' <<< $aux2`
- rm $aux
- echo "$aux2" >> $aux
- cat $aux | xargs > tmp2
- cat tmp2 > $aux
- rm tmp2
- sed -i 's:</dt>:</dt>\r\n:g' $aux
- sed -i 's:</dd>:</dd>\r\n:g' $aux
- # NOME
- a="$(cat $aux)"; a="$(echo "${a#*$from1}")"; linha="$from1${a%%$to1*}$to1"; grep -v "$linha" $aux > temp && mv temp $aux
- linha=`sed -e "s:$from1::g" <<< $linha`; linha=`sed -e "s:$to1::g" <<< $linha`
- a="$(cat $aux)"; a="$(echo "${a#*$from2}")"; nome="$from2${a%%$to2*}$to2"; grep -v "$nome" $aux > temp && mv temp $aux
- # tirando tags HTML da string
- nome=`sed -e "s:$from2::g" <<< $nome`; nome=`sed -e "s:$to2::g" <<< $nome`
- mkdir "$pasta/$nome"
- mv senador.html "$pasta/$nome/senador.html"
- touch "$pasta/$nome/dados.txt"
- echo "ID: $id" >> "$pasta/$nome/dados.txt"
- echo "$linha $nome" >> "$pasta/$nome/dados.txt"
- while grep -q "$from1" $aux; do
- a="$(cat $aux)"; a="$(echo "${a#*$from1}")"; linha1="$from1${a%%$to1*}$to1"; grep -v "$linha1" $aux > temp && mv temp $aux
- # tirando tags HTML da string
- linha1=`sed -e "s:$from1::g" <<< $linha1`; linha1=`sed -e "s:$to1::g" <<< $linha1`
- #DEBUG
- #echo "LINHA1: $linha1"
- a="$(cat $aux)"; a="$(echo "${a#*$from2}")"; linha2="$from2${a%%$to2*}$to2"; grep -v "$linha2" $aux > temp && mv temp $aux
- # tirando tags HTML da string
- linha2=`sed -e "s:$from2::g" <<< $linha2`; linha2=`sed -e "s:$to2::g" <<< $linha2`
- if [[ $linha2 =~ .*\<a.* ]]; then
- echo "$linha2" >> temp
- from3="href="; to3=">"; a="$(cat temp)"; a="$(echo "${a#*$from3}")"; linha2="$from3${a%%$to3*}$to3";
- linha2=`sed -e "s/mailto://g" <<< $linha2`; linha2=`sed -e "s/href=//g" <<< $linha2`; linha2=`sed -e "s/>//g" <<< $linha2`
- rm temp
- fi
- #DEBUG
- #echo "LINHA2: $linha2"
- echo "$linha1 $linha2" >> "$pasta/$nome/dados.txt"
- done
- rm $aux
- mv $img "$pasta/$nome/$nome.jpg"
- (( count++ ))
- fi
- fi
- (( id++ ))
- echo "---------------"
- done
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement