Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #!/bin/bash
- # HTML Clean-up script using Dave Ragget's HTML-Tidy and SED.
- # Script para limpeza de HTML usando o HTML-Tidy, de Dave Ragget e o SED.
- # http://tidy.sourceforge.net -- http://www.gnu.org/software/sed #
- # This work was assembled from several loose answers collected over many
- # different Internet forums --- I do not claim authorship. Public Domain.
- # Este trabalho reúne diversas respostas avulsas coletadas em muitos
- # fóruns da Internet --- Não reivindico a autoria. Domínio Público.
- # José Geraldo Gouvêa -- jggouvea at gmail.com
- for file in post.txt; do
- sed 's/<br \/>/<p>/g' post.txt > post.1
- tidy --char-encoding utf8 --wrap 0 --logical-emphasis true --enclose-block-text true \
- --drop-empty-paras true post.1 > post.2
- sed '1,7d' post.2 > post.3
- sed 'N;$!P;$!D;$d' post.3 > post.4
- sed -e :a -e '$b;N;s/\n//;ba' post.4 > post.5
- sed 's/ - / \– /g' post.5 > post.6
- sed 's/ -- /\—/g' post.6 > post.7
- sed 's/\.\.\./\…/g' post.7 > post.8
- mv post.8 newpost.html
- rm post.*
- mv newpost.html post.txt
- done
- for f in post.txt;
- do
- sed 's/^"/\“/g' $f > $f.2 ; mv $f.2 $f
- sed 's/"$/\”/g' $f > $f.2 ; mv $f.2 $f
- sed 's/ "/ \“/g' $f > $f.2 ; mv $f.2 $f
- sed 's/" /\” /g' $f > $f.2 ; mv $f.2 $f
- sed 's/[TAB]"/[TAB]\“/g' $f > $f.2 ; mv $f.2 $f
- sed 's/"[TAB]/\”[TAB]/g' $f > $f.2 ; mv $f.2 $f
- sed 's/")/\”)/g' $f > $f.2 ; mv $f.2 $f
- sed 's/("/(\“/g' $f > $f.2 ; mv $f.2 $f
- sed 's/";/\”;/g' $f > $f.2 ; mv $f.2 $f
- sed 's/":/\”:/g' $f > $f.2 ; mv $f.2 $f
- sed 's/,"/,\”/g' $f > $f.2 ; mv $f.2 $f
- sed 's/",/\”,/g' $f > $f.2 ; mv $f.2 $f
- sed 's/\."/\.\”/g' $f > $f.2 ; mv $f.2 $f
- sed 's/"\./\”./g' $f > $f.2 ; mv $f.2 $f
- done
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement