Advertisement
Guest User

Untitled

a guest
Dec 10th, 2016
39
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Bash 1.63 KB | None | 0 0
  1. cat Desktop/list.txt | \
  2. grep \
  3.     -e "<li>" \
  4.     -e "</li>" | \
  5. grep -v  \
  6.     -e "</div>" \
  7.     -e "href\="| \
  8. sed \
  9.     -e 's/\<li\>//' \
  10.     -e 's/\<\/li\>//' \
  11.     -e 's/\<strong\>//' \
  12.     -e 's/\<\/strong\>//' \
  13.     -e 's/\<p\>//' \
  14.     -e 's/\<\/p\>//' \
  15.     -e 's/\<br\>//' \
  16.     -e 's/\<em\>//' \
  17.     -e 's/\<\/em\>//' \
  18.     -e 's/ en / \& /' \
  19.     -e 's/ and / \& /' \
  20.     -e 's/ et / \& /' \
  21.     -e 's/\&amp\;/\&/' \
  22.     -e 's/[[:space:]]*$//' | \
  23. sed 'y/āáǎàēéëěèīíǐìōóǒòūúǔùǖǘǚǜĀÁǍÀĒÉĚÈĪÍǏÌŌÓǑÒŪÚǓÙǕǗǙǛ/aaaaeeeeeiiiioooouuuuüüüüAAAAEEEEIIIIOOOOUUUUÜÜÜÜ/' | \
  24. gawk '{print tolower($0);}' > Desktop/templijst.txt
  25.  
  26. cat Desktop/templijst.txt | \
  27. sed \
  28.     -e 's/.*suske.*/suske \& wiske/' \
  29.     -e 's/.*asterix.*/asterix \& obelix/' \
  30.     -e 's/.*michel va.*/michel vaillant/' \
  31.     -e 's/.*robin ho.*/robin hood/' \
  32.     -e 's/.*robbe.*/spirou/' \
  33.     -e 's/.*spirou.*/spirou/' \
  34.     -e 's/.*vrouwen in.*/vrouwen in het wit/' \
  35.     -e 's/.*tintin.*/tintin/' \
  36.     -e 's/.*kuifje.*/tintin/' \
  37.     -e 's/.*johan \&.*/johan \& pierewiet/' \
  38.     -e 's/.*jommeke.*/jommeke/' \
  39.     -e 's/.*guus.*/guust flater/' \
  40.     -e 's/.*gaston.*/guust flater/' \
  41.     -e 's/.*tuniques.*/blauwbloezen/' \
  42.     -e 's/.*blauwbloezen.*/blauwbloezen/' \
  43.     -e 's/.*kampioenen.*/f.c. de kampioenen/' \
  44.     -e 's/.*schtroumpf.*/smurfen/' \
  45.     -e 's/.*mensjes.*/mini-mensjes/' \
  46.     -e 's/.*raf zerk.*/g.raf zerk/' \
  47.     -e 's/.*boule.*/bollie \& billie/' \
  48.     -e 's/.*bollie.*/bollie \& billie/' \
  49.     -e 's/.*bakeland.*/bakelandt/' \
  50.     -e 's/.*astrix.*/asterix/' \
  51.     -e 's/.*212.*/agent 212/' \
  52.     -e 's/.*kiekeboe.*/kiekeboe/' \
  53.     -e 's/.*ping.*/pol\, pel \& pingo/' \
  54.     | \
  55. sort -n | \
  56. uniq -c
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement