Advertisement
Guest User

NER

a guest
Oct 18th, 2013
151
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. stanford-ner/ner.sh FILENAME.txt > FILENAME_ner.txt
  2. sed 's/\/O / /g' < FILENAME_ner.txt > FILENAME_ner_clean.txt
  3. alias egrepmatch='egrep --color -f pattr'
  4. echo "[[:alpha:]]*/PERSON" > pattr
  5. echo "(([[:alnum:]]|\.)+/ORGANIZATION([[:space:]]|$))+" > orgpattr
  6. echo "(([[:alnum:]]|\.)+/LOCATION[[:space:]](,[[:space:]])?)+" > locpattr
  7. echo "(([[:alpha:]]|\.)*/PERSON([[:space:]]|$))+" > personpattr
  8. egrepmatch *clean.txt
  9. echo "([[:alpha:]]|\.)*/PERSON" > pattr
  10. egrepmatch *clean.txt
  11. echo "([[:alpha:]]|\.)*/PERSON([[:space:]]|$)" > pattr
  12. egrepmatch *clean.txt
  13. egrep -o -f personpattr FILENAME_ner_clean.txt > FILENAME_ner_pers.txt
  14. cat FILENAME_ner_pers.txt | sed 's/\/PERSON//g' | sort | uniq -c | sort -nr > FILENAME_ner_pers_freq.txt
  15. egrep -o -f orgpattr FILENAME_ner_clean.txt > FILENAME_ner_org.txt
  16. cat FILENAME_ner_org.txt | sed 's/\/ORGANIZATION//g' | sort | uniq -c | sort -nr > FILENAME_ner_org_freq.txt
  17. egrep -o -f locpattr FILENAME_ner_clean.txt > FILENAME_ner_loc.txt
  18. sed -i 's/ , /\n/g' FILENAME_ner_loc.txt
  19. sed -i '/^$/d' FILENAME_ner_loc.txt
  20. cat FILENAME_ner_loc.txt | sed 's/\/LOCATION//g' | sort | uniq -c | sort -nr > FILENAME_ner_loc_freq.txt
Advertisement
RAW Paste Data Copied
Advertisement