Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- stanford-ner/ner.sh FILENAME.txt > FILENAME_ner.txt
- sed 's/\/O / /g' < FILENAME_ner.txt > FILENAME_ner_clean.txt
- alias egrepmatch='egrep --color -f pattr'
- echo "[[:alpha:]]*/PERSON" > pattr
- echo "(([[:alnum:]]|\.)+/ORGANIZATION([[:space:]]|$))+" > orgpattr
- echo "(([[:alnum:]]|\.)+/LOCATION[[:space:]](,[[:space:]])?)+" > locpattr
- echo "(([[:alpha:]]|\.)*/PERSON([[:space:]]|$))+" > personpattr
- egrepmatch *clean.txt
- echo "([[:alpha:]]|\.)*/PERSON" > pattr
- egrepmatch *clean.txt
- echo "([[:alpha:]]|\.)*/PERSON([[:space:]]|$)" > pattr
- egrepmatch *clean.txt
- egrep -o -f personpattr FILENAME_ner_clean.txt > FILENAME_ner_pers.txt
- cat FILENAME_ner_pers.txt | sed 's/\/PERSON//g' | sort | uniq -c | sort -nr > FILENAME_ner_pers_freq.txt
- egrep -o -f orgpattr FILENAME_ner_clean.txt > FILENAME_ner_org.txt
- cat FILENAME_ner_org.txt | sed 's/\/ORGANIZATION//g' | sort | uniq -c | sort -nr > FILENAME_ner_org_freq.txt
- egrep -o -f locpattr FILENAME_ner_clean.txt > FILENAME_ner_loc.txt
- sed -i 's/ , /\n/g' FILENAME_ner_loc.txt
- sed -i '/^$/d' FILENAME_ner_loc.txt
- cat FILENAME_ner_loc.txt | sed 's/\/LOCATION//g' | sort | uniq -c | sort -nr > FILENAME_ner_loc_freq.txt
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement