eniallator

Lab 2.2 last Q SRC

Oct 13th, 2019
310
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 0.63 KB | None | 0 0
  1. from sussex_nltk.corpus_readers import MedlineCorpusReader, ReutersCorpusReader, TwitterCorpusReader
  2.  
  3. readers = [MedlineCorpusReader(), ReutersCorpusReader(), TwitterCorpusReader()]
  4. sample_size = 10000
  5.  
  6. for reader in readers:
  7.     sentences = map(lambda s: word_tokenize(s.lower()), reader.sample_raw_sents(sample_size))
  8.     stops = 0
  9.     others = 0
  10.    
  11.     for tokens in sentences:
  12.         for token in tokens:
  13.             if token in stop:
  14.                 stops += 1
  15.             else:
  16.                 others += 1
  17.     print(f"{round(stops / (stops + others) * 100, 2)}% were stop words with a total of {stops}/{stops + others}")
Advertisement
Add Comment
Please, Sign In to add comment