Advertisement
Guest User

Untitled

a guest
May 22nd, 2017
519
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 1.15 KB | None | 0 0
  1. """
  2. Program WordCount that reads a text file, that can contain multiple words
  3. per line, and returns the top-500 frequent words and frequencies
  4.  
  5. Copyright 2017, University of Freiburg.
  6. Christy Nader <christy-nader@live.com>
  7. """
  8.  
  9. import re
  10. import sys
  11.  
  12. from Aufgabe1 import *
  13.  
  14. class WordCount():
  15.  
  16.     def __init__(self):
  17.  
  18.         self.hMap = HashMap(500)
  19.        
  20.  
  21.     def readTextFile(self, file_name):
  22.        
  23.         with open(file_name) as fh:
  24.             for line in fh:
  25.                 for word in re.split('\W+', line):
  26.                     word = word.lower()
  27.                     count = self.hMap.lookup(word) + 1
  28.                     self.hMap.insert(word, count)
  29.  
  30.        
  31.        
  32.     def __repr__(self):
  33.         for elem in self.hMap.map:
  34.             print(elem)
  35.    
  36.                    
  37.  
  38. if __name__ == '__main__':
  39.    
  40.     lst = list()
  41.     wordcouncounter = WordCount()
  42.     # Parse command line arguments.
  43.     if len(sys.argv) != 2:
  44.         print("Usage: python ./word_count.py <file>")
  45.         sys.exit(1)
  46.  
  47.     file_name = sys.argv[1]
  48.     wordcouncounter.readTextFile(file_name)
  49.     print(wordcouncounter.hMap.map)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement