Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- """
- Program WordCount that reads a text file, that can contain multiple words
- per line, and returns the top-500 frequent words and frequencies
- Copyright 2017, University of Freiburg.
- Christy Nader <christy-nader@live.com>
- """
- import re
- import sys
- from Aufgabe1 import *
- class WordCount():
- def __init__(self):
- self.hMap = HashMap(500)
- def readTextFile(self, file_name):
- with open(file_name) as fh:
- for line in fh:
- for word in re.split('\W+', line):
- word = word.lower()
- count = self.hMap.lookup(word) + 1
- self.hMap.insert(word, count)
- def __repr__(self):
- for elem in self.hMap.map:
- print(elem)
- if __name__ == '__main__':
- lst = list()
- wordcouncounter = WordCount()
- # Parse command line arguments.
- if len(sys.argv) != 2:
- print("Usage: python ./word_count.py <file>")
- sys.exit(1)
- file_name = sys.argv[1]
- wordcouncounter.readTextFile(file_name)
- print(wordcouncounter.hMap.map)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement