Untitled

import os
import json
import re

target_words = ['han', 'hon', 'den', 'det', 'denna', 'denne', 'hen']
occurences = {word: 0 for word in target_words}

for filename in os.listdir(os.getcwd() + '/tweets':
    file = open(filename, 'w')
    analyze_file(file)

def analyze_file(file):
    for line in file:
        tweet = json.loads(line)
        if 'retweeted_status' not in tweet:
            analyze_tweet(tweet)

def analyze_tweet(tweet):
    for target_word in target_words:
        occurences[target_word] = occurence_count(target_word, tweet['text'].lower())

def occurence_count(target_word, text):
    count = 0
    words = re.findall('\w+', text).read() # TODO: Improve?
    for word in words: # TODO: Use collections.Counter() instead?
        if word == target_word:
            count += 1
    return count