Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #!/usr/bin/env python
- # -*- coding: utf-8 -*-
- import csv
- # read csv and save as import_file
- with open('master_posts.csv', 'rU') as f:
- reader = csv.reader(f, delimiter=';')
- import_file = list(reader)
- #dictionary with categories
- categories_dict = { "Bundesregierung" : ["bundesregierung", "merkel", "groko"], "Russland" : ["russland", "putin", "russisch", "skripal", "moskau"], "Fluechtlinge" : ["syrien", "fluechtling"], "Trump" : ["trump", "usa, us"], "AFD" : ["afd", "gauland"] }
- #list to save the categories
- categories = []
- #check each line in import file
- for row in import_file:
- #a new row is analyzed and the variable has to be false by default
- status = False;
- #save post content in list categories
- categories.append(row[1])
- #check for each key in the dictionary
- for key in categories_dict.keys():
- #check each value in the dictionary
- if status == False:
- for value in categories_dict[key]:
- #when value is the post, print the key of the value
- if value in row[1].lower():
- categories.append(key)
- #set status true, because a category is set and the next for loop can skip to the next row
- status = True;
- break
- else:
- break
- #when no category matched, add "nothing" to list
- if status == False:
- categories.append("nothing")
- print categories
- #should write post content and "category" in one line (two columns) in csv file for each column in import_file
- with open("output_categories.csv", "wb") as f:
- writer = csv.writer(f, delimiter=';')
- writer.writerows(categories)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement