Advertisement
Mabro106

Untitled

Apr 23rd, 2018
71
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 1.57 KB | None | 0 0
  1. #!/usr/bin/env python
  2. # -*- coding: utf-8 -*-
  3.  
  4. import csv
  5.  
  6. # read csv and save as import_file
  7. with open('master_posts.csv', 'rU') as f:
  8.     reader = csv.reader(f, delimiter=';')
  9.     import_file = list(reader)
  10.  
  11. #dictionary with categories
  12. categories_dict = { "Bundesregierung" : ["bundesregierung", "merkel", "groko"], "Russland" : ["russland", "putin", "russisch", "skripal", "moskau"], "Fluechtlinge" : ["syrien", "fluechtling"], "Trump" : ["trump", "usa, us"], "AFD" : ["afd", "gauland"] }
  13.  
  14. #list to save the categories
  15. categories = []
  16.  
  17. #check each line in import file
  18. for row in import_file:
  19.     #a new row is analyzed and the variable has to be false by default
  20.     status = False;
  21.     #save post content in list categories
  22.     categories.append(row[1])
  23.     #check for each key in the dictionary
  24.     for key in categories_dict.keys():
  25.             #check each value in the dictionary
  26.         if status == False:
  27.             for value in categories_dict[key]:
  28.                 #when value is the post, print the key of the value
  29.                 if value in row[1].lower():
  30.                     categories.append(key)
  31.                     #set status true, because a category is set and the next for loop can skip to the next row
  32.                     status = True;
  33.                     break
  34.                 else:
  35.                     break
  36.     #when no category matched, add "nothing" to list
  37.     if status == False:
  38.         categories.append("nothing")
  39.  
  40. print categories
  41.  
  42. #should write post content and "category" in one line (two columns) in csv file for each column in import_file
  43. with open("output_categories.csv", "wb") as f:
  44.     writer = csv.writer(f, delimiter=';')
  45.     writer.writerows(categories)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement