SHARE
TWEET

conta-occorrenze.py

TringaliLuca May 12th, 2018 80 Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. #!/usr/bin/env python3
  2. # -*- coding: utf-8 -*-
  3. from os import path
  4. import sys
  5. import os
  6. import csv
  7. #import re
  8.  
  9. mycsv = []
  10. mycol = 1
  11. csvfile = ""
  12. mydelimiter = '\t'
  13. words = []
  14. outputfile = ""
  15.  
  16. if len(sys.argv) < 2:
  17.     print("python3 conta-occorrenze.py FILEINPUT FILEOUTPUT COLONNA DELIMITATORE\n ES:\n \"C:\\Programs\\Python 3.6\\Python 3.6 (32-bit).lnk\" \"C:\\conta-occorrenze.py\" \"C:\\ETR Tagged.txt\" \"C:\\occorrenze.csv\" 1 '\\t'")
  18. if len(sys.argv) > 1:
  19.     csvfile = sys.argv[1]
  20. if len(sys.argv) > 2:
  21.     outputfile = sys.argv[2]
  22. if len(sys.argv) > 3:
  23.     mycol = int(sys.argv[3])
  24. if len(sys.argv) > 4:
  25.     mydelimiter = sys.argv[4]
  26.  
  27. if (len(mydelimiter) != 1):
  28.     mydelimiter = '\t'
  29.  
  30. if csvfile == "":
  31.     sys.exit()
  32.  
  33. def findIndexinCol(arr, string, col):
  34.     for i in range(len(arr)):
  35.         if (arr[i][col]) == string:
  36.             return i
  37.     return -1
  38.  
  39. csvfile = os.path.abspath(csvfile)
  40. origdict = list(csv.reader(open(csvfile), delimiter=mydelimiter)) #this is [row][column]
  41. for i in range(len(origdict)):
  42.     if (len(origdict[i]) > mycol):
  43.         mycsv.append(origdict[i][mycol])
  44.  
  45. for word in mycsv:
  46.     value_index = findIndexinCol(words,word,0)
  47.     if value_index > -1:
  48.         thiscount = words[value_index][1]
  49.         words[value_index][1] = thiscount + 1
  50.     else:
  51.         value_index = len(words)
  52.         words.append([word,1])
  53.  
  54. csvoutput = ""
  55. for i in range(len(words)):
  56.     csvoutput += words[i][0] + ";" + str(words[i][1]) + "\n"
  57.  
  58. if (outputfile != ""):
  59.     text_file = open(outputfile, "w")
  60.     text_file.write(csvoutput)
  61.     text_file.close()
  62. else:
  63.     print(csvoutput)
RAW Paste Data
We use cookies for various purposes including analytics. By continuing to use Pastebin, you agree to our use of cookies as described in the Cookies Policy. OK, I Understand
 
Top