conta-occorrenze.py

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
from os import path
import sys
import os
import csv
#import re

mycsv = []
mycol = 1
csvfile = ""
mydelimiter = '\t'
words = []
outputfile = ""

if len(sys.argv) < 2:
    print("python3 conta-occorrenze.py FILEINPUT FILEOUTPUT COLONNA DELIMITATORE\n ES:\n \"C:\\Programs\\Python 3.6\\Python 3.6 (32-bit).lnk\" \"C:\\conta-occorrenze.py\" \"C:\\ETR Tagged.txt\" \"C:\\occorrenze.csv\" 1 '\\t'")
if len(sys.argv) > 1:
    csvfile = sys.argv[1]
if len(sys.argv) > 2:
    outputfile = sys.argv[2]
if len(sys.argv) > 3:
    mycol = int(sys.argv[3])
if len(sys.argv) > 4:
    mydelimiter = sys.argv[4]

if (len(mydelimiter) != 1):
    mydelimiter = '\t'

if csvfile == "":
    sys.exit()

def findIndexinCol(arr, string, col):
    for i in range(len(arr)):
        if (arr[i][col]) == string:
            return i
    return -1

csvfile = os.path.abspath(csvfile)
origdict = list(csv.reader(open(csvfile), delimiter=mydelimiter)) #this is [row][column]
for i in range(len(origdict)):
    if (len(origdict[i]) > mycol):
        mycsv.append(origdict[i][mycol])

for word in mycsv:
    value_index = findIndexinCol(words,word,0)
    if value_index > -1:
        thiscount = words[value_index][1]
        words[value_index][1] = thiscount + 1
    else:
        value_index = len(words)
        words.append([word,1])

csvoutput = ""
for i in range(len(words)):
    csvoutput += words[i][0] + ";" + str(words[i][1]) + "\n"

if (outputfile != ""):
    text_file = open(outputfile, "w")
    text_file.write(csvoutput)
    text_file.close()
else:
    print(csvoutput)