Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #!/usr/bin/env python3
- # -*- coding: utf-8 -*-
- from os import path
- import sys
- import os
- import csv
- #import re
- mycsv = []
- mycol = 1
- csvfile = ""
- mydelimiter = '\t'
- words = []
- outputfile = ""
- if len(sys.argv) < 2:
- print("python3 conta-occorrenze.py FILEINPUT FILEOUTPUT COLONNA DELIMITATORE\n ES:\n \"C:\\Programs\\Python 3.6\\Python 3.6 (32-bit).lnk\" \"C:\\conta-occorrenze.py\" \"C:\\ETR Tagged.txt\" \"C:\\occorrenze.csv\" 1 '\\t'")
- if len(sys.argv) > 1:
- csvfile = sys.argv[1]
- if len(sys.argv) > 2:
- outputfile = sys.argv[2]
- if len(sys.argv) > 3:
- mycol = int(sys.argv[3])
- if len(sys.argv) > 4:
- mydelimiter = sys.argv[4]
- if (len(mydelimiter) != 1):
- mydelimiter = '\t'
- if csvfile == "":
- sys.exit()
- def findIndexinCol(arr, string, col):
- for i in range(len(arr)):
- if (arr[i][col]) == string:
- return i
- return -1
- csvfile = os.path.abspath(csvfile)
- origdict = list(csv.reader(open(csvfile), delimiter=mydelimiter)) #this is [row][column]
- for i in range(len(origdict)):
- if (len(origdict[i]) > mycol):
- mycsv.append(origdict[i][mycol])
- for word in mycsv:
- value_index = findIndexinCol(words,word,0)
- if value_index > -1:
- thiscount = words[value_index][1]
- words[value_index][1] = thiscount + 1
- else:
- value_index = len(words)
- words.append([word,1])
- csvoutput = ""
- for i in range(len(words)):
- csvoutput += words[i][0] + ";" + str(words[i][1]) + "\n"
- if (outputfile != ""):
- text_file = open(outputfile, "w")
- text_file.write(csvoutput)
- text_file.close()
- else:
- print(csvoutput)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement