# Untitled

By: a guest on May 18th, 2012  |  syntax: Python  |  size: 1.74 KB  |  views: 3  |  expires: Never
1. def main():
3.     print "Processing: %s" % (INPUT)
4.     output = open(OUTPUT, 'w')
5.     rows = rowcount(INPUT)
6.     print "Total rows in file: %s" % (rows)
7.     result = ''
8.     authors = []
9.     row = ''
10.     arow = ''
11.     columns = authorcount(INPUT)
12.     print "Max author count: %s" % (columns)
13.     matrix = Matrix(columns, rows)
14.     #print matrix
15.     try:
16.         for line in text.split('\n'):
17.             names = re.split(r'\d+: |[^,;.]{20,}', line)[1]
18.             authors.append(re.findall(r'\w+,? [\w.]+', names))
19.             result += re.sub(r'([^,;.]{20,}.*)', r'\t\1', line) + '\n'
20.             author = [a for a in authors[0]]
21.             desc = result.split('\t')
22.             for i in author:
23.                 arow += i + '\t'
24.             row += arow + desc[1]
25.             output.write(row)
26.             buildMatrix(matrix, row, rows, columns)
27.             row = ''
28.             arow = ''
29.             result = ''
30.             names = ''
31.             author = []
32.             desc = []
33.             authors = []
34.
35.         output.close()
36.         print "Wrote file: %s" % (OUTPUT)
37.
38.     except Exception, err:
39.         print err
40.
41. def buildMatrix(matrix, row, rows, columns):
42.     mat = row.split('\t')
43.     arr = [columns]
44.     arr.insert(columns, mat[-1])
45.
46.     print arr
47.
48.
49. def rowcount(filename):
50.     lines = 0
51.     for line in open(filename):
52.         lines += 1
53.     return lines
54.
55. def authorcount(filename):