Advertisement
Guest User

Untitled

a guest
May 18th, 2012
47
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 1.74 KB | None | 0 0
  1. def main():
  2.     text = open(INPUT).read()
  3.     print "Processing: %s" % (INPUT)
  4.     output = open(OUTPUT, 'w')
  5.     rows = rowcount(INPUT)
  6.     print "Total rows in file: %s" % (rows)
  7.     result = ''  
  8.     authors = []
  9.     row = ''
  10.     arow = ''
  11.     columns = authorcount(INPUT)
  12.     print "Max author count: %s" % (columns)
  13.     matrix = Matrix(columns, rows)
  14.     #print matrix
  15.     try:
  16.         for line in text.split('\n'):
  17.             names = re.split(r'\d+: |[^,;.]{20,}', line)[1]
  18.             authors.append(re.findall(r'\w+,? [\w.]+', names))
  19.             result += re.sub(r'([^,;.]{20,}.*)', r'\t\1', line) + '\n'
  20.             author = [a for a in authors[0]]
  21.             desc = result.split('\t')
  22.             for i in author:
  23.                 arow += i + '\t'
  24.             row += arow + desc[1]
  25.             output.write(row)
  26.             buildMatrix(matrix, row, rows, columns)
  27.             row = ''
  28.             arow = ''
  29.             result = ''
  30.             names = ''
  31.             author = []
  32.             desc = []  
  33.             authors = []  
  34.            
  35.         output.close()
  36.         print "Wrote file: %s" % (OUTPUT)
  37.            
  38.     except Exception, err:
  39.         print err
  40.    
  41. def buildMatrix(matrix, row, rows, columns):
  42.     mat = row.split('\t')
  43.     arr = [columns]
  44.     arr.insert(columns, mat[-1])
  45.    
  46.     print arr
  47.  
  48.        
  49. def rowcount(filename):
  50.     lines = 0
  51.     for line in open(filename):
  52.         lines += 1
  53.     return lines
  54.  
  55. def authorcount(filename):
  56.     getCount = open(filename).read()
  57.     authors = []
  58.     for line in getCount.split('\n'):
  59.         names = re.split(r'\d+: |[^,;.]{20,}', line)[1]
  60.         authors.append(re.findall(r'\w+,? [\w.]+', names))
  61.     return max(len(x) for x in authors) + 1
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement