Don't like ads? PRO users don't see any ads ;-)
Guest

Untitled

By: a guest on Jul 29th, 2012  |  syntax: None  |  size: 1.74 KB  |  hits: 16  |  expires: Never
download  |  raw  |  embed  |  report abuse  |  print
Text below is selected. Please press Ctrl+C to copy to your clipboard. (⌘+C on Mac)
  1. Combining columns of CSV files of unknown lengths but same width in Python
  2. Header1, Header2, Header3, Header4
  3. 1,2,3,4
  4. 11,22,33,44
  5. 1,2,3,4
  6.        
  7. Header1,Header3, ,Header1,Header3, ,...
  8. 1,3, ,1,3, ,...
  9. ...
  10.        
  11. from itertools import izip_longest
  12. # http://docs.python.org/library/itertools.html#itertools.izip_longest
  13.  
  14. # get a list of open readers using a list comprehension
  15. readers = [csv.reader(open(fname, "r")) for fname in list_of_filenames]
  16.  
  17. # open writer
  18. output_csv = csv.writer(...)
  19.  
  20. for bunch_of_lines in izip_longest(*readers, fillvalue=['', '', '', '']):
  21.   # Here bunch_of_lines is a tuple of lines read from each reader,
  22.   # e.g. all first lines, all second lines, etc
  23.   # When one file is past EOF but others aren't, you get fillvalue for its line.
  24.   merged_row = []
  25.   for line in bunch_of_lines:
  26.       # if it's a real line, you have 4 items of data.
  27.       # if the file is past EOF, the line is fillvalue from above
  28.       #   which again is guaranteed to have 4 items of data, all empty strings.
  29.       merged_row.extend([line[1], line[3]]) # put columns 1 and 3
  30.   output_csv.writerow(merged_row)
  31.        
  32. import csv
  33.  
  34. names=['test1.csv','test2.csv']
  35. csvs = []
  36. done = []
  37. for name in names:
  38.     csvs.append(csv.reader(open(name, 'rb')))
  39.     done.append(False)
  40.  
  41. while not all(done):
  42.     data = []
  43.     for i, c in enumerate(csvs):
  44.         if not done[i]:
  45.             try:
  46.                 row = c.next()
  47.             except StopIteration:
  48.                 done[i] = True
  49.         if done[i]:
  50.             data.append('')
  51.             data.append('')
  52.             # data.append('')  <-- here
  53.         else:
  54.             data.append(row[0])
  55.             data.append(row[3])
  56.             # data.append('')   <-- and here for extra commas
  57.     if not all(done):
  58.         print ','.join(data)