
Untitled
By: a guest on
Jul 29th, 2012 | syntax:
None | size: 1.74 KB | hits: 16 | expires: Never
Combining columns of CSV files of unknown lengths but same width in Python
Header1, Header2, Header3, Header4
1,2,3,4
11,22,33,44
1,2,3,4
Header1,Header3, ,Header1,Header3, ,...
1,3, ,1,3, ,...
...
from itertools import izip_longest
# http://docs.python.org/library/itertools.html#itertools.izip_longest
# get a list of open readers using a list comprehension
readers = [csv.reader(open(fname, "r")) for fname in list_of_filenames]
# open writer
output_csv = csv.writer(...)
for bunch_of_lines in izip_longest(*readers, fillvalue=['', '', '', '']):
# Here bunch_of_lines is a tuple of lines read from each reader,
# e.g. all first lines, all second lines, etc
# When one file is past EOF but others aren't, you get fillvalue for its line.
merged_row = []
for line in bunch_of_lines:
# if it's a real line, you have 4 items of data.
# if the file is past EOF, the line is fillvalue from above
# which again is guaranteed to have 4 items of data, all empty strings.
merged_row.extend([line[1], line[3]]) # put columns 1 and 3
output_csv.writerow(merged_row)
import csv
names=['test1.csv','test2.csv']
csvs = []
done = []
for name in names:
csvs.append(csv.reader(open(name, 'rb')))
done.append(False)
while not all(done):
data = []
for i, c in enumerate(csvs):
if not done[i]:
try:
row = c.next()
except StopIteration:
done[i] = True
if done[i]:
data.append('')
data.append('')
# data.append('') <-- here
else:
data.append(row[0])
data.append(row[3])
# data.append('') <-- and here for extra commas
if not all(done):
print ','.join(data)