Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #!/usr/bin/env python
- # Loads a CSV file, assuming it has header names, and prints only the specified columns.
- # ... like selecting two columns of a 7-column table.
- #
- # Arguments:
- # - field names, comma-separated
- # - input filename
- #
- # Output:
- # - the header row for the specified columns, and those fields for all input records.
- #
- # Usage:
- # python csvextract.py "fieldName1,fieldName2" ./source.csv > output.csv
- #
- #
- #
- import sys
- import csv
- import fileinput
- def parse_field_expression(expr):
- return expr.split(',')
- def perform_extract(field_expr, filename):
- _input = fileinput.input(filename)
- reader = csv.DictReader(_input)
- expression = parse_field_expression(field_expr)
- for row in reader:
- yield dict((e, row[e]) for e in expression)
- def main(args):
- writer = None
- fieldnames = None
- for record in perform_extract(args[0], args[1]):
- if writer is None:
- fieldnames = record.keys()
- writer = csv.DictWriter(sys.stdout,
- fieldnames=fieldnames)
- try:
- writer.writerow(record)
- except ValueError, e:
- print >>sys.stderr, repr(record)
- raise e
- if __name__ == '__main__':
- main(sys.argv[1:])
Add Comment
Please, Sign In to add comment