Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #!/usr/bin/python
- '''
- Python script for merging multiple csv file togethar.
- Drop the script into the working directory and run the script using CLI.
- '''
- # import necessary library
- import argparse
- import csv
- import os
- import sys
- def parse_csv(file_name, custom_delimiter=','):
- """Generator Function for parsing csv file
- Params:
- file_name(str) -> Name of csv file
- custom_delimiters(str) : ',' -> delimiter for csv file
- Return:
- (list) A single row of a file one by one
- """
- with open(file_name, 'r') as fp:
- read_as_csv = csv.reader(fp, delimiter=custom_delimiter)
- for rows in read_as_csv:
- yield rows
- def write_row(fp_name, data, custom_delimiter=','):
- """Function for write data into given file with custom delimiter
- Params:
- fp_name(str) -> File name for write data
- data(list) -> List object for write into file
- custom_delimiter(str): ',' -> delimiter for csv file
- Return:
- None
- """
- with open(fp_name, 'a') as fp:
- csv_write_obj = csv.writer(fp, delimiter=custom_delimiter)
- if not isinstance(data[0], list):
- csv_write_obj.writerows([data])
- else:
- csv_write_obj.writerows(data)
- def main(file_list, output_file, headers, custom_delimiter=',', additional_columns=None):
- """Function for run all the operation together
- Params:
- file_list(list): -> List of file name to be merge
- output_file(str): -> Output file name
- headers(list): -> Header for output file
- custom_delimiter(str): ',' -> delimiter for csv file
- additional_columns(list): None -> Additional columns for output file
- Return:
- None
- """
- # initialize variables
- if not additional_columns:
- additional_columns = []
- standard_headers = [column.lower() for column in headers]
- write_row(output_file, standard_headers, custom_delimiter)
- # itarate over all the file
- for file in file_list:
- # set header for new file
- row_generator = parse_csv(file, custom_delimiter)
- current_header = [column.lower() for column in next(row_generator)]
- # validation for columns
- all_in = [True for column in standard_headers if column in current_header]
- if len(all_in) != len(standard_headers):
- print('Mismatched the file with header row --> %s' % file)
- continue
- # write data for current file
- for row in row_generator:
- data = [row[current_header.index(column)] for column in standard_headers]
- write_row(output_file, data)
- # remove the file finished copy
- os.remove(file)
- # sorting the data by first column named id
- sorted_list = [[]]
- with open(output_file, 'r') as fp:
- reader = csv.reader(fp, delimiter=custom_delimiter)
- reader = list(reader)
- header_row = reader[0]
- reader = reader[1:]
- id_index = header_row.index('id')
- header_row += additional_columns
- sorted_list = [header_row] + sorted(reader, key=lambda row: int(row[id_index]))
- # remove exsiting file and create with sorted data
- if os.path.isfile(output_file):
- os.remove(output_file)
- write_row(output_file, sorted_list, custom_delimiter)
- if __name__ == "__main__":
- epilog = 'python csv_merge.py -f test1.csv test2.csv test3.csv' \
- ' -o output.csv -nc email post_code address'
- parser = argparse.ArgumentParser(description='Merge multiple csv file into one file.',
- epilog=epilog)
- parser.add_argument('-f', '--files', type=str, nargs='+',
- required=True,
- help='File names/path to be merge.')
- parser.add_argument('-o', '--output', type=str, required=True,
- help='Output file name.')
- parser.add_argument('-d', '--delimiter', type=str,
- default=',',
- help='Custom delimiter for CSV file, default ","')
- parser.add_argument('-nc', '--ncolumn', type=str, nargs='+',
- help='Additional columns for output file.')
- arguments = parser.parse_args()
- # check files
- if not arguments.files:
- print('Enter at least 1 file name')
- exit(True)
- not_exists = False
- for file in arguments.files:
- if not os.path.isfile(file):
- print('Invalid File name or path does not exists %s (exit: 1)' % file)
- not_exists = True
- if not_exists:
- exit(True)
- if not arguments.output or os.path.isfile(arguments.output):
- QUESTION = 'This "%s" file already exists, ' \
- 'Do you want to continue Yes/N: ' % arguments.output
- if sys.version_info.major == 2:
- # python 2 input
- confirm = raw_input(QUESTION)
- else:
- # python 3 input
- confirm = input(QUESTION)
- if confirm == 'Yes':
- os.remove(arguments.output)
- else:
- exit(True)
- if not arguments.ncolumn:
- arguments.ncolumn = []
- # list of columns to be copied or merge into new file
- list_of_columns = ['id', 'name', 'phone']
- main(arguments.files,
- arguments.output,
- list_of_columns,
- arguments.delimiter,
- arguments.ncolumn)
- print('Done..!')
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement