Advertisement
Guest User

Untitled

a guest
Jul 23rd, 2019
99
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 5.28 KB | None | 0 0
  1. #!/usr/bin/python
  2.  
  3. '''
  4. Python script for merging multiple csv file togethar.
  5. Drop the script into the working directory and run the script using CLI.
  6. '''
  7.  
  8. # import necessary library
  9. import argparse
  10. import csv
  11. import os
  12. import sys
  13.  
  14. def parse_csv(file_name, custom_delimiter=','):
  15. """Generator Function for parsing csv file
  16.  
  17. Params:
  18. file_name(str) -> Name of csv file
  19. custom_delimiters(str) : ',' -> delimiter for csv file
  20.  
  21. Return:
  22. (list) A single row of a file one by one
  23.  
  24. """
  25.  
  26. with open(file_name, 'r') as fp:
  27. read_as_csv = csv.reader(fp, delimiter=custom_delimiter)
  28. for rows in read_as_csv:
  29. yield rows
  30.  
  31. def write_row(fp_name, data, custom_delimiter=','):
  32. """Function for write data into given file with custom delimiter
  33.  
  34. Params:
  35. fp_name(str) -> File name for write data
  36. data(list) -> List object for write into file
  37. custom_delimiter(str): ',' -> delimiter for csv file
  38.  
  39. Return:
  40. None
  41. """
  42. with open(fp_name, 'a') as fp:
  43. csv_write_obj = csv.writer(fp, delimiter=custom_delimiter)
  44. if not isinstance(data[0], list):
  45. csv_write_obj.writerows([data])
  46. else:
  47. csv_write_obj.writerows(data)
  48.  
  49. def main(file_list, output_file, headers, custom_delimiter=',', additional_columns=None):
  50. """Function for run all the operation together
  51.  
  52. Params:
  53. file_list(list): -> List of file name to be merge
  54. output_file(str): -> Output file name
  55. headers(list): -> Header for output file
  56. custom_delimiter(str): ',' -> delimiter for csv file
  57. additional_columns(list): None -> Additional columns for output file
  58.  
  59. Return:
  60. None
  61. """
  62.  
  63. # initialize variables
  64. if not additional_columns:
  65. additional_columns = []
  66.  
  67. standard_headers = [column.lower() for column in headers]
  68. write_row(output_file, standard_headers, custom_delimiter)
  69.  
  70. # itarate over all the file
  71. for file in file_list:
  72.  
  73. # set header for new file
  74. row_generator = parse_csv(file, custom_delimiter)
  75. current_header = [column.lower() for column in next(row_generator)]
  76.  
  77. # validation for columns
  78. all_in = [True for column in standard_headers if column in current_header]
  79. if len(all_in) != len(standard_headers):
  80. print('Mismatched the file with header row --> %s' % file)
  81. continue
  82.  
  83. # write data for current file
  84. for row in row_generator:
  85. data = [row[current_header.index(column)] for column in standard_headers]
  86. write_row(output_file, data)
  87.  
  88. # remove the file finished copy
  89. os.remove(file)
  90.  
  91. # sorting the data by first column named id
  92. sorted_list = [[]]
  93. with open(output_file, 'r') as fp:
  94. reader = csv.reader(fp, delimiter=custom_delimiter)
  95. reader = list(reader)
  96. header_row = reader[0]
  97. reader = reader[1:]
  98. id_index = header_row.index('id')
  99. header_row += additional_columns
  100. sorted_list = [header_row] + sorted(reader, key=lambda row: int(row[id_index]))
  101.  
  102. # remove exsiting file and create with sorted data
  103. if os.path.isfile(output_file):
  104. os.remove(output_file)
  105.  
  106. write_row(output_file, sorted_list, custom_delimiter)
  107.  
  108. if __name__ == "__main__":
  109. epilog = 'python csv_merge.py -f test1.csv test2.csv test3.csv' \
  110. ' -o output.csv -nc email post_code address'
  111.  
  112. parser = argparse.ArgumentParser(description='Merge multiple csv file into one file.',
  113. epilog=epilog)
  114.  
  115. parser.add_argument('-f', '--files', type=str, nargs='+',
  116. required=True,
  117. help='File names/path to be merge.')
  118. parser.add_argument('-o', '--output', type=str, required=True,
  119. help='Output file name.')
  120. parser.add_argument('-d', '--delimiter', type=str,
  121. default=',',
  122. help='Custom delimiter for CSV file, default ","')
  123. parser.add_argument('-nc', '--ncolumn', type=str, nargs='+',
  124. help='Additional columns for output file.')
  125. arguments = parser.parse_args()
  126.  
  127. # check files
  128. if not arguments.files:
  129. print('Enter at least 1 file name')
  130. exit(True)
  131.  
  132. not_exists = False
  133. for file in arguments.files:
  134. if not os.path.isfile(file):
  135. print('Invalid File name or path does not exists %s (exit: 1)' % file)
  136. not_exists = True
  137. if not_exists:
  138. exit(True)
  139.  
  140. if not arguments.output or os.path.isfile(arguments.output):
  141. QUESTION = 'This "%s" file already exists, ' \
  142. 'Do you want to continue Yes/N: ' % arguments.output
  143. if sys.version_info.major == 2:
  144. # python 2 input
  145. confirm = raw_input(QUESTION)
  146. else:
  147. # python 3 input
  148. confirm = input(QUESTION)
  149.  
  150. if confirm == 'Yes':
  151. os.remove(arguments.output)
  152. else:
  153. exit(True)
  154.  
  155. if not arguments.ncolumn:
  156. arguments.ncolumn = []
  157.  
  158. # list of columns to be copied or merge into new file
  159. list_of_columns = ['id', 'name', 'phone']
  160.  
  161. main(arguments.files,
  162. arguments.output,
  163. list_of_columns,
  164. arguments.delimiter,
  165. arguments.ncolumn)
  166.  
  167. print('Done..!')
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement