Advertisement
acclivity

pyProcess-CSV-files-example

Feb 5th, 2021
196
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 1.99 KB | None | 0 0
  1. # Read two CSV files and combine
  2.  
  3. dvd_dict = {}
  4.  
  5. f1 = open("File1.csv")
  6. hdr = f1.readline()     # read the header line
  7.  
  8. # We could identify the various columns here
  9. idx_id, idx_prod, idx_title, idx_genre = 99, 99, 99, 99
  10. hdr_upper = hdr.upper()
  11. split_hdr = hdr_upper.split(",")
  12. line_ctr = 1
  13. for idx, field in enumerate(split_hdr):
  14.     if "ID" in field: idx_id = idx
  15.     if "PROD" in field: idx_prod = idx
  16.     if "TITLE" in field: idx_title = idx
  17.     if "GENRE" in field: idx_genre = idx
  18. # -------------------------------------------------
  19.  
  20. # Now read the data lines
  21. for line in f1:
  22.     line = line.strip()
  23.     print(line)
  24.     line_ctr += 1
  25.     split_line = line.split(",")
  26.     dvd_id = split_line[idx_id]
  27.     dvd_title = split_line[idx_title]
  28.     dvd_prod = split_line[idx_prod]
  29.     dvd_genre = split_line[idx_genre]
  30.     if dvd_id in dvd_dict:
  31.         print("Duplicate DVD ID on line: ", line_ctr, line)
  32.         continue
  33.     dvd_dict[dvd_id] = [dvd_title, dvd_prod, dvd_genre]
  34.  
  35. f1.close()
  36. print("File1 Finished reading", line_ctr, "lines. Dictionary created")
  37. f2 = open("File2.csv")
  38.  
  39. line_ctr = 0
  40. for line in f2:
  41.     line = line.strip()
  42.     line_ctr += 1
  43.     split_line = line.split(",")
  44.     # No header on this file, so we have to presume the column identities
  45.     dvd_id = split_line[0]
  46.     dvd_sales_id = split_line[1]
  47.     dvd_date_stocked = split_line[2]
  48.     dvd_date_sold = split_line[3]
  49.  
  50.     if dvd_id not in dvd_dict:
  51.         print("Unknown DVD ID on 2nd file, line: ", line_ctr, line)
  52.         continue
  53.  
  54.     dvd_dict[dvd_id].extend([dvd_sales_id, dvd_date_stocked, dvd_date_sold])
  55.  
  56. f2.close()
  57. print("File2 Finished reading", line_ctr, "lines. Dictionary updated")
  58. f3 = open("File3.csv", 'w')
  59. line_ctr = 0
  60. dvd_keys = list(dvd_dict.keys())
  61. dvd_keys.sort()
  62. for key in dvd_keys:
  63.     line_ctr += 1
  64.     dvd_data = dvd_dict[key]
  65.     lineout = key + "," + ",".join(dvd_data)
  66.     f3.write(lineout + "\n")
  67.  
  68. f3.close()
  69. print("File3 Finished writing: ", line_ctr, "lines. ")
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement