Advertisement
homeworkhelp111

Comments

Apr 12th, 2023
19
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 5.86 KB | None | 0 0
  1. #import the csv and datetime library
  2. import csv
  3. import datetime
  4.  
  5. def main():
  6. #Open the input dataset
  7. file = open("sample.csv")
  8.  
  9. #Create a csv reader object to read data from csv file
  10. csvreader = csv.reader(file)
  11.  
  12. #Read the header values i.e. shop_id, date, n_trans
  13. header = next(csvreader)
  14.  
  15. #Empty list to store all the rows
  16. rows = []
  17.  
  18. # Dictionary to store shop and dates that appear in the CSV file
  19. # key is the shop_id and value is a list containing all dates
  20. # shop_id: list containing all dates for that shop
  21.  
  22. shops = {}
  23.  
  24. # For each row in the sample.csv file we populate the shops dictionary
  25. for row in csvreader:
  26. rows.append(row)
  27. shop_id = int(row[0])
  28. # If the shop id is not in shops dictionary it means we are encountering it the first time
  29. # and we populate it with a list but if it is already present we simply append the date to
  30. # the list
  31. if(shop_id not in shops):
  32. shops[shop_id] = [datetime.datetime(int(row[1][0:4]), int(row[1][5:7]),int(row[1][8:]))]
  33. else:
  34. shops[shop_id].append(datetime.datetime(int(row[1][0:4]), int(row[1][5:7]),int(row[1][8:])))
  35.  
  36. # Dictionary to store open and closed dates where key is shop id and value is a list
  37. # The list contains another list with two values [lower_range, upper_range]
  38. # upper_range can also be NULL
  39. closed = {}
  40. opened = {}
  41.  
  42. # Iterate through each shop
  43. for shop in shops:
  44. # Sort the date list for each shop
  45. shops[shop].sort()
  46.  
  47. # Start with index 0 of list i.e. first value
  48. i = 0
  49.  
  50. # Starting date is taken 1st January 2021
  51. opendate = datetime.datetime(2021,1,1)
  52.  
  53. # Go through all the dates
  54. while i < len(shops[shop]) - 1:
  55. # Check if difference between successive entries is more than 30 days which means shop was closed for that interval
  56. # For example if we have 2021-09-02 and 2021-10-03 entries, shop remained open till 2021-09-02
  57. # Shop was closed from 2021-09-03 till 2021-10-02
  58. # Shop again opened from 2021-10-03
  59.  
  60. if((shops[shop][i+1] - shops[shop][i]).days > 30):
  61. # Check if there is already a cloesd entry for the shop in closed dictionary
  62. # If there is no entry create a new list for that shop
  63. if(shop not in closed):
  64. # If the shop closed it means it must be open before so we add an entry in opened dictionary
  65. opened[shop] = [[opendate, shops[shop][i]]]
  66.  
  67. # We also add an entry in closed dictionary but we also add a day
  68. closed[shop] = [[shops[shop][i] + datetime.timedelta(days=1), shops[shop][i+1] - datetime.timedelta(days=1)]]
  69.  
  70. # The next date entry is the time when shop is open
  71. opendate = shops[shop][i+1]
  72. # If there is an entry simply append to that list, the logic in the below else codeblock is same as above if block code
  73. else:
  74. opened[shop].append([opendate, shops[shop][i]])
  75. closed[shop].append([shops[shop][i] + datetime.timedelta(days=1), shops[shop][i+1] - datetime.timedelta(days=1)])
  76. opendate = shops[shop][i+1]
  77. i += 1
  78.  
  79. # Check if the difference between last date of transaction record and 2022-12-31 is more than 30 days
  80. # which means the shop closed after the last entry, then we keep the upper_range as NULL
  81. if((datetime.datetime(2022,12,31) - shops[shop][i]).days > 30):
  82. if(shop not in closed):
  83. opened[shop] = [[opendate, shops[shop][i]]]
  84. closed[shop] = [[shops[shop][i] + datetime.timedelta(days=1), 'NULL']]
  85. else:
  86. opened[shop].append([opendate, shops[shop][i]])
  87. closed[shop].append([shops[shop][i] + datetime.timedelta(days=1), 'NULL'])
  88.  
  89. # Check if shop never closed which means it was open from 2021-01-01 with upper_range as NULL
  90. for shop in shops:
  91. if(shop not in closed):
  92. opened[shop] =[[datetime.datetime(2021,1,1),'NULL']]
  93.  
  94. # For all the scenarios in which upper_range of close is not NULL we open the shop
  95. for shop in closed:
  96. if(closed[shop][-1][1]!="NULL"):
  97. opened[shop].append([closed[shop][-1][1] + datetime.timedelta(days=1),'NULL'])
  98.  
  99. # Now we simply write the data to SqlDataset.csv file
  100. with open('SqlDataset.csv','w',newline='') as f:
  101. writer = csv.writer(f)
  102.  
  103. # Write the header
  104. writer.writerow(['shop_id','status','lower_range','upper_range'])
  105.  
  106. # Variable to store all the rows to be written
  107. outputdata = []
  108.  
  109. # Now we copy all the data from opened and closed dictionary to outputdata list
  110. for shop in opened:
  111. for interval in opened[shop]:
  112. if(interval[1]!="NULL"):
  113. outputdata.append([shop, 'open', datetime.datetime.strftime(interval[0],"%Y-%m-%d"), datetime.datetime.strftime(interval[1],"%Y-%m-%d")])
  114. else:
  115. outputdata.append([shop, 'open', datetime.datetime.strftime(interval[0],"%Y-%m-%d"), 'NULL'])
  116.  
  117. for shop in closed:
  118. for interval in closed[shop]:
  119. if(interval[1]!="NULL"):
  120. outputdata.append([shop, 'clsd', datetime.datetime.strftime(interval[0],"%Y-%m-%d"), datetime.datetime.strftime(interval[1],"%Y-%m-%d")])
  121. else:
  122. outputdata.append([shop, 'clsd', datetime.datetime.strftime(interval[0],"%Y-%m-%d"), 'NULL'])
  123.  
  124. # Sort first using shop id and then using the lower_range
  125. outputdata.sort(key=lambda x:(x[0],x[2]))
  126.  
  127. # Write the data
  128. writer.writerows(outputdata)
  129.  
  130. main()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement