Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #import the csv and datetime library
- import csv
- import datetime
- def main():
- #Open the input dataset
- file = open("sample.csv")
- #Create a csv reader object to read data from csv file
- csvreader = csv.reader(file)
- #Read the header values i.e. shop_id, date, n_trans
- header = next(csvreader)
- #Empty list to store all the rows
- rows = []
- # Dictionary to store shop and dates that appear in the CSV file
- # key is the shop_id and value is a list containing all dates
- # shop_id: list containing all dates for that shop
- shops = {}
- # For each row in the sample.csv file we populate the shops dictionary
- for row in csvreader:
- rows.append(row)
- shop_id = int(row[0])
- # If the shop id is not in shops dictionary it means we are encountering it the first time
- # and we populate it with a list but if it is already present we simply append the date to
- # the list
- if(shop_id not in shops):
- shops[shop_id] = [datetime.datetime(int(row[1][0:4]), int(row[1][5:7]),int(row[1][8:]))]
- else:
- shops[shop_id].append(datetime.datetime(int(row[1][0:4]), int(row[1][5:7]),int(row[1][8:])))
- # Dictionary to store open and closed dates where key is shop id and value is a list
- # The list contains another list with two values [lower_range, upper_range]
- # upper_range can also be NULL
- closed = {}
- opened = {}
- # Iterate through each shop
- for shop in shops:
- # Sort the date list for each shop
- shops[shop].sort()
- # Start with index 0 of list i.e. first value
- i = 0
- # Starting date is taken 1st January 2021
- opendate = datetime.datetime(2021,1,1)
- # Go through all the dates
- while i < len(shops[shop]) - 1:
- # Check if difference between successive entries is more than 30 days which means shop was closed for that interval
- # For example if we have 2021-09-02 and 2021-10-03 entries, shop remained open till 2021-09-02
- # Shop was closed from 2021-09-03 till 2021-10-02
- # Shop again opened from 2021-10-03
- if((shops[shop][i+1] - shops[shop][i]).days > 30):
- # Check if there is already a cloesd entry for the shop in closed dictionary
- # If there is no entry create a new list for that shop
- if(shop not in closed):
- # If the shop closed it means it must be open before so we add an entry in opened dictionary
- opened[shop] = [[opendate, shops[shop][i]]]
- # We also add an entry in closed dictionary but we also add a day
- closed[shop] = [[shops[shop][i] + datetime.timedelta(days=1), shops[shop][i+1] - datetime.timedelta(days=1)]]
- # The next date entry is the time when shop is open
- opendate = shops[shop][i+1]
- # If there is an entry simply append to that list, the logic in the below else codeblock is same as above if block code
- else:
- opened[shop].append([opendate, shops[shop][i]])
- closed[shop].append([shops[shop][i] + datetime.timedelta(days=1), shops[shop][i+1] - datetime.timedelta(days=1)])
- opendate = shops[shop][i+1]
- i += 1
- # Check if the difference between last date of transaction record and 2022-12-31 is more than 30 days
- # which means the shop closed after the last entry, then we keep the upper_range as NULL
- if((datetime.datetime(2022,12,31) - shops[shop][i]).days > 30):
- if(shop not in closed):
- opened[shop] = [[opendate, shops[shop][i]]]
- closed[shop] = [[shops[shop][i] + datetime.timedelta(days=1), 'NULL']]
- else:
- opened[shop].append([opendate, shops[shop][i]])
- closed[shop].append([shops[shop][i] + datetime.timedelta(days=1), 'NULL'])
- # Check if shop never closed which means it was open from 2021-01-01 with upper_range as NULL
- for shop in shops:
- if(shop not in closed):
- opened[shop] =[[datetime.datetime(2021,1,1),'NULL']]
- # For all the scenarios in which upper_range of close is not NULL we open the shop
- for shop in closed:
- if(closed[shop][-1][1]!="NULL"):
- opened[shop].append([closed[shop][-1][1] + datetime.timedelta(days=1),'NULL'])
- # Now we simply write the data to SqlDataset.csv file
- with open('SqlDataset.csv','w',newline='') as f:
- writer = csv.writer(f)
- # Write the header
- writer.writerow(['shop_id','status','lower_range','upper_range'])
- # Variable to store all the rows to be written
- outputdata = []
- # Now we copy all the data from opened and closed dictionary to outputdata list
- for shop in opened:
- for interval in opened[shop]:
- if(interval[1]!="NULL"):
- outputdata.append([shop, 'open', datetime.datetime.strftime(interval[0],"%Y-%m-%d"), datetime.datetime.strftime(interval[1],"%Y-%m-%d")])
- else:
- outputdata.append([shop, 'open', datetime.datetime.strftime(interval[0],"%Y-%m-%d"), 'NULL'])
- for shop in closed:
- for interval in closed[shop]:
- if(interval[1]!="NULL"):
- outputdata.append([shop, 'clsd', datetime.datetime.strftime(interval[0],"%Y-%m-%d"), datetime.datetime.strftime(interval[1],"%Y-%m-%d")])
- else:
- outputdata.append([shop, 'clsd', datetime.datetime.strftime(interval[0],"%Y-%m-%d"), 'NULL'])
- # Sort first using shop id and then using the lower_range
- outputdata.sort(key=lambda x:(x[0],x[2]))
- # Write the data
- writer.writerows(outputdata)
- main()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement