Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import pandas as pd
- import numpy as np
- data = [['A001', '3/2/1950','1/1/2010','1/1/2010','','','','','','','','','',''],
- ['A001', '3/2/1950','','','','1/1/2010','','','','','','','',''],
- ['A001', '3/2/1950','','','','1/1/2010','1/1/2010','','','','','','',''],
- ['A001', '3/2/1950','','','','','','1/1/2010','','','','','',''],
- ['A001', '3/2/1950','','','','','','','1/1/2010','1/1/2010','1/1/2010','1/1/2010','',''],
- ['A001', '3/2/1950','','','','','','','','','','','1/1/2010',''],
- ['A001', '3/2/1950','','','','','','','','','','','','1/1/2010'],
- ['A001', '3/2/1950','','','','','','','','','','','','1/1/2010'],
- ['A001', '3/2/1950','','','','','','','','','','','','1/1/2010'],
- ['A001', '3/2/1950','','','','','','','','','','','','1/1/2010'],
- ['A001', '3/2/1950','','','','','','','','','','','','1/1/2010'],
- ['A001', '3/2/1950','','','','','','','','','','','','1/1/2010'],
- ['A001', '3/2/1950','','','','','','','','','','','','1/1/2010'],
- ['A001', '3/2/1950','','','','','','','','','','','','1/1/2010'],
- ['A001', '3/2/1950','','','','','','','','','','','','1/1/2010'],
- ['A002', '3/2/1950','0107-01-06','1/1/2010','','','','','','','','','',''],
- ['A002', '3/2/1950','','','','1/1/2010','','','','','','','',''],
- ['A002', '3/2/1950','','','','1/1/2010','1/1/2010','','','','','','',''],
- ['A002', '3/2/1950','','','','','','1/1/2010','','','','','',''],
- ['A002', '3/2/1950','','','','','','','1/1/2010','1/1/2010','1/1/2010','1/1/2010','',''],
- ['A002', '3/2/1950','','','','','','','','','','','1/1/2010',''],
- ['A002', '3/2/1950','','','','','','','','','','','','1/1/2010'],
- ['A002', '3/2/1950','','','','','','','','','','','','1/1/2010'],
- ['A002', '3/2/1950','','','','','','','','','','','','1/1/2010'],
- ['A002', '3/2/1950','','','','','','','','','','','','1/1/2010'],
- ['A002', '3/2/1950','','','','','','','','','','','','1/1/2010'],
- ['A002', '3/2/1950','','','','','','','','','','','','1/1/2010'],
- ['A002', '3/2/1950','','','','','','','','','','','','1/1/2010'],
- ['A002', '3/2/1950','','','','','','','','','','','','1/1/2010'],
- ['A002', '3/2/1950','','','','','','','','','','','','0107-01-06']]
- df = pd.DataFrame(data,columns=['patient_ID','DateOfBirth','date_1','date_2','date_3','date_4','date_5','date_6','date_7','date_8','date_9','date_10','date_11','date_12',])
- # insert empty Fail column to identify date errors
- df.insert(loc=0, column='Fail', value="")
- # replace all blanks with np.NaN
- df.replace(r"^s*$", np.nan, regex=True, inplace = True)
- # get list of date columns
- cols = list(df)
- date_cols = cols[2:]
- # create empty dict
- dfs = {}
- # iterate over date columns to identify which columns contain invalid dates & add to dfs
- for col in df[date_cols]:
- try:
- df[col] = df[col].apply(pd.to_datetime, errors='raise')
- except:
- print("%s column contains invalid date" % col)
- dfs[col] = df[col]
Advertisement
Add Comment
Please, Sign In to add comment