m8_

datetime_df

m8_
Aug 7th, 2019
331
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 3.06 KB | None | 0 0
  1. import pandas as pd
  2. import numpy as np
  3.  
  4. data = [['A001', '3/2/1950','1/1/2010','1/1/2010','','','','','','','','','',''],
  5.         ['A001', '3/2/1950','','','','1/1/2010','','','','','','','',''],
  6.         ['A001', '3/2/1950','','','','1/1/2010','1/1/2010','','','','','','',''],
  7.         ['A001', '3/2/1950','','','','','','1/1/2010','','','','','',''],
  8.         ['A001', '3/2/1950','','','','','','','1/1/2010','1/1/2010','1/1/2010','1/1/2010','',''],
  9.         ['A001', '3/2/1950','','','','','','','','','','','1/1/2010',''],
  10.         ['A001', '3/2/1950','','','','','','','','','','','','1/1/2010'],
  11.         ['A001', '3/2/1950','','','','','','','','','','','','1/1/2010'],
  12.         ['A001', '3/2/1950','','','','','','','','','','','','1/1/2010'],
  13.         ['A001', '3/2/1950','','','','','','','','','','','','1/1/2010'],
  14.         ['A001', '3/2/1950','','','','','','','','','','','','1/1/2010'],
  15.         ['A001', '3/2/1950','','','','','','','','','','','','1/1/2010'],
  16.         ['A001', '3/2/1950','','','','','','','','','','','','1/1/2010'],
  17.         ['A001', '3/2/1950','','','','','','','','','','','','1/1/2010'],
  18.         ['A001', '3/2/1950','','','','','','','','','','','','1/1/2010'],
  19.         ['A002', '3/2/1950','0107-01-06','1/1/2010','','','','','','','','','',''],
  20.         ['A002', '3/2/1950','','','','1/1/2010','','','','','','','',''],
  21.         ['A002', '3/2/1950','','','','1/1/2010','1/1/2010','','','','','','',''],
  22.         ['A002', '3/2/1950','','','','','','1/1/2010','','','','','',''],
  23.         ['A002', '3/2/1950','','','','','','','1/1/2010','1/1/2010','1/1/2010','1/1/2010','',''],
  24.         ['A002', '3/2/1950','','','','','','','','','','','1/1/2010',''],
  25.         ['A002', '3/2/1950','','','','','','','','','','','','1/1/2010'],
  26.         ['A002', '3/2/1950','','','','','','','','','','','','1/1/2010'],
  27.         ['A002', '3/2/1950','','','','','','','','','','','','1/1/2010'],
  28.         ['A002', '3/2/1950','','','','','','','','','','','','1/1/2010'],
  29.         ['A002', '3/2/1950','','','','','','','','','','','','1/1/2010'],
  30.         ['A002', '3/2/1950','','','','','','','','','','','','1/1/2010'],
  31.         ['A002', '3/2/1950','','','','','','','','','','','','1/1/2010'],
  32.         ['A002', '3/2/1950','','','','','','','','','','','','1/1/2010'],
  33.         ['A002', '3/2/1950','','','','','','','','','','','','0107-01-06']]
  34.  
  35. df = pd.DataFrame(data,columns=['patient_ID','DateOfBirth','date_1','date_2','date_3','date_4','date_5','date_6','date_7','date_8','date_9','date_10','date_11','date_12',])
  36.  
  37. # insert empty Fail column to identify date errors
  38. df.insert(loc=0, column='Fail', value="")
  39.  
  40. # replace all blanks with np.NaN
  41. df.replace(r"^s*$", np.nan, regex=True, inplace = True)
  42.  
  43. # get list of date columns
  44. cols = list(df)
  45. date_cols = cols[2:]
  46.  
  47. # create empty dict
  48. dfs = {}
  49.  
  50. # iterate over date columns to identify which columns contain invalid dates & add to dfs
  51. for col in df[date_cols]:
  52.     try:
  53.         df[col] = df[col].apply(pd.to_datetime, errors='raise')
  54.     except:
  55.         print("%s column contains invalid date" % col)
  56.         dfs[col] = df[col]
Advertisement
Add Comment
Please, Sign In to add comment