SHARE
TWEET

Untitled

a guest Jan 24th, 2019 60 Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. a=c(1,2,3,NA,5,6,NA)
  2. positions=which(is.na(a))
  3.    
  4. a=[1,2,3,np.nan,5,6,np.nan]
  5. positions=pd.isnull(a)
  6.    
  7. In [307]:
  8.  
  9. a=[1,2,3,np.nan,5,6,np.nan]
  10. np.nonzero(pd.isnull(a))
  11. Out[307]:
  12. (array([3, 6], dtype=int64),)
  13.    
  14. indexes = [index for index,element in enumerate(a) if np.isnan(element)]
  15.    
  16. def Kickstarter_Example_94():
  17.     print()
  18.     print(format('How to deal with missing values in a Pandas DataFrame','*^82'))    
  19.     import warnings
  20.     warnings.filterwarnings("ignore")
  21.     # load libraries
  22.     import pandas as pd
  23.     import numpy as np
  24.  
  25.     # Create dataframe with missing values
  26.     raw_data = {'first_name': ['Jason', np.nan, 'Tina', 'Jake', 'Amy'],
  27.                 'last_name': ['Miller', np.nan, 'Ali', 'Milner', 'Cooze'],
  28.                 'age': [42, np.nan, 36, 24, 73],
  29.                 'sex': ['m', np.nan, 'f', 'm', 'f'],
  30.                 'preTestScore': [4, np.nan, np.nan, 2, 3],
  31.                 'postTestScore': [25, np.nan, np.nan, 62, 70]}
  32.     df = pd.DataFrame(raw_data, columns = ['first_name', 'last_name', 'age', 'sex',
  33.                                            'preTestScore', 'postTestScore'])
  34.     print(); print(df)
  35.     # Drop missing observations
  36.     df_no_missing = df.dropna()
  37.     print(); print(df_no_missing)
  38.     # Drop rows where all cells in that row is NA
  39.     df_cleaned = df.dropna(how='all')
  40.     print(); print(df_cleaned)
  41.     # Create a new column full of missing values
  42.     df['location'] = np.nan
  43.     print(); print(df)
  44.     # Drop column if they only contain missing values
  45.     print(); print(df.dropna(axis=1, how='all'))
  46.     # Drop rows that contain less than five observations
  47.     # This is really mostly useful for time series
  48.     print(); print(df.dropna(thresh=5))
  49.     # Fill in missing data with zeros
  50.     print(); print(df.fillna(0))
  51.     # Fill in missing in preTestScore with the mean value of preTestScore
  52.     # inplace=True means that the changes are saved to the df right away
  53.     df["preTestScore"].fillna(df["preTestScore"].mean(), inplace=True)
  54.     print(); print(df)
  55.     # Fill in missing in postTestScore with each sex’s mean value of postTestScore
  56.     df["postTestScore"].fillna(df.groupby("sex")["postTestScore"].transform("mean"), inplace=True)
  57.     print(); print(df)
  58.     # Select the rows of df where age is not NaN and sex is not NaN
  59.     print(); print(df[df['age'].notnull() & df['sex'].notnull()])
  60.     print(); print(df[df['age'].notnull() & df['sex'].notnull()].fillna(0))
  61. Kickstarter_Example_94()
RAW Paste Data
We use cookies for various purposes including analytics. By continuing to use Pastebin, you agree to our use of cookies as described in the Cookies Policy. OK, I Understand
Not a member of Pastebin yet?
Sign Up, it unlocks many cool features!
 
Top