Untitled

a=c(1,2,3,NA,5,6,NA)
positions=which(is.na(a))

a=[1,2,3,np.nan,5,6,np.nan]
positions=pd.isnull(a)

In [307]:

a=[1,2,3,np.nan,5,6,np.nan]
np.nonzero(pd.isnull(a))
Out[307]:
(array([3, 6], dtype=int64),)

indexes = [index for index,element in enumerate(a) if np.isnan(element)]

def Kickstarter_Example_94():
    print()
    print(format('How to deal with missing values in a Pandas DataFrame','*^82'))
    import warnings
    warnings.filterwarnings("ignore")
    # load libraries
    import pandas as pd
    import numpy as np

    # Create dataframe with missing values
    raw_data = {'first_name': ['Jason', np.nan, 'Tina', 'Jake', 'Amy'],
                'last_name': ['Miller', np.nan, 'Ali', 'Milner', 'Cooze'],
                'age': [42, np.nan, 36, 24, 73],
                'sex': ['m', np.nan, 'f', 'm', 'f'],
                'preTestScore': [4, np.nan, np.nan, 2, 3],
                'postTestScore': [25, np.nan, np.nan, 62, 70]}
    df = pd.DataFrame(raw_data, columns = ['first_name', 'last_name', 'age', 'sex',
                                           'preTestScore', 'postTestScore'])
    print(); print(df)
    # Drop missing observations
    df_no_missing = df.dropna()
    print(); print(df_no_missing)
    # Drop rows where all cells in that row is NA
    df_cleaned = df.dropna(how='all')
    print(); print(df_cleaned)
    # Create a new column full of missing values
    df['location'] = np.nan
    print(); print(df)
    # Drop column if they only contain missing values
    print(); print(df.dropna(axis=1, how='all'))
    # Drop rows that contain less than five observations
    # This is really mostly useful for time series
    print(); print(df.dropna(thresh=5))
    # Fill in missing data with zeros
    print(); print(df.fillna(0))
    # Fill in missing in preTestScore with the mean value of preTestScore
    # inplace=True means that the changes are saved to the df right away
    df["preTestScore"].fillna(df["preTestScore"].mean(), inplace=True)
    print(); print(df)
    # Fill in missing in postTestScore with each sex’s mean value of postTestScore
    df["postTestScore"].fillna(df.groupby("sex")["postTestScore"].transform("mean"), inplace=True)
    print(); print(df)
    # Select the rows of df where age is not NaN and sex is not NaN
    print(); print(df[df['age'].notnull() & df['sex'].notnull()])
    print(); print(df[df['age'].notnull() & df['sex'].notnull()].fillna(0))
Kickstarter_Example_94()