Guest User

Untitled

a guest
Jan 24th, 2019
92
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 2.42 KB | None | 0 0
  1. a=c(1,2,3,NA,5,6,NA)
  2. positions=which(is.na(a))
  3.  
  4. a=[1,2,3,np.nan,5,6,np.nan]
  5. positions=pd.isnull(a)
  6.  
  7. In [307]:
  8.  
  9. a=[1,2,3,np.nan,5,6,np.nan]
  10. np.nonzero(pd.isnull(a))
  11. Out[307]:
  12. (array([3, 6], dtype=int64),)
  13.  
  14. indexes = [index for index,element in enumerate(a) if np.isnan(element)]
  15.  
  16. def Kickstarter_Example_94():
  17. print()
  18. print(format('How to deal with missing values in a Pandas DataFrame','*^82'))
  19. import warnings
  20. warnings.filterwarnings("ignore")
  21. # load libraries
  22. import pandas as pd
  23. import numpy as np
  24.  
  25. # Create dataframe with missing values
  26. raw_data = {'first_name': ['Jason', np.nan, 'Tina', 'Jake', 'Amy'],
  27. 'last_name': ['Miller', np.nan, 'Ali', 'Milner', 'Cooze'],
  28. 'age': [42, np.nan, 36, 24, 73],
  29. 'sex': ['m', np.nan, 'f', 'm', 'f'],
  30. 'preTestScore': [4, np.nan, np.nan, 2, 3],
  31. 'postTestScore': [25, np.nan, np.nan, 62, 70]}
  32. df = pd.DataFrame(raw_data, columns = ['first_name', 'last_name', 'age', 'sex',
  33. 'preTestScore', 'postTestScore'])
  34. print(); print(df)
  35. # Drop missing observations
  36. df_no_missing = df.dropna()
  37. print(); print(df_no_missing)
  38. # Drop rows where all cells in that row is NA
  39. df_cleaned = df.dropna(how='all')
  40. print(); print(df_cleaned)
  41. # Create a new column full of missing values
  42. df['location'] = np.nan
  43. print(); print(df)
  44. # Drop column if they only contain missing values
  45. print(); print(df.dropna(axis=1, how='all'))
  46. # Drop rows that contain less than five observations
  47. # This is really mostly useful for time series
  48. print(); print(df.dropna(thresh=5))
  49. # Fill in missing data with zeros
  50. print(); print(df.fillna(0))
  51. # Fill in missing in preTestScore with the mean value of preTestScore
  52. # inplace=True means that the changes are saved to the df right away
  53. df["preTestScore"].fillna(df["preTestScore"].mean(), inplace=True)
  54. print(); print(df)
  55. # Fill in missing in postTestScore with each sex’s mean value of postTestScore
  56. df["postTestScore"].fillna(df.groupby("sex")["postTestScore"].transform("mean"), inplace=True)
  57. print(); print(df)
  58. # Select the rows of df where age is not NaN and sex is not NaN
  59. print(); print(df[df['age'].notnull() & df['sex'].notnull()])
  60. print(); print(df[df['age'].notnull() & df['sex'].notnull()].fillna(0))
  61. Kickstarter_Example_94()
Add Comment
Please, Sign In to add comment