Aliendreamer

cleaning a df for course exam!

Jan 28th, 2018
176
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 0.93 KB | None | 0 0
  1. # final option for reading and cleaning files!
  2. name=os.path.splitext('coal_consumption_per_capita.xlsx')[0]
  3. g1 =(pd.read_excel("exam/coal_consumption_per_capita.xlsx"))
  4. g2=g1.rename(columns={'value':g1.columns[0],g1.columns[0]:'country'})
  5. g2=pd.melt(g2,id_vars=['country'],var_name='year', value_name=name[:len(name)])
  6. g2=g2[(g2['country'].str.contains("Turkey"))
  7.         | (g2['country'].str.contains("Serbia") == True)
  8.         | (g2['country'].str.contains("Bulgaria") == True)
  9.         | (g2['country'].str.contains("Greece") == True)
  10.         | (g2['country'].str.contains("Turkey") == True)
  11.         | (g2['country'].str.contains("Macedonia") == True)]
  12. g2 =g2.loc[~g2['country'].isin(['Serbia excluding Kosovo','Serbia and Montenegro'])]
  13. g2['year'] = g2['year'].astype('int',errors='ignore')
  14. g2[g2.columns[2]]=g2[g2.columns[2]].astype('float',errors='ignore').round(2)
  15. g2= g2.drop(g2[g2.year <1990].index)
  16. g2.reset_index(drop=True)
Advertisement
Add Comment
Please, Sign In to add comment