Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- # -*- coding: utf-8 -*-
- """
- Created on Thu Feb 20 15:44:48 2020
- @author: User
- """
- import pandas
- import numpy as np
- from sklearn.impute import SimpleImputer
- # membaca file dan direktori tempat iris.data.missing.csv disimpan
- direktori = "bjir.csv"
- # memberi nama variabel
- names = ['class','age', 'sex','steroid','antiviral','fatigue', 'malaise','anorexia','liver-big','liver-firm', 'spleen-palpable','spiders','ascites','varices', 'bilirubin','alk-phosphate','sgot','albumin', 'protime','histology']
- # membaca data dengan library panda
- datamissing = pandas.read_csv(direktori, names=names, na_values=["?"])
- #panggil dataset
- array = datamissing.values
- # pisah input dan output
- x = array[:,0:19] #inputnya adalah kolom ke-0, 1, 2, 3
- y = array[:,0] #outputnya adalah kolom ke 4
- #imputasi mean
- imp = SimpleImputer(missing_values=np.nan, strategy="mean")
- #simpan hasil imputasi ke dalam variable X
- X = imp.fit_transform(x)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement