Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import os
- import pandas as pd
- import numpy as np
- from sklearn.ensemble import make_stack_layer
- from sklearn.preprocessing import FunctionTransformer
- from sklearn.experimental import make_column_transformer
- from sklearn.pipeline import make_pipeline
- from sklearn.preprocessing import StandardScaler
- from sklearn.preprocessing import CategoricalEncoder
- from sklearn.svm import LinearSVC
- from sklearn.dummy import DummyRegressor
- from sklearn.linear_model import LogisticRegression
- from nilearn.connectome import ConnectivityMeasure
- from sklearn.model_selection import cross_val_score
- from sklearn.model_selection import StratifiedShuffleSplit
- path_fmri = '/home/lemaitre/Documents/data/ramp_autism/fmri_filename.csv'
- path_anatomy = '/home/lemaitre/Documents/data/ramp_autism/anatomy.csv'
- path_participants = ('/home/lemaitre/Documents/data/ramp_autism/'
- 'participants.csv')
- df_fmri = pd.read_csv(path_fmri, index_col=0)['basc064']
- df_anatomy = pd.read_csv(path_anatomy, index_col=0)
- df_participants = pd.read_csv(path_participants, index_col=0)
- df_data = pd.concat([df_anatomy, df_fmri, df_participants[['age', 'sex']]],
- axis=1)
- X = df_data
- y = df_participants['target']
- def _load_fmri(fmri_filenames):
- data_path = '/home/lemaitre/Documents/data/ramp_autism'
- return np.array(
- [pd.read_csv(os.path.join(data_path, subject_filename),
- header=None).values
- for subject_filename in fmri_filenames])
- transformer_fmri = make_pipeline(
- FunctionTransformer(func=_load_fmri, validate=False, check_inverse=False),
- ConnectivityMeasure(kind='tangent', vectorize=True))
- transformer_anat = StandardScaler()
- transformer_sex = CategoricalEncoder(encoding='ordinal')
- transformer_age = FunctionTransformer()
- columns_fmri = 'basc064'
- columns_sex = ['sex']
- columns_age = ['age']
- columns_anat = [name
- for name in X.columns
- if name not in [columns_fmri] + columns_sex + columns_age]
- def make_domain_estimator(column_name, transformer, estimator):
- return make_pipeline(
- make_column_transformer((transformer, column_name)),
- estimator)
- stacked_estimators = [
- ('estimator_fmri', make_domain_estimator(columns_fmri,
- transformer_fmri,
- LinearSVC())),
- ('estimator_anat', make_domain_estimator(columns_anat,
- transformer_anat,
- LinearSVC())),
- ('estimator_age', make_domain_estimator(columns_age,
- transformer_age,
- DummyRegressor(strategy='input'))),
- ('estimator_sex', make_domain_estimator(columns_sex,
- transformer_sex,
- DummyRegressor(strategy='input')))]
- layer = make_stack_layer(stacked_estimators, n_jobs=-1)
- full_pipeline = make_pipeline(layer, LogisticRegression())
- results = cross_val_score(full_pipeline, X, y,
- cv=StratifiedShuffleSplit(),
- scoring='roc_auc',
- n_jobs=-1, verbose=100)
- # AUC: 0.72 +- 0.3
- print(results)
Add Comment
Please, Sign In to add comment