Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #!/usr/bin/env python
- # coding: utf-8
- # In[1]:
- # This notebook is adapted from " https://github.com/Trusted-AI/AIF360 "
- import sys
- import warnings
- # Redirections of warnings to /dev/null (Linux/macOS)
- warnings.filterwarnings("ignore")
- sys.stderr = open('/dev/null', 'w')
- # Some library imports
- from matplotlib import pyplot as plt
- import seaborn as sns
- import sys
- sys.path.append("../")
- import numpy as np
- import random as rd
- import pandas as pd
- import scipy.stats as stats
- from sklearn.model_selection import train_test_split
- from sklearn.linear_model import LogisticRegression
- from sklearn.preprocessing import MinMaxScaler
- from sklearn.metrics import f1_score, accuracy_score
- from aif360.algorithms.postprocessing.reject_option_classification import RejectOptionClassification
- from aif360.sklearn.datasets import fetch_german
- from aif360.datasets import AdultDataset, BinaryLabelDataset
- from aif360.metrics import BinaryLabelDatasetMetric
- from matplotlib import rc
- from matplotlib.patches import Patch
- # Fix randomness and some parameters
- seed = 8858 #rd.randint(1000, 10000)
- np.random.seed(seed)
- rd.seed(seed)
- plotting = True
- sns.set_style(style="ticks")
- params = {
- 'axes.spines.right' : False,
- 'axes.spines.top' : False,
- 'text.usetex' : True,
- 'pdf.fonttype' : 42,
- 'legend.fontsize': 'large',
- 'figure.figsize': (10, 6),
- 'axes.labelsize': 'large',
- 'axes.titlesize':'large',
- 'xtick.labelsize':'large',
- 'ytick.labelsize':'large',
- 'font.size': 24}
- plt.rcParams.update(params)
- plt.rc('lines', linewidth=2.0)
- scaler = MinMaxScaler(copy=False)
- repetitions = 50
- budget = 1500
- # # General setting
- # Load the Adult dataset, generate the name, the proxy and learn the black-box model.
- # In[2]:
- # Importing the dataset "AdultDataset" from: https://archive.ics.uci.edu/ml/datasets/adult.
- # As in the example, we keep the following features: 'age', 'education-num', 'capital-gain', 'capital-loss', 'hours-per-week'
- # The protected variable is: 'sex'
- # The variable to predict is: 'income-per-year'
- ad = AdultDataset(protected_attribute_names=['sex'],
- privileged_classes=[['Male']], categorical_features=[],
- features_to_keep=['age', 'education-num', 'capital-gain', 'capital-loss', 'hours-per-week'])
- label_name = ad.label_names[0]
- # See appendix B, table II in Barry III and Harper (2000)
- female_names = ['Ashley', 'Jessica', 'Amanda', 'Brittany', 'Samantha', 'Sarah', 'Lauren', 'Nicole', 'Megan', 'Stephanie', 'Emily', 'Jennifer', 'Elizabeth', 'Kayla', 'Rachel', 'Amber', 'Rebecca', 'Danielle', 'Chelsea', 'Alyssa', 'Melissa', 'Heather', 'Kelly', 'Christina', 'Michelle']
- male_names = ['Michael', 'Matthew', 'Christopher', 'Joshua', 'Andrew', 'Joseph', 'John', 'Daniel', 'David', 'Robert', 'James', 'Justin', 'Nicholas', 'Anthony', 'William', 'Kyle', 'Zachary', 'Kevin', 'Tyler', 'Thomas', 'Eric', 'Brian', 'Brandon', 'Jonathan', 'Timothy']
- def propose_name(s):
- if s == 1:
- return rd.choice(male_names)
- else:
- return rd.choice(female_names)
- def proxy_sex_3letters(name):
- '''
- Predict the sex (1: Male, 0: Male) depending on first names.
- p(g^i(n) = Female)=
- 1 if the name ends by a, e or i
- 0.5 if the name ends by h or y
- 0 otherwise.
- '''
- l = name[-1]
- if l in ['a', 'e', 'i']:
- return 0
- elif l in ['y', 'h']:
- return rd.choices([0, 1], [0.5, 0.5], k=1)[0]
- else:
- return 1
- # Increase of the dataset with the following two features: 'name' and 'proxy' where 'proxy' is the reconstructed "sex" using the intermediate proxy.
- df = ad.convert_to_dataframe()[0]
- df['name'] = df['sex'].map(propose_name)
- del df['sex']
- # some characteristics of the dataset, used for "poor proxy"
- # Age: 25-86 ans (Int)
- # education-num: 1-16 (Int)
- # capital-gain: 0-99999 (Int)
- # capital-loss: 0-4356 (Int)
- # hours-per-week: 1-99 (Int)
- # len(audit_df)
- # Side of the platform: split the dataset, learn the linear regression etc
- train_and_test_df, audit_df = train_test_split(df, test_size=0.05, random_state=42)
- nbr_audit = len(audit_df)
- protected = 'proxy'
- p = [{protected: 1}]
- u = [{protected: 0}]
- ori_D = train_and_test_df.copy()
- ori_D['proxy'] = ori_D['name'].map(proxy_sex_3letters)
- hat_D = audit_df.copy()
- hat_D = hat_D[~hat_D['name'].str.endswith(('y', 'h'))]
- hat_D['proxy'] = hat_D['name'].map(proxy_sex_3letters)
- del ori_D['name']
- del hat_D['name']
- ### The complete dataset
- # Using https://towardsdatascience.com/mitigating-bias-in-ai-with-aif360-b4305d1f88a9, loading the data
- P_d = BinaryLabelDataset(
- favorable_label=1,
- unfavorable_label=0,
- df=ori_D,
- label_names = ['income-per-year'],
- protected_attribute_names=[protected])
- # Split the data
- P_test, P_train = P_d.split([16281])
- P_train.features = scaler.fit_transform(P_train.features)
- P_test.features = scaler.fit_transform(P_test.features)
- print("The dataset is split as follow:")
- print("Train set:", len(P_train.features), "Test set:", len(P_test.features), "Audit set:", nbr_audit)
- index_P = P_train.feature_names.index(protected)
- # Train the model to predict 'income-per-year'
- lmod = LogisticRegression(class_weight='balanced', solver='liblinear')
- lmod.fit(P_train.features, P_train.labels.ravel())
- # Test it
- P_test_repd_pred = P_test.copy()
- P_test_repd_pred.labels = lmod.predict(P_test_repd_pred.features).reshape(-1, 1)
- p_A = accuracy_score(P_test.labels, P_test_repd_pred.labels)
- print("Accuracy of the model", p_A)
- P_di = BinaryLabelDatasetMetric(P_test_repd_pred, privileged_groups=p, unprivileged_groups=u).disparate_impact()
- ### On the hat_D with perfect proxy
- hat_P_d = BinaryLabelDataset(
- favorable_label=1,
- unfavorable_label=0,
- df=hat_D,
- label_names = ['income-per-year'],
- protected_attribute_names=[protected])
- hat_P_d.features = scaler.fit_transform(hat_P_d.features)
- hat_P_d.labels = lmod.predict(hat_P_d.features).reshape(-1, 1)
- hat_P_di = BinaryLabelDatasetMetric(hat_P_d, privileged_groups=p, unprivileged_groups=u).disparate_impact()
- print("Disparate Impact before the production", P_di, hat_P_di)
- new_budget = len(hat_D)
- # # Fairwashing method
- # In[3]:
- def ROC_mitigation(lmod, test_set, theta, index_protected):
- '''
- Implementation of the Reject Option based Classification method (Kamiran et al., (2012)).
- '''
- test_p = lmod.predict_proba(test_set)
- y_pred = lmod.predict(test_set)
- max_values = np.maximum(test_p[:, 0], 1 - test_p[:, 0])
- critical_region = np.where(max_values <= theta)[0]
- for i in critical_region:
- if not test_set[i][index_protected]:
- y_pred[i] = 1
- else:
- y_pred[i] = 0
- return y_pred, 100*len(critical_region)/len(test_set)
- def ROC_mitigation_by_prop_lies(lmod, test_set, prop, index_protected):
- test_p = lmod.predict_proba(test_set)
- y_pred = lmod.predict(test_set)
- max_values = np.maximum(test_p[:, 0], 1 - test_p[:, 0])
- critical_region = sorted(range(len(max_values)), key=lambda k: max_values[k])[:int(prop*len(test_set))]
- for i in critical_region:
- if not test_set[i][index_protected]:
- y_pred[i] = 1
- else:
- y_pred[i] = 0
- return y_pred.reshape(-1, 1), 100*len(critical_region)/len(test_set)
- def ROC_until_fair(lmod, audit_set, index_protected, first_theta = 0, theta_pas = 0.01):
- theta = first_theta
- current_answers = audit_set.copy()
- current_answers.labels = lmod.predict(current_answers.features).reshape(-1, 1)
- cm = BinaryLabelDatasetMetric(current_answers, privileged_groups=p, unprivileged_groups=u).disparate_impact()
- fair = (cm >= 0.8)
- if fair:
- return current_answers, theta, True
- while ((theta <= 1) and (not fair)):
- theta += theta_pas
- current_answers.labels, fairwash_rate = ROC_mitigation_by_prop_lies(lmod, current_answers.features, theta, index_protected)
- cm = BinaryLabelDatasetMetric(current_answers, privileged_groups=p, unprivileged_groups=u).disparate_impact()
- fair = (cm >= 0.8)
- return current_answers, theta, False
- # # Definition of the three proxies
- # In[4]:
- def poor_df(budget = budget):
- '''
- Implementation of \psi^{\emptyset}, the poor proxy
- '''
- age = np.random.randint(25, 87, size=budget)
- education_num = np.random.randint(1, 17, size=budget)
- capital_gain = np.random.randint(0, 100000, size=budget)
- capital_loss = np.random.randint(0, 4357, size=budget)
- hours_per_week = np.random.randint(1, 100, size=budget)
- proxy = np.random.randint(0, 2, size = budget)
- income_per_year = np.zeros(budget)
- data = {
- 'age': age,
- 'education-num': education_num,
- 'capital-gain': capital_gain,
- 'capital-loss': capital_loss,
- 'hours-per-week': hours_per_week,
- 'proxy' : proxy,
- 'income-per-year':income_per_year
- }
- R_df = pd.DataFrame(data)
- R_d = BinaryLabelDataset(
- favorable_label=1,
- unfavorable_label=0,
- df=R_df,
- label_names = ['income-per-year'],
- protected_attribute_names=[protected])
- R_d.features = scaler.fit_transform(R_d.features)
- return R_d
- def intermediate_df(budget = budget):
- '''
- Implementation of \psi^i, the intermediate proxy
- '''
- R_df = audit_df.copy()
- R_df['proxy'] = R_df['name'].map(proxy_sex_3letters)
- del R_df['name']
- # assert budget <= len(audit_df)
- R_df = R_df.sample(budget, random_state = seed)
- R_d = BinaryLabelDataset(
- favorable_label=1,
- unfavorable_label=0,
- df=R_df,
- label_names = ['income-per-year'],
- protected_attribute_names=[protected])
- R_d.features = scaler.fit_transform(R_d.features)
- return R_d
- def perfect_df(budget = budget):
- '''
- Implementation of \psi^p, the perfect proxy.
- It corresponds to the intermediate proxy on names not ending by 'y' or 'h'.
- '''
- def select_determinist_names(name):
- l = name[-1]
- return l not in ['y', 'h']
- R_df = audit_df.copy()
- R_df = R_df[~R_df['name'].str.endswith(('y', 'h'))]
- R_df['proxy'] = R_df['name'].map(proxy_sex_3letters)
- del R_df['name']
- R_df = R_df.sample(budget)
- R_d = BinaryLabelDataset(
- favorable_label=1,
- unfavorable_label=0,
- df=R_df,
- label_names = ['income-per-year'],
- protected_attribute_names=[protected])
- R_d.features = scaler.fit_transform(R_d.features)
- return R_d
- # In[5]:
- thresholds= np.arange(0, 0.30, 0.001)
- res = []
- for _ in range(repetitions):
- for sample_func in [poor_df, intermediate_df, perfect_df]:
- R_d = sample_func(budget)
- R_d.labels = lmod.predict(R_d.features).reshape(-1, 1)
- cm = BinaryLabelDatasetMetric(R_d, privileged_groups=p, unprivileged_groups=u).disparate_impact()
- index_protected = R_d.feature_names.index(protected)
- fairwash_rate = 0
- res.append([sample_func.__name__, None, cm, fairwash_rate])
- for th in thresholds:
- R_d.labels, fairwash_rate = ROC_mitigation_by_prop_lies(lmod, R_d.features, th, index_protected)
- cm = BinaryLabelDatasetMetric(R_d, privileged_groups=p, unprivileged_groups=u).disparate_impact()
- res.append([sample_func.__name__, th, cm, np.round(th,3)*100])
- df = pd.DataFrame(res, columns = ['Proxy', 'th', 'Disparate Impact', 'Fairwashing rate'])
- poor_di = df[df['Proxy'] == 'poor_df']
- intermediate_di = df[df['Proxy'] == 'intermediate_df']
- perfect_di = df[df['Proxy'] == 'perfect_df']
- res_by_proxies ={
- "poor": poor_di,
- "intermediate": intermediate_di,
- "perfect": perfect_di
- }
- intermediate_di
- # # Section 4.1, Table 1
- # In[6]:
- print("\n on D:")
- print("The original disparate impact is {}".format(np.round(P_di,2)))
- intermediate_DI = np.mean(intermediate_di[intermediate_di['th'] == 0]['Disparate Impact'])
- print("The measured disparate impact is {} (with the intermediate proxy)".format(np.round(intermediate_DI,2)))
- poor_DI = np.mean(poor_di[poor_di['th'] == 0]['Disparate Impact'])
- print("The measured disparate impact is {} (with the poor proxy)".format(np.round(poor_DI,2)))
- print("\n on hat_D:")
- print("The original disparate impact is {}".format(np.round(hat_P_di,2)))
- perfect_DI = np.mean(perfect_di[perfect_di['th'] == 0]['Disparate Impact'])
- print("The measured disparate impact is {} (with the perfect proxy)".format(np.round(perfect_DI,2)), flush = True)
- # # Section 4.4, Figure 4
- # In[7]:
- for name, res_di in res_by_proxies.items():
- grouped = res_di.groupby('Fairwashing rate')['Disparate Impact'].agg(['mean', 'std']).reset_index()
- gamma_min = grouped[grouped['mean'] >= 0.8].iloc[0]['Fairwashing rate']
- print(f"Gamma min for {name} is {gamma_min}")
- left, bottom, width, height = (0, 0.8, max(grouped['Fairwashing rate']), 0.2)
- rect = plt.Rectangle((left, bottom), width, height,
- facecolor='black', alpha=0.1)
- plt.gca().add_patch(rect)
- real_value = res_di[res_di['th'] == 0]['Disparate Impact'].mean()
- real_std = res_di[res_di['th'] == 0]['Disparate Impact'].std()
- plt.axhline(real_value, label = r'$\hat{\mu}_\mathbf{B}$', color=sns.color_palette("magma", 6)[3]) #color=sns.color_palette("hls", 3)[0])
- plt.axvline(gamma_min,
- color=sns.color_palette("magma", 6)[2],
- linestyle='dashed')
- plt.fill_between(
- grouped['Fairwashing rate'],
- real_value - real_std,
- real_value + real_std,
- color=sns.color_palette("magma", 6)[3],
- alpha=0.2
- )
- plt.plot(grouped['Fairwashing rate'], grouped['mean'], label=r'$\hat{\mu}_\mathbf{A}$', color=sns.color_palette("magma", 6)[0]) #'#9c179e')
- plt.fill_between(
- grouped['Fairwashing rate'],
- grouped['mean'] - grouped['std'],
- grouped['mean'] + grouped['std'],
- color=sns.color_palette("magma", 6)[0],
- alpha=0.2
- )
- plt.ylim(bottom = 0.4, top = 1)
- plt.xlim(left = -0.3, right = 27)
- plt.ylabel("Disparate Impact: "+r'$\mu$')
- plt.xlabel("Percentage of manipulated API's answers: "+r'$\gamma$')
- legend = plt.legend(loc='upper right')
- handles, labels = plt.gca().get_legend_handles_labels()
- handles.append(Patch(facecolor='black', edgecolor='black', alpha = 0.2))
- labels.append(r'$Z = 1$')
- handles.append(Patch(facecolor='white', edgecolor='black'))
- labels.append(r'$Z = 0$')
- legend._legend_box = None
- legend._init_legend_box(handles, labels)
- legend._set_loc(legend._loc)
- legend.set_title(legend.get_title().get_text())
- plt.savefig("fig_lies_"+name+".pdf", bbox_inches='tight')
- plt.show()
- # In[ ]:
- import numpy as np
- from aif360.metrics import BinaryLabelDatasetMetric
- def calculate_disparate_impact_confidence_interval(hat_P_test, privileged_groups, unprivileged_groups):
- metric = BinaryLabelDatasetMetric(hat_P_test,
- privileged_groups=privileged_groups,
- unprivileged_groups=unprivileged_groups)
- p_privileged = metric.base_rate(privileged=True)
- p_unprivileged = metric.base_rate(privileged=False)
- disparate_impact = metric.disparate_impact()
- def get_group_mask(dataset, group_conditions):
- mask = np.ones(len(dataset.labels), dtype=bool)
- for attr, value in group_conditions.items():
- attr_column = dataset.features[:, dataset.feature_names.index(attr)]
- mask &= (attr_column == value)
- return mask
- privileged_mask = get_group_mask(hat_P_test, privileged_groups[0])
- unprivileged_mask = get_group_mask(hat_P_test, unprivileged_groups[0])
- n_privileged = np.sum(privileged_mask)
- n_unprivileged = np.sum(unprivileged_mask)
- std_privileged = np.sqrt(p_privileged * (1 - p_privileged) / n_privileged)
- std_unprivileged = np.sqrt(p_unprivileged * (1 - p_unprivileged) / n_unprivileged)
- std_deviation = disparate_impact * np.sqrt((std_privileged / p_privileged)**2 +
- (std_unprivileged / p_unprivileged)**2)
- n_total = len(hat_P_test.labels)
- standard_error = std_deviation / np.sqrt(n_total)
- z_score = 1.96
- margin_of_error = z_score * standard_error
- return disparate_impact, margin_of_error
- # In[9]:
- def double_poor_df(budget = budget):
- R_d1 = poor_df(budget)
- R_d2 = poor_df(budget)
- return R_d1, R_d2
- def double_intermediate_df(budget = budget):
- '''
- Implementation of \psi^i, the intermediate proxy
- '''
- R_df1 = audit_df.sample(budget)
- R_df2 = R_df1.copy()
- R_df1['proxy'] = R_df1['name'].map(proxy_sex_3letters)
- del R_df1['name']
- R_df2['proxy'] = R_df2['name'].map(proxy_sex_3letters)
- del R_df2['name']
- R_d1 = BinaryLabelDataset(
- favorable_label=1,
- unfavorable_label=0,
- df=R_df1,
- label_names = ['income-per-year'],
- protected_attribute_names=[protected])
- R_d1.features = scaler.fit_transform(R_d1.features)
- R_d2 = BinaryLabelDataset(
- favorable_label=1,
- unfavorable_label=0,
- df=R_df2,
- label_names = ['income-per-year'],
- protected_attribute_names=[protected])
- R_d2.features = scaler.fit_transform(R_d2.features)
- return R_d1, R_d2
- def accuracy_intermediaire(budget):
- total_acc = 0
- for _ in range(budget):
- R_d1, R_d2 = double_intermediate_df(1)
- R_d1.labels = lmod.predict(R_d1.features).reshape(-1, 1)
- R_d2.labels = lmod.predict(R_d2.features).reshape(-1, 1)
- total_acc += len(R_d1.convert_to_dataframe()[0].compare(R_d2.convert_to_dataframe()[0]))/budget
- return 1-total_acc
- def double_perfect_df(budget = budget):
- R_d = perfect_df(budget)
- return R_d, R_d
- acc_by_proxies = {
- "poor": 1/104537,
- 'intermediate': 0.91, # accuracy_intermediaire(10000)
- "perfect": 1
- }
- # In[ ]:
- t_A = 1500
- repetitions = 50
- beta_range = np.arange(0.01, 1.01, 0.01)
- def exploitation(t, acc, res_du, beta, alpha = 0.95, ratio = 1):
- tfair = int(t*beta + (1-beta)*t/(1+ratio))
- A_queries, _ = res_du(tfair)
- ## Answers on A
- A_queries.labels = lmod.predict(A_queries.features).reshape(-1, 1)
- A_audit, _, _ = ROC_until_fair(lmod, A_queries, index_protected)
- disparate_impact, margin_of_error = calculate_disparate_impact_confidence_interval(A_audit, p, u)
- ## Answers on B + proxy
- tfraud = max(int((1-beta)*t/(1+ratio)), 1)
- A_queries, B_queries = res_du(tfraud)
- B_queries.labels = lmod.predict(B_queries.features).reshape(-1, 1)
- A_queries.labels = lmod.predict(A_queries.features).reshape(-1, 1)
- A_audit, gamma, fair = ROC_until_fair(lmod, A_queries, index_protected)
- acc_gamma = gamma * acc + (1 - gamma)*(1 - acc)
- n_max = stats.binom.ppf(0.95, tfraud, 1-acc)
- flag_proba = 1 - stats.binom.cdf(n_max, tfraud, acc_gamma)
- return margin_of_error, flag_proba, fair
- name = "intermediate"
- res_du = double_intermediate_df
- acc = acc_by_proxies[name]
- res = []
- tot_lies = 0
- for _ in range(repetitions):
- for beta in beta_range:
- error, detected, fair = exploitation(t_A, acc, res_du, beta)
- res.append([beta, error, detected])
- # In[11]:
- def find_last_exceeding_threshold(data, threshold=0.995):
- for i in range(len(data)-1, -1, -1):
- if data[i][0] >= threshold:
- return i, data[i]
- return None
- def aux_pareto(Xs, Ys, Betas, maxX=True, maxY=True):
- '''Pareto frontier selection process'''
- sorted_list = sorted([[Xs[i], Ys[i], Betas[i]] for i in range(len(Xs))], reverse=maxY)
- pareto_front = [sorted_list[0]]
- for pair in sorted_list[1:]:
- if maxY:
- if pair[1] >= pareto_front[-1][1]:
- pareto_front.append(pair)
- else:
- if pair[1] <= pareto_front[-1][1]:
- pareto_front.append(pair)
- return pareto_front
- def aux_pareto_and_last_fig(res, scatter_all = True):
- res_df = pd.DataFrame(res, columns=['beta', 'margin of error', 'proba detectability'])
- grouped = res_df.groupby('beta')['proba detectability', 'margin of error'].agg(['mean']).reset_index()
- if scatter_all:
- plt.scatter(1-grouped['margin of error'],grouped['proba detectability'], c=sns.color_palette("magma", 6)[4], alpha=.5)
- Xs = (1-grouped['margin of error']['mean']).to_list()
- Ys = grouped['proba detectability']['mean'].to_list()
- Betas = grouped['beta'].to_list()
- pareto_front = aux_pareto(Xs, Ys, Betas)
- i, [x, y, b]=find_last_exceeding_threshold(pareto_front)
- return i, x, y, b, pareto_front
- i, x, y, b, pareto_front = aux_pareto_and_last_fig(res)
- pf_X = [pair[0] for pair in pareto_front]
- pf_Y = [pair[1] for pair in pareto_front]
- plt.plot(pf_X, pf_Y,
- c = sns.color_palette("magma", 6)[1])
- print(i, [x, y, b])
- plt.scatter(x,y, c=sns.color_palette("magma", 6)[1], alpha=1)
- plt.text(0.995, 0.4+0.15, r'$\beta$ = '+str(np.round(b, 2)), ha='center', va='center', color=sns.color_palette("magma", 6)[1])
- plt.text(0.995, 0.4+0.1, r'$\epsilon$ = '+str(np.round(1-x, 3)), ha='center', va='center', color=sns.color_palette("magma", 6)[1])
- plt.text(0.995, 0.4+0.05, r'$p(\bar{\phi})$ = '+str(np.round(y, 2)), ha='center', va='center', color=sns.color_palette("magma", 6)[1])
- plt.annotate('', xy=(x, y), xytext=(0.995, 0.6),
- arrowprops=dict(arrowstyle='->', color=sns.color_palette("magma", 6)[1],# lw=2
- ))
- plt.xlabel("Accuracy : "+r'$1-\epsilon$')
- plt.ylabel("Manipulation's probability: "+r'$p_{manip}$')
- plt.savefig("pareto43.pdf", bbox_inches='tight')
Advertisement
Add Comment
Please, Sign In to add comment