Untitled

#!/usr/bin/env python
# coding: utf-8

# In[1]:


# This notebook is adapted from " https://github.com/Trusted-AI/AIF360 "

import sys
import warnings

# Redirections of warnings to /dev/null (Linux/macOS)
warnings.filterwarnings("ignore")
sys.stderr = open('/dev/null', 'w')

# Some library imports
from matplotlib import pyplot as plt
import seaborn as sns

import sys
sys.path.append("../")

import numpy as np
import random as rd
import pandas as pd
import scipy.stats as stats

from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import f1_score, accuracy_score

from aif360.algorithms.postprocessing.reject_option_classification import RejectOptionClassification
from aif360.sklearn.datasets import fetch_german
from aif360.datasets import AdultDataset, BinaryLabelDataset

from aif360.metrics import BinaryLabelDatasetMetric

from matplotlib import rc
from matplotlib.patches import Patch

# Fix randomness and some parameters
seed = 8858 #rd.randint(1000, 10000)

np.random.seed(seed)

rd.seed(seed)

plotting = True

sns.set_style(style="ticks")

params = {
        'axes.spines.right' : False,
        'axes.spines.top' : False,
        'text.usetex' : True,
        'pdf.fonttype' : 42,
        'legend.fontsize': 'large',
        'figure.figsize': (10, 6),
        'axes.labelsize': 'large',
        'axes.titlesize':'large',
        'xtick.labelsize':'large',
        'ytick.labelsize':'large',
        'font.size': 24}
plt.rcParams.update(params)
plt.rc('lines', linewidth=2.0)

scaler = MinMaxScaler(copy=False)

repetitions = 50
budget = 1500


# # General setting
# Load the Adult dataset, generate the name, the proxy and learn the black-box model.

# In[2]:


# Importing the dataset "AdultDataset" from: https://archive.ics.uci.edu/ml/datasets/adult.
# As in the example, we keep the following features: 'age', 'education-num', 'capital-gain', 'capital-loss', 'hours-per-week'
# The protected variable is: 'sex'
# The variable to predict is: 'income-per-year'

ad = AdultDataset(protected_attribute_names=['sex'],
    privileged_classes=[['Male']], categorical_features=[],
    features_to_keep=['age', 'education-num', 'capital-gain', 'capital-loss', 'hours-per-week'])
label_name = ad.label_names[0]

# See appendix B, table II in Barry III and Harper (2000)
female_names = ['Ashley', 'Jessica', 'Amanda', 'Brittany', 'Samantha', 'Sarah', 'Lauren', 'Nicole', 'Megan', 'Stephanie', 'Emily', 'Jennifer', 'Elizabeth', 'Kayla', 'Rachel', 'Amber', 'Rebecca', 'Danielle', 'Chelsea', 'Alyssa', 'Melissa', 'Heather', 'Kelly', 'Christina', 'Michelle']
male_names = ['Michael', 'Matthew', 'Christopher', 'Joshua', 'Andrew', 'Joseph', 'John', 'Daniel', 'David', 'Robert', 'James', 'Justin', 'Nicholas', 'Anthony', 'William', 'Kyle', 'Zachary', 'Kevin', 'Tyler', 'Thomas', 'Eric', 'Brian', 'Brandon', 'Jonathan', 'Timothy']

def propose_name(s):
    if s == 1:
        return rd.choice(male_names)
    else:
        return rd.choice(female_names)

def proxy_sex_3letters(name):
    '''
    Predict the sex (1: Male, 0: Male) depending on first names.
    p(g^i(n) = Female)=
                        1 if the name ends by a, e or i
                        0.5 if the name ends by h or y
                        0 otherwise.
    '''
    l = name[-1]
    if l in ['a', 'e', 'i']:
        return 0
    elif l in ['y', 'h']:
        return rd.choices([0, 1], [0.5, 0.5], k=1)[0]
    else:
        return 1

# Increase of the dataset with the following two features: 'name' and 'proxy' where 'proxy' is the reconstructed "sex" using the intermediate proxy.
df = ad.convert_to_dataframe()[0]
df['name'] = df['sex'].map(propose_name)

del df['sex']

# some characteristics of the dataset, used for "poor proxy"
# Age: 25-86 ans (Int)
# education-num: 1-16 (Int)
# capital-gain: 0-99999 (Int)
# capital-loss: 0-4356 (Int)
# hours-per-week: 1-99 (Int)
# len(audit_df)

# Side of the platform: split the dataset, learn the linear regression etc
train_and_test_df, audit_df = train_test_split(df, test_size=0.05, random_state=42)
nbr_audit = len(audit_df)

protected = 'proxy'
p = [{protected: 1}]
u = [{protected: 0}]


ori_D = train_and_test_df.copy()
ori_D['proxy'] = ori_D['name'].map(proxy_sex_3letters)

hat_D = audit_df.copy()
hat_D = hat_D[~hat_D['name'].str.endswith(('y', 'h'))]
hat_D['proxy'] = hat_D['name'].map(proxy_sex_3letters)

del ori_D['name']
del hat_D['name']

### The complete dataset
# Using https://towardsdatascience.com/mitigating-bias-in-ai-with-aif360-b4305d1f88a9, loading the data
P_d = BinaryLabelDataset(
    favorable_label=1,
    unfavorable_label=0,
    df=ori_D,
    label_names = ['income-per-year'],
    protected_attribute_names=[protected])

# Split the data
P_test, P_train = P_d.split([16281])
P_train.features = scaler.fit_transform(P_train.features)
P_test.features = scaler.fit_transform(P_test.features)

print("The dataset is split as follow:")
print("Train set:", len(P_train.features), "Test set:", len(P_test.features), "Audit set:", nbr_audit)

index_P = P_train.feature_names.index(protected)

# Train the model to predict 'income-per-year'
lmod = LogisticRegression(class_weight='balanced', solver='liblinear')
lmod.fit(P_train.features, P_train.labels.ravel())


# Test it
P_test_repd_pred = P_test.copy()
P_test_repd_pred.labels = lmod.predict(P_test_repd_pred.features).reshape(-1, 1)

p_A = accuracy_score(P_test.labels, P_test_repd_pred.labels)
print("Accuracy of the model", p_A)

P_di = BinaryLabelDatasetMetric(P_test_repd_pred, privileged_groups=p, unprivileged_groups=u).disparate_impact()

### On the hat_D with perfect proxy
hat_P_d = BinaryLabelDataset(
    favorable_label=1,
    unfavorable_label=0,
    df=hat_D,
    label_names = ['income-per-year'],
    protected_attribute_names=[protected])

hat_P_d.features = scaler.fit_transform(hat_P_d.features)

hat_P_d.labels = lmod.predict(hat_P_d.features).reshape(-1, 1)

hat_P_di = BinaryLabelDatasetMetric(hat_P_d, privileged_groups=p, unprivileged_groups=u).disparate_impact()

print("Disparate Impact before the production", P_di, hat_P_di)
new_budget = len(hat_D)


# # Fairwashing method

# In[3]:


def ROC_mitigation(lmod, test_set, theta, index_protected):
    '''
    Implementation of the Reject Option based Classification method (Kamiran et al., (2012)).
    '''
    test_p = lmod.predict_proba(test_set)
    y_pred = lmod.predict(test_set)
    max_values = np.maximum(test_p[:, 0], 1 - test_p[:, 0])
    critical_region = np.where(max_values <= theta)[0]
    for i in critical_region:
        if not test_set[i][index_protected]:
            y_pred[i] = 1
        else:
            y_pred[i] = 0
    return y_pred, 100*len(critical_region)/len(test_set)

def ROC_mitigation_by_prop_lies(lmod, test_set, prop, index_protected):
    test_p = lmod.predict_proba(test_set)
    y_pred = lmod.predict(test_set)
    max_values = np.maximum(test_p[:, 0], 1 - test_p[:, 0])
    critical_region = sorted(range(len(max_values)), key=lambda k: max_values[k])[:int(prop*len(test_set))]
    for i in critical_region:
        if not test_set[i][index_protected]:
            y_pred[i] = 1
        else:
            y_pred[i] = 0
    return y_pred.reshape(-1, 1), 100*len(critical_region)/len(test_set)

def ROC_until_fair(lmod, audit_set, index_protected, first_theta = 0, theta_pas = 0.01):
    theta = first_theta
    current_answers = audit_set.copy()
    current_answers.labels = lmod.predict(current_answers.features).reshape(-1, 1)
    cm = BinaryLabelDatasetMetric(current_answers, privileged_groups=p, unprivileged_groups=u).disparate_impact()
    fair = (cm >= 0.8)
    if fair:
        return current_answers, theta, True
    while ((theta <= 1) and (not fair)):
        theta += theta_pas
        current_answers.labels, fairwash_rate = ROC_mitigation_by_prop_lies(lmod, current_answers.features, theta, index_protected)
        cm = BinaryLabelDatasetMetric(current_answers, privileged_groups=p, unprivileged_groups=u).disparate_impact()
        fair = (cm >= 0.8)
    return current_answers, theta, False


# # Definition of the three proxies

# In[4]:


def poor_df(budget = budget):
    '''
    Implementation of \psi^{\emptyset}, the poor proxy
    '''
    age = np.random.randint(25, 87, size=budget)
    education_num = np.random.randint(1, 17, size=budget)
    capital_gain = np.random.randint(0, 100000, size=budget)
    capital_loss = np.random.randint(0, 4357, size=budget)
    hours_per_week = np.random.randint(1, 100, size=budget)
    proxy = np.random.randint(0, 2, size = budget)
    income_per_year = np.zeros(budget)

    data = {
    'age': age,
    'education-num': education_num,
    'capital-gain': capital_gain,
    'capital-loss': capital_loss,
    'hours-per-week': hours_per_week,
    'proxy' : proxy,
    'income-per-year':income_per_year
    }
    R_df = pd.DataFrame(data)
    R_d = BinaryLabelDataset(
                            favorable_label=1,
                            unfavorable_label=0,
                            df=R_df,
                            label_names = ['income-per-year'],
                            protected_attribute_names=[protected])
    R_d.features = scaler.fit_transform(R_d.features)
    return R_d

def intermediate_df(budget = budget):
    '''
    Implementation of \psi^i, the intermediate proxy
    '''
    R_df = audit_df.copy()
    R_df['proxy'] = R_df['name'].map(proxy_sex_3letters)
    del R_df['name']
    # assert budget <= len(audit_df)
    R_df = R_df.sample(budget, random_state = seed)

    R_d = BinaryLabelDataset(
                            favorable_label=1,
                            unfavorable_label=0,
                            df=R_df,
                            label_names = ['income-per-year'],
                            protected_attribute_names=[protected])
    R_d.features = scaler.fit_transform(R_d.features)

    return R_d

def perfect_df(budget = budget):
    '''
    Implementation of \psi^p, the perfect proxy.
    It corresponds to the intermediate proxy on names not ending by 'y' or 'h'.
    '''
    def select_determinist_names(name):
        l = name[-1]
        return l not in ['y', 'h']

    R_df = audit_df.copy()
    R_df = R_df[~R_df['name'].str.endswith(('y', 'h'))]

    R_df['proxy'] = R_df['name'].map(proxy_sex_3letters)
    del R_df['name']
    R_df = R_df.sample(budget)
    R_d = BinaryLabelDataset(
                            favorable_label=1,
                            unfavorable_label=0,
                            df=R_df,
                            label_names = ['income-per-year'],
                            protected_attribute_names=[protected])
    R_d.features = scaler.fit_transform(R_d.features)
    return R_d


# In[5]:


thresholds= np.arange(0, 0.30, 0.001)
res = []
for _ in range(repetitions):
    for sample_func in [poor_df, intermediate_df, perfect_df]:
        R_d = sample_func(budget)
        R_d.labels = lmod.predict(R_d.features).reshape(-1, 1)
        cm = BinaryLabelDatasetMetric(R_d, privileged_groups=p, unprivileged_groups=u).disparate_impact()
        index_protected = R_d.feature_names.index(protected)
        fairwash_rate = 0
        res.append([sample_func.__name__, None, cm, fairwash_rate])
        for th in thresholds:
            R_d.labels, fairwash_rate = ROC_mitigation_by_prop_lies(lmod, R_d.features, th, index_protected)
            cm = BinaryLabelDatasetMetric(R_d, privileged_groups=p, unprivileged_groups=u).disparate_impact()
            res.append([sample_func.__name__, th, cm, np.round(th,3)*100])
df = pd.DataFrame(res, columns = ['Proxy', 'th', 'Disparate Impact', 'Fairwashing rate'])

poor_di = df[df['Proxy'] == 'poor_df']
intermediate_di = df[df['Proxy'] == 'intermediate_df']
perfect_di = df[df['Proxy'] == 'perfect_df']

res_by_proxies ={
    "poor": poor_di,
  "intermediate": intermediate_di,
  "perfect": perfect_di
}

intermediate_di


# # Section 4.1, Table 1

# In[6]:


print("\n on D:")
print("The original disparate impact is {}".format(np.round(P_di,2)))

intermediate_DI = np.mean(intermediate_di[intermediate_di['th'] == 0]['Disparate Impact'])
print("The measured disparate impact is {} (with the intermediate proxy)".format(np.round(intermediate_DI,2)))

poor_DI = np.mean(poor_di[poor_di['th'] == 0]['Disparate Impact'])
print("The measured disparate impact is {} (with the poor proxy)".format(np.round(poor_DI,2)))

print("\n on hat_D:")
print("The original disparate impact is {}".format(np.round(hat_P_di,2)))

perfect_DI = np.mean(perfect_di[perfect_di['th'] == 0]['Disparate Impact'])
print("The measured disparate impact is {} (with the perfect proxy)".format(np.round(perfect_DI,2)), flush = True)


# # Section 4.4, Figure 4

# In[7]:


for name, res_di in res_by_proxies.items():

    grouped = res_di.groupby('Fairwashing rate')['Disparate Impact'].agg(['mean', 'std']).reset_index()
    gamma_min = grouped[grouped['mean'] >= 0.8].iloc[0]['Fairwashing rate']

    print(f"Gamma min for {name} is {gamma_min}")

    left, bottom, width, height = (0, 0.8, max(grouped['Fairwashing rate']), 0.2)
    rect = plt.Rectangle((left, bottom), width, height,
                            facecolor='black', alpha=0.1)
    plt.gca().add_patch(rect)

    real_value = res_di[res_di['th'] == 0]['Disparate Impact'].mean()
    real_std = res_di[res_di['th'] == 0]['Disparate Impact'].std()

    plt.axhline(real_value, label = r'$\hat{\mu}_\mathbf{B}$', color=sns.color_palette("magma", 6)[3]) #color=sns.color_palette("hls", 3)[0])

    plt.axvline(gamma_min,
                color=sns.color_palette("magma", 6)[2],
                  linestyle='dashed')


    plt.fill_between(
        grouped['Fairwashing rate'],
        real_value - real_std,
        real_value + real_std,
        color=sns.color_palette("magma", 6)[3],
        alpha=0.2
    )

    plt.plot(grouped['Fairwashing rate'], grouped['mean'], label=r'$\hat{\mu}_\mathbf{A}$', color=sns.color_palette("magma", 6)[0]) #'#9c179e')

    plt.fill_between(
        grouped['Fairwashing rate'],
        grouped['mean'] - grouped['std'],
        grouped['mean'] + grouped['std'],
        color=sns.color_palette("magma", 6)[0],
        alpha=0.2
    )

    plt.ylim(bottom = 0.4, top = 1)
    plt.xlim(left = -0.3, right = 27)
    plt.ylabel("Disparate Impact: "+r'$\mu$')
    plt.xlabel("Percentage of manipulated API's answers: "+r'$\gamma$')
    legend = plt.legend(loc='upper right')
    handles, labels = plt.gca().get_legend_handles_labels()
    handles.append(Patch(facecolor='black', edgecolor='black', alpha = 0.2))
    labels.append(r'$Z = 1$')
    handles.append(Patch(facecolor='white', edgecolor='black'))
    labels.append(r'$Z = 0$')

    legend._legend_box = None
    legend._init_legend_box(handles, labels)
    legend._set_loc(legend._loc)
    legend.set_title(legend.get_title().get_text())
    plt.savefig("fig_lies_"+name+".pdf", bbox_inches='tight')
    plt.show()


# In[ ]:


import numpy as np
from aif360.metrics import BinaryLabelDatasetMetric

def calculate_disparate_impact_confidence_interval(hat_P_test, privileged_groups, unprivileged_groups):
    metric = BinaryLabelDatasetMetric(hat_P_test,
                                      privileged_groups=privileged_groups,
                                      unprivileged_groups=unprivileged_groups)

    p_privileged = metric.base_rate(privileged=True)
    p_unprivileged = metric.base_rate(privileged=False)

    disparate_impact = metric.disparate_impact()

    def get_group_mask(dataset, group_conditions):
        mask = np.ones(len(dataset.labels), dtype=bool)
        for attr, value in group_conditions.items():
            attr_column = dataset.features[:, dataset.feature_names.index(attr)]
            mask &= (attr_column == value)
        return mask

    privileged_mask = get_group_mask(hat_P_test, privileged_groups[0])
    unprivileged_mask = get_group_mask(hat_P_test, unprivileged_groups[0])

    n_privileged = np.sum(privileged_mask)
    n_unprivileged = np.sum(unprivileged_mask)

    std_privileged = np.sqrt(p_privileged * (1 - p_privileged) / n_privileged)
    std_unprivileged = np.sqrt(p_unprivileged * (1 - p_unprivileged) / n_unprivileged)

    std_deviation = disparate_impact * np.sqrt((std_privileged / p_privileged)**2 +
                                               (std_unprivileged / p_unprivileged)**2)

    n_total = len(hat_P_test.labels)

    standard_error = std_deviation / np.sqrt(n_total)

    z_score = 1.96
    margin_of_error = z_score * standard_error

    return disparate_impact, margin_of_error


# In[9]:


def double_poor_df(budget = budget):
    R_d1 = poor_df(budget)
    R_d2 = poor_df(budget)
    return R_d1, R_d2

def double_intermediate_df(budget = budget):
    '''
    Implementation of \psi^i, the intermediate proxy
    '''
    R_df1 = audit_df.sample(budget)
    R_df2 = R_df1.copy()
    R_df1['proxy'] = R_df1['name'].map(proxy_sex_3letters)
    del R_df1['name']

    R_df2['proxy'] = R_df2['name'].map(proxy_sex_3letters)
    del R_df2['name']

    R_d1 = BinaryLabelDataset(
                            favorable_label=1,
                            unfavorable_label=0,
                            df=R_df1,
                            label_names = ['income-per-year'],
                            protected_attribute_names=[protected])
    R_d1.features = scaler.fit_transform(R_d1.features)

    R_d2 = BinaryLabelDataset(
                            favorable_label=1,
                            unfavorable_label=0,
                            df=R_df2,
                            label_names = ['income-per-year'],
                            protected_attribute_names=[protected])
    R_d2.features = scaler.fit_transform(R_d2.features)
    return R_d1, R_d2

def accuracy_intermediaire(budget):
    total_acc = 0
    for _ in range(budget):
        R_d1, R_d2 = double_intermediate_df(1)
        R_d1.labels = lmod.predict(R_d1.features).reshape(-1, 1)
        R_d2.labels = lmod.predict(R_d2.features).reshape(-1, 1)

        total_acc += len(R_d1.convert_to_dataframe()[0].compare(R_d2.convert_to_dataframe()[0]))/budget
    return 1-total_acc

def double_perfect_df(budget = budget):
    R_d = perfect_df(budget)
    return R_d, R_d


acc_by_proxies = {
    "poor": 1/104537,
    'intermediate': 0.91, # accuracy_intermediaire(10000)
    "perfect": 1
}


# In[ ]:


t_A = 1500
repetitions = 50
beta_range = np.arange(0.01, 1.01, 0.01)
def exploitation(t, acc, res_du, beta, alpha = 0.95, ratio = 1):
    tfair = int(t*beta + (1-beta)*t/(1+ratio))
    A_queries, _ = res_du(tfair)

    ## Answers on A
    A_queries.labels = lmod.predict(A_queries.features).reshape(-1, 1)
    A_audit, _, _ = ROC_until_fair(lmod, A_queries, index_protected)
    disparate_impact, margin_of_error = calculate_disparate_impact_confidence_interval(A_audit, p, u)

    ## Answers on B + proxy
    tfraud = max(int((1-beta)*t/(1+ratio)), 1)
    A_queries, B_queries = res_du(tfraud)
    B_queries.labels = lmod.predict(B_queries.features).reshape(-1, 1)
    A_queries.labels = lmod.predict(A_queries.features).reshape(-1, 1)
    A_audit, gamma, fair = ROC_until_fair(lmod, A_queries, index_protected)

    acc_gamma = gamma * acc + (1 - gamma)*(1 - acc)
    n_max = stats.binom.ppf(0.95, tfraud, 1-acc)
    flag_proba = 1 - stats.binom.cdf(n_max, tfraud, acc_gamma)
    return margin_of_error, flag_proba, fair

name = "intermediate"
res_du = double_intermediate_df
acc = acc_by_proxies[name]
res = []
tot_lies = 0
for _ in range(repetitions):
    for beta in beta_range:
        error, detected, fair = exploitation(t_A, acc, res_du, beta)
        res.append([beta, error, detected])


# In[11]:


def find_last_exceeding_threshold(data, threshold=0.995):
    for i in range(len(data)-1, -1, -1):
        if data[i][0] >= threshold:
            return i, data[i]
    return None

def aux_pareto(Xs, Ys, Betas, maxX=True, maxY=True):
    '''Pareto frontier selection process'''
    sorted_list = sorted([[Xs[i], Ys[i], Betas[i]] for i in range(len(Xs))], reverse=maxY)

    pareto_front = [sorted_list[0]]
    for pair in sorted_list[1:]:
        if maxY:
            if pair[1] >= pareto_front[-1][1]:
                pareto_front.append(pair)
        else:
            if pair[1] <= pareto_front[-1][1]:
                pareto_front.append(pair)
    return pareto_front

def aux_pareto_and_last_fig(res, scatter_all = True):
    res_df = pd.DataFrame(res, columns=['beta', 'margin of error', 'proba detectability'])
    grouped = res_df.groupby('beta')['proba detectability', 'margin of error'].agg(['mean']).reset_index()

    if scatter_all:
        plt.scatter(1-grouped['margin of error'],grouped['proba detectability'], c=sns.color_palette("magma", 6)[4], alpha=.5)


    Xs = (1-grouped['margin of error']['mean']).to_list()
    Ys = grouped['proba detectability']['mean'].to_list()
    Betas = grouped['beta'].to_list()

    pareto_front = aux_pareto(Xs, Ys, Betas)
    i, [x, y, b]=find_last_exceeding_threshold(pareto_front)
    return i, x, y, b, pareto_front

i, x, y, b, pareto_front = aux_pareto_and_last_fig(res)
pf_X = [pair[0] for pair in pareto_front]
pf_Y = [pair[1] for pair in pareto_front]
plt.plot(pf_X, pf_Y,
          c = sns.color_palette("magma", 6)[1])
print(i, [x, y, b])
plt.scatter(x,y, c=sns.color_palette("magma", 6)[1], alpha=1)

plt.text(0.995, 0.4+0.15, r'$\beta$ = '+str(np.round(b, 2)), ha='center', va='center', color=sns.color_palette("magma", 6)[1])
plt.text(0.995, 0.4+0.1, r'$\epsilon$ = '+str(np.round(1-x, 3)), ha='center', va='center', color=sns.color_palette("magma", 6)[1])
plt.text(0.995, 0.4+0.05, r'$p(\bar{\phi})$ = '+str(np.round(y, 2)), ha='center', va='center', color=sns.color_palette("magma", 6)[1])
plt.annotate('', xy=(x, y), xytext=(0.995, 0.6),
         arrowprops=dict(arrowstyle='->', color=sns.color_palette("magma", 6)[1],# lw=2
                         ))

plt.xlabel("Accuracy : "+r'$1-\epsilon$')
plt.ylabel("Manipulation's probability: "+r'$p_{manip}$')
plt.savefig("pareto43.pdf", bbox_inches='tight')