Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import pandas as pd
- import numpy as np
- import random as rd
- import matplotlib.pyplot as plt
- import itertools as it
- import sys
- import copy
- from math import ceil
- import pickle
- seed = 42
- rd.seed(seed)
- np.random.seed(seed)
- def aux_passe(l):
- aux_l = copy.deepcopy(l)
- changed = False
- for i in range(len(aux_l)-1):
- if aux_l[i+1] < aux_l[i]:
- aux_l[i], aux_l[i+1] = aux_l[i+1], aux_l[i]
- changed = True
- return aux_l, changed
- def check_begin(l):
- if len(l) < 2:
- return l
- else:
- if l[0] > l[1]:
- return [l[1]] + l
- else:
- return [l[0]] + l
- Nc = 1000 # Number of content creators
- Nv_max = 100 # Maximal number of videos per creators
- M = 1000 # Maximal number of views per videos
- avantage = 10 # r_1/r_0
- e_per_view = 0.01 # r_0
- p0 = 0.33 # Proportion of privileged content creators
- q0 = 20/100 # Proportion of privileged content creators that are manipulated on the Section 6
- alpha = 1.96 # z-score for confidence level 95%
- prop_T = 0.0025 # proportion of the budget the regulator can request
- # Set up for the figures
- nbr_run = 50 # Number of runs per point
- step_lies = 0.01
- qqt_lies = np.arange(0, 1 + step_lies, step_lies)
- max_passe = 0 # arbitrary constant to swapping manipulations
- step_eps = 0.01
- epsilon_range = np.arange(step_eps, 0.06+step_eps, step_eps)
- step_beta = 0.001
- beta_range = np.arange(step_beta, 1+step_beta, step_beta)
- #### Experience of Section 5 on VideoSharingPlatform
- def exp_2():
- ym = []
- yp = []
- ypasse = []
- Va = np.random.power(3., Nc)*Nv_max
- Va = list(map(int, Va))
- Va = [x if x !=0 else 1 for x in Va]
- Va.sort()
- avantaged = np.random.choice([1, 0], Nc, p = [p0, 1-p0])
- whosa = np.where(avantaged)[0]
- whosna = np.where(avantaged == 0)[0]
- features = ["videomaker", "popularity", "adv", "earnings"]
- to_del = []
- to_add = []
- for q in qqt_lies:
- to_del.append(np.random.choice(whosa, int(q*len(whosa)), replace = False))
- features.append("earnings_-" + str(int(q*len(whosa))))
- to_add.append(np.random.choice(whosna, int(q*len(whosna)), replace = False))
- features.append("earnings_+" + str(int(q*len(whosna))))
- data = [[v, np.sum([rd.randrange(M) for _ in range(Va[v])]), avantaged[v]] for v in range(Nc)]
- for v in range(Nc):
- _ ,p,a = data[v]
- v_earning = p*e_per_view*(1-a + avantage*a)
- data[v].append(v_earning)
- for l in range(len(qqt_lies)):
- al = (v in whosa) and not (v in to_del[l])
- data[v].append(p*e_per_view*(1-al + avantage*al))
- al = (v in to_add[l]) or (v in whosa)
- data[v].append(p*e_per_view*(1-al + avantage*al))
- df_hat = pd.DataFrame(data, columns = features)
- df_hat = df_hat.sort_values(by=['popularity'])
- earnings_l = df_hat["earnings"].tolist()
- swappe_e = []
- changed = True
- max_passe = 0
- bulled_features = []
- earnings_l = check_begin(earnings_l)
- while changed:
- bulled_features.append("bulle_"+str(max_passe))
- max_passe +=1
- earnings_l, changed = aux_passe(earnings_l)
- swappe_e.append(earnings_l[1:])
- swapped = np.array(swappe_e).transpose().tolist()
- df_hat = df_hat.join(pd.DataFrame(
- swapped,
- index=df_hat.index,
- columns = bulled_features
- ))
- def estimate(q_col):
- order = 0
- not_order = 0
- pop_list = df_hat["popularity"].tolist()
- q_col_list = df_hat[q_col].tolist()
- for i in range(Nc):
- for j in range(i+1, Nc):
- delta_e = float(pop_list[i]) - float(pop_list[j])
- delta_r = float(q_col_list[i]) - float(q_col_list[j])
- if delta_e*delta_r >= 0:
- order += 1
- else:
- not_order += 1
- return(order/(order + not_order))
- def estimate_limited_budget(q_col, T_part = 0.1):
- T = int(T_part * Nc*(Nc-1)/2)
- order = 0
- not_order = 0
- pop_list = df_hat["popularity"].tolist()
- q_col_list = df_hat[q_col].tolist()
- for _ in range(T):
- i,j = rd.randrange(Nc), rd.randrange(Nc)
- delta_e = float(pop_list[i]) - float(pop_list[j])
- delta_r = float(q_col_list[i]) - float(q_col_list[j])
- if delta_e*delta_r >= 0:
- order += 1
- else:
- not_order += 1
- return(order/(order + not_order))
- cur_ym = []
- cur_yp = []
- cur_ypasse = []
- for q in qqt_lies:
- cur_ym.append(estimate("earnings_-" + str(int(q*len(whosa)))))
- cur_yp.append(estimate("earnings_+" + str(int(q*len(whosna)))))
- for x in range(max_passe):
- cur_ypasse.append(estimate("bulle_"+str(x)))
- ym.append(cur_ym)
- yp.append(cur_yp)
- ypasse.append(cur_ypasse)
- #### Experience of Section 6 on VideoSharingPlatform
- def exp_3():
- y_tot = []
- x_tot = []
- def create_df(q0):
- Va = np.random.power(3., Nc)*Nv_max
- Va = list(map(int, Va))
- Va = [x if x !=0 else 1 for x in Va]
- Va.sort()
- avantaged = np.random.choice([1, 0], Nc, p = [p0, 1-p0])
- whosa = np.where(avantaged)[0]
- whosna = np.where(avantaged == 0)[0]
- features = ["videomaker", "nbr videos", "video", "views", "popularity", "adv", "earnings"]
- to_del = np.random.choice(whosa, int(q0*len(whosa)), replace = False)
- features.append("earnings_-" + str(int(q0*len(whosa))))
- to_add = np.random.choice(whosna, int(q0*len(whosna)), replace = False)
- features.append("earnings_+" + str(int(q0*len(whosna))))
- the_feature = "earnings_+" + str(int(q0*len(whosna)))
- data = []
- for v in range(Nc):
- views = [rd.randrange(M) for _ in range(Va[v])]
- sum_views = np.sum(views)
- for r in range(Va[v]):
- data.append([v, Va[v], r, views[r], sum_views, avantaged[v]])
- num_l = 0
- for v in range(Nc):
- for r in range(Va[v]):
- _ , _ , _, _, p,a = data[num_l]
- v_earning = p*e_per_view*(1-a + avantage*a)
- data[num_l].append(v_earning)
- al = (v in whosa) and not (v in to_del)
- data[num_l].append(p*e_per_view*(1-al + avantage*al))
- al = (v in to_add) or (v in whosa)
- data[num_l].append(p*e_per_view*(1-al + avantage*al))
- num_l += 1
- df_hat = pd.DataFrame(data, columns = features)
- return df_hat, Va, the_feature
- df_hat, Va, the_feature = create_df(q0)
- def calculate_tA(alpha, epsilon, Nc):
- TA = ceil(alpha**2*0.25/epsilon**2)
- if TA < Nc:
- return TA
- else:
- return ceil(TA/(1+(TA-1)/Nc))
- def calculate_tB(tA):
- return sum(Va[tA:])
- def lesstA(init_tA, init_tB):
- tA = init_tA
- tB = init_tB
- if tA == 0:
- tA += 1
- tB -= 1
- inconsistency = 0
- remainings_videaste = list(range(Nc))
- max_TA = init_tA
- how_many = 0
- while (tA+tB>0) and remainings_videaste != []:
- c = rd.choice(remainings_videaste)
- remainings_videaste.remove(c)
- tA -= 1
- how_many +=1
- cur_vid = list(range(Va[c]))
- estimated_e = 0
- while (tB>0) and cur_vid != []:
- v = rd.choice(cur_vid)
- cur_vid.remove(v)
- tB -= 1
- l = df_hat[(df_hat['videomaker'] == c) & (df_hat['video'] ==v) ]
- m, a, e = int(l["views"]), int(l["adv"]), int(l[the_feature])
- estimated_e += m*e_per_view*(1-a + avantage*a)
- if estimated_e > e:
- inconsistency += 1
- break
- return inconsistency
- def tAtBx(beta, k):
- tA = (beta + (1-beta)/(k+1))*tmax
- tB = k*(1-beta)/(k+1)*tmax
- return int(tA), int(tB), alpha*np.sqrt(0.25/tA)
- for beta in beta_range:
- tmax = ceil((sum(Va) + Nc)*prop_T)
- k = np.mean(Va)
- tA, tB, x = tAtBx(beta, k)
- x_tot.append(x)
- assert tA + tB <= tmax
- y = []
- for _ in range(nbr_run):
- res = lesstA(tA, tB)
- y.append(res)
- y_tot.append(y)
- ## Experience of Section 5 on IncomePredictor
- '''
- The code is not provided as it is a direct use of the notebook available at https://github.com/Trusted-AI/AIF360
- As in the example, we keep the following features: 'age', 'education-num', 'capital-gain', 'capital-loss', 'hours-per-week'
- The protected variable is: 'sex'
- The variable to predict is: 'income-per-year'
- The only additions from the paper are the following dictionaries and functions according to Barry III, H., and Harper, A. S. Three last letters identify most female first
- names. Psychological reports 87, 1 (2000), 48–54.
- '''
- female_names = ['Ashley', 'Jessica', 'Amanda', 'Brittany', 'Samantha', 'Sarah', 'Lauren', 'Nicole', 'Megan', 'Stephanie', 'Emily', 'Jennifer', 'Elizabeth', 'Kayla', 'Rachel', 'Amber', 'Rebecca', 'Danielle', 'Chelsea', 'Alyssa', 'Melissa', 'Heather', 'Kelly', 'Christina', 'Michelle']
- male_names = ['Michael', 'Matthew', 'Christopher', 'Joshua', 'Andrew', 'Joseph', 'John', 'Daniel', 'David', 'Robert', 'James', 'Justin', 'Nicholas', 'Anthony', 'William', 'Kyle', 'Zachary', 'Kevin', 'Tyler', 'Thomas', 'Eric', 'Brian', 'Brandon', 'Jonathan', 'Timothy']
- dic = {"a": [38.1, 0.8],
- "e": [24.0 , 10.4],
- "i": [4.4, 0.6],
- "y": [12.3, 11.6],
- "h": [3.5, 2.9],
- "n": [12.8, 24.8],
- "l": [1.0, 8.3],
- "r": [1.0, 7.5],
- "s": [0.4, 7.1],
- "d": [0.4, 5.7],
- "o": [0, 4.1]}
- def propose_name(s):
- if s == 1:
- return rd.choice(male_names)
- else:
- return rd.choice(female_names)
- def proxy_sex(name):
- l = name[-1]
- if l in dic:
- weights = dic[l]
- else:
- weights = [2.1, 16.2]
- return rd.choices([0, 1], weights, k=1)[0]
Advertisement
Add Comment
Please, Sign In to add comment