Guest User

Untitled

a guest
Mar 15th, 2023
90
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 10.13 KB | None | 0 0
  1. import pandas as pd
  2. import numpy as np
  3. import random as rd
  4. import matplotlib.pyplot as plt
  5. import itertools as it
  6. import sys
  7. import copy
  8. from math import ceil
  9. import pickle
  10.  
  11. seed = 42
  12. rd.seed(seed)
  13. np.random.seed(seed)
  14.  
  15. def aux_passe(l):
  16. aux_l = copy.deepcopy(l)
  17. changed = False
  18. for i in range(len(aux_l)-1):
  19. if aux_l[i+1] < aux_l[i]:
  20. aux_l[i], aux_l[i+1] = aux_l[i+1], aux_l[i]
  21. changed = True
  22. return aux_l, changed
  23.  
  24. def check_begin(l):
  25. if len(l) < 2:
  26. return l
  27. else:
  28. if l[0] > l[1]:
  29. return [l[1]] + l
  30. else:
  31. return [l[0]] + l
  32.  
  33.  
  34. Nc = 1000 # Number of content creators
  35. Nv_max = 100 # Maximal number of videos per creators
  36. M = 1000 # Maximal number of views per videos
  37. avantage = 10 # r_1/r_0
  38. e_per_view = 0.01 # r_0
  39.  
  40. p0 = 0.33 # Proportion of privileged content creators
  41. q0 = 20/100 # Proportion of privileged content creators that are manipulated on the Section 6
  42. alpha = 1.96 # z-score for confidence level 95%
  43. prop_T = 0.0025 # proportion of the budget the regulator can request
  44.  
  45. # Set up for the figures
  46. nbr_run = 50 # Number of runs per point
  47. step_lies = 0.01
  48. qqt_lies = np.arange(0, 1 + step_lies, step_lies)
  49. max_passe = 0 # arbitrary constant to swapping manipulations
  50.  
  51. step_eps = 0.01
  52. epsilon_range = np.arange(step_eps, 0.06+step_eps, step_eps)
  53.  
  54. step_beta = 0.001
  55. beta_range = np.arange(step_beta, 1+step_beta, step_beta)
  56.  
  57. #### Experience of Section 5 on VideoSharingPlatform
  58.  
  59. def exp_2():
  60. ym = []
  61. yp = []
  62. ypasse = []
  63. Va = np.random.power(3., Nc)*Nv_max
  64. Va = list(map(int, Va))
  65. Va = [x if x !=0 else 1 for x in Va]
  66. Va.sort()
  67. avantaged = np.random.choice([1, 0], Nc, p = [p0, 1-p0])
  68. whosa = np.where(avantaged)[0]
  69. whosna = np.where(avantaged == 0)[0]
  70.  
  71. features = ["videomaker", "popularity", "adv", "earnings"]
  72.  
  73. to_del = []
  74. to_add = []
  75. for q in qqt_lies:
  76. to_del.append(np.random.choice(whosa, int(q*len(whosa)), replace = False))
  77. features.append("earnings_-" + str(int(q*len(whosa))))
  78. to_add.append(np.random.choice(whosna, int(q*len(whosna)), replace = False))
  79. features.append("earnings_+" + str(int(q*len(whosna))))
  80.  
  81. data = [[v, np.sum([rd.randrange(M) for _ in range(Va[v])]), avantaged[v]] for v in range(Nc)]
  82.  
  83. for v in range(Nc):
  84. _ ,p,a = data[v]
  85. v_earning = p*e_per_view*(1-a + avantage*a)
  86. data[v].append(v_earning)
  87. for l in range(len(qqt_lies)):
  88. al = (v in whosa) and not (v in to_del[l])
  89. data[v].append(p*e_per_view*(1-al + avantage*al))
  90.  
  91. al = (v in to_add[l]) or (v in whosa)
  92. data[v].append(p*e_per_view*(1-al + avantage*al))
  93. df_hat = pd.DataFrame(data, columns = features)
  94. df_hat = df_hat.sort_values(by=['popularity'])
  95. earnings_l = df_hat["earnings"].tolist()
  96. swappe_e = []
  97. changed = True
  98. max_passe = 0
  99. bulled_features = []
  100. earnings_l = check_begin(earnings_l)
  101.  
  102. while changed:
  103. bulled_features.append("bulle_"+str(max_passe))
  104. max_passe +=1
  105. earnings_l, changed = aux_passe(earnings_l)
  106. swappe_e.append(earnings_l[1:])
  107. swapped = np.array(swappe_e).transpose().tolist()
  108. df_hat = df_hat.join(pd.DataFrame(
  109. swapped,
  110. index=df_hat.index,
  111. columns = bulled_features
  112. ))
  113. def estimate(q_col):
  114. order = 0
  115. not_order = 0
  116. pop_list = df_hat["popularity"].tolist()
  117. q_col_list = df_hat[q_col].tolist()
  118. for i in range(Nc):
  119. for j in range(i+1, Nc):
  120. delta_e = float(pop_list[i]) - float(pop_list[j])
  121. delta_r = float(q_col_list[i]) - float(q_col_list[j])
  122. if delta_e*delta_r >= 0:
  123. order += 1
  124. else:
  125. not_order += 1
  126. return(order/(order + not_order))
  127. def estimate_limited_budget(q_col, T_part = 0.1):
  128. T = int(T_part * Nc*(Nc-1)/2)
  129. order = 0
  130. not_order = 0
  131. pop_list = df_hat["popularity"].tolist()
  132. q_col_list = df_hat[q_col].tolist()
  133. for _ in range(T):
  134. i,j = rd.randrange(Nc), rd.randrange(Nc)
  135. delta_e = float(pop_list[i]) - float(pop_list[j])
  136. delta_r = float(q_col_list[i]) - float(q_col_list[j])
  137. if delta_e*delta_r >= 0:
  138. order += 1
  139. else:
  140. not_order += 1
  141. return(order/(order + not_order))
  142. cur_ym = []
  143. cur_yp = []
  144. cur_ypasse = []
  145. for q in qqt_lies:
  146. cur_ym.append(estimate("earnings_-" + str(int(q*len(whosa)))))
  147. cur_yp.append(estimate("earnings_+" + str(int(q*len(whosna)))))
  148. for x in range(max_passe):
  149. cur_ypasse.append(estimate("bulle_"+str(x)))
  150. ym.append(cur_ym)
  151. yp.append(cur_yp)
  152. ypasse.append(cur_ypasse)
  153.  
  154. #### Experience of Section 6 on VideoSharingPlatform
  155.  
  156. def exp_3():
  157. y_tot = []
  158. x_tot = []
  159. def create_df(q0):
  160. Va = np.random.power(3., Nc)*Nv_max
  161. Va = list(map(int, Va))
  162. Va = [x if x !=0 else 1 for x in Va]
  163. Va.sort()
  164. avantaged = np.random.choice([1, 0], Nc, p = [p0, 1-p0])
  165. whosa = np.where(avantaged)[0]
  166. whosna = np.where(avantaged == 0)[0]
  167.  
  168. features = ["videomaker", "nbr videos", "video", "views", "popularity", "adv", "earnings"]
  169.  
  170. to_del = np.random.choice(whosa, int(q0*len(whosa)), replace = False)
  171. features.append("earnings_-" + str(int(q0*len(whosa))))
  172. to_add = np.random.choice(whosna, int(q0*len(whosna)), replace = False)
  173. features.append("earnings_+" + str(int(q0*len(whosna))))
  174. the_feature = "earnings_+" + str(int(q0*len(whosna)))
  175. data = []
  176. for v in range(Nc):
  177. views = [rd.randrange(M) for _ in range(Va[v])]
  178. sum_views = np.sum(views)
  179. for r in range(Va[v]):
  180. data.append([v, Va[v], r, views[r], sum_views, avantaged[v]])
  181.  
  182.  
  183. num_l = 0
  184. for v in range(Nc):
  185. for r in range(Va[v]):
  186. _ , _ , _, _, p,a = data[num_l]
  187. v_earning = p*e_per_view*(1-a + avantage*a)
  188. data[num_l].append(v_earning)
  189. al = (v in whosa) and not (v in to_del)
  190. data[num_l].append(p*e_per_view*(1-al + avantage*al))
  191.  
  192. al = (v in to_add) or (v in whosa)
  193. data[num_l].append(p*e_per_view*(1-al + avantage*al))
  194. num_l += 1
  195. df_hat = pd.DataFrame(data, columns = features)
  196.  
  197. return df_hat, Va, the_feature
  198. df_hat, Va, the_feature = create_df(q0)
  199.  
  200. def calculate_tA(alpha, epsilon, Nc):
  201. TA = ceil(alpha**2*0.25/epsilon**2)
  202. if TA < Nc:
  203. return TA
  204. else:
  205. return ceil(TA/(1+(TA-1)/Nc))
  206. def calculate_tB(tA):
  207. return sum(Va[tA:])
  208.  
  209. def lesstA(init_tA, init_tB):
  210. tA = init_tA
  211. tB = init_tB
  212. if tA == 0:
  213. tA += 1
  214. tB -= 1
  215. inconsistency = 0
  216. remainings_videaste = list(range(Nc))
  217. max_TA = init_tA
  218. how_many = 0
  219. while (tA+tB>0) and remainings_videaste != []:
  220. c = rd.choice(remainings_videaste)
  221. remainings_videaste.remove(c)
  222. tA -= 1
  223. how_many +=1
  224. cur_vid = list(range(Va[c]))
  225. estimated_e = 0
  226. while (tB>0) and cur_vid != []:
  227. v = rd.choice(cur_vid)
  228. cur_vid.remove(v)
  229. tB -= 1
  230. l = df_hat[(df_hat['videomaker'] == c) & (df_hat['video'] ==v) ]
  231. m, a, e = int(l["views"]), int(l["adv"]), int(l[the_feature])
  232. estimated_e += m*e_per_view*(1-a + avantage*a)
  233. if estimated_e > e:
  234. inconsistency += 1
  235. break
  236. return inconsistency
  237.  
  238. def tAtBx(beta, k):
  239. tA = (beta + (1-beta)/(k+1))*tmax
  240. tB = k*(1-beta)/(k+1)*tmax
  241. return int(tA), int(tB), alpha*np.sqrt(0.25/tA)
  242.  
  243. for beta in beta_range:
  244. tmax = ceil((sum(Va) + Nc)*prop_T)
  245. k = np.mean(Va)
  246. tA, tB, x = tAtBx(beta, k)
  247. x_tot.append(x)
  248. assert tA + tB <= tmax
  249. y = []
  250. for _ in range(nbr_run):
  251. res = lesstA(tA, tB)
  252. y.append(res)
  253. y_tot.append(y)
  254.  
  255.  
  256. ## Experience of Section 5 on IncomePredictor
  257. '''
  258. The code is not provided as it is a direct use of the notebook available at https://github.com/Trusted-AI/AIF360
  259.  
  260. As in the example, we keep the following features: 'age', 'education-num', 'capital-gain', 'capital-loss', 'hours-per-week'
  261. The protected variable is: 'sex'
  262. The variable to predict is: 'income-per-year'
  263.  
  264. The only additions from the paper are the following dictionaries and functions according to Barry III, H., and Harper, A. S. Three last letters identify most female first
  265. names. Psychological reports 87, 1 (2000), 48–54.
  266. '''
  267.  
  268. female_names = ['Ashley', 'Jessica', 'Amanda', 'Brittany', 'Samantha', 'Sarah', 'Lauren', 'Nicole', 'Megan', 'Stephanie', 'Emily', 'Jennifer', 'Elizabeth', 'Kayla', 'Rachel', 'Amber', 'Rebecca', 'Danielle', 'Chelsea', 'Alyssa', 'Melissa', 'Heather', 'Kelly', 'Christina', 'Michelle']
  269. male_names = ['Michael', 'Matthew', 'Christopher', 'Joshua', 'Andrew', 'Joseph', 'John', 'Daniel', 'David', 'Robert', 'James', 'Justin', 'Nicholas', 'Anthony', 'William', 'Kyle', 'Zachary', 'Kevin', 'Tyler', 'Thomas', 'Eric', 'Brian', 'Brandon', 'Jonathan', 'Timothy']
  270. dic = {"a": [38.1, 0.8],
  271. "e": [24.0 , 10.4],
  272. "i": [4.4, 0.6],
  273. "y": [12.3, 11.6],
  274. "h": [3.5, 2.9],
  275. "n": [12.8, 24.8],
  276. "l": [1.0, 8.3],
  277. "r": [1.0, 7.5],
  278. "s": [0.4, 7.1],
  279. "d": [0.4, 5.7],
  280. "o": [0, 4.1]}
  281.  
  282. def propose_name(s):
  283. if s == 1:
  284. return rd.choice(male_names)
  285. else:
  286. return rd.choice(female_names)
  287.  
  288. def proxy_sex(name):
  289. l = name[-1]
  290. if l in dic:
  291. weights = dic[l]
  292. else:
  293. weights = [2.1, 16.2]
  294. return rd.choices([0, 1], weights, k=1)[0]
  295.  
Advertisement
Add Comment
Please, Sign In to add comment