Guest User

quick_mock_up_benfords_2020

a guest
Nov 5th, 2020
5,648
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 3.91 KB | None | 0 0
  1. #### BEGIN IMPORT SECTION ####
  2. import requests
  3. from bs4 import BeautifulSoup as bs
  4. from scipy.stats import chisquare
  5. from math import log10
  6. #### END IMPORT SECTION ####
  7.  
  8.  
  9. #### BEGIN FUNCTION/CLASS DEFINITION ####
  10. def getsource(url='https://county.milwaukee.gov/EN/County-Clerk/Off-Nav/Election-Results/Election-Results-Fall-2020'):
  11. headers={'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/35.0.1916.47 Safari/537.36'}
  12. r = requests.get(url,headers=headers)
  13. data = r.text
  14. soup=bs(data,"html.parser")
  15. return soup
  16.  
  17. def getBenfords():
  18. #This is a function I'm making to create a list with the expected values so I can do a chi squared test later
  19. expected = [log10(1+1/d) for d in range(1,10)]
  20. return expected
  21.  
  22. def chiTest(actual,expected):
  23. return chisquare(f_obs=actual,f_exp=expected)
  24.  
  25. def calcfreqs(inlist,num):
  26. if (num==1):
  27. inlist[0] = inlist[0]+1
  28. elif (num==2):
  29. inlist[1] = inlist[1]+1
  30. elif (num==3):
  31. inlist[2] = inlist[2]+1
  32. elif (num==4):
  33. inlist[3] = inlist[3]+1
  34. elif (num==5):
  35. inlist[4] = inlist[4]+1
  36. elif (num==6):
  37. inlist[5] = inlist[5]+1
  38. elif (num==7):
  39. inlist[6] = inlist[6]+1
  40. elif (num==8):
  41. inlist[7] = inlist[7]+1
  42. elif (num==9):
  43. inlist[8] = inlist[8]+1
  44. return inlist
  45. #### END FUNCTION/CLASS DEFINITION ####
  46.  
  47.  
  48. #### BEGIN SCRIPT EXECUTION ####
  49. dataDict = {'Num':[],'Ward':[],'Biden':[],'Trump':[],'Blankenship':[],'Jorgensen':[],'Carroll':[],'Writein':[]}
  50. benfords = getBenfords()
  51. biden = [0,0,0,0,0,0,0,0,0]
  52. trump = [0,0,0,0,0,0,0,0,0]
  53. blankenship = [0,0,0,0,0,0,0,0,0]
  54. jorgensen = [0,0,0,0,0,0,0,0,0]
  55. carroll = [0,0,0,0,0,0,0,0,0]
  56. writein = [0,0,0,0,0,0,0,0,0]
  57. datalist = getsource().findAll('table',{'class':'precinctTable'})
  58. count = 1
  59. header = True
  60. for val in datalist[1].findAll('td'):
  61. if (header and count != 8):
  62. count+=1
  63. continue
  64. elif (header and count == 8):
  65. count = 1
  66. header = False
  67. continue
  68. if count == 1:
  69. dataDict['Num'].append(val.text)
  70. elif count == 2:
  71. dataDict['Ward'].append(val.text)
  72. elif count == 3:
  73. dataDict['Biden'].append(val.text)
  74. biden = calcfreqs(biden,int(val.text[0]))
  75. elif count == 4:
  76. dataDict['Trump'].append(val.text)
  77. trump = calcfreqs(trump,int(val.text[0]))
  78. elif count == 5:
  79. dataDict['Blankenship'].append(val.text)
  80. blankenship = calcfreqs(blankenship,int(val.text[0]))
  81. elif count == 6:
  82. dataDict['Jorgensen'].append(val.text)
  83. jorgensen = calcfreqs(jorgensen,int(val.text[0]))
  84. elif count == 7:
  85. dataDict['Carroll'].append(val.text)
  86. carroll = calcfreqs(carroll,int(val.text[0]))
  87. elif count == 8:
  88. dataDict['Writein'].append(val.text)
  89. writein = calcfreqs(writein,int(val.text[0]))
  90. count = 0
  91. count+=1
  92.  
  93. bidenexpected = [sum(biden)*a for a in benfords]
  94. trumpexpected = [sum(trump)*a for a in benfords]
  95. blankenshipexpected = [sum(blankenship)*a for a in benfords]
  96. jorgensenexpected = [sum(jorgensen)*a for a in benfords]
  97. carrollexpected = [sum(carroll)*a for a in benfords]
  98. writeinexpected = [sum(writein)*a for a in benfords]
  99. bidenchival,bidenpval = chiTest(biden,bidenexpected)
  100. trumpchival,trumppval = chiTest(trump,trumpexpected)
  101. blankenshipchival,blankenshippval = chiTest(blankenship,blankenshipexpected)
  102. jorgensenchival,jorgensenpval = chiTest(jorgensen,jorgensenexpected)
  103. carrollchival,carrollpval = chiTest(carroll,carrollexpected)
  104. writeinchival,writeinpval = chiTest(writein,writeinexpected)
  105. print(bidenchival,trumpchival)
  106. print(bidenpval,trumppval) #the p-value for Biden and Trump first digit frequency goodness-of-fit test against Benford's
  107. print(biden,trump,bidenexpected,trumpexpected) #the raw counts of Biden and Trump first digit frequencies
Add Comment
Please, Sign In to add comment