Guest User

Untitled

a guest
Apr 22nd, 2018
88
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 2.90 KB | None | 0 0
  1. bj.collect():
  2.  
  3. [{'name': 'ab',
  4. 'abc': 0,
  5. 'def': 0,
  6. 'ghi': 0,
  7. 'jkl': 0,
  8. ....},
  9. {'name': 'ak',
  10. 'abc': 1,
  11. 'def': 1,
  12. 'ghi': 0,
  13. 'jkl': 0,
  14. ....},...]
  15.  
  16.  
  17.  
  18. from pyspark.context import SparkContext
  19. from pyspark.sql.session import SparkSession
  20. from pyspark.sql import Row
  21. import pyspark
  22. import sys
  23. from sys import argv
  24. sc = SparkContext('local')
  25. spark = SparkSession(sc)
  26.  
  27.  
  28.  
  29. all_states =["ab", "ak", "ar", "az", "ca", "co", "ct", "de", "dc", "fl",
  30. "ga", "hi", "id", "il", "in", "ia", "ks", "ky", "la", "me", "md",
  31. "ma", "mi", "mn", "ms", "mo", "mt", "ne", "nv", "nh", "nj", "nm",
  32. "ny", "nc", "nd", "oh", "ok", "or", "pa", "pr", "ri", "sc", "sd",
  33. "tn", "tx", "ut", "vt", "va", "vi", "wa", "wv", "wi", "wy", "al",
  34. "bc", "mb", "nb", "lb", "nf", "nt", "ns", "nu", "on", "qc", "sk",
  35. "yt", "dengl", "fraspm" ]
  36.  
  37. import math
  38. centroids = bj.filter(lambda x: x['name'] in init_states)
  39. ro=2
  40. mon=0
  41. mp=6
  42. while(ro!=3):
  43. if(mp!=6):
  44. R=[]
  45. for i in range(len(jo)):
  46. j=jo[i][0]
  47. dc={}
  48. d1=bj.filter(lambda x: x['name']==j).first()
  49.  
  50. for r in jo[i]:
  51. if(j!=r):
  52. d2=bj.filter(lambda x: x['name']==r).first()
  53. dc={key: d1[key] + d2[key] for key in d1.keys() if key not in {'name'} }
  54. d1=dc
  55. if(j==r)&(len(jo[i])==1):
  56. d2=bj.filter(lambda x: x['name']==r).first()
  57. dc=d2
  58. dc={key: dc[key]/len(jo[i]) for key in dc.keys() if key not in {'name'} }
  59. R.append(dc)
  60. R[i]['name']=init_states[i]
  61.  
  62.  
  63. centroids=sc.parallelize(R)
  64.  
  65. mp=3
  66. cent=centroids
  67. s={}
  68. r={}
  69. S=[]
  70. for i in all_states:
  71. mn=300000
  72.  
  73. dc={}
  74. d1=bj.filter(lambda x: x['name']==i).first()
  75. for j in init_states:
  76. d2=centroids.filter(lambda x: x['name']==j).first()
  77. dc={key: (d1[key] - d2[key])**2 for key in d1.keys() if key not in 'name'}
  78. val=sum([v for v in dc.values()])
  79. val=math.sqrt(val)
  80.  
  81. if(val<mn):
  82. s[i]=j
  83. mn=val
  84.  
  85. k=[]
  86. for i in init_states:
  87. l=[]
  88. for j in all_states:
  89. if(s[j]==i):
  90. c=j
  91. l.append(c)
  92.  
  93. k.append(sorted(l))
  94. jo=sorted(k)
  95.  
  96. mon=mon+1
  97. if(mon>1):
  98. if(mu==jo):
  99. ro=3
  100. mu=jo
  101.  
  102.  
  103.  
  104. lp=3;
  105. for j in range(len(mu)):
  106. if(lp==3):
  107. print ("* Class",j)
  108. else:
  109. print("n* Class",j)
  110.  
  111. for i in mu[j]:
  112. lp=2
  113. print(i,'',end="")
  114.  
  115. print('')
  116.  
  117. * Class 0
  118. ab bc mb mn qc sk
  119. * Class 1
  120. ak dengl lb nt nu yt
  121. * Class 2
  122. al fl ga ms nc sc
  123. * Class 3
  124. ar ks ky la mo ok tn tx
  125. * Class 4
  126. az ia nd ne sd
  127. * Class 5
  128. ca co id mt nm nv or ut wa wy
  129. * Class 6
  130. ct dc de il in ma md me mi nh nj ny oh on pa ri va vt wi wv
  131. * Class 7
  132. fraspm nb nf ns
  133. * Class 8
  134. hi
  135. * Class 9
  136. pr vi
Add Comment
Please, Sign In to add comment