Guest User

Untitled

a guest
Dec 14th, 2018
87
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 1.30 KB | None | 0 0
  1. from pyspark.sql import SparkSession
  2. from pyspark.sql import Row
  3. from pyspark.sql.functions import udf
  4. from pyspark.sql.types import *
  5.  
  6. spark = SparkSession.builder.appName("SRDD").getOrCreate()
  7. sc = spark.sparkContext
  8.  
  9. # Some sequence of floats
  10. abc = [[0.0769,0.2982],[0.0863,0.30052],[0.0690,0.33337],[0.11975,0.2984],[0.07224,0.3467],[0.1316,0.2999]]
  11.  
  12. def build_kdtree(points,depth=0):
  13. n=points.count()
  14. if n<=0:
  15. return None
  16. axis=depth%2
  17. sorted_points=sorted(points,key=lambda point:point[axis])
  18. return{
  19. 'point': sorted_points[n/2],
  20. 'left':build_kdtree(sorted_points[:n/2],depth+1),
  21. 'right':build_kdtree(sorted_points[n/2 + 1:],depth+1)
  22. }
  23. #This is how I'm trying to specify the return type of the function
  24. kdtree_schema=StructType([StructField('point',ArrayType(FloatType()),nullable=True),StructField('left',StructType(),nullable=True),StructField('right',StructType(),nullable=True)])
  25. kdtree_schema=StructType([StructField('point',ArrayType(FloatType()),nullable=True),StructField('left',kdtree_schema,nullable=True),StructField('right',kdtree_schema,nullable=True)])
  26. #UDF registration
  27. buildkdtree_udf=udf(build_kdtree, kdtree_schema)
  28.  
  29. #Function call
  30. pointskdtree=buildkdtree_udf(abc)
Add Comment
Please, Sign In to add comment