Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- from pyspark.context import SparkContext
- from pyspark.mllib.clustering import KMeans
- from pyspark.mllib.random import RandomRDDs
- if __name__ == "__main__":
- sc = SparkContext(appName='kmeansMinimalExample')
- # same with 10000 points
- data = RandomRDDs.uniformVectorRDD(sc, 10000000, 64)
- C = KMeans.train(data, 8192, maxIterations=10)
- sc.stop()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement