Advertisement
Guest User

Untitled

a guest
Jul 19th, 2019
143
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 1.98 KB | None | 0 0
  1. %spark.pyspark
  2. import tensorflow as tf
  3. from pyspark.ml import Pipeline
  4. from pyspark.ml import PipelineModel
  5. from sparkflow.graph_utils import build_graph
  6. from sparkflow.tensorflow_async import SparkAsyncDL
  7. from pyspark.ml.feature import VectorAssembler, OneHotEncoder
  8. from pyspark.ml.pipeline import Pipeline
  9. from pyspark.sql import SparkSession
  10. from sparkflow.pipeline_util import PysparkPipelineWrapper
  11.  
  12. def small_model():
  13. x = tf.placeholder(tf.float32, shape=[None, 784], name='x')
  14. y = tf.placeholder(tf.float32, shape=[None, 10], name='y')
  15. layer1 = tf.layers.dense(x, 256, activation=tf.nn.relu)
  16. layer2 = tf.layers.dense(layer1, 256, activation=tf.nn.relu)
  17. out = tf.layers.dense(layer2, 10)
  18. z = tf.argmax(out, 1, name='out')
  19. loss = tf.losses.softmax_cross_entropy(y, out)
  20. return loss
  21.  
  22. spark = SparkSession.builder \
  23. .appName("examples") \
  24. .getOrCreate()
  25.  
  26. df = spark.read.option("inferSchema", "true").csv('s3://emr-datascience-adjoe-sandbox/mnist_train.csv')
  27. df_test = spark.read.option("inferSchema", "true").csv('s3://emr-datascience-adjoe-sandbox/mnist_test.csv')
  28. mg = build_graph(small_model)
  29. #Assemble and one hot encode
  30. va = VectorAssembler(inputCols=df.columns[1:785], outputCol='features')
  31. encoded = OneHotEncoder(inputCol='_c0', outputCol='labels', dropLast=False)
  32.  
  33. spark_model = SparkAsyncDL(
  34. inputCol='features',
  35. tensorflowGraph=mg,
  36. tfInput='x:0',
  37. tfLabel='y:0',
  38. tfOutput='out:0',
  39. tfLearningRate=.001,
  40. iters=20,
  41. predictionCol='predicted',
  42. labelCol='labels',
  43. verbose=1,
  44. partitions=100
  45. )
  46.  
  47. p = Pipeline(stages=[va, encoded, spark_model]).fit(df)
  48. p.write().overwrite().save("s3://emr-datascience-adjoe-sandbox/mnist_model")
  49. p.transform(df_test).write.parquet("s3://emr-datascience-adjoe-sandbox/mnist_predictions")
  50.  
  51. p_loaded = PysparkPipelineWrapper.unwrap(PipelineModel.load("s3://emr-datascience-adjoe-sandbox/mnist_model"))
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement