Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- sc = spark.sparkContext
- # A JSON dataset is pointed to by path.
- # The path can be either a single text file or a directory storing text files
- path = "examples/src/main/resources/people.json"
- peopleDF = spark.read.json(path)
- # The inferred schema can be visualized using the printSchema() method
- peopleDF.printSchema()
- # root
- # |-- age: long (nullable = true)
- # |-- name: string (nullable = true)
- # Creates a temporary view using the DataFrame
- peopleDF.createOrReplaceTempView("people")
- # SQL statements can be run by using the sql methods provided by spark
- teenagerNamesDF = spark.sql("SELECT name FROM people WHERE age BETWEEN 13 AND 19")
- teenagerNamesDF.show()
- # +------+
- # | name|
- # +------+
- # |Justin|
- # +------+
- # Alternatively, a DataFrame can be created for a JSON dataset represented by
- # an RDD[String] storing one JSON object per string
- jsonStrings = ['{"name":"Yin","address":{"city":"Columbus","state":"Ohio"}}']
- otherPeopleRDD = sc.parallelize(jsonStrings)
- otherPeople = spark.read.json(otherPeopleRDD)
- otherPeople.show()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement