Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- # http://spark.apache.org/docs/latest/sql-programming-guide.html
- import sys
- try:
- from pyspark import SparkContext
- from pyspark import SparkConf
- print ("Successfully imported Spark Modules")
- sc = SparkContext('local')
- except ImportError as e:
- print ("Can not import Spark Modules", e)
- sys.exit(1)
- # fname="I:/temp/data/f1.txt"
- logFile="C:/temp/2013-09-15.log"
- from pyspark.sql import SQLContext
- sqlContext = SQLContext(sc)
- # df = sqlContext.read.format('com.databricks.spark.csv').options(header='false').load(logFile)
- df = sqlContext.read.format('com.databricks.spark.csv').options(header='false',delimiter=' ').load(logFile)
- df.show()
- df.printSchema()
- '''
- root
- |-- _c0: string (nullable = true)
- |-- _c1: string (nullable = true)
- |-- _c2: string (nullable = true)
- |-- _c3: string (nullable = true)
- |-- _c4: string (nullable = true)
- |-- _c5: string (nullable = true)
- |-- _c6: string (nullable = true)
- |-- _c7: string (nullable = true)
- |-- _c8: string (nullable = true)
- |-- _c9: string (nullable = true)
- |-- _c10: string (nullable = true)
- '''
- # df.select("GoogleKnowlege_Occupation").show()
- # Select everybody, but increment the age by 1
- df.select(df['_c0'], df['_c2'] ).show()
- # Select people older than 21
- # df.filter(df['age'] > 21).show()
- # Count people by age
- # df.groupBy("age").count().show()
- # Running SQL Queries Programmatically
- # The sql function on a SparkSession enables applications to run SQL queries programmatically and returns the result as a DataFrame.
- # Register the DataFrame as a SQL temporary view
- df.createOrReplaceTempView("people")
- from pyspark.shell import spark
- sqlDF = spark.sql("SELECT * FROM people")
- sqlDF.show()
- exit(0)
- '''
- Welcome to
- ____ __
- / __/__ ___ _____/ /__
- _\ \/ _ \/ _ `/ __/ '_/
- /__ / .__/\_,_/_/ /_/\_\ version 2.0.2
- /_/
- Using Python version 3.5.1 (v3.5.1:37a07cee5969, Dec 6 2015 01:38:48)
- SparkSession available as 'spark'.
- '''
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement