Read text file using spark and python

# http://spark.apache.org/docs/latest/sql-programming-guide.html

import sys
try:
    from pyspark import SparkContext
    from pyspark import SparkConf
    print ("Successfully imported Spark Modules")
    sc = SparkContext('local')
except ImportError as e:
    print ("Can not import Spark Modules", e)
    sys.exit(1)

# fname="I:/temp/data/f1.txt"
logFile="C:/temp/2013-09-15.log"

from pyspark.sql import SQLContext
sqlContext = SQLContext(sc)
# df = sqlContext.read.format('com.databricks.spark.csv').options(header='false').load(logFile)
df = sqlContext.read.format('com.databricks.spark.csv').options(header='false',delimiter=' ').load(logFile)
df.show()


df.printSchema()

'''
root
 |-- _c0: string (nullable = true)
 |-- _c1: string (nullable = true)
 |-- _c2: string (nullable = true)
 |-- _c3: string (nullable = true)
 |-- _c4: string (nullable = true)
 |-- _c5: string (nullable = true)
 |-- _c6: string (nullable = true)
 |-- _c7: string (nullable = true)
 |-- _c8: string (nullable = true)
 |-- _c9: string (nullable = true)
 |-- _c10: string (nullable = true)
'''

# df.select("GoogleKnowlege_Occupation").show()
# Select everybody, but increment the age by 1
df.select(df['_c0'], df['_c2'] ).show()

# Select people older than 21
# df.filter(df['age'] > 21).show()

# Count people by age
# df.groupBy("age").count().show()


# Running SQL Queries Programmatically
# The sql function on a SparkSession enables applications to run SQL queries programmatically and returns the result as a DataFrame.

# Register the DataFrame as a SQL temporary view
df.createOrReplaceTempView("people")

from pyspark.shell import spark

sqlDF = spark.sql("SELECT * FROM people")
sqlDF.show()

exit(0)

'''
Welcome to
      ____              __
     / __/__  ___ _____/ /__
    _\ \/ _ \/ _ `/ __/  '_/
   /__ / .__/\_,_/_/ /_/\_\   version 2.0.2
      /_/

Using Python version 3.5.1 (v3.5.1:37a07cee5969, Dec  6 2015 01:38:48)
SparkSession available as 'spark'.


'''