Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- SparkS = SparkSession.builder
- .appName("Test")
- .master("local[*]")
- .getOrCreate()
- raw_data = SparkS
- .sparkContext
- .textFile("C:\Users\...\RawData\nasdaq.csv")
- print(raw_data.take(3))
- schema = StructType().add("date", StringType())
- .add("open", StringType())
- .add("high", StringType())
- .add("low", StringType())
- .add("close", StringType())
- .add("adj_close", StringType())
- .add("volume", StringType())
- geioIP = SparkS.createDataFrame(raw_data,schema)
- print(geioIP)
- DataFrame[date: string, open: string, high: string, low: string, close: string, adj_close: string, volume: string]
- 18/01/23 12:58:48 ERROR Executor: Exception in task 0.0 in stage 1.0 (TID 1)
- org.apache.spark.api.python.PythonException: Traceback (most recent call last):
- File "C:spark-2.2.1-bin-hadoop2.7pythonlibpyspark.zippysparkworker.py", line 177, in main
- File "C:spark-2.2.1-bin-hadoop2.7pythonlibpyspark.zippysparkworker.py", line 172, in process
- File "C:spark-2.2.1-bin-hadoop2.7pythonlibpyspark.zippysparkserializers.py", line 268, in dump_stream
- vs = list(itertools.islice(iterator, batch))
- File "C:Usersrajnish.kumarAppDataLocalProgramsPythonPython36libsite-packagespysparksqlsession.py", line 520, in prepare
- verify_func(obj, schema)
- File "C:spark-2.2.1-bin-hadoop2.7pythonlibpyspark.zippysparksqltypes.py", line 1371, in _verify_type
- raise TypeError("StructType can not accept object %r in type %s" % (obj, type(obj)))
- TypeError: StructType can not accept object '43084,6871.549805,6945.819824,6871.450195,6936.580078,6936.580078,3510420000' in type <class 'str'>
Add Comment
Please, Sign In to add comment