Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import os
- from pyspark.sql.types import TimestampType
- import pandas as pd
- from pyspark.sql import SparkSession
- spark = SparkSession.builder.appName('Basics').getOrCreate()
- path = os.getcwd()+ "/Dronedata/export_csv/"
- files = ["Temperature.csv","Pressure.csv","Salinity.csv","GpsFix.csv"]
- def datefromparts(timestamp):
- return pd.Timestamp(timestamp).round('min')
- spark.udf.register('datefromparts', datefromparts, TimestampType())
- def csvParser():
- df_Salinity = spark.read.csv(path + files[2], header=True)
- df_Temperature = spark.read.csv(path + files[0], header=True)
- table = spark.sql(
- "SELECT datefromparts(timestamp) AS time,ROUND(AVG(pressure),1) AS AVG_pressure,ROUND(AVG(temperature),1) AS AVG_temperature FROM odp GROUP BY time")
- #print(tempData_filteredLists)
- csvParser()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement