Advertisement
Guest User

Untitled

a guest
Feb 21st, 2020
105
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 0.82 KB | None | 0 0
  1. import os
  2. from pyspark.sql.types import TimestampType
  3. import pandas as pd
  4. from pyspark.sql import SparkSession
  5.  
  6. spark = SparkSession.builder.appName('Basics').getOrCreate()
  7.  
  8. path = os.getcwd()+ "/Dronedata/export_csv/"
  9. files = ["Temperature.csv","Pressure.csv","Salinity.csv","GpsFix.csv"]
  10.  
  11.  
  12. def datefromparts(timestamp):
  13.     return pd.Timestamp(timestamp).round('min')
  14. spark.udf.register('datefromparts', datefromparts, TimestampType())
  15.  
  16.  
  17. def csvParser():
  18.     df_Salinity = spark.read.csv(path + files[2], header=True)
  19.     df_Temperature = spark.read.csv(path + files[0], header=True)
  20.     table = spark.sql(
  21.         "SELECT datefromparts(timestamp) AS time,ROUND(AVG(pressure),1) AS AVG_pressure,ROUND(AVG(temperature),1) AS AVG_temperature FROM odp GROUP BY time")
  22.  
  23.     #print(tempData_filteredLists)
  24.  
  25. csvParser()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement