Advertisement
Guest User

Untitled

a guest
Mar 2nd, 2016
114
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 1.18 KB | None | 0 0
  1. [Stage 56:====================================================> (193 + 1) / 200]
  2.  
  3. kinesisStream = KinesisUtils.createStream(ssc, APPLICATION_NAME, STREAM_NAME, ENDPOINT, REGION_NAME, INITIAL_POS, CHECKPOINT_INTERVAL, awsAccessKeyId =AWSACCESSID, awsSecretKey=AWSSECRETKEY, storageLevel=STORAGE_LEVEL)
  4. CHECKPOINT_INTERVAL = 60
  5. storageLevel = memory
  6.  
  7. kinesisStream.foreachRDD(writeTotable)
  8. def WriteToTable(df, type):
  9. if type in REDSHIFT_PAGEVIEW_TBL:
  10. df = df.groupby([COL_STARTTIME, COL_ENDTIME, COL_CUSTOMERID, COL_PROJECTID, COL_FONTTYPE, COL_DOMAINNAME, COL_USERAGENT]).count()
  11. df = df.withColumnRenamed('count', COL_PAGEVIEWCOUNT)
  12.  
  13. # Write back to a table
  14.  
  15. url = ("jdbc:redshift://" + REDSHIFT_HOSTNAME + ":" + REDSHIFT_PORT + "/" + REDSHIFT_DATABASE + "?user=" + REDSHIFT_USERNAME + "&password="+ REDSHIFT_PASSWORD)
  16.  
  17. s3Dir = 's3n://' + AWSACCESSID + ':' + AWSSECRETKEY + '@' + BUCKET + '/' + FOLDER
  18.  
  19. print 'Start writing to redshift'
  20. df.write.format("com.databricks.spark.redshift").option("url", url).option("dbtable", REDSHIFT_PAGEVIEW_TBL).option('tempdir', s3Dir).mode('Append').save()
  21.  
  22. print 'Finished writing to redshift'
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement