Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- [Stage 56:====================================================> (193 + 1) / 200]
- kinesisStream = KinesisUtils.createStream(ssc, APPLICATION_NAME, STREAM_NAME, ENDPOINT, REGION_NAME, INITIAL_POS, CHECKPOINT_INTERVAL, awsAccessKeyId =AWSACCESSID, awsSecretKey=AWSSECRETKEY, storageLevel=STORAGE_LEVEL)
- CHECKPOINT_INTERVAL = 60
- storageLevel = memory
- kinesisStream.foreachRDD(writeTotable)
- def WriteToTable(df, type):
- if type in REDSHIFT_PAGEVIEW_TBL:
- df = df.groupby([COL_STARTTIME, COL_ENDTIME, COL_CUSTOMERID, COL_PROJECTID, COL_FONTTYPE, COL_DOMAINNAME, COL_USERAGENT]).count()
- df = df.withColumnRenamed('count', COL_PAGEVIEWCOUNT)
- # Write back to a table
- url = ("jdbc:redshift://" + REDSHIFT_HOSTNAME + ":" + REDSHIFT_PORT + "/" + REDSHIFT_DATABASE + "?user=" + REDSHIFT_USERNAME + "&password="+ REDSHIFT_PASSWORD)
- s3Dir = 's3n://' + AWSACCESSID + ':' + AWSSECRETKEY + '@' + BUCKET + '/' + FOLDER
- print 'Start writing to redshift'
- df.write.format("com.databricks.spark.redshift").option("url", url).option("dbtable", REDSHIFT_PAGEVIEW_TBL).option('tempdir', s3Dir).mode('Append').save()
- print 'Finished writing to redshift'
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement