Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- //Declare the target Sink for storing test data
- val mongoDB = MongoSink("mongodb://${mongo.server.host}")
- //Init the test data
- val testData = Seq(
- """{"_id":"1000","articleId":"a001","comment":{"content":"Nice article","author":"a001"}}""",
- """{"_id":"1001","articleId":"a002","comment":{"content":"Good One","author":"a002"}}""",
- """{"_id":"1002","articleId":"a003","comment":{"content":"That's way too long","author":"a003"}}"""
- )
- val sparkCmd = "spark-submit --master yarn --deploy-mode cluster --name MongoJob --class SparkApp /path/to/app.jar"
- //Remote server ssh config
- val sshConfig = SSHConfig(user = "admin", password = "admin", hostName = "xx.xx.xx.xx")
- //Define the scenario
- val setup = scenario("Mongo Spark Test Suite")
- .exec(
- mongoDB.createCollection(db = "articles", collection = "comments", indexFields = Seq("comment.author"))
- )
- .exec(
- mongoDB.insertDocuments(db = "articles", collection = "comments", rows = docs)
- )
- .ssh(sshConfig, cmd = sparkCmd)
Add Comment
Please, Sign In to add comment