Guest User

Untitled

a guest
Nov 25th, 2017
181
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 6.84 KB | None | 0 0
  1. applications = [
  2. {'Name': 'Spark'},
  3. {'Name': 'Hive'},
  4. {'Name': 'Tez'},
  5. {'Name': 'Hadoop'},
  6. {'Name': 'Ganglia'},
  7. {'Name': 'Presto'},
  8. {'Name': 'Zeppelin'}
  9. ]
  10. release_label = "emr-5.9.0"
  11. log_uri = "s3n://SOME-LOGS-BUCKET/"
  12. configurations = [
  13. {
  14. "Classification": "spark-env",
  15. "Properties": {},
  16. "Configurations":
  17. [
  18. {
  19. "Classification": "export",
  20. "Properties":
  21. {
  22. "PYSPARK_PYTHON": "/mnt/pyspark/miniconda/envs/spark/bin/python3.6",
  23. "JAVA_HOME": "/usr/lib/jvm/java-1.8.0",
  24. "PYSPARK_DRIVER_PYTHON": "/mnt/pyspark/miniconda/envs/spark/bin/python3.6"
  25. },
  26. "Configurations": []
  27. }
  28. ]
  29. },
  30. {
  31. "Classification": "hadoop-env",
  32. "Properties": {},
  33. "Configurations":
  34. [
  35. {
  36. "Classification": "export",
  37. "Properties":
  38. {
  39. "JAVA_HOME": "/usr/lib/jvm/java-1.8.0"
  40. },
  41. "Configurations": []
  42. }
  43. ]
  44. },
  45. {
  46. "Classification": "hive-site",
  47. "Properties":
  48. {
  49. "javax.jdo.option.ConnectionUserName": "SOME-USER",
  50. "javax.jdo.option.ConnectionDriverName": "org.mariadb.jdbc.Driver",
  51. "javax.jdo.option.ConnectionPassword": "SOME-PASSWORD",
  52. "javax.jdo.option.ConnectionURL": "jdbc:mysql://HOST:3306/SOME-HIVEDB?createDatabaseIfNotExist=true"
  53. },
  54. "Configurations": []
  55. },
  56. {
  57. "Classification": "presto-connector-hive",
  58. "Properties":
  59. {
  60. "hive.parquet-optimized-reader.enabled": "true",
  61. "hive.parquet-predicate-pushdown.enabled": "true",
  62. "hive.parquet.use-column-names": "true",
  63. "hive.orc.use-column-names": "true"
  64. }
  65. },
  66. {
  67. "Classification": "hue-ini",
  68. "Properties": {},
  69. "Configurations":
  70. [
  71. {
  72. "Classification": "desktop",
  73. "Properties": {},
  74. "Configurations":
  75. [
  76. {
  77. "Classification": "database",
  78. "Properties":
  79. {
  80. "password": "SOME-PWD",
  81. "engine": "mysql",
  82. "port": "3306",
  83. "host": "HUE-HOST",
  84. "name": "HUE-DB",
  85. "user": "HUE-USER"
  86. },
  87. "Configurations": []
  88. }
  89. ]
  90. }
  91. ]
  92. }
  93. ]
  94.  
  95. steps = [
  96. {
  97. "ActionOnFailure": "TERMINATE_JOB_FLOW",
  98. "Name": "Test S3FS script",
  99. "HadoopJarStep":
  100. {
  101. "Jar": "command-runner.jar",
  102. "Args": [
  103. "echo",
  104. "1"
  105. ]
  106. }
  107. }
  108. ]
  109.  
  110. # instance group configuration
  111. instance_groups = [
  112. {
  113. "InstanceCount": core_instance_count,
  114. "InstanceRole": "CORE",
  115. "InstanceType": core_instance_type,
  116. "Name": "CORE"
  117. },
  118. {
  119. "InstanceCount": task_instance_count,
  120. "BidPrice": task_spot_bid_price,
  121. "InstanceRole": "TASK",
  122. "InstanceType": task_instance_type,
  123. "Market": "SPOT",
  124. "Name": "TASK"
  125. },
  126. {
  127. "InstanceCount": 1
  128. , "EbsConfiguration":
  129. {
  130. "EbsBlockDeviceConfigs": [
  131. {
  132. "VolumeSpecification":
  133. {
  134. "SizeInGB": 32,
  135. "VolumeType": "gp2"
  136. },
  137. "VolumesPerInstance": 1
  138. }
  139. ]
  140. },
  141. "InstanceRole": "MASTER",
  142. "InstanceType": master_instance_type,
  143. "Name": "MASTER"
  144. }
  145. ]
  146.  
  147. instances = {
  148. "KeepJobFlowAliveWhenNoSteps": True,
  149. "TerminationProtected": False,
  150. "Ec2KeyName": "SOME-SSH-KEY",
  151. "Ec2SubnetId": "SOME-SUBNET-ID",
  152. "EmrManagedSlaveSecurityGroup": "SOME-SG-ID",
  153. "EmrManagedMasterSecurityGroup": "SOME-SG-ID",
  154. "ServiceAccessSecurityGroup": "SOME-SERVICE-SG-ID",
  155. "InstanceGroups": instance_groups
  156. }
  157.  
  158. job_flow_role = "SOME-INSTANCE-ROLE"
  159. service_role = "SOME-SERVICE-ROLE"
  160.  
  161. tags = [
  162. {
  163. 'Key': 'Name',
  164. 'Value': global_cluster_name
  165. },
  166. {
  167. 'Key': 'Team',
  168. 'Value': 'YOUR-TEAM'
  169. },
  170. {
  171. 'Key': 'Managed',
  172. 'Value': 'BY-SOME-SCHEDULER'
  173. },
  174. {
  175. 'Key': 'Environment',
  176. 'Value': 'dev'
  177. },
  178. ]
  179.  
  180. bootstrap_actions = [{
  181. "Name": "presto_configuration",
  182. "ScriptBootstrapAction": {
  183. "Path": "s3://{0}/{1}".format(s3_bucket_presto, s3_key_presto)
  184.  
  185. }
  186. }
  187. ]
  188.  
  189. response = client.run_job_flow(Applications=applications,
  190. Name=global_cluster_name,
  191. BootstrapActions=bootstrap_actions,
  192. ReleaseLabel=release_label,
  193. LogUri=log_uri,
  194. Steps=steps,
  195. Tags=tags,
  196. Configurations=configurations,
  197. Instances=instances,
  198. JobFlowRole=job_flow_role,
  199. VisibleToAllUsers=True,
  200. ServiceRole=service_role)
  201. print("got response {0}".format(response))
  202. job_flow_id = response['JobFlowId']
  203. logging.info("Got job_flow_id {0}".format(job_flow_id))
  204. cluster_id = job_flow_id
  205. time.sleep(300)
  206. cluster_description = client.describe_cluster(
  207. ClusterId=cluster_id
  208. )
  209. print(cluster_description)
Add Comment
Please, Sign In to add comment