Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- from airflow import DAG
- from airflow.contrib.operators.spark_submit_operator import SparkSubmitOperator
- from datetime import datetime, timedelta
- args = {
- 'owner': 'airflow',
- 'start_date': datetime(2019, 5, 22)
- }
- dag = DAG('spark_example_new', default_args=args, schedule_interval="*/10 * * * *")
- operator = SparkSubmitOperator(
- task_id='spark_submit_job_from_airflow',
- conn_id='spark_default',
- java_class='org.apache.spark.examples.JavaWordCount',
- application='local:///opt/spark/examples/jars/spark-examples_2.12-2.4.1.jar',
- total_executor_cores='1',
- executor_cores='2',
- executor_memory='2g',
- num_executors='1',
- name='airflow-spark-example-coming-from-aws-k8s',
- verbose=True,
- driver_memory='1g',
- application_args=["/opt/spark/data/graphx/users.txt"],
- dag=dag,
- )
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement