Guest User

Untitled

a guest
Apr 1st, 2018
96
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 2.61 KB | None | 0 0
  1. # ====================================================================
  2. # PullCsvFromS3
  3. # Pull CSV data from a directory S3 to our local system
  4. # ====================================================================
  5.  
  6. job.name=PullCsvFromS3
  7. job.description=Pull CSV data from a directory S3 to our local system
  8. fs.uri=file:///
  9.  
  10. # Set working directory
  11. work.dir=/Users/tilak/gobblin/mopar-demo
  12. writer.staging.dir=${work.dir}/taskStaging
  13. writer.output.dir=${work.dir}/taskOutput
  14. mr.job.root.dir=${work.dir}/working
  15.  
  16. # Set state store
  17. state.store.enabled=true
  18. state.store.type=mysql
  19. state.store.db.jdbc.driver=com.mysql.jdbc.Driver
  20. state.store.db.url=jdbc:mysql://localhost/mopar_demo
  21. state.store.db.user=gobblin
  22. state.store.db.password=gobblin
  23.  
  24. # Set writer and publisher
  25. writer.fs.uri=file:///
  26. data.publisher.final.dir=${work.dir}/output
  27. writer.builder.class=org.apache.gobblin.data.management.copy.writer.FileAwareInputStreamDataWriterBuilder
  28. data.publisher.type=org.apache.gobblin.data.management.copy.publisher.CopyDataPublisher
  29.  
  30. writer.destination.type=HDFS
  31. data.publisher.fs.uri=${fs.uri}
  32. data.publisher.metadata.output.dir=${work.dir}/metadata_out
  33.  
  34. # Source Configuration
  35. source.class=org.apache.gobblin.data.management.copy.CopySource
  36. gobblin.dataset.profile.class=org.apache.gobblin.data.management.copy.CopyableGlobDatasetFinder
  37. gobblin.dataset.pattern=pricing.products_*.csv
  38. # To copy from particular directory gobblin.dataset.pattern=some_folder/*.csv
  39. gobblin.copy.recursive.update=true
  40. fork.record.queue.capacity=1
  41.  
  42.  
  43. # Source S3 Configuration
  44. source.filebased.fs.uri=s3a://<bucket_name>
  45. source.filebased.preserve.file.name=true
  46. source.filebased.encrypted.fs.s3a.access.key=<s3-access-key>
  47. source.filebased.encrypted.fs.s3a.secret.key=<s3-secret-key>
  48. fs.s3a.impl=org.apache.hadoop.fs.s3a.S3AFileSystem
  49. fs.s3a.buffer.dir=${work.dir}/buffer-dir
  50. fs.s3a.connection.ssl.enabled=false
  51.  
  52. # Converters
  53. converter.classes=org.apache.gobblin.converter.IdentityConverter
  54.  
  55. # ====================================================================
  56. # Distcp configurations (do not change)
  57. # ====================================================================
  58.  
  59. job.class=org.apache.gobblin.azkaban.AzkabanJobLauncher
  60.  
  61. extract.namespace=org.apache.gobblin.copy
  62. distcp.persist.dir=/tmp/distcp-persist-dir
  63. task.maxretries=0
  64. workunit.retry.enabled=false
  65.  
  66. # Job History server
  67. job.history.store.enabled=true
  68. job.history.store.url=jdbc:mysql://localhost/mopar_demo
  69. job.history.store.jdbc.driver=com.mysql.jdbc.Driver
  70. job.history.store.user=gobblin
  71. job.history.store.password=gobblin
  72.  
  73. # Other s3a settings
  74. # Should be greater than 5MB else distcp won't work
  75. fs.s3a.multipart.size=67108864
Add Comment
Please, Sign In to add comment