Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- // Pull orders data from order sqoop table to \user\cloudera\problem7\prework
- sqoop import \
- --table orders \
- --connect "jdbc:mysql://quickstart.cloudera:3306/retail_db" \
- --username retail_dba \
- --password cloudera \
- -m 1 \
- --target-dir /user/cloudera/problem7/prework \
- --as-avrodatafile
- // Get the file from HDFS to local
- mkdir flume-avro;
- cd flume-avro;
- hadoop fs -get /user/cloudera/problem7/prework/* .
- gedit f.config
- // Create a flume-config file in problem7 folder named f.config
- #Agent Name = step1
- # Name the source, channel and sink
- step1.sources = avro-source
- step1.channels = jdbc-channel
- step1.sinks = file-sink
- # Source configuration
- step1.sources.avro-source.type = avro
- step1.sources.avro-source.port = 11112
- step1.sources.avro-source.bind = localhost
- # Describe the sink
- step1.sinks.file-sink.type = hdfs
- step1.sinks.file-sink.hdfs.path = /user/cloudera/problem7/sink
- step1.sinks.file-sink.hdfs.fileType = DataStream
- step1.sinks.file-sink.hdfs.fileSuffix = .avro
- step1.sinks.file-sink.serializer = avro_event
- step1.sinks.file-sink.serializer.compressionCodec=snappy
- # Describe the type of channel -- Use memory channel if jdbc channel does not work
- step1.channels.jdbc-channel.type = jdbc
- # Bind the source and sink to the channel
- step1.sources.avro-source.channels = jdbc-channel
- step1.sinks.file-sink.channel = jdbc-channel
- // Run the flume agent
- flume-ng agent --name step1 --conf . --conf-file f.config
- // Run the flume Avro client
- flume-ng avro-client -H localhost -p 11112 -F <<Provide your avro file path here>>
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement