Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import java.io.ByteArrayOutputStream;
- import java.io.File;
- import java.io.FileInputStream;
- import org.apache.commons.logging.Log;
- import org.apache.commons.logging.LogFactory;
- import org.apache.hadoop.conf.Configuration;
- import org.apache.hadoop.fs.FileSystem;
- import org.apache.hadoop.fs.Path;
- import org.apache.hadoop.io.BytesWritable;
- import org.apache.hadoop.io.SequenceFile;
- import org.apache.hadoop.io.Text;
- import org.apache.hadoop.util.ToolRunner;
- public class CopyDirToSequenceFile extends InputOutputDriver {
- public static final Log LOG = LogFactory.getLog(CopyDirToSequenceFile.class);
- public static void main(String[] args) throws Exception {
- int rc = ToolRunner.run(new CopyDirToSequenceFile(), args);
- System.exit(rc);
- }
- @Override
- public int process(String[] args) throws Exception {
- Configuration conf = getConf();
- FileSystem fs = FileSystem.get(conf);
- SequenceFile.Writer writer = null;
- try {
- Text key = null;
- BytesWritable value = null;
- File inputDir = new File(input);
- File[] inputFiles = inputDir.listFiles();
- if(inputFiles != null && inputFiles.length > 0) {
- writer = SequenceFile.createWriter(fs, conf,
- new Path(output), Text.class, BytesWritable.class);
- ByteArrayOutputStream out = new ByteArrayOutputStream(4096);
- for (int i = 0; i < inputFiles.length; i++) {
- out.reset();
- FileInputStream in = new FileInputStream(inputFiles[i]);
- try {
- IOUtils.copyBytes(in, out, conf, false);
- } finally {
- IOUtils.cleanup(LOG, in, out);
- }
- String fileName = inputFiles[i].getName();
- key = new Text(fileName);
- value = new BytesWritable(out.toByteArray());
- writer.append(key, value);
- }
- }
- } finally {
- IOUtils.cleanup(LOG, writer);
- }
- return 0;
- }
- }
- import java.util.ArrayList;
- import java.util.List;
- import org.apache.hadoop.conf.Configuration;
- import org.apache.hadoop.conf.Configured;
- import org.apache.hadoop.fs.FileSystem;
- import org.apache.hadoop.fs.Path;
- import org.apache.hadoop.util.Tool;
- public abstract class InputOutputDriver extends Configured implements Tool {
- protected String input;
- protected String output;
- protected abstract int process(String[] args) throws Exception;
- public int run(String[] args) throws Exception {
- List<String> remaining = new ArrayList<String>();
- boolean deleteOutput = false;
- for (int i = 0; i < args.length; i++) {
- boolean last = i + 1 == args.length;
- String compareArg = args[i].trim().toLowerCase();
- if(compareArg.equals("-input")) {
- if(last) {
- throw new IllegalArgumentException("Argument " + args[i] + " requires value");
- }
- input = args[++i];
- } else if (compareArg.equals("-output")) {
- if(last) {
- throw new IllegalArgumentException("Argument " + args[i] + " requires value");
- }
- output = args[++i];
- } else if (compareArg.equals("-delete")) {
- deleteOutput = true;
- } else {
- remaining.add(args[i]);
- }
- }
- if(input == null) {
- throw new IllegalArgumentException("-input required");
- }
- if(output == null) {
- throw new IllegalArgumentException("-output required");
- }
- if(deleteOutput) {
- Configuration conf = getConf();
- FileSystem fs = FileSystem.get(conf);
- Path outputPath = new Path(output);
- if(fs.exists(outputPath)) {
- fs.delete(outputPath, true);
- }
- }
- return process(remaining.toArray(new String[]{}));
- }
- public String getInput() {
- return input;
- }
- public void setInput(String input) {
- this.input = input;
- }
- public String getOutput() {
- return output;
- }
- public void setOutput(String output) {
- this.output = output;
- }
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement