Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- package org.myorg;
- import java.io.IOException;
- import java.util.*;
- import org.apache.hadoop.fs.Path;
- import org.apache.hadoop.conf.*;
- import org.apache.hadoop.io.*;
- import org.apache.hadoop.mapred.*;
- import org.apache.hadoop.util.*;
- public class FilterData {
- public static class Map extends MapReduceBase implements Mapper<LongWritable, Text, Text, Text> {
- private Text ipadr = new Text();
- private Text monthFile = new Text();
- public void map(LongWritable key, Text value, OutputCollector<Text, Text> output, Reporter reporter) throws IOException {
- String line = value.toString();
- String[] temp = line.split("\"", 0);
- if (temp.length == 3){
- String[] temp2 = temp[1].split(" ", 0);
- String[] temp3 = temp[0].split(" ", 0);
- String[] temp4 = temp3[3].split("/", 0);
- monthFile.set(temp4[1] + temp2[1]);
- ipadr.set(temp3[0]);
- output.collect(monthFile, ipadr);
- }
- }
- }
- public static class Reduce extends MapReduceBase implements Reducer<Text, Text, Text, Text> {
- Text result = new Text();
- public void reduce(Text key, Iterator<Text> values, OutputCollector<Text, Text> output, Reporter reporter) throws IOException {
- String temp = "[";
- while (values.hasNext()) {
- if (temp.equals("["))
- temp = temp + values.next().toString();
- else
- temp = temp + " " + values.next().toString();
- }
- temp = temp + "]";
- result.set(temp);
- output.collect(key, result);
- }
- }
- public static void main(String[] args) throws Exception {
- JobConf conf = new JobConf(FilterData.class);
- conf.setJobName("filterdata");
- conf.setOutputKeyClass(Text.class);
- conf.setOutputValueClass(Text.class);
- conf.setMapperClass(Map.class);
- conf.setCombinerClass(Reduce.class);
- conf.setReducerClass(Reduce.class);
- conf.setInputFormat(TextInputFormat.class);
- conf.setOutputFormat(TextOutputFormat.class);
- FileInputFormat.setInputPaths(conf, new Path(args[0]));
- FileOutputFormat.setOutputPath(conf, new Path(args[1]));
- JobClient.runJob(conf);
- }
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement