Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- package peoplemail;
- import java.io.IOException;
- import java.util.Iterator;
- import org.apache.hadoop.conf.Configuration;
- import org.apache.hadoop.conf.Configured;
- import org.apache.hadoop.fs.Path;
- import org.apache.hadoop.io.IntWritable;
- import org.apache.hadoop.io.LongWritable;
- import org.apache.hadoop.io.Text;
- import org.apache.hadoop.mapred.FileInputFormat;
- import org.apache.hadoop.mapred.FileOutputFormat;
- import org.apache.hadoop.mapred.JobClient;
- import org.apache.hadoop.mapred.JobConf;
- import org.apache.hadoop.mapred.MapReduceBase;
- import org.apache.hadoop.mapred.Mapper;
- import org.apache.hadoop.mapred.OutputCollector;
- import org.apache.hadoop.mapred.Reducer;
- import org.apache.hadoop.mapred.Reporter;
- import org.apache.hadoop.util.Tool;
- import org.apache.hadoop.util.ToolRunner;
- public class DomainGenderCount extends Configured implements Tool {
- public static class MapClass
- extends MapReduceBase implements Mapper<LongWritable, Text, Text, Text>{
- @Override
- public void map(LongWritable key,Text value,
- OutputCollector<Text,Text> output, Reporter r)throws IOException{
- String fields[] = value.toString().split(",");
- String gender = fields[5];
- String domain = fields[3].split("@")[1];
- output.collect(new Text(domain), new Text(gender));
- }
- }
- public static class ReduceClass
- extends MapReduceBase implements Reducer<Text, Text, Text, IntWritable>{
- @Override
- public void reduce(Text key, Iterator<Text> value,
- OutputCollector<Text,IntWritable> output, Reporter r)throws IOException{
- int count=0;
- while(value.hasNext()){
- value.next();
- count++;
- }
- output.collect(key, new IntWritable(count));
- }
- }
- public static void main(String[] args) throws Exception{
- System.exit(ToolRunner.run(new Configuration(), new DomainGenderCount(), args));
- }
- public int run(String[] paths) throws Exception {
- JobConf jobConf = new JobConf(getConf(), DomainGenderCount.class);
- jobConf.setMapOutputKeyClass(Text.class);
- jobConf.setMapOutputValueClass(Text.class);
- jobConf.setJobName("Number of Users in each domain:");
- jobConf.setOutputKeyClass(Text.class);
- jobConf.setOutputValueClass(IntWritable.class);
- jobConf.setMapperClass(MapClass.class);
- jobConf.setReducerClass(ReduceClass.class);
- jobConf.setCombinerClass(ReduceClass.class);
- FileInputFormat.setInputPaths(jobConf, new Path(paths[0]));
- FileOutputFormat.setOutputPath(jobConf, new Path(paths[1]));
- JobClient.runJob(jobConf);
- return 0;
- }
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement