Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- /* here is my control class */
- package control;
- import java.io.IOException;
- import mapreduce.MapNumMapper;
- import mapreduce.MapNumReducer;
- import org.apache.hadoop.conf.Configuration;
- import org.apache.hadoop.conf.Configured;
- import org.apache.hadoop.fs.Path;
- import org.apache.hadoop.io.Text;
- import org.apache.hadoop.mapreduce.Job;
- import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
- import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
- import org.apache.hadoop.util.Tool;
- import org.apache.hadoop.util.ToolRunner;
- public class Main extends Configured implements Tool {
- public static void main(String[] args) throws Exception {
- int exitCode = ToolRunner.run(new Main(), args);
- System.exit(exitCode);
- }
- public int run(String[] arg0) throws Exception {
- boolean result = false;
- Job job = getJob();
- if (job.waitForCompletion(true) && (job.waitForCompletion(true))) {
- result = true;
- }
- return result ? 0 : 1;
- }
- protected Job getJob() throws IOException {
- Configuration configuration = getConf();
- configuration.setInt("mapred.tasktracker.map.tasks.maximum", 4);
- Job job = new Job(configuration, "mapnumtest");
- job.setJarByClass(getClass());
- FileInputFormat.addInputPath(job, new Path(configuration.get("mapnumtest.inputpath")));
- FileOutputFormat.setOutputPath(job, new Path(configuration.get("mapnumtest.outputpath")));
- job.setMapperClass(MapNumMapper.class);
- job.setReducerClass(MapNumReducer.class);
- job.setOutputKeyClass(Text.class);
- job.setOutputValueClass(Text.class);
- return job;
- }
- }
- /* here is my mapper class */
- package mapreduce;
- import java.io.IOException;
- import org.apache.hadoop.io.LongWritable;
- import org.apache.hadoop.io.Text;
- import org.apache.hadoop.mapreduce.Mapper;
- public class MapNumMapper extends Mapper<LongWritable, Text, Text, Text> {
- @Override
- public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
- String[] input = value.toString().split(" ");
- context.write(new Text(input[0]), new Text(input[1]));
- }
- }
- /* here is my reducer class */
- package mapreduce;
- import java.io.IOException;
- import org.apache.hadoop.io.IntWritable;
- import org.apache.hadoop.io.Text;
- import org.apache.hadoop.mapreduce.Reducer;
- public class MapNumReducer extends Reducer<Text, Text, Text, IntWritable> {
- public void reduce(Text key, Iterable<Text> values, Context context) throws IOException, InterruptedException {
- int sum=0;
- for(Text current : values) {
- sum+=Integer.parseInt(current.toString());
- }
- context.write(key, new IntWritable(sum));
- }
- }
- /*
- Here is my run script :
- hadoop fs -rmr /data/mapnumtest/out
- hadoop jar mapnumtest-1.0-SNAPSHOT.jar control.Main -Dmapred.reduce.tasks=4 -conf mapnumtest.xml
- */
- /* Here is my mapnumtest.xml :
- <?xml version="1.0"?>
- <?xml-stylesheet type="text/xsl" href="file:///etc/hadoop/conf/configuration.xsl"?>
- <configuration>
- <property>
- <name>mapnumtest.inputpath</name>
- <value>/data/mapnumtest/in/</value>
- </property>
- <property>
- <name>mapnumtest.outputpath</name>
- <value>/data/mapnumtest/out/</value>
- </property>
- </configuration>
- */
- /* Here is my .pom (probably not relevant)
- <project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
- <modelVersion>4.0.0</modelVersion>
- <groupId>com.mycompany</groupId>
- <artifactId>mapnumtest</artifactId>
- <version>1.0-SNAPSHOT</version>
- <name>Mapper number test</name>
- <dependencies>
- <!--Hadoop dependencies -->
- <dependency>
- <groupId>org.apache.hadoop</groupId>
- <artifactId>hadoop-core</artifactId>
- <version>0.20.2</version>
- <scope>provided</scope>
- </dependency>
- </dependencies>
- </project>
- */
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement