Advertisement
sangfroid

Mapper number test

Mar 13th, 2013
131
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Java 4.01 KB | None | 0 0
  1. /* here is my control class */
  2.  
  3. package control;
  4.  
  5. import java.io.IOException;
  6.  
  7. import mapreduce.MapNumMapper;
  8. import mapreduce.MapNumReducer;
  9.  
  10. import org.apache.hadoop.conf.Configuration;
  11. import org.apache.hadoop.conf.Configured;
  12. import org.apache.hadoop.fs.Path;
  13. import org.apache.hadoop.io.Text;
  14. import org.apache.hadoop.mapreduce.Job;
  15. import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
  16. import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
  17. import org.apache.hadoop.util.Tool;
  18. import org.apache.hadoop.util.ToolRunner;
  19.  
  20. public class Main extends Configured implements Tool {
  21.  
  22.     public static void main(String[] args) throws Exception {
  23.         int exitCode = ToolRunner.run(new Main(), args);
  24.         System.exit(exitCode);
  25.     }  
  26.    
  27.     public int run(String[] arg0) throws Exception {
  28.        
  29.         boolean result = false;
  30.  
  31.         Job job = getJob();
  32.  
  33.         if (job.waitForCompletion(true) && (job.waitForCompletion(true))) {
  34.             result = true;
  35.         }
  36.  
  37.         return result ? 0 : 1;
  38.     }
  39.    
  40.     protected Job getJob() throws IOException {
  41.        
  42.         Configuration configuration = getConf();
  43.         configuration.setInt("mapred.tasktracker.map.tasks.maximum", 4);       
  44.        
  45.         Job job = new Job(configuration, "mapnumtest");
  46.         job.setJarByClass(getClass());
  47.        
  48.         FileInputFormat.addInputPath(job, new Path(configuration.get("mapnumtest.inputpath")));
  49.         FileOutputFormat.setOutputPath(job, new Path(configuration.get("mapnumtest.outputpath")));
  50.  
  51.         job.setMapperClass(MapNumMapper.class);
  52.         job.setReducerClass(MapNumReducer.class);
  53.  
  54.         job.setOutputKeyClass(Text.class);
  55.         job.setOutputValueClass(Text.class);
  56.  
  57.         return job;
  58.     }
  59. }
  60.  
  61. /* here is my mapper class */
  62.  
  63. package mapreduce;
  64.  
  65. import java.io.IOException;
  66.  
  67. import org.apache.hadoop.io.LongWritable;
  68. import org.apache.hadoop.io.Text;
  69. import org.apache.hadoop.mapreduce.Mapper;
  70.  
  71. public class MapNumMapper extends Mapper<LongWritable, Text, Text, Text> {
  72.  
  73.     @Override
  74.     public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {      
  75.         String[] input = value.toString().split(" ");
  76.         context.write(new Text(input[0]), new Text(input[1]));     
  77.     }  
  78. }
  79.  
  80.  
  81. /* here is my reducer class */
  82.  
  83. package mapreduce;
  84.  
  85. import java.io.IOException;
  86.  
  87. import org.apache.hadoop.io.IntWritable;
  88. import org.apache.hadoop.io.Text;
  89. import org.apache.hadoop.mapreduce.Reducer;
  90.  
  91. public class MapNumReducer extends Reducer<Text, Text, Text, IntWritable> {
  92.  
  93.     public void reduce(Text key, Iterable<Text> values, Context context) throws IOException, InterruptedException {
  94.        
  95.         int sum=0;
  96.        
  97.         for(Text current : values) {           
  98.             sum+=Integer.parseInt(current.toString());         
  99.         }
  100.        
  101.         context.write(key, new IntWritable(sum));      
  102.     }  
  103. }
  104.  
  105.  
  106. /*
  107.     Here is my run script :
  108.    
  109.     hadoop fs -rmr /data/mapnumtest/out
  110.     hadoop jar mapnumtest-1.0-SNAPSHOT.jar control.Main -Dmapred.reduce.tasks=4 -conf mapnumtest.xml
  111. */
  112.  
  113. /*  Here is my mapnumtest.xml :
  114.  
  115. <?xml version="1.0"?>
  116. <?xml-stylesheet type="text/xsl" href="file:///etc/hadoop/conf/configuration.xsl"?>
  117. <configuration>
  118.     <property>
  119.         <name>mapnumtest.inputpath</name>
  120.         <value>/data/mapnumtest/in/</value>
  121.     </property>
  122.     <property>
  123.         <name>mapnumtest.outputpath</name>
  124.         <value>/data/mapnumtest/out/</value>
  125.     </property>
  126. </configuration>
  127. */
  128.  
  129. /*  Here is my .pom (probably not relevant)
  130.  
  131. <project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
  132.   <modelVersion>4.0.0</modelVersion>
  133.   <groupId>com.mycompany</groupId>
  134.   <artifactId>mapnumtest</artifactId>
  135.   <version>1.0-SNAPSHOT</version>
  136.   <name>Mapper number test</name>
  137.  
  138.  
  139.   <dependencies>
  140.         <!--Hadoop dependencies -->
  141.       <dependency>
  142.           <groupId>org.apache.hadoop</groupId>
  143.           <artifactId>hadoop-core</artifactId>
  144.           <version>0.20.2</version>
  145.           <scope>provided</scope>
  146.       </dependency>
  147.   </dependencies>
  148.  
  149.    
  150. </project>
  151.  
  152. */
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement