当前位置：首页 >

Hadoop MapReduce程序的模板框架

发布时间：2025/3/21 30 豆豆

生活随笔收集整理的这篇文章主要介绍了 Hadoop MapReduce程序的模板框架小编觉得挺不错的,现在分享给大家,帮大家做个参考.

这里放了两个Hadoop MapReduce程序的模板框架，包括一些基本的包import语句、Mapper基类、Reducer基类、map()方法、reduce()方法，后面还有一些作业job的驱动程序，具体说是配置作业名、配置Mapper类、Reducer类、Combiner类的类名等等。

写MR程序时，程序员需要实现相应的map()函数和reduce()函数。

一、

/** MapReduce程序模板，一些必要的语句* 写MR程序时，复制该文件，修改类名，实现相应的map、reduce函数等 */import java.io.IOException; import java.util.StringTokenizer; // 分隔字符串 import org.apache.hadoop.conf.Configured; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.IntWritable; // 相当于int类型 import org.apache.hadoop.io.LongWritable; // 相当于long类型 import org.apache.hadoop.io.Text; // 相当于String类型 import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.mapreduce.Mapper; import org.apache.hadoop.mapreduce.Reducer; import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; import org.apache.hadoop.mapreduce.lib.input.TextInputFormat; import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat; import org.apache.hadoop.util.Tool; import org.apache.hadoop.util.ToolRunner; public class HadoopMRTemplate extends Configured implements Tool{public static class MapTemplate extends Mapper<LongWritable, Text, Text, IntWritable> { // TODO: some preprocessing operations before map() functionpublic void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {// map函数中参数key是偏移量，value是每一行的内容// TODO: implements map() function} // map( )} // class MapTemplatepublic static class ReduceTemplate extends Reducer<Text, IntWritable, Text, IntWritable> { //实现reduce函数// TODO: some preprocessing operations before reduce() functionpublic void reduce(Text key, Iterable<IntWritable> values, Context context)throws IOException, InterruptedException {// TODO: implements reduce() function} // reduce( ) } // class ReduceTemplatepublic int run(String[] args) throws Exception {Job job = new Job(getConf()); job.setJobName("HadoopMRTemplate"); // 作业名job.setOutputKeyClass(Text.class); // 类名.class生成class对象job.setOutputValueClass(IntWritable.class);job.setMapperClass(MapTemplate.class); job.setReducerClass(ReduceTemplate.class); job.setInputFormatClass(TextInputFormat.class); job.setOutputFormatClass(TextOutputFormat.class); FileInputFormat.setInputPaths(job, new Path(args[0])); // 作业的输入路径FileOutputFormat.setOutputPath(job, new Path(args[1])); // 作业的输出路径return (job.waitForCompletion(true)? 0 : 1); } //run()public static void main(String[] args) throws Exception { // 调用ToolRunner.run( )int exitCode = ToolRunner.run(new HadoopMRTemplate(), args); System.exit(exitCode); } //main() } // class HadoopMRTemplate

二、

/** MapReduce程序模板，一些必要的语句* 写MR程序时，复制该文件，修改类名，实现相应的map、reduce函数等 */import java.io.IOException; import java.util.StringTokenizer; // 分隔字符串import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.IntWritable; // 相当于int类型 import org.apache.hadoop.io.Text; // 相当于String类型 import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.mapreduce.Mapper; import org.apache.hadoop.mapreduce.Reducer; import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; import org.apache.hadoop.util.GenericOptionsParser;public class HadoopMRTemplate2 {public static class TokenizerMapper extends Mapper<Object, Text, Text, IntWritable>{// TODO: some preprocessing operations before map() functionpublic void map(Object key, Text value, Context context) throws IOException, InterruptedException {// map函数中参数key是偏移量，value是每一行的内容// TODO: implements map() function} //map()} // class TokenizerMapperpublic static class IntSumReducer extends Reducer<Text,IntWritable,Text,IntWritable> {// TODO: some preprocessing operations before map() functionpublic void reduce(Text key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException {// TODO: implements reduce() function} // reduce()} // class IntSumReducerpublic static void main(String[] args) throws Exception {Configuration conf = new Configuration(); String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();if (otherArgs.length != 2) {System.err.println("Usage: wordcount <in> <out>");System.exit(2);}Job job = new Job(conf, "HadoopMRTemplate2"); // 作业名job.setJarByClass(HadoopMRTemplate2.class);job.setMapperClass(TokenizerMapper.class);job.setCombinerClass(IntSumReducer.class);job.setReducerClass(IntSumReducer.class);job.setOutputKeyClass(Text.class);job.setOutputValueClass(IntWritable.class);FileInputFormat.addInputPath(job, new Path(otherArgs[0])); // 作业的输入路径FileOutputFormat.setOutputPath(job, new Path(otherArgs[1])); // 作业的输出路径System.exit(job.waitForCompletion(true) ? 0 : 1);} //main() } // class HadoopMRTempalte2

总结

以上是生活随笔为你收集整理的Hadoop MapReduce程序的模板框架的全部内容，希望文章能够帮你解决所遇到的问题。

如果觉得生活随笔网站内容还不错，欢迎将生活随笔推荐给好友。

上一篇： Python变量和对象类型速记手册
下一篇：怎样在 Markdown 中使程序代码带