2018年安徽省大数据比赛MapReduce题目解答第二题

mac2022-06-30  40

num.txt在此博客中:https://blog.csdn.net/qq_41479464/article/details/101922339

使用MR的处理方式,去除num.txt中以2开头的数字,并且统计每个数字出现的次数将结果保存为num2.txt(10分)

 主函数:

import java.io.IOException; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; public class MapReduceNum { public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException { Configuration conf = new Configuration(); Job job = Job.getInstance(conf,MapReduceNum.class.getSimpleName()); job.setJarByClass(MapReduceNum.class); FileInputFormat.addInputPath(job, new Path(args[0])); job.setMapperClass(MyMap.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(IntWritable.class); job.setReducerClass(MyRed.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(LongWritable.class); FileOutputFormat.setOutputPath(job, new Path(args[1])); job.waitForCompletion(true); } }

Map函数:

import java.io.IOException; import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Mapper; public class MyMap extends Mapper<LongWritable, Text, Text, IntWritable>{ @Override protected void map(LongWritable key, Text value, Mapper<LongWritable, Text, Text, IntWritable>.Context context) throws IOException, InterruptedException { String line[] = value.toString().split(","); for (String string : line) { if(!string.startsWith("2")); context.write(new Text(string),new IntWritable(1)); } } }

Reduce函数:

import java.io.IOException; import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Reducer; public class MyRed extends Reducer<Text, IntWritable, Text, LongWritable> { @Override protected void reduce(Text key, Iterable<IntWritable> values, Reducer<Text, IntWritable, Text, LongWritable>.Context context) throws IOException, InterruptedException { long count = 0l; for (IntWritable val : values) { count = count +val.get(); } //将结果输出,输出到hdfs上 context.write(new Text(key), new LongWritable(count)); } }

 

最新回复(0)