- 主页 > 生活百科 > >
package cn.mr.dedup;
import JAVA.io.IOException;
import org.Apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.MApper;
public class DedupMapper extends Mapper<LongWritable, Text, Text, NullWritable> {
private static Text field = new Text();
// <0,2018-3-3 c><11,2018-3-4 d>
@Override
protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
field = value;
context.write(field, NullWritable.get());
}
// <2018-3-3 c,null> <2018-3-4 d,null>
}
package cn.mr.dedup;
import java.io.IOException;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;
public class DedupReducer extends Reducer<Text, NullWritable, Text, NullWritable> {
// <2018-3-3 c,null> <2018-3-4 d,null><2018-3-4 d,null>
@Override
protected void reduce(Text key, Iterable<NullWritable> values, Context context)
【Hadoop数据去重】throws IOException, InterruptedException {
context.write(key, NullWritable.get());
}
}
package cn.mr.dedup;
import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
public class DedupRunner {
public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
Configuration conf = new Configuration();
Job job = Job.getInstance(conf);
job.setJarByClass(DedupRunner.class);
job.setMapperClass(DedupMapper.class);
job.setReducerClass(DedupReducer.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(NullWritable.class);
FileInputFormat.setInputPaths(job, new Path("D:\Dedup\input"));
// 指定处理完成之后的结果所保存的位置
FileOutputFormat.setOutputPath(job, new Path("D:\Dedup\output"));
job.waitForCompletion(true);
}
}
推荐阅读
-
星了个星座|贪图享受,却十分有福气的4个生肖,好吃懒做
-
行业互联网|2020线上智博会重庆开幕 山东19家企业组团参展
-
糖果妈妈|胎儿能尝出味道吗?医生:不仅能,还吃的很香,孕妈吃饭
-
-
|一女子,对一位忙着钓鱼的男子贴身热舞,男子却无动于衷
-
育儿房间|李天一人生重头再来,被祸害的女孩却还在接受治疗,7年过去了
-
-
-
-
电动汽车|云度电动车发生自燃 官方回应称此前发生过碰撞事故
-
上海市闵行区人民政府网站|区医保中心进行火灾消防实景演练
-
围巾|冬天羽绒服+围巾,这么搭原来可以这么好看,时髦保暖又显瘦!
-
易车▲2020款福特途睿欧正式上市,售价18.69万—23.39万元
-
国际足球|国米高兴!头号杀星因梅西转会改变心意 计划长留蓝黑军团
-
-
「英超」24年前,曼联逆转12分夺得英超,当年到底发生了什么?
-
米家互联网迷你波轮洗衣机Pro 3kg 1099元开售
-
-
-
明星指南针当镜头拉近双腿时,38岁身材很真实,ella穿短裤给儿子剪发