不知到是"Hadoop 实战" 还是那个虾皮工作室写的解法，把这个问题说成是单表联接。个人觉得其实没有这么深奥，说白了非常的简单，map输入：如上所说，key是child's name, value是parent's name； map的输出：key是名字，value是“标识+名字”，其中标识'0'后面接他的child's name，'1'接他的parent's name。比如第一个记录Tom Lucy，那么key是"Tom", value是"1Lucy"，还有key是lucy，value是"0Tom" 。 reducer的输入：对于每一个人名(key)，它的一系列values是他的children或者parents name(通过标识来识别是children还是parents)，这样子得到key的children集合和parents集合（说白了就是知道了key的所有children和parents的名字）。然后求这两个集合的乘积（或者称为笛卡尔积）即可。。

具体代码如下：

import java.util.*;
import java.io.*;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.TextInputFormat;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
import org.apache.hadoop.mapreduce.lib.input.KeyValueTextInputFormat;

public class oneTableConnect {

public static class tableMapper extends Mapper<Text, Text, Text, Text>
{
@Override
public void map(Text key, Text value, Context context) throws IOException, InterruptedException
{
context.write(key, new Text( "1" + value.toString()));
context.write(value, new Text("0" + key.toString()));
}
}

public static class tableReducer extends Reducer<Text, Text, Text, Text>
{
@Override
public void reduce(Text key, Iterable<Text> values, Context context) throws IOException, InterruptedException
{
String[] gChildren = new String[10];
String[] gParents = new String[10];
int cNum = 0;
int pNum = 0;
for(Text val : values)
{
if(val.toString().charAt(0) == '0')// the key's child.
{
gChildren[cNum++] = val.toString().substring(1);
}
else//the key's parent.
{
gParents[pNum++] = val.toString().substring(1);
}
}

for(int i=0; i<cNum; i++)
for(int j=0;j<pNum;j++)
{
context.write(new Text(gChildren[i]), new Text(gParents[j]));
}
}
}

public static void main(String[] args) throws Exception
{
Configuration conf = new Configuration();
conf.set("mapreduce.input.keyvaluelinerecordreader.key.value.separator", " ");

Job job = new Job(conf, "tableConnect");
job.setJarByClass(oneTableConnect.class);

job.setMapperClass(tableMapper.class);
job.setReducerClass(tableReducer.class);

job.setOutputKeyClass(Text.class);
job.setOutputValueClass(Text.class);

job.setInputFormatClass(KeyValueTextInputFormat.class);
job.setOutputFormatClass(TextOutputFormat.class);

FileInputFormat.addInputPath(job, new Path(args[0]));
FileOutputFormat.setOutputPath(job, new Path(args[1]));

System.exit( job.waitForCompletion(true) ? 0 : 1);
}
}

更多Hadoop相关信息见Hadoop 专题页面 http://www.linuxidc.com/topicnews.aspx?tid=13

知识点

相关文章

最近更新

Hadoop HelloWorld Examples - 单表连接

相关问答

Hadoop示例排序验证(Hadoop Examples Sort Validation)[2022-05-31]

hadoop示例没有在亚马逊ec2上运行(hadoop examples not running on amazon ec2)[2022-02-23]

Hadoop在我的JAR上提供SCDynamicStore，但在hadoop-examples.jar上没有(Hadoop giving SCDynamicStore on my JAR but not on hadoop-examples.jar)[2023-10-12]

无法运行hadoop wordcount示例？(unable to run hadoop wordcount example?)[2023-03-16]

Hadoop性能(Hadoop performance)[2022-07-13]

Hadoop for MySQL用例(Hadoop for MySQL use cases)[2022-12-14]

在单节点群集上运行Hadoop示例时出错(Error in running Hadoop example on single node cluster)[2022-02-26]

执行hadoop-mapreduce-examples-2.2.0.jar时出错(Error while executing hadoop-mapreduce-examples-2.2.0.jar)[2023-09-29]

MAC：Mono Helloworld的例子(MAC: Mono Helloworld example)[2022-05-21]

Jasmin HelloWorld示例失败(Jasmin HelloWorld Example Failing)[2022-02-04]