commit
This commit is contained in:
@@ -0,0 +1,45 @@
|
||||
package com.aisi.wordcount;
|
||||
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
import org.apache.hadoop.fs.Path;
|
||||
import org.apache.hadoop.io.IntWritable;
|
||||
import org.apache.hadoop.io.Text;
|
||||
import org.apache.hadoop.mapreduce.Job;
|
||||
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
|
||||
import org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter;
|
||||
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
public class WordCountDriver {
|
||||
public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException {
|
||||
// 创建配置文件对象
|
||||
Configuration conf = new Configuration();
|
||||
// conf.set("fs.defaultFS", "hdfs://localhost:9000/");
|
||||
// 创建任务对象
|
||||
Job job = Job.getInstance(conf, "wordcount");
|
||||
// 设置入口类
|
||||
job.setJarByClass(WordCountDriver.class);
|
||||
// 设置mapper类
|
||||
job.setMapperClass(WordCountMapper.class);
|
||||
// 设置reducer类
|
||||
job.setReducerClass(WordCountReducer.class);
|
||||
// 设置reducer输出类型
|
||||
job.setOutputKeyClass(Text.class);
|
||||
job.setOutputValueClass(IntWritable.class);
|
||||
// 设置mapper输出类型
|
||||
job.setMapOutputKeyClass(Text.class);
|
||||
job.setMapOutputValueClass(IntWritable.class);
|
||||
// 设置mapreduce要处理的文件路径(hdfs路径)
|
||||
FileInputFormat.addInputPath(job, new Path(args[0]));//"hdfs://ns1/word/words.txt"
|
||||
// 设置mapreduce处理完成保存的文件路径
|
||||
FileOutputFormat.setOutputPath(job, new Path(args[1])); //"hdfs://ns1/word/result"
|
||||
boolean completion = job.waitForCompletion(true);
|
||||
// 判断是否运行成功
|
||||
if (completion) {
|
||||
System.exit(0);
|
||||
}else {
|
||||
System.exit(1);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,27 @@
|
||||
package com.aisi.wordcount;
|
||||
|
||||
import org.apache.hadoop.io.IntWritable;
|
||||
import org.apache.hadoop.io.LongWritable;
|
||||
import org.apache.hadoop.io.Text;
|
||||
import org.apache.hadoop.mapreduce.Mapper;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
public class WordCountMapper extends Mapper<LongWritable, Text, Text, IntWritable> {
|
||||
@Override
|
||||
/**
|
||||
* key : 行首偏移量
|
||||
* value:一行的数据
|
||||
*/
|
||||
protected void map(LongWritable key, Text value, Mapper<LongWritable, Text, Text, IntWritable>.Context context) throws IOException, InterruptedException {
|
||||
System.out.println("map invoke...");
|
||||
String[] words = value.toString().split(" ");
|
||||
for (String word : words) {
|
||||
context.write(new Text(word), new IntWritable(1));
|
||||
// (hello,1) (pooo,1) (shenjianz,1)
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
}
|
||||
@@ -0,0 +1,25 @@
|
||||
package com.aisi.wordcount;
|
||||
|
||||
import org.apache.hadoop.io.IntWritable;
|
||||
import org.apache.hadoop.io.Text;
|
||||
import org.apache.hadoop.mapreduce.Reducer;
|
||||
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
public class WordCountReducer extends Reducer<Text, IntWritable, Text, IntWritable> {
|
||||
|
||||
// (hello,1) (pooo,1) (shenjianz,1) (hello,1)
|
||||
// (hello,[1,1])
|
||||
@Override
|
||||
protected void reduce(Text key, Iterable<IntWritable> values, Reducer<Text, IntWritable, Text, IntWritable>.Context context) throws IOException, InterruptedException {
|
||||
System.out.println("reduce invoke...");
|
||||
// 记录每个单词的总数
|
||||
int sum = 0;
|
||||
for (IntWritable value : values) {
|
||||
sum += value.get();
|
||||
}
|
||||
context.write(key, new IntWritable(sum));
|
||||
// (hello,2)
|
||||
}
|
||||
}
|
||||
36
MapReduceTest/src/main/resources/core-site.xml
Normal file
36
MapReduceTest/src/main/resources/core-site.xml
Normal file
@@ -0,0 +1,36 @@
|
||||
<configuration>
|
||||
<property>
|
||||
<name>fs.defaultFS</name>
|
||||
<value>hdfs://ns1</value>
|
||||
<description>默认文件服务的协议和NS逻辑名称,和hdfs-site.xml里的对应此配置替代了1.0里的fs.default.name</description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>hadoop.tmp.dir</name>
|
||||
<value>/data/tmp</value>
|
||||
<description>数据存储目录</description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>hadoop.proxyuser.root.groups</name>
|
||||
<value>hadoop</value>
|
||||
<description>
|
||||
hdfs dfsadmin –refreshSuperUserGroupsConfiguration,
|
||||
yarn rmadmin –refreshSuperUserGroupsConfiguration
|
||||
使用这两个命令不用重启就能刷新
|
||||
</description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>hadoop.proxyuser.root.hosts</name>
|
||||
<value>localhost</value>
|
||||
<description>本地代理</description>
|
||||
</property>
|
||||
|
||||
<!-- zkfc的配置 -->
|
||||
<property>
|
||||
<name>ha.zookeeper.quorum</name>
|
||||
<value>nn1:2181,nn2:2181,nn3:2181</value>
|
||||
<description>HA使用的zookeeper地址</description>
|
||||
</property>
|
||||
</configuration>
|
||||
140
MapReduceTest/src/main/resources/hdfs-site.xml
Normal file
140
MapReduceTest/src/main/resources/hdfs-site.xml
Normal file
@@ -0,0 +1,140 @@
|
||||
<configuration>
|
||||
<property>
|
||||
<name>dfs.namenode.name.dir</name>
|
||||
<value>/data/namenode</value>
|
||||
<description>namenode本地文件存放地址</description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>dfs.nameservices</name>
|
||||
<value>ns1</value>
|
||||
<description>提供服务的NS逻辑名称,与core-site.xml里的对应</description>
|
||||
</property>
|
||||
|
||||
<!-- namenode的配置 -->
|
||||
<!-- 主要的 -->
|
||||
<property>
|
||||
<name>dfs.ha.namenodes.ns1</name>
|
||||
<value>nn1,nn2,nn3</value>
|
||||
<description>列出该逻辑名称下的NameNode逻辑名称</description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>dfs.namenode.rpc-address.ns1.nn1</name>
|
||||
<value>nn1:9000</value>
|
||||
<description>指定NameNode的RPC位置</description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>dfs.namenode.http-address.ns1.nn1</name>
|
||||
<value>nn1:50070</value>
|
||||
<description>指定NameNode的Web Server位置</description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>dfs.namenode.rpc-address.ns1.nn2</name>
|
||||
<value>nn2:9000</value>
|
||||
<description>指定NameNode的RPC位置</description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>dfs.namenode.http-address.ns1.nn2</name>
|
||||
<value>nn2:50070</value>
|
||||
<description>指定NameNode的Web Server位置</description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>dfs.namenode.rpc-address.ns1.nn3</name>
|
||||
<value>nn3:9000</value>
|
||||
<description>指定NameNode的RPC位置</description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>dfs.namenode.http-address.ns1.nn3</name>
|
||||
<value>nn3:50070</value>
|
||||
<description>指定NameNode的Web Server位置</description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>dfs.namenode.handler.count</name>
|
||||
<value>77</value>
|
||||
<description>namenode的工作线程数</description>
|
||||
</property>
|
||||
|
||||
<!-- journaldata配置,使得其他两个namenode同步第一个namenode数据 -->
|
||||
<property>
|
||||
<name>dfs.namenode.shared.edits.dir</name>
|
||||
<value>qjournal://nn1:8485;nn2:8485;nn3:8485/ns1</value>
|
||||
<description>指定用于HA存放edits的共享存储,通常是namenode的所在机器</description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>dfs.journalnode.edits.dir</name>
|
||||
<value>/data/journaldata/</value>
|
||||
<description>journaldata服务存放文件的地址</description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>ipc.client.connect.max.retries</name>
|
||||
<value>10</value>
|
||||
<description>namenode和journalnode的链接重试次数10次</description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>ipc.client.connect.retry.interval</name>
|
||||
<value>10000</value>
|
||||
<description>重试的间隔时间10s</description>
|
||||
</property>
|
||||
|
||||
<!-- zkfc的配置 -->
|
||||
<property>
|
||||
<name>dfs.ha.fencing.methods</name>
|
||||
<value>sshfence</value>
|
||||
<description>指定HA做隔离的方法,缺省是ssh,可设为shell,稍后详述</description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>dfs.ha.fencing.ssh.private-key-files</name>
|
||||
<value>/home/hadoop/.ssh/id_rsa</value>
|
||||
<description>杀死命令脚本的免密配置秘钥</description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>dfs.client.failover.proxy.provider.ns1</name>
|
||||
<value>org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider</value>
|
||||
<description>指定客户端用于HA切换的代理类,不同的NS可以用不同的代理类以上示例为Hadoop 2.0自带的缺省代理类</description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>dfs.client.failover.proxy.provider.auto-ha</name>
|
||||
<value>org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider</value>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>dfs.ha.automatic-failover.enabled</name>
|
||||
<value>true</value>
|
||||
</property>
|
||||
<!-- datanode配置 -->
|
||||
<property>
|
||||
<name>dfs.datanode.data.dir</name>
|
||||
<value>/data/datanode</value>
|
||||
<description>datanode本地文件存放地址</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>dfs.replication</name>
|
||||
<value>3</value>
|
||||
<description>文件复本数</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>dfs.namenode.datanode.registration.ip-hostname-check</name>
|
||||
<value>false</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>dfs.client.use.datanode.hostname</name>
|
||||
<value>true</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>dfs.datanode.use.datanode.hostname</name>
|
||||
<value>true</value>
|
||||
</property>
|
||||
</configuration>
|
||||
Reference in New Issue
Block a user