This commit is contained in:
2024-10-11 11:12:32 +08:00
commit 8b4a30b940
30 changed files with 1005115 additions and 0 deletions

View File

@@ -0,0 +1,39 @@
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
<parent>
<artifactId>HaiNiuProjects</artifactId>
<groupId>com.aisi</groupId>
<version>1.0-SNAPSHOT</version>
</parent>
<modelVersion>4.0.0</modelVersion>
<artifactId>MapReduceTest</artifactId>
<build>
<plugins>
<plugin>
<artifactId>maven-shade-plugin</artifactId>
<version>3.2.4</version>
<executions>
<execution>
<phase>package</phase>
<goals>
<goal>shade</goal>
</goals>
</execution>
</executions>
<configuration>
<archive>
<manifest>
<addDefaultImplementationEntries>true</addDefaultImplementationEntries>
<mainClass>com.aisi.wordcount.WordCountDriver</mainClass>
</manifest>
</archive>
</configuration>
</plugin>
</plugins>
</build>
<properties>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
<maven.compiler.source>8</maven.compiler.source>
<maven.compiler.target>8</maven.compiler.target>
</properties>
</project>

95
MapReduceTest/pom.xml Normal file
View File

@@ -0,0 +1,95 @@
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<parent>
<groupId>com.aisi</groupId>
<artifactId>HaiNiuProjects</artifactId>
<version>1.0-SNAPSHOT</version>
</parent>
<artifactId>MapReduceTest</artifactId>
<properties>
<maven.compiler.source>8</maven.compiler.source>
<maven.compiler.target>8</maven.compiler.target>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
</properties>
<dependencies>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-client</artifactId>
<version>3.1.4</version>
</dependency>
<dependency>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-simple</artifactId>
<version>1.7.32</version>
</dependency>
<dependency>
<groupId>log4j</groupId>
<artifactId>log4j</artifactId>
<version>1.2.17</version>
</dependency>
</dependencies>
<build>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-shade-plugin</artifactId>
<version>3.2.4</version>
<executions>
<execution>
<phase>package</phase>
<goals>
<goal>shade</goal>
</goals>
<configuration>
<createDependencyReducedPom>false</createDependencyReducedPom>
<filters>
<filter>
<artifact>*:*</artifact>
<excludes>
<exclude>META-INF/*.SF</exclude>
<exclude>META-INF/*.DSA</exclude>
<exclude>META-INF/*.RSA</exclude>
</excludes>
</filter>
</filters>
<shadedArtifactAttached>true</shadedArtifactAttached>
<shadedArtifactPrimary>true</shadedArtifactPrimary>
<outputFile>${project.build.directory}/${project.build.finalName}-shaded.jar</outputFile>
<relocators>
<relocator>
<pattern>org.apache.commons</pattern>
<shadedPattern>shade.org.apache.commons</shadedPattern>
</relocator>
</relocators>
<filters>
<filter>
<artifact>*:*</artifact>
<excludes>
<exclude>META-INF/*.SF</exclude>
<exclude>META-INF/*.DSA</exclude>
<exclude>META-INF/*.RSA</exclude>
</excludes>
</filter>
</filters>
<transformers>
<transformer implementation="org.apache.maven.plugins.shade.resource.ServicesResourceTransformer"/>
<transformer implementation="org.apache.maven.plugins.shade.resource.ManifestResourceTransformer">
<mainClass>com.aisi.wordcount.WordCountDriver</mainClass>
</transformer>
</transformers>
</configuration>
</execution>
</executions>
</plugin>
</plugins>
</build>
</project>

View File

@@ -0,0 +1,45 @@
package com.aisi.wordcount;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import java.io.IOException;
public class WordCountDriver {
public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException {
// 创建配置文件对象
Configuration conf = new Configuration();
// conf.set("fs.defaultFS", "hdfs://localhost:9000/");
// 创建任务对象
Job job = Job.getInstance(conf, "wordcount");
// 设置入口类
job.setJarByClass(WordCountDriver.class);
// 设置mapper类
job.setMapperClass(WordCountMapper.class);
// 设置reducer类
job.setReducerClass(WordCountReducer.class);
// 设置reducer输出类型
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(IntWritable.class);
// 设置mapper输出类型
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(IntWritable.class);
// 设置mapreduce要处理的文件路径(hdfs路径)
FileInputFormat.addInputPath(job, new Path(args[0]));//"hdfs://ns1/word/words.txt"
// 设置mapreduce处理完成保存的文件路径
FileOutputFormat.setOutputPath(job, new Path(args[1])); //"hdfs://ns1/word/result"
boolean completion = job.waitForCompletion(true);
// 判断是否运行成功
if (completion) {
System.exit(0);
}else {
System.exit(1);
}
}
}

View File

@@ -0,0 +1,27 @@
package com.aisi.wordcount;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
import java.io.IOException;
public class WordCountMapper extends Mapper<LongWritable, Text, Text, IntWritable> {
@Override
/**
* key : 行首偏移量
* value一行的数据
*/
protected void map(LongWritable key, Text value, Mapper<LongWritable, Text, Text, IntWritable>.Context context) throws IOException, InterruptedException {
System.out.println("map invoke...");
String[] words = value.toString().split(" ");
for (String word : words) {
context.write(new Text(word), new IntWritable(1));
// (hello,1 pooo1 shenjianz1
}
}
}

View File

@@ -0,0 +1,25 @@
package com.aisi.wordcount;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;
import java.io.IOException;
public class WordCountReducer extends Reducer<Text, IntWritable, Text, IntWritable> {
// (hello,1 pooo1 shenjianz1 (hello,1)
// (hello,[1,1])
@Override
protected void reduce(Text key, Iterable<IntWritable> values, Reducer<Text, IntWritable, Text, IntWritable>.Context context) throws IOException, InterruptedException {
System.out.println("reduce invoke...");
// 记录每个单词的总数
int sum = 0;
for (IntWritable value : values) {
sum += value.get();
}
context.write(key, new IntWritable(sum));
// (hello,2)
}
}

View File

@@ -0,0 +1,36 @@
<configuration>
<property>
<name>fs.defaultFS</name>
<value>hdfs://ns1</value>
<description>默认文件服务的协议和NS逻辑名称和hdfs-site.xml里的对应此配置替代了1.0里的fs.default.name</description>
</property>
<property>
<name>hadoop.tmp.dir</name>
<value>/data/tmp</value>
<description>数据存储目录</description>
</property>
<property>
<name>hadoop.proxyuser.root.groups</name>
<value>hadoop</value>
<description>
hdfs dfsadmin refreshSuperUserGroupsConfiguration,
yarn rmadmin refreshSuperUserGroupsConfiguration
使用这两个命令不用重启就能刷新
</description>
</property>
<property>
<name>hadoop.proxyuser.root.hosts</name>
<value>localhost</value>
<description>本地代理</description>
</property>
<!-- zkfc的配置 -->
<property>
<name>ha.zookeeper.quorum</name>
<value>nn1:2181,nn2:2181,nn3:2181</value>
<description>HA使用的zookeeper地址</description>
</property>
</configuration>

View File

@@ -0,0 +1,140 @@
<configuration>
<property>
<name>dfs.namenode.name.dir</name>
<value>/data/namenode</value>
<description>namenode本地文件存放地址</description>
</property>
<property>
<name>dfs.nameservices</name>
<value>ns1</value>
<description>提供服务的NS逻辑名称与core-site.xml里的对应</description>
</property>
<!-- namenode的配置 -->
<!-- 主要的 -->
<property>
<name>dfs.ha.namenodes.ns1</name>
<value>nn1,nn2,nn3</value>
<description>列出该逻辑名称下的NameNode逻辑名称</description>
</property>
<property>
<name>dfs.namenode.rpc-address.ns1.nn1</name>
<value>nn1:9000</value>
<description>指定NameNode的RPC位置</description>
</property>
<property>
<name>dfs.namenode.http-address.ns1.nn1</name>
<value>nn1:50070</value>
<description>指定NameNode的Web Server位置</description>
</property>
<property>
<name>dfs.namenode.rpc-address.ns1.nn2</name>
<value>nn2:9000</value>
<description>指定NameNode的RPC位置</description>
</property>
<property>
<name>dfs.namenode.http-address.ns1.nn2</name>
<value>nn2:50070</value>
<description>指定NameNode的Web Server位置</description>
</property>
<property>
<name>dfs.namenode.rpc-address.ns1.nn3</name>
<value>nn3:9000</value>
<description>指定NameNode的RPC位置</description>
</property>
<property>
<name>dfs.namenode.http-address.ns1.nn3</name>
<value>nn3:50070</value>
<description>指定NameNode的Web Server位置</description>
</property>
<property>
<name>dfs.namenode.handler.count</name>
<value>77</value>
<description>namenode的工作线程数</description>
</property>
<!-- journaldata配置,使得其他两个namenode同步第一个namenode数据 -->
<property>
<name>dfs.namenode.shared.edits.dir</name>
<value>qjournal://nn1:8485;nn2:8485;nn3:8485/ns1</value>
<description>指定用于HA存放edits的共享存储通常是namenode的所在机器</description>
</property>
<property>
<name>dfs.journalnode.edits.dir</name>
<value>/data/journaldata/</value>
<description>journaldata服务存放文件的地址</description>
</property>
<property>
<name>ipc.client.connect.max.retries</name>
<value>10</value>
<description>namenode和journalnode的链接重试次数10次</description>
</property>
<property>
<name>ipc.client.connect.retry.interval</name>
<value>10000</value>
<description>重试的间隔时间10s</description>
</property>
<!-- zkfc的配置 -->
<property>
<name>dfs.ha.fencing.methods</name>
<value>sshfence</value>
<description>指定HA做隔离的方法缺省是ssh可设为shell稍后详述</description>
</property>
<property>
<name>dfs.ha.fencing.ssh.private-key-files</name>
<value>/home/hadoop/.ssh/id_rsa</value>
<description>杀死命令脚本的免密配置秘钥</description>
</property>
<property>
<name>dfs.client.failover.proxy.provider.ns1</name>
<value>org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider</value>
<description>指定客户端用于HA切换的代理类不同的NS可以用不同的代理类以上示例为Hadoop 2.0自带的缺省代理类</description>
</property>
<property>
<name>dfs.client.failover.proxy.provider.auto-ha</name>
<value>org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider</value>
</property>
<property>
<name>dfs.ha.automatic-failover.enabled</name>
<value>true</value>
</property>
<!-- datanode配置 -->
<property>
<name>dfs.datanode.data.dir</name>
<value>/data/datanode</value>
<description>datanode本地文件存放地址</description>
</property>
<property>
<name>dfs.replication</name>
<value>3</value>
<description>文件复本数</description>
</property>
<property>
<name>dfs.namenode.datanode.registration.ip-hostname-check</name>
<value>false</value>
</property>
<property>
<name>dfs.client.use.datanode.hostname</name>
<value>true</value>
</property>
<property>
<name>dfs.datanode.use.datanode.hostname</name>
<value>true</value>
</property>
</configuration>