use git in obsidian
This commit is contained in:
253
HaiNiuHadoop搭建/1.环境安装.md
Normal file
253
HaiNiuHadoop搭建/1.环境安装.md
Normal file
@@ -0,0 +1,253 @@
|
||||
### 防火墙关闭
|
||||
```bash
|
||||
# 在 6 台主机执行
|
||||
sudo systemctl stop firewalld
|
||||
sudo systemctl disable firewalld
|
||||
```
|
||||
### 配置yum源
|
||||
- 下载 repo 文件:
|
||||
[Centos-7.repo](http://mirrors.aliyun.com/repo/Centos-7.repo)
|
||||
并上传到`/tmp`,进入到`/tmp`
|
||||
- 备份并且替换系统的repo文件
|
||||
``` bash
|
||||
cp Centos-7.repo /etc/yum.repos.d/
|
||||
cd /etc/yum.repos.d/
|
||||
mv CentOS-Base.repo CentOS-Base.repo.bak
|
||||
mv Centos-7.repo CentOS-Base.repo
|
||||
```
|
||||
- 将`nn1`上的`CentOS-Base.repo`拷贝到其他主机
|
||||
```bash
|
||||
scp /etc/yum.repos.d/CentOS-Base.repo root@nn2:/etc/yum.repos.d
|
||||
scp /etc/yum.repos.d/CentOS-Base.repo root@nn3:/etc/yum.repos.d
|
||||
scp /etc/yum.repos.d/CentOS-Base.repo root@s1:/etc/yum.repos.d
|
||||
scp /etc/yum.repos.d/CentOS-Base.repo root@s2:/etc/yum.repos.d
|
||||
scp /etc/yum.repos.d/CentOS-Base.repo root@s3:/etc/yum.repos.d
|
||||
```
|
||||
- 执行yum源更新命令
|
||||
```bash
|
||||
yum clean all
|
||||
yum makecache
|
||||
yum update -y
|
||||
```
|
||||
- 安装常用软件
|
||||
```bash
|
||||
yum install -y openssh-server vim gcc gcc-c++ glibc-headers bzip2-devel lzo-devel curl wget openssh-clients zlib-devel autoconf automake cmake libtool openssl-devel fuse-devel snappy-devel telnet unzip zip net-tools.x86_64 firewalld systemd ntp unrar bzip2
|
||||
```
|
||||
### JDK安装
|
||||
>注意需要在六台机器依次执行
|
||||
- 上传到`/tmp`目录下,安装
|
||||
```bash
|
||||
cd /tmp
|
||||
rpm -ivh jdk-8u144-linux-x64.rpm
|
||||
```
|
||||
- 配置环境变量
|
||||
```bash
|
||||
ln -s /usr/java/jdk1.8.0_144/ /usr/java/jdk1.8
|
||||
echo 'export JAVA_HOME=/usr/java/jdk1.8' >> /etc/profile.d/myEnv.sh
|
||||
echo 'export PATH=$PATH:$JAVA_HOME/bin' >> /etc/profile.d/myEnv.sh
|
||||
source /etc/profile
|
||||
java -version
|
||||
```
|
||||
### 修改主机名和主机名映射
|
||||
|
||||
```bash
|
||||
vim /etc/hostname
|
||||
```
|
||||
6台机器分别为nn1、nn2、nn3、s1、s2、s3
|
||||
```bash
|
||||
vim /etc/hosts
|
||||
```
|
||||
修改为
|
||||
```text
|
||||
192.168.1.30 nn1
|
||||
192.168.1.31 nn2
|
||||
192.168.1.32 nn3
|
||||
192.168.1.33 s1
|
||||
192.168.1.34 s2
|
||||
192.168.1.35 s3
|
||||
```
|
||||
### 创建hadoop用户
|
||||
```bash
|
||||
#创建hadoop用户
|
||||
useradd hadoop
|
||||
#给hadoop用户设置密码: 12345678
|
||||
passwd hadoop
|
||||
```
|
||||
### 禁止非 wheel 组用户切换到root,配置免密切换root
|
||||
- 修改/etc/pam.d/su配置
|
||||
```bash
|
||||
sed -i 's/#auth\t\trequired\tpam_wheel.so/auth\t\trequired\tpam_wheel.so/g' '/etc/pam.d/su'
|
||||
sed -i 's/#auth\t\tsufficient\tpam_wheel.so/auth\t\tsufficient\tpam_wheel.so/g' '/etc/pam.d/su'
|
||||
```
|
||||
- 修改/etc/login.defs文件
|
||||
```bash
|
||||
echo "SU_WHEEL_ONLY yes" >> /etc/login.defs
|
||||
```
|
||||
- 添加用户到管理员,禁止普通用户su 到 root
|
||||
```bash
|
||||
#把hadoop用户加到wheel组里
|
||||
gpasswd -a hadoop wheel
|
||||
#查看wheel组里是否有hadoop用户
|
||||
cat /etc/group | grep wheel
|
||||
```
|
||||
### 给hadoop用户,配置SSH密钥
|
||||
#### 配置hadoop用户ssh免密码登录到hadoop
|
||||
- 仅在`nn1`执行这段脚本命令即可
|
||||
但是 `su - hadoop ` ,` mkdir ~/.ssh` 需要在其他主机执行一下
|
||||
```bash
|
||||
#切换到hadoop用户
|
||||
su - hadoop
|
||||
#生成ssh公私钥
|
||||
ssh-keygen -t rsa -f ~/.ssh/id_rsa -P ''
|
||||
ssh-copy-id nn1
|
||||
ssh-copy-id nn2
|
||||
ssh-copy-id nn3
|
||||
ssh-copy-id s1
|
||||
ssh-copy-id s2
|
||||
ssh-copy-id s3
|
||||
scp /home/hadoop/.ssh/id_rsa hadoop@nn2:/home/hadoop/.ssh
|
||||
scp /home/hadoop/.ssh/id_rsa hadoop@nn3:/home/hadoop/.ssh
|
||||
scp /home/hadoop/.ssh/id_rsa hadoop@s1:/home/hadoop/.ssh
|
||||
scp /home/hadoop/.ssh/id_rsa hadoop@s2:/home/hadoop/.ssh
|
||||
scp /home/hadoop/.ssh/id_rsa hadoop@s3:/home/hadoop/.ssh
|
||||
```
|
||||
#### 配置hadoop用户ssh免密码登录到root
|
||||
```bash
|
||||
ssh-copy-id root@nn1
|
||||
ssh-copy-id root@ nn2
|
||||
ssh-copy-id root@nn3
|
||||
ssh-copy-id root@s1
|
||||
ssh-copy-id root@s2
|
||||
ssh-copy-id root@s3
|
||||
scp /home/hadoop/.ssh/id_rsa root@nn2:/root/.ssh
|
||||
scp /home/hadoop/.ssh/id_rsa root@nn3:/root/.ssh
|
||||
scp /home/hadoop/.ssh/id_rsa root@s1:/root/.ssh
|
||||
scp /home/hadoop/.ssh/id_rsa root@s2:/root/.ssh
|
||||
scp /home/hadoop/.ssh/id_rsa root@s3:/root/.ssh
|
||||
```
|
||||
### 脚本配置
|
||||
- **ips**
|
||||
```bash
|
||||
vim /home/hadoop/bin/ips
|
||||
```
|
||||
|
||||
```bash
|
||||
nn1
|
||||
nn2
|
||||
nn3
|
||||
s1
|
||||
s2
|
||||
s3
|
||||
```
|
||||
- **ssh_all.sh**
|
||||
```bash
|
||||
vim /home/hadoop/bin/ssh_all.sh
|
||||
```
|
||||
|
||||
```bash
|
||||
#! /bin/bash
|
||||
# 进入到当前脚本所在目录
|
||||
cd `dirname $0`
|
||||
# 获取当前脚本所在目录
|
||||
dir_path=`pwd`
|
||||
#echo $dir_path
|
||||
# 读ips文件得到数组(里面是一堆主机名)
|
||||
ip_arr=(`cat $dir_path/ips`)
|
||||
# 遍历数组里的主机名
|
||||
for ip in ${ip_arr[*]}
|
||||
do
|
||||
# 拼接ssh命令: ssh hadoop@nn1.hadoop ls
|
||||
cmd_="ssh hadoop@${ip} \"$*\" "
|
||||
echo $cmd_
|
||||
# 通过eval命令 执行 拼接的ssh 命令
|
||||
if eval ${cmd_} ; then
|
||||
echo "OK"
|
||||
else
|
||||
echo "FAIL"
|
||||
fi
|
||||
done
|
||||
```
|
||||
- **ssh_root.sh**
|
||||
```bash
|
||||
#! /bin/bash
|
||||
# 进入到当前脚本所在目录
|
||||
cd `dirname $0`
|
||||
# 获取当前脚本所在目录
|
||||
dir_path=`pwd`
|
||||
#echo $dir_path
|
||||
# 读ips文件得到数组(里面是一堆主机名)
|
||||
ip_arr=(`cat $dir_path/ips`)
|
||||
# 遍历数组里的主机名
|
||||
for ip in ${ip_arr[*]}
|
||||
do
|
||||
# 拼接ssh命令: ssh hadoop@nn1.hadoop ls
|
||||
cmd_="ssh hadoop@${ip} ~/bin/exe.sh \"$*\""
|
||||
echo $cmd_
|
||||
# 通过eval命令 执行 拼接的ssh 命令
|
||||
if eval ${cmd_} ; then
|
||||
echo "OK"
|
||||
else
|
||||
echo "FAIL"
|
||||
fi
|
||||
done
|
||||
```
|
||||
- **scp_all.sh**
|
||||
```bash
|
||||
#! /bin/bash
|
||||
# 进入到当前脚本所在目录
|
||||
cd `dirname $0`
|
||||
# 获取当前脚本所在目录
|
||||
dir_path=`pwd`
|
||||
#echo $dir_path
|
||||
# 读ips文件得到数组(里面是一堆主机名)
|
||||
ip_arr=(`cat $dir_path/ips`)
|
||||
# 源
|
||||
source_=$1
|
||||
# 目标
|
||||
target=$2
|
||||
# 遍历数组里的主机名
|
||||
for ip in ${ip_arr[*]}
|
||||
do
|
||||
# 拼接scp命令: scp 源 hadoop@nn1.hadoop:目标
|
||||
cmd_="scp -r ${source_} hadoop@${ip}:${target}"
|
||||
echo $cmd_
|
||||
# 通过eval命令 执行 拼接的scp 命令
|
||||
if eval ${cmd_} ; then
|
||||
echo "OK"
|
||||
else
|
||||
echo "FAIL"
|
||||
fi
|
||||
done
|
||||
```
|
||||
- **exe.sh**
|
||||
```bash
|
||||
#切换到root用户执行cmd命令
|
||||
cmd=$*
|
||||
su - << EOF
|
||||
$cmd
|
||||
EOF
|
||||
```
|
||||
- 赋予执行权限
|
||||
```bash
|
||||
chmod +x ssh_all.sh
|
||||
chmod +x scp_all.sh
|
||||
chmod +x ssh_root.sh
|
||||
chmod +x exe.sh
|
||||
```
|
||||
- 分发到其他主机
|
||||
```bash
|
||||
./ssh_all.sh mkdir /home/hadoop/bin
|
||||
./scp_all.sh /home/hadoop/bin/ips /home/hadoop/bin/
|
||||
./scp_all.sh /home/hadoop/bin/exe.sh /home/hadoop/bin/
|
||||
./scp_all.sh /home/hadoop/bin/ssh_all.sh /home/hadoop/bin/
|
||||
./scp_all.sh /home/hadoop/bin/scp_all.sh /home/hadoop/bin/
|
||||
./scp_all.sh /home/hadoop/bin/ssh_root.sh /home/hadoop/bin/
|
||||
```
|
||||
|
||||
- 将 `/home/hadoop/bin`添加到hadoop用户的环境变量,需要切换到`hadoop`用户
|
||||
|
||||
```bash
|
||||
echo 'export PATH=$PATH:/home/hadoop/bin' >> ~/.bashrc && source ~/.bashrc
|
||||
scp_all.sh /home/hadoop/.bashrc /home/hadoop/
|
||||
source ~/.bashrc # 在所有主机节点执行
|
||||
```
|
||||
106
HaiNiuHadoop搭建/2.ZooKeeper配置.md
Normal file
106
HaiNiuHadoop搭建/2.ZooKeeper配置.md
Normal file
@@ -0,0 +1,106 @@
|
||||
### Zookeeper脚本配置
|
||||
- 拷贝ips
|
||||
```bash
|
||||
cp ips ips_zookeeper
|
||||
```
|
||||
修改为
|
||||
```bash
|
||||
nn1
|
||||
nn2
|
||||
nn3
|
||||
```
|
||||
- 拷贝三个脚本
|
||||
```bash
|
||||
cp scp_all.sh scp_all_zookeeper.sh
|
||||
cp ssh_all.sh ssh_all_zookeeper.sh
|
||||
cp ssh_root.sh ssh_root_zookeeper.sh
|
||||
```
|
||||
- 修改脚本
|
||||
```shell
|
||||
vim scp_all_zookeeper.sh
|
||||
vim ssh_all_zookeeper.sh
|
||||
vim ssh_root_zookeeper.sh
|
||||
```
|
||||
|
||||
将三个脚本中的ips改为ips_zookeeper
|
||||
### Zookeeper安装
|
||||
- 上传到`/tmp`目录下,解压
|
||||
```bash
|
||||
sudo tar -zxvf /tmp/zookeeper-3.4.8.tar.gz -C /usr/local/
|
||||
scp -r /usr/local/zookeeper-3.4.8/ root@nn2:/usr/local/
|
||||
scp -r /usr/local/zookeeper-3.4.8/ root@nn3:/usr/local/
|
||||
ssh_root_zookeeper.sh chown -R hadoop:hadoop /usr/local/zookeeper-3.4.8
|
||||
ssh_root_zookeeper.s ln -s /usr/local/zookeeper-3.4.8/ /usr/local/zookeeper
|
||||
```
|
||||
### Zookeeper配置
|
||||
- zoo.cfg配置
|
||||
```bash
|
||||
cd /usr/local/zookeeper/conf/
|
||||
cp zoo_sample.cfg zoo.cfg
|
||||
```
|
||||
然后`vim zoo.cfg`,修改如下:
|
||||
```properties
|
||||
# 修改dataDir
|
||||
dataDir=/data/zookeeper
|
||||
# 添加一下内容
|
||||
server.1=nn1:2888:3888
|
||||
server.2=nn2:2888:3888
|
||||
server.3=nn3:2888:3888
|
||||
```
|
||||
分发给nn2、nn3
|
||||
`scp_all_zookeeper.sh /usr/local/zookeeper/conf/zoo.cfg /usr/local/zookeeper/conf/`
|
||||
|
||||
- `zkEnv.sh`配置
|
||||
`vim /usr/local/zookeeper/bin/zkEnv.sh`
|
||||
![[images/000001.png]]
|
||||
分发到nn2、nn3
|
||||
```bash
|
||||
scp_all_zookeeper.sh /usr/local/zookeeper/bin/zkEnv.sh /usr/local/zookeeper/bin/
|
||||
```
|
||||
- 创建zookeeper数据目录
|
||||
```bash
|
||||
ssh_root_zookeeper.sh mkdir -p /data/zookeeper
|
||||
ssh_root_zookeeper.sh chown -R hadoop:hadoop /data
|
||||
```
|
||||
- 创建myid文件
|
||||
```bash
|
||||
ssh nn1 'echo "1" > /data/zookeeper/myid'
|
||||
ssh nn2 'echo "2" > /data/zookeeper/myid'
|
||||
ssh nn3 'echo "3" > /data/zookeeper/myid'
|
||||
```
|
||||
- 配置Zookeeper环境变量
|
||||
```bash
|
||||
# 在其他所有主机也执行
|
||||
sudo chown -R hadoop:hadoop /etc/profile.d/myEnv.sh
|
||||
```
|
||||
|
||||
```bash
|
||||
echo 'export ZOOKEEPER_HOME=/usr/local/zookeeper' >> /etc/profile.d/myEnv.sh
|
||||
echo 'export PATH=$PATH:$ZOOKEEPER_HOME/bin' >> /etc/profile.d/myEnv.sh
|
||||
```
|
||||
|
||||
```bash
|
||||
# 分发到nn2、nn3
|
||||
scp_all_zookeeper.sh /etc/profile.d/myEnv.sh /etc/profile.d/
|
||||
# source 环境变量
|
||||
ssh_all_zookeeper.sh source /etc/profile
|
||||
```
|
||||
### Zookeeper的命令
|
||||
```bash
|
||||
#启动zk服务
|
||||
ssh_all_zookeeper.sh /usr/local/zookeeper/bin/zkServer.sh start
|
||||
#查看每个机器ZK运行的状态
|
||||
ssh_all_zookeeper.sh /usr/local/zookeeper/bin/zkServer.sh status
|
||||
#整体停止服务
|
||||
ssh_all_zookeeper.sh /usr/local/zookeeper/bin/zkServer.sh stop
|
||||
#重启zk服务
|
||||
ssh_all_zookeeper.sh /usr/local/zookeeper/bin/zkServer.sh restart
|
||||
```
|
||||
|
||||
```shell
|
||||
#启动zookeeper客户端,并连接zookeeper集群
|
||||
/usr/local/zookeeper/bin/zkCli.sh -server nn1:2181
|
||||
# 可以简化为:
|
||||
zkCli.sh
|
||||
```
|
||||
|
||||
286
HaiNiuHadoop搭建/3.HDFS搭建.md
Normal file
286
HaiNiuHadoop搭建/3.HDFS搭建.md
Normal file
@@ -0,0 +1,286 @@
|
||||
### 上传`hadoop-3.1.4.tar.gz`到`/tmp`,解压
|
||||
>注意在六台机器均上传到`/tmp`
|
||||
```bash
|
||||
# 在6台机器执行
|
||||
sudo tar -zxvf /tmp/hadoop-3.1.4.tar.gz -C /usr/local/
|
||||
# 分发到其他主机
|
||||
ssh_root.sh chown -R hadoop:hadoop /usr/local/hadoop-3.1.4
|
||||
ssh_root.sh ln -s /usr/local/hadoop-3.1.4/ /usr/local/hadoop
|
||||
```
|
||||
### 配置环境变量
|
||||
```bash
|
||||
echo 'export HADOOP_HOME=/usr/local/hadoop' >> /etc/profile.d/myEnv.sh
|
||||
echo 'export PATH=$PATH:$HADOOP_HOME/bin' >> /etc/profile.d/myEnv.sh
|
||||
echo 'export PATH=$PATH:$HADOOP_HOME/sbin' >> /etc/profile.d/myEnv.sh
|
||||
```
|
||||
|
||||
```bash
|
||||
# 分发到nn2、nn3、s1、s2、s3
|
||||
scp_all.sh /etc/profile.d/myEnv.sh /etc/profile.d/
|
||||
# source 环境变量
|
||||
ssh_root.sh source /etc/profile
|
||||
```
|
||||
>还需要创建 `/data`这个目录,由于nn1、nn2、nn3已经创建`/data`,其他三台需要创建一下
|
||||
```bash
|
||||
### 在s1、s2、s3执行
|
||||
sudo mkdir /data
|
||||
sudo chown -R hadoop:hadoop /data
|
||||
```
|
||||
|
||||
### 修改core-site.xml
|
||||
```bash
|
||||
vim /usr/local/hadoop/etc/hadoop/core-site.xml
|
||||
```
|
||||
|
||||
```xml
|
||||
<configuration>
|
||||
<property>
|
||||
<name>fs.defaultFS</name>
|
||||
<value>hdfs://ns1</value>
|
||||
<description>默认文件服务的协议和NS逻辑名称,和hdfs-site.xml里的对应此配置替代了1.0里的fs.default.name</description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>hadoop.tmp.dir</name>
|
||||
<value>/data/tmp</value>
|
||||
<description>数据存储目录</description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>hadoop.proxyuser.root.groups</name>
|
||||
<value>hadoop</value>
|
||||
<description>
|
||||
hdfs dfsadmin –refreshSuperUserGroupsConfiguration,
|
||||
yarn rmadmin –refreshSuperUserGroupsConfiguration
|
||||
使用这两个命令不用重启就能刷新
|
||||
</description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>hadoop.proxyuser.root.hosts</name>
|
||||
<value>localhost</value>
|
||||
<description>本地代理</description>
|
||||
</property>
|
||||
|
||||
<!-- zkfc的配置 -->
|
||||
<property>
|
||||
<name>ha.zookeeper.quorum</name>
|
||||
<value>nn1:2181,nn2:2181,nn3:2181</value>
|
||||
<description>HA使用的zookeeper地址</description>
|
||||
</property>
|
||||
</configuration>
|
||||
```
|
||||
### 修改`hdfs-site.xml`
|
||||
```bash
|
||||
vim /usr/local/hadoop/etc/hadoop/hdfs-site.xml
|
||||
```
|
||||
|
||||
```xml
|
||||
<configuration>
|
||||
<property>
|
||||
<name>dfs.namenode.name.dir</name>
|
||||
<value>/data/namenode</value>
|
||||
<description>namenode本地文件存放地址</description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>dfs.nameservices</name>
|
||||
<value>ns1</value>
|
||||
<description>提供服务的NS逻辑名称,与core-site.xml里的对应</description>
|
||||
</property>
|
||||
|
||||
<!-- namenode的配置 -->
|
||||
<!-- 主要的 -->
|
||||
<property>
|
||||
<name>dfs.ha.namenodes.ns1</name>
|
||||
<value>nn1,nn2,nn3</value>
|
||||
<description>列出该逻辑名称下的NameNode逻辑名称</description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>dfs.namenode.rpc-address.ns1.nn1</name>
|
||||
<value>nn1:9000</value>
|
||||
<description>指定NameNode的RPC位置</description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>dfs.namenode.http-address.ns1.nn1</name>
|
||||
<value>nn1:50070</value>
|
||||
<description>指定NameNode的Web Server位置</description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>dfs.namenode.rpc-address.ns1.nn2</name>
|
||||
<value>nn2:9000</value>
|
||||
<description>指定NameNode的RPC位置</description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>dfs.namenode.http-address.ns1.nn2</name>
|
||||
<value>nn2:50070</value>
|
||||
<description>指定NameNode的Web Server位置</description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>dfs.namenode.rpc-address.ns1.nn3</name>
|
||||
<value>nn3:9000</value>
|
||||
<description>指定NameNode的RPC位置</description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>dfs.namenode.http-address.ns1.nn3</name>
|
||||
<value>nn3:50070</value>
|
||||
<description>指定NameNode的Web Server位置</description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>dfs.namenode.handler.count</name>
|
||||
<value>77</value>
|
||||
<description>namenode的工作线程数</description>
|
||||
</property>
|
||||
|
||||
<!-- journaldata配置,使得其他两个namenode同步第一个namenode数据 -->
|
||||
<property>
|
||||
<name>dfs.namenode.shared.edits.dir</name>
|
||||
<value>qjournal://nn1:8485;nn2:8485;nn3:8485/ns1</value>
|
||||
<description>指定用于HA存放edits的共享存储,通常是namenode的所在机器</description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>dfs.journalnode.edits.dir</name>
|
||||
<value>/data/journaldata/</value>
|
||||
<description>journaldata服务存放文件的地址</description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>ipc.client.connect.max.retries</name>
|
||||
<value>10</value>
|
||||
<description>namenode和journalnode的链接重试次数10次</description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>ipc.client.connect.retry.interval</name>
|
||||
<value>10000</value>
|
||||
<description>重试的间隔时间10s</description>
|
||||
</property>
|
||||
|
||||
<!-- zkfc的配置 -->
|
||||
<property>
|
||||
<name>dfs.ha.fencing.methods</name>
|
||||
<value>sshfence</value>
|
||||
<description>指定HA做隔离的方法,缺省是ssh,可设为shell,稍后详述</description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>dfs.ha.fencing.ssh.private-key-files</name>
|
||||
<value>/home/hadoop/.ssh/id_rsa</value>
|
||||
<description>杀死命令脚本的免密配置秘钥</description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>dfs.client.failover.proxy.provider.ns1</name>
|
||||
<value>org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider</value>
|
||||
<description>指定客户端用于HA切换的代理类,不同的NS可以用不同的代理类以上示例为Hadoop 2.0自带的缺省代理类</description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>dfs.client.failover.proxy.provider.auto-ha</name>
|
||||
<value>org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider</value>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>dfs.ha.automatic-failover.enabled</name>
|
||||
<value>true</value>
|
||||
</property>
|
||||
<!-- datanode配置 -->
|
||||
<property>
|
||||
<name>dfs.datanode.data.dir</name>
|
||||
<value>/data/datanode</value>
|
||||
<description>datanode本地文件存放地址</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>dfs.replication</name>
|
||||
<value>3</value>
|
||||
<description>文件复本数</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>dfs.namenode.datanode.registration.ip-hostname-check</name>
|
||||
<value>false</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>dfs.client.use.datanode.hostname</name>
|
||||
<value>true</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>dfs.datanode.use.datanode.hostname</name>
|
||||
<value>true</value>
|
||||
</property>
|
||||
</configuration>
|
||||
```
|
||||
### 修改`hadoop-env.sh`
|
||||
```bash
|
||||
vim /usr/local/hadoop/etc/hadoop/hadoop-env.sh
|
||||
```
|
||||
|
||||
```bash
|
||||
# 添加这两行
|
||||
source /etc/profile
|
||||
export HADOOP_HEAPSIZE_MAX=512
|
||||
```
|
||||
### 分发这些配置文件
|
||||
```bash
|
||||
scp_all.sh /usr/local/hadoop/etc/hadoop/core-site.xml /usr/local/hadoop/etc/hadoop/
|
||||
scp_all.sh /usr/local/hadoop/etc/hadoop/hdfs-site.xml /usr/local/hadoop/etc/hadoop/
|
||||
scp_all.sh /usr/local/hadoop/etc/hadoop/hadoop-env.sh /usr/local/hadoop/etc/hadoop/
|
||||
```
|
||||
### 集群初始化
|
||||
- 需要先启动zookeeper集群
|
||||
```bash
|
||||
ssh_all_zookeeper.sh /usr/local/zookeeper/bin/zkServer.sh start
|
||||
```
|
||||
|
||||
```bash
|
||||
# 第一次启动先启动journalnode,便于3个namenode的元数据同步
|
||||
ssh_all_zookeeper.sh hadoop-daemon.sh start journalnode
|
||||
```
|
||||
- `zkfc`搭建
|
||||
```bash
|
||||
#在nn1节点执行
|
||||
hdfs zkfc -formatZK
|
||||
#nn1 nn2 nn3启动zkfc
|
||||
hadoop-daemon.sh start zkfc
|
||||
```
|
||||
- 初始化nn1的namenode,在nn1执行
|
||||
```bash
|
||||
hdfs namenode -format
|
||||
hadoop-daemon.sh start namenode
|
||||
```
|
||||
- 格式化第二台和第三台namenode,并且启动namenode,在nn2、nn3执行
|
||||
```bash
|
||||
hdfs namenode -bootstrapStandby
|
||||
hadoop-daemon.sh start namenode
|
||||
```
|
||||
- 修改**workers**
|
||||
```bash
|
||||
vim /usr/local/hadoop/etc/hadoop/workers
|
||||
```
|
||||
修改为
|
||||
```text
|
||||
s1
|
||||
s2
|
||||
s3
|
||||
```
|
||||
分发给其他机器
|
||||
```bash
|
||||
scp_all.sh /usr/local/hadoop/etc/hadoop/workers /usr/local/hadoop/etc/hadoop
|
||||
```
|
||||
- 启动datanode节点,在s1、s2、s3执行
|
||||
|
||||
```bash
|
||||
#启动各个节点的datanode,仅在一台机器执行即可(nn1或其他任意机器)
|
||||
hadoop-daemons.sh start datanode
|
||||
```
|
||||
### hdfs集群启动
|
||||
```bash
|
||||
start-dfs.sh
|
||||
```
|
||||
482
HaiNiuHadoop搭建/4.Yarn配置.md
Normal file
482
HaiNiuHadoop搭建/4.Yarn配置.md
Normal file
@@ -0,0 +1,482 @@
|
||||
- `yarn.env.sh`
|
||||
```bash
|
||||
# 添加如下内容:
|
||||
source /etc/profile
|
||||
JAVA=$JAVA_HOME/bin/java
|
||||
JAVA_HEAP_MAX=-Xmx256m
|
||||
YARN_HEAPSIZE=256 export YARN_RESOURCEMANAGER_HEAPSIZE=256
|
||||
```
|
||||
|
||||
```bash
|
||||
# 分发 yarn.env.sh 到其他主机
|
||||
scp_all.sh /usr/local/hadoop/etc/hadoop/yarn-env.sh /usr/local/hadoop/etc/hadoop/
|
||||
```
|
||||
- `yarn-site.xml`
|
||||
```xml
|
||||
<configuration>
|
||||
<!-- RM1 configs start -->
|
||||
<property>
|
||||
<name>yarn.resourcemanager.address.rm1</name>
|
||||
<value>nn1:8032</value>
|
||||
<description>ResourceManager 对客户端暴露的地址。客户端通过该地址向RM提交应用程序,杀死应用程序等</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>yarn.resourcemanager.hostname.rm1</name>
|
||||
<value>nn1</value>
|
||||
<description>ResourceManager主机名</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>yarn.resourcemanager.scheduler.address.rm1</name>
|
||||
<value>nn1:8030</value>
|
||||
<description>ResourceManager 对ApplicationMaster暴露的访问地址。ApplicationMaster通过该地址向RM申请资源、释放资源等。</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>yarn.resourcemanager.webapp.https.address.rm1</name>
|
||||
<value>nn1:8089</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>yarn.resourcemanager.webapp.address.rm1</name>
|
||||
<value>nn1:8088</value>
|
||||
<description>ResourceManager对外web ui地址。用户可通过该地址在浏览器中查看集群各类信息。</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>yarn.resourcemanager.resource-tracker.address.rm1</name>
|
||||
<value>nn1:8031</value>
|
||||
<description>ResourceManager 对NodeManager暴露的地址.。NodeManager通过该地址向RM汇报心跳,领取任务等。</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>yarn.resourcemanager.admin.address.rm1</name>
|
||||
<value>nn1:8033</value>
|
||||
<description>ResourceManager 对管理员暴露的访问地址。管理员通过该地址向RM发送管理命令等</description>
|
||||
</property>
|
||||
<!-- RM1 configs end -->
|
||||
|
||||
<!-- RM2 configs start -->
|
||||
<property>
|
||||
<name>yarn.resourcemanager.address.rm2</name>
|
||||
<value>nn2:8032</value>
|
||||
<description>ResourceManager 对客户端暴露的地址。客户端通过该地址向RM提交应用程序,杀死应用程序等</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>yarn.resourcemanager.hostname.rm2</name>
|
||||
<value>nn2</value>
|
||||
<description>ResourceManager主机名</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>yarn.resourcemanager.scheduler.address.rm2</name>
|
||||
<value>nn2:8030</value>
|
||||
<description>ResourceManager 对ApplicationMaster暴露的访问地址。ApplicationMaster通过该地址向RM申请资源、释放资>源等。</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>yarn.resourcemanager.webapp.https.address.rm2</name>
|
||||
<value>nn2:8089</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>yarn.resourcemanager.webapp.address.rm2</name>
|
||||
<value>nn2:8088</value>
|
||||
<description>ResourceManager对外web ui地址。用户可通过该地址在浏览器中查看集群各类信息。</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>yarn.resourcemanager.resource-tracker.address.rm2</name>
|
||||
<value>nn2:8031</value>
|
||||
<description>ResourceManager 对NodeManager暴露的地址.。NodeManager通过该地址向RM汇报心跳,领取任务等。</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>yarn.resourcemanager.admin.address.rm2</name>
|
||||
<value>nn2:8033</value>
|
||||
<description>ResourceManager 对管理员暴露的访问地址。管理员通过该地址向RM发送管理命令等</description>
|
||||
</property>
|
||||
<!-- RM2 configs end -->
|
||||
|
||||
<!-- RM3 configs start -->
|
||||
<property>
|
||||
<name>yarn.resourcemanager.address.rm3</name>
|
||||
<value>nn3:8032</value>
|
||||
<description>ResourceManager 对客户端暴露的地址。客户端通过该地址向RM提交应用程序,杀死应用程序等</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>yarn.resourcemanager.hostname.rm3</name>
|
||||
<value>nn3</value>
|
||||
<description>ResourceManager主机名</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>yarn.resourcemanager.scheduler.address.rm3</name>
|
||||
<value>nn3:8030</value>
|
||||
<description>ResourceManager 对ApplicationMaster暴露的访问地址。ApplicationMaster通过该地址向RM申请资源、释放资源等。</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>yarn.resourcemanager.webapp.https.address.rm3</name>
|
||||
<value>nn3:8089</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>yarn.resourcemanager.webapp.address.rm3</name>
|
||||
<value>nn3:8088</value>
|
||||
<description>ResourceManager对外web ui地址。用户可通过该地址在浏览器中查看集群各类信息。</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>yarn.resourcemanager.resource-tracker.address.rm3</name>
|
||||
<value>nn3:8031</value>
|
||||
<description>ResourceManager 对NodeManager暴露的地址.。NodeManager通过该地址向RM汇报心跳,领取任务等。</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>yarn.resourcemanager.admin.address.rm3</name>
|
||||
<value>nn3:8033</value>
|
||||
<description>ResourceManager 对管理员暴露的访问地址。管理员通过该地址向RM发送管理命令等</description>
|
||||
</property>
|
||||
|
||||
<!-- yarn ha start -->
|
||||
<property>
|
||||
<name>yarn.resourcemanager.ha.enabled</name>
|
||||
<value>true</value>
|
||||
<description>是否开启yarn ha</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>yarn.resourcemanager.ha.automatic-failover.embedded</name>
|
||||
<value>true</value>
|
||||
<description>ha状态切换为自动切换</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>yarn.resourcemanager.ha.rm-ids</name>
|
||||
<value>rm1,rm2,rm3</value>
|
||||
<description>RMs的逻辑id列表</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>yarn.resourcemanager.zk-address</name>
|
||||
<value>nn1:2181,nn2:2181,nn3:2181</value>
|
||||
<description>ha状态的存储地址</description>
|
||||
</property>
|
||||
<!-- yarn ha end -->
|
||||
|
||||
<!-- 元数据存储共享 start -->
|
||||
<property>
|
||||
<name>yarn.resourcemanager.cluster-id</name>
|
||||
<value>pseudo-yarn-rm-cluster</value>
|
||||
<description>集群的Id</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>yarn.resourcemanager.recovery.enabled</name>
|
||||
<value>true</value>
|
||||
<description>默认值为false,也就是说resourcemanager挂了相应的正在运行的任务在rm恢复后不能重新启动</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>yarn.resourcemanager.store.class</name>
|
||||
<value>org.apache.hadoop.yarn.server.resourcemanager.recovery.ZKRMStateStore</value>
|
||||
<description>配置RM状态信息存储方式3有两种,一种是FileSystemRMStateStore,另一种是MemoryRMStateStore,还有一种目前较为主流的是zkstore</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>yarn.resourcemanager.zk.state-store.address</name>
|
||||
<value>nn1:2181,nn2:2181,nn3:2181</value>
|
||||
<description>当使用ZK存储时,指定在ZK上的存储地址。</description>
|
||||
</property>
|
||||
<!-- 元数据存储共享 end-->
|
||||
|
||||
<!-- nodeManager基础配置 start-->
|
||||
<property>
|
||||
<name>yarn.nodemanager.local-dirs</name>
|
||||
<value>/data/yarn/local</value>
|
||||
<description>中间结果存放位置,存放执行Container所需的数据如可执行程序或jar包,配置文件等和运行过程中产生的临时数据</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>yarn.nodemanager.log-dirs</name>
|
||||
<value>/data/yarn/logs</value>
|
||||
<description>Container运行日志存放地址(可配置多个目录)</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>yarn.nodemanager.address</name>
|
||||
<value>0.0.0.0:9103</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>yarn.nodemanager.aux-services</name>
|
||||
<value>mapreduce_shuffle</value>
|
||||
<description>NodeManager上运行的附属服务。需配置成mapreduce_shuffle,才可运行MapReduce程序</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>yarn.nodemanager.webapp.address</name>
|
||||
<value>0.0.0.0:8042</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>yarn.nodemanager.localizer.address</name>
|
||||
<value>0.0.0.0:8040</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>yarn.nodemanager.aux-services.mapreduce.shuffle.class</name>
|
||||
<value>org.apache.hadoop.mapred.ShuffleHandler</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>mapreduce.shuffle.port</name>
|
||||
<value>23080</value>
|
||||
</property>
|
||||
<!-- nodeManager基础配置 end-->
|
||||
|
||||
<!-- nodeMananger资源限定 start -->
|
||||
<property>
|
||||
<name>yarn.scheduler.minimum-allocation-vcores</name>
|
||||
<value>1</value>
|
||||
<description>单个任务可申请的最小虚拟CPU个数</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>yarn.scheduler.maximum-allocation-vcores</name>
|
||||
<value>3</value>
|
||||
<description>单个任务可申请的最大虚拟CPU个数,此参数对应yarn.nodemanager.resource.cpu-vcores,建议最大为一个物理CPU的数量</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>yarn.nodemanager.resource.memory-mb</name>
|
||||
<value>1536</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>yarn.scheduler.maximum-allocation-mb</name>
|
||||
<value>1024</value>
|
||||
<description>单个任务可申请的最多物理内存量</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>yarn.nodemanager.resource.cpu-vcores</name>
|
||||
<value>3</value>
|
||||
<description>该节点上YARN可使用的虚拟CPU个数,一个物理CPU对应3个虚拟CPU</description>
|
||||
</property>
|
||||
<!-- 关闭内存检测 start -->
|
||||
<property>
|
||||
<name>yarn.nodemanager.vmem-check-enabled</name>
|
||||
<value>false</value>
|
||||
<description>虚拟内存检测,默认是True</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>yarn.nodemanager.pmem-check-enabled</name>
|
||||
<value>false</value>
|
||||
<description>物理内存检测,默认是True</description>
|
||||
</property>
|
||||
<!-- 关闭内存检测 end -->
|
||||
<property>
|
||||
<name>yarn.application.classpath</name>
|
||||
<value>$HADOOP_CONF_DIR,
|
||||
$HADOOP_COMMON_HOME/share/hadoop/common/*,
|
||||
$HADOOP_COMMON_HOME/share/hadoop/common/lib/*,
|
||||
$HADOOP_COMMON_HOME/share/hadoop/hdfs/*,
|
||||
$HADOOP_COMMON_HOME/share/hadoop/hdfs/lib/*,
|
||||
$HADOOP_COMMON_HOME/share/hadoop/mapreduce/*,
|
||||
$HADOOP_COMMON_HOME/share/hadoop/mapreduce/lib/*,
|
||||
$HADOOP_COMMON_HOME/share/hadoop/yarn/*,
|
||||
$HADOOP_COMMON_HOME/share/hadoop/yarn/lib/*</value>
|
||||
</property>
|
||||
<!-- nodeMananger资源限定 end-->
|
||||
|
||||
<!-- 容量调度器的优先级
|
||||
0 - 5 , 0是默认值 ,5 优先级最高
|
||||
start
|
||||
-->
|
||||
<property>
|
||||
<name>yarn.cluster.max-application-priority</name>
|
||||
<value>5</value>
|
||||
</property>
|
||||
<!-- 容量调度器的优先级
|
||||
0 - 5 , 0是默认值 ,5 优先级最高
|
||||
end
|
||||
-->
|
||||
|
||||
<!--在yarn-site.xml中配置,执行的任务文件应该上传到/user的用户目录下 start -->
|
||||
<property>
|
||||
<name>yarn.app.mapreduce.am.staging-dir</name>
|
||||
<value>/user</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>yarn.app.mapreduce.am.scheduler.connection.wait.interval-ms</name>
|
||||
<value>5000</value>
|
||||
</property>
|
||||
<!-- end -->
|
||||
|
||||
<!-- 日志聚集功能开启 start-->
|
||||
<property>
|
||||
<name>yarn.log-aggregation-enable</name>
|
||||
<value>true</value>
|
||||
<description>是否启用日志聚集功能</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>yarn.nodemanager.remote-app-log-dir</name>
|
||||
<value>/tmp/app-logs</value>
|
||||
<description>当应用程序运行结束后,日志被转移到的HDFS目录(启用日志聚集功能时有效)</description>
|
||||
</property>
|
||||
<!--目录相关 end -->
|
||||
<!-- 其它 -->
|
||||
<property>
|
||||
<name>yarn.log-aggregation.retain-seconds</name>
|
||||
<value>1209600</value>
|
||||
<description>nodemanager上所有Container的运行日志在HDFS中的保存时间,保留半个月</description>
|
||||
</property>
|
||||
<!-- 日志聚集功能开启end -->
|
||||
</configuration>
|
||||
```
|
||||
|
||||
```bash
|
||||
# 分发 yarn-site.xml 到其他主机
|
||||
scp_all.sh /usr/local/hadoop/etc/hadoop/yarn-site.xml /usr/local/hadoop/etc/hadoop/
|
||||
```
|
||||
|
||||
`mapred-site.xml`
|
||||
```xml
|
||||
<configuration>
|
||||
<!--运行模式 -->
|
||||
<property>
|
||||
<name>mapreduce.framework.name</name>
|
||||
<value>yarn</value>
|
||||
<description>运行模式</description>
|
||||
</property>
|
||||
<!--运行模式 -->
|
||||
<!--资源限定 -->
|
||||
<property>
|
||||
<name>yarn.app.mapreduce.am.resource.mb</name>
|
||||
<value>1024</value>
|
||||
<description>MR ApplicationMaster yarn申请的内存量</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>yarn.app.mapreduce.am.command-opts</name>
|
||||
<value>-Xmx768m</value>
|
||||
<description>jvm使用内存</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>mapreduce.map.memory.mb</name>
|
||||
<value>1024</value>
|
||||
<description>每个Map Task yarn申请内存</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>mapreduce.reduce.memory.mb</name>
|
||||
<value>1024</value>
|
||||
<description>每个Reduce Task yarn申请内存</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>yarn.app.mapreduce.am.resource.cpu-vcores</name>
|
||||
<value>1</value>
|
||||
<description>MR ApplicationMaster占用的虚拟CPU个数,此参数对应yarn.nodemanager.resource.cpu-vcores,建议最大为一个物理CPU的数量</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>mapreduce.reduce.java.opts</name>
|
||||
<value>-Xmx768m</value>
|
||||
<description>reduce jvm实际内存</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>mapreduce.map.java.opts</name>
|
||||
<value>-Xmx768m</value>
|
||||
<description>map jvm实际内存</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>mapreduce.map.cpu.vcores</name>
|
||||
<value>1</value>
|
||||
<description>每个map Task需要的虚拟cpu数</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>mapreduce.reduce.cpu.vcores</name>
|
||||
<value>1</value>
|
||||
<description>每个Reduce Task需要的虚拟cpu数</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>mapreduce.application.classpath</name>
|
||||
<value>/usr/local/hadoop/etc/hadoop,/usr/local/hadoop/share/hadoop/common/*,/usr/local/hadoop/share/hadoop/common/lib/*,/usr/local/hadoop/share/hadoop/hdfs/*,/usr/local/hadoop/share/hadoop/hdfs/lib/*,/usr/local/hadoop/share/hadoop/mapreduce/*,/usr/local/hadoop/share/hadoop/mapreduce/lib/*,/usr/local/hadoop/share/hadoop/yarn/*,/usr/local/hadoop/share/hadoop/yarn/lib/*,/usr/local/hadoop/lib/*,/usr/local/hbase/lib/*</value>
|
||||
<description>运行mr程序所使用的虚拟机运行时的classpath</description>
|
||||
</property>
|
||||
|
||||
<!-- 在nn1 开启历史服务器 start-->
|
||||
<property>
|
||||
<name>mapreduce.jobhistory.address</name>
|
||||
<value>nn1:10020</value>
|
||||
<description>MapReduce JobHistory Server地址</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>mapreduce.jobhistory.webapp.address</name>
|
||||
<value>nn1:19888</value>
|
||||
<description>MapReduce JobHistory Server Web UI地址</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>mapreduce.jobhistory.intermediate-done-dir</name>
|
||||
<value>/data/mapred/tmp</value>
|
||||
<description>MapReduce作业产生的日志存放位置</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>mapreduce.jobhistory.done-dir</name>
|
||||
<value>/data/mapred/done</value>
|
||||
<description>MR JobHistory Server管理的日志的存放位置</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>mapreduce.job.userlog.retain.hours</name>
|
||||
<value>48</value>
|
||||
</property>
|
||||
<!-- 在nn1 开启历史服务器 end-->
|
||||
|
||||
<!-- map阶段开启输出压缩,便于reduce阶段从磁盘拉取数据 start-->
|
||||
<property>
|
||||
<name>mapreduce.map.output.compress</name>
|
||||
<value>true</value>
|
||||
<description>map是否开启输出压缩</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>mapreduce.map.output.compress.codec</name>
|
||||
<value>org.apache.hadoop.io.compress.Bzip2Codec</value>
|
||||
<description>map输出默认的算法</description>
|
||||
</property>
|
||||
<!-- map阶段开启输出压缩 end-->
|
||||
|
||||
<!--
|
||||
这里是开启reduce输出压缩,如果开启了,那么输出结果无法直接看到,需要进行解压缩,
|
||||
如果需要,取消注释即可
|
||||
<property>
|
||||
<name>mapreduce.output.fileoutputformat.compress</name>
|
||||
<value>true</value>
|
||||
<description>reduce是否开启输出压缩</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>mapreduce.output.fileoutputformat.compress.codec</name>
|
||||
<value>org.apache.hadoop.io.compress.Bzip2Codec</value>
|
||||
<description>reduce输出默认的算法</description>
|
||||
</property>
|
||||
-->
|
||||
</configuration>
|
||||
```
|
||||
|
||||
```bash
|
||||
# 分发 mapred-site.xml 到其他主机
|
||||
scp_all.sh /usr/local/hadoop/etc/hadoop/mapred-site.xml /usr/local/hadoop/etc/hadoop/
|
||||
```
|
||||
|
||||
**配置capacity-scheduler.xml**
|
||||
|
||||
```xml
|
||||
<!-- yarn使用容量调度器来管理集群的资源:
|
||||
配置root队列下两个子队列 hainiu占比80 default占比20
|
||||
start
|
||||
-->
|
||||
<configuration>
|
||||
<property>
|
||||
<name>yarn.scheduler.capacity.root.queues</name>
|
||||
<value>hainiu,default</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>yarn.scheduler.capacity.root.hainiu.capacity</name>
|
||||
<value>80</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>yarn.scheduler.capacity.root.default.capacity</name>
|
||||
<value>20</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>yarn.scheduler.capacity.root.hainiu.maximum-capacity</name>
|
||||
<value>100</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>yarn.scheduler.capacity.root.default.maximum-capacity</name>
|
||||
<value>100</value>
|
||||
</property>
|
||||
<!-- yarn使用容量调度器来管理集群的资源:
|
||||
配置root队列下两个子队列 hainiu占比80 default占比20
|
||||
end
|
||||
-->
|
||||
</configuration>
|
||||
```
|
||||
|
||||
```shell
|
||||
# 分发 capacity-scheduler.xml 到其他主机
|
||||
scp_all.sh /usr/local/hadoop/etc/hadoop/capacity-scheduler.xml /usr/local/hadoop/etc/hadoop/
|
||||
```
|
||||
|
||||
```shell
|
||||
#启动yarn,如果已经启动,可以先关闭yarn集群:stop-yarn.sh
|
||||
start-yarn.sh
|
||||
#启动历史服务器 ,在 nn1执行
|
||||
mapred --daemon start historyserver
|
||||
```
|
||||
BIN
HaiNiuHadoop搭建/images/000001.png
Normal file
BIN
HaiNiuHadoop搭建/images/000001.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 152 KiB |
33
HaiNiuHadoop搭建/未命名.md
Normal file
33
HaiNiuHadoop搭建/未命名.md
Normal file
@@ -0,0 +1,33 @@
|
||||
```xml
|
||||
<configuration>
|
||||
<!--运行模式 -->
|
||||
<property>
|
||||
<name>mapreduce.framework.name</name>
|
||||
<value>yarn</value>
|
||||
</property>
|
||||
<!--jobhistory properties-->
|
||||
<property>
|
||||
<name>mapreduce.jobhistory.address</name>
|
||||
<value>master:10020</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>mapreduce.jobhistory.webapp.address</name>
|
||||
<value>master:19888</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>yarn.app.mapreduce.am.env</name>
|
||||
<value>HADOOP_MAPRED_HOME=$(HADOOP_HOME)</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>mapreduce.map.env</name>
|
||||
<value>HADOOP_MAPRED_HOME=$(HADOOP_HOME)</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>mapreduce, reduce,env</name>
|
||||
<value>HADOOP_MAPRED_HOME=$(HADOOP_HOME)</value>
|
||||
</property>
|
||||
</configuration>
|
||||
|
||||
|
||||
|
||||
```
|
||||
Reference in New Issue
Block a user