feat hadoop hainiu (-)zookeeper、hdfs
This commit is contained in:
@@ -2,8 +2,8 @@
|
||||
title: Hadoop集群搭建基础环境
|
||||
top_img: /img/site01.jpg
|
||||
top_img_height: 800px
|
||||
date: 2024-08-01 09:10:40
|
||||
tags: hadoop
|
||||
abbrlink: 61253
|
||||
date: 2024-09-011 22:45:40
|
||||
---
|
||||
|
||||
### 防火墙关闭
|
||||
|
||||
294
source/_posts/bigdata/hadoop/hdfs.md
Normal file
294
source/_posts/bigdata/hadoop/hdfs.md
Normal file
@@ -0,0 +1,294 @@
|
||||
---
|
||||
title: Hadoop集群HDFS配置
|
||||
top_img: /img/site01.jpg
|
||||
top_img_height: 800px
|
||||
abbrlink: 61252
|
||||
date: 2024-09-011 22:45:40
|
||||
---
|
||||
|
||||
### 上传`hadoop-3.1.4.tar.gz`到`/tmp`,解压
|
||||
>注意在六台机器均上传到`/tmp`
|
||||
```bash
|
||||
# 在6台机器执行
|
||||
sudo tar -zxvf /tmp/hadoop-3.1.4.tar.gz -C /usr/local/
|
||||
# 分发到其他主机
|
||||
ssh_root.sh chown -R hadoop:hadoop /usr/local/hadoop-3.1.4
|
||||
ssh_root.sh ln -s /usr/local/hadoop-3.1.4/ /usr/local/hadoop
|
||||
```
|
||||
### 配置环境变量
|
||||
```bash
|
||||
echo 'export HADOOP_HOME=/usr/local/hadoop' >> /etc/profile.d/myEnv.sh
|
||||
echo 'export PATH=$PATH:$HADOOP_HOME/bin' >> /etc/profile.d/myEnv.sh
|
||||
echo 'export PATH=$PATH:$HADOOP_HOME/sbin' >> /etc/profile.d/myEnv.sh
|
||||
```
|
||||
|
||||
```bash
|
||||
# 分发到nn2、nn3、s1、s2、s3
|
||||
scp_all.sh /etc/profile.d/myEnv.sh /etc/profile.d/
|
||||
# source 环境变量
|
||||
ssh_root.sh source /etc/profile
|
||||
```
|
||||
>还需要创建 `/data`这个目录,由于nn1、nn2、nn3已经创建`/data`,其他三台需要创建一下
|
||||
```bash
|
||||
### 在s1、s2、s3执行
|
||||
sudo mkdir /data
|
||||
sudo chown -R hadoop:hadoop /data
|
||||
```
|
||||
|
||||
### 修改core-site.xml
|
||||
```bash
|
||||
vim /usr/local/hadoop/etc/hadoop/core-site.xml
|
||||
```
|
||||
|
||||
```xml
|
||||
<configuration>
|
||||
<property>
|
||||
<name>fs.defaultFS</name>
|
||||
<value>hdfs://ns1</value>
|
||||
<description>默认文件服务的协议和NS逻辑名称,和hdfs-site.xml里的对应此配置替代了1.0里的fs.default.name</description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>hadoop.tmp.dir</name>
|
||||
<value>/data/tmp</value>
|
||||
<description>数据存储目录</description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>hadoop.proxyuser.root.groups</name>
|
||||
<value>hadoop</value>
|
||||
<description>
|
||||
hdfs dfsadmin –refreshSuperUserGroupsConfiguration,
|
||||
yarn rmadmin –refreshSuperUserGroupsConfiguration
|
||||
使用这两个命令不用重启就能刷新
|
||||
</description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>hadoop.proxyuser.root.hosts</name>
|
||||
<value>localhost</value>
|
||||
<description>本地代理</description>
|
||||
</property>
|
||||
|
||||
<!-- zkfc的配置 -->
|
||||
<property>
|
||||
<name>ha.zookeeper.quorum</name>
|
||||
<value>nn1:2181,nn2:2181,nn3:2181</value>
|
||||
<description>HA使用的zookeeper地址</description>
|
||||
</property>
|
||||
</configuration>
|
||||
```
|
||||
### 修改`hdfs-site.xml`
|
||||
```bash
|
||||
vim /usr/local/hadoop/etc/hadoop/hdfs-site.xml
|
||||
```
|
||||
|
||||
```xml
|
||||
<configuration>
|
||||
<property>
|
||||
<name>dfs.namenode.name.dir</name>
|
||||
<value>/data/namenode</value>
|
||||
<description>namenode本地文件存放地址</description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>dfs.nameservices</name>
|
||||
<value>ns1</value>
|
||||
<description>提供服务的NS逻辑名称,与core-site.xml里的对应</description>
|
||||
</property>
|
||||
|
||||
<!-- namenode的配置 -->
|
||||
<!-- 主要的 -->
|
||||
<property>
|
||||
<name>dfs.ha.namenodes.ns1</name>
|
||||
<value>nn1,nn2,nn3</value>
|
||||
<description>列出该逻辑名称下的NameNode逻辑名称</description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>dfs.namenode.rpc-address.ns1.nn1</name>
|
||||
<value>nn1:9000</value>
|
||||
<description>指定NameNode的RPC位置</description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>dfs.namenode.http-address.ns1.nn1</name>
|
||||
<value>nn1:50070</value>
|
||||
<description>指定NameNode的Web Server位置</description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>dfs.namenode.rpc-address.ns1.nn2</name>
|
||||
<value>nn2:9000</value>
|
||||
<description>指定NameNode的RPC位置</description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>dfs.namenode.http-address.ns1.nn2</name>
|
||||
<value>nn2:50070</value>
|
||||
<description>指定NameNode的Web Server位置</description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>dfs.namenode.rpc-address.ns1.nn3</name>
|
||||
<value>nn3:9000</value>
|
||||
<description>指定NameNode的RPC位置</description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>dfs.namenode.http-address.ns1.nn3</name>
|
||||
<value>nn3:50070</value>
|
||||
<description>指定NameNode的Web Server位置</description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>dfs.namenode.handler.count</name>
|
||||
<value>77</value>
|
||||
<description>namenode的工作线程数</description>
|
||||
</property>
|
||||
|
||||
<!-- journaldata配置,使得其他两个namenode同步第一个namenode数据 -->
|
||||
<property>
|
||||
<name>dfs.namenode.shared.edits.dir</name>
|
||||
<value>qjournal://nn1:8485;nn2:8485;nn3:8485/ns1</value>
|
||||
<description>指定用于HA存放edits的共享存储,通常是namenode的所在机器</description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>dfs.journalnode.edits.dir</name>
|
||||
<value>/data/journaldata/</value>
|
||||
<description>journaldata服务存放文件的地址</description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>ipc.client.connect.max.retries</name>
|
||||
<value>10</value>
|
||||
<description>namenode和journalnode的链接重试次数10次</description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>ipc.client.connect.retry.interval</name>
|
||||
<value>10000</value>
|
||||
<description>重试的间隔时间10s</description>
|
||||
</property>
|
||||
|
||||
<!-- zkfc的配置 -->
|
||||
<property>
|
||||
<name>dfs.ha.fencing.methods</name>
|
||||
<value>sshfence</value>
|
||||
<description>指定HA做隔离的方法,缺省是ssh,可设为shell,稍后详述</description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>dfs.ha.fencing.ssh.private-key-files</name>
|
||||
<value>/home/hadoop/.ssh/id_rsa</value>
|
||||
<description>杀死命令脚本的免密配置秘钥</description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>dfs.client.failover.proxy.provider.ns1</name>
|
||||
<value>org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider</value>
|
||||
<description>指定客户端用于HA切换的代理类,不同的NS可以用不同的代理类以上示例为Hadoop 2.0自带的缺省代理类</description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>dfs.client.failover.proxy.provider.auto-ha</name>
|
||||
<value>org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider</value>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>dfs.ha.automatic-failover.enabled</name>
|
||||
<value>true</value>
|
||||
</property>
|
||||
<!-- datanode配置 -->
|
||||
<property>
|
||||
<name>dfs.datanode.data.dir</name>
|
||||
<value>/data/datanode</value>
|
||||
<description>datanode本地文件存放地址</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>dfs.replication</name>
|
||||
<value>3</value>
|
||||
<description>文件复本数</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>dfs.namenode.datanode.registration.ip-hostname-check</name>
|
||||
<value>false</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>dfs.client.use.datanode.hostname</name>
|
||||
<value>true</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>dfs.datanode.use.datanode.hostname</name>
|
||||
<value>true</value>
|
||||
</property>
|
||||
</configuration>
|
||||
```
|
||||
### 修改`hadoop-env.sh`
|
||||
```bash
|
||||
vim /usr/local/hadoop/etc/hadoop/hadoop-env.sh
|
||||
```
|
||||
|
||||
```bash
|
||||
# 添加这两行
|
||||
source /etc/profile
|
||||
export HADOOP_HEAPSIZE_MAX=512
|
||||
```
|
||||
### 分发这些配置文件
|
||||
```bash
|
||||
scp_all.sh /usr/local/hadoop/etc/hadoop/core-site.xml /usr/local/hadoop/etc/hadoop/
|
||||
scp_all.sh /usr/local/hadoop/etc/hadoop/hdfs-site.xml /usr/local/hadoop/etc/hadoop/
|
||||
scp_all.sh /usr/local/hadoop/etc/hadoop/hadoop-env.sh /usr/local/hadoop/etc/hadoop/
|
||||
```
|
||||
### 集群初始化
|
||||
- 需要先启动zookeeper集群
|
||||
```bash
|
||||
ssh_all_zookeeper.sh /usr/local/zookeeper/bin/zkServer.sh start
|
||||
```
|
||||
|
||||
```bash
|
||||
# 第一次启动先启动journalnode,便于3个namenode的元数据同步
|
||||
ssh_all_zookeeper.sh hadoop-daemon.sh start journalnode
|
||||
```
|
||||
- `zkfc`搭建
|
||||
```bash
|
||||
#在nn1节点执行
|
||||
hdfs zkfc -formatZK
|
||||
#nn1 nn2 nn3启动zkfc
|
||||
hadoop-daemon.sh start zkfc
|
||||
```
|
||||
- 初始化nn1的namenode,在nn1执行
|
||||
```bash
|
||||
hdfs namenode -format
|
||||
hadoop-daemon.sh start namenode
|
||||
```
|
||||
- 格式化第二台和第三台namenode,并且启动namenode,在nn2、nn3执行
|
||||
```bash
|
||||
hdfs namenode -bootstrapStandby
|
||||
hadoop-daemon.sh start namenode
|
||||
```
|
||||
- 修改**workers**
|
||||
```bash
|
||||
vim /usr/local/hadoop/etc/hadoop/workers
|
||||
```
|
||||
修改为
|
||||
```text
|
||||
s1
|
||||
s2
|
||||
s3
|
||||
```
|
||||
分发给其他机器
|
||||
```bash
|
||||
scp_all.sh /usr/local/hadoop/etc/hadoop/workers /usr/local/hadoop/etc/hadoop
|
||||
```
|
||||
- 启动datanode节点,在s1、s2、s3执行
|
||||
|
||||
```bash
|
||||
#启动各个节点的datanode
|
||||
hadoop-daemons.sh start datanode
|
||||
```
|
||||
### 集群启动
|
||||
```bash
|
||||
start-dfs.sh
|
||||
```
|
||||
114
source/_posts/bigdata/hadoop/zookeper.md
Normal file
114
source/_posts/bigdata/hadoop/zookeper.md
Normal file
@@ -0,0 +1,114 @@
|
||||
---
|
||||
title: Hadoop集群Zookeeper配置
|
||||
top_img: /img/site01.jpg
|
||||
top_img_height: 800px
|
||||
abbrlink: 61251
|
||||
date: 2024-09-011 22:45:40
|
||||
---
|
||||
|
||||
### Zookeeper脚本配置
|
||||
- 拷贝ips
|
||||
```bash
|
||||
cp ips ips_zookeeper
|
||||
```
|
||||
修改为
|
||||
```bash
|
||||
nn1
|
||||
nn2
|
||||
nn3
|
||||
```
|
||||
- 拷贝三个脚本
|
||||
```bash
|
||||
cp scp_all.sh scp_all_zookeeper.sh
|
||||
cp ssh_all.sh ssh_all_zookeeper.sh
|
||||
cp ssh_root.sh ssh_root_zookeeper.sh
|
||||
```
|
||||
- 修改脚本
|
||||
```shell
|
||||
vim scp_all_zookeeper.sh
|
||||
vim ssh_all_zookeeper.sh
|
||||
vim ssh_root_zookeeper.sh
|
||||
```
|
||||
|
||||
将三个脚本中的ips改为ips_zookeeper
|
||||
### Zookeeper安装
|
||||
- 上传到`/tmp`目录下,解压
|
||||
```bash
|
||||
sudo tar -zxvf /tmp/zookeeper-3.4.8.tar.gz -C /usr/local/
|
||||
scp -r /usr/local/zookeeper-3.4.8/ root@nn2:/usr/local/
|
||||
scp -r /usr/local/zookeeper-3.4.8/ root@nn3:/usr/local/
|
||||
ssh_root_zookeeper.sh chown -R hadoop:hadoop /usr/local/zookeeper-3.4.8
|
||||
ssh_root_zookeeper.s ln -s /usr/local/zookeeper-3.4.8/ /usr/local/zookeeper
|
||||
```
|
||||
### Zookeeper配置
|
||||
- zoo.cfg配置
|
||||
```bash
|
||||
cd /usr/local/zookeeper/conf/
|
||||
cp zoo_sample.cfg zoo.cfg
|
||||
```
|
||||
然后`vim zoo.cfg`,修改如下:
|
||||
```properties
|
||||
# 修改dataDir
|
||||
dataDir=/data/zookeeper
|
||||
# 添加一下内容
|
||||
server.1=nn1:2888:3888
|
||||
server.2=nn2:2888:3888
|
||||
server.3=nn3:2888:3888
|
||||
```
|
||||
分发给nn2、nn3
|
||||
`scp_all_zookeeper.sh /usr/local/zookeeper/conf/zoo.cfg /usr/local/zookeeper/conf/`
|
||||
|
||||
- `zkEnv.sh`配置
|
||||
`vim /usr/local/zookeeper/bin/zkEnv.sh`
|
||||

|
||||
分发到nn2、nn3
|
||||
```bash
|
||||
scp_all_zookeeper.sh /usr/local/zookeeper/bin/zkEnv.sh /usr/local/zookeeper/bin/
|
||||
```
|
||||
- 创建zookeeper数据目录
|
||||
```bash
|
||||
ssh_root_zookeeper.sh mkdir -p /data/zookeeper
|
||||
ssh_root_zookeeper.sh chown -R hadoop:hadoop /data
|
||||
```
|
||||
- 创建myid文件
|
||||
```bash
|
||||
ssh nn1 'echo "1" > /data/zookeeper/myid'
|
||||
ssh nn2 'echo "2" > /data/zookeeper/myid'
|
||||
ssh nn3 'echo "3" > /data/zookeeper/myid'
|
||||
```
|
||||
- 配置Zookeeper环境变量
|
||||
```bash
|
||||
# 在其他所有主机也执行
|
||||
sudo chown -R hadoop:hadoop /etc/profile.d/myEnv.sh
|
||||
```
|
||||
|
||||
```bash
|
||||
echo 'export ZOOKEEPER_HOME=/usr/local/zookeeper' >> /etc/profile.d/myEnv.sh
|
||||
echo 'export PATH=$PATH:$ZOOKEEPER_HOME/bin' >> /etc/profile.d/myEnv.sh
|
||||
```
|
||||
|
||||
```bash
|
||||
# 分发到nn2、nn3
|
||||
scp_all_zookeeper.sh /etc/profile.d/myEnv.sh /etc/profile.d/
|
||||
# source 环境变量
|
||||
ssh_all_zookeeper.sh source /etc/profile
|
||||
```
|
||||
### Zookeeper的命令
|
||||
```bash
|
||||
#启动zk服务
|
||||
ssh_all_zookeeper.sh /usr/local/zookeeper/bin/zkServer.sh start
|
||||
#查看每个机器ZK运行的状态
|
||||
ssh_all_zookeeper.sh /usr/local/zookeeper/bin/zkServer.sh status
|
||||
#整体停止服务
|
||||
ssh_all_zookeeper.sh /usr/local/zookeeper/bin/zkServer.sh stop
|
||||
#重启zk服务
|
||||
ssh_all_zookeeper.sh /usr/local/zookeeper/bin/zkServer.sh restart
|
||||
```
|
||||
|
||||
```shell
|
||||
#启动zookeeper客户端,并连接zookeeper集群
|
||||
/usr/local/zookeeper/bin/zkCli.sh -server nn1:2181
|
||||
# 可以简化为:
|
||||
zkCli.sh
|
||||
```
|
||||
|
||||
BIN
source/img/000001.png
Normal file
BIN
source/img/000001.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 152 KiB |
Reference in New Issue
Block a user