Hadoop 3.1.0 安装教程 (Ubuntu 16.04)
1. 系统准备
1.1 更新系统并安装必要工具
sudo apt-get update && sudo apt-get install -y ssh rsync wget vim
1.2 安装 Java 8 (Hadoop 3.1.0 推荐)
sudo apt-get install -y openjdk-8-jdk && java -version
2. 配置 SSH 免密登录
ssh-keygen -t rsa -P "" -f ~/.ssh/id_rsa && cat ~/.ssh/id_rsa.pub >> ~/.ssh/authorized_keys && chmod 600 ~/.ssh/authorized_keys
验证免密登录:
ssh localhost
首次连接输入 yes,之后应该无需密码即可登录。输入 exit 退出。
3. 下载并安装 Hadoop 3.1.0
3.1 下载 Hadoop
cd ~ && wget --no-check-certificate https://archive.apache.org/dist/hadoop/common/hadoop-3.1.0/hadoop-3.1.0.tar.gz
如果下载慢,可使用国内镜像:
cd ~ && wget --no-check-certificate https://mirrors.tuna.tsinghua.edu.cn/apache/hadoop/common/hadoop-3.1.0/hadoop-3.1.0.tar.gz
3.2 解压并移动到安装目录
tar -xzf hadoop-3.1.0.tar.gz && sudo mv hadoop-3.1.0 /usr/local/hadoop && sudo chown -R $USER:$USER /usr/local/hadoop
4. 配置环境变量
4.1 编辑 ~/.bashrc
cat >> ~/.bashrc << 'EOF'
# Java Environment
export JAVA_HOME=/usr/lib/jvm/java-8-openjdk-amd64
export PATH=$PATH:$JAVA_HOME/bin
# Hadoop Environment
export HADOOP_HOME=/usr/local/hadoop
export HADOOP_INSTALL=$HADOOP_HOME
export HADOOP_MAPRED_HOME=$HADOOP_HOME
export HADOOP_COMMON_HOME=$HADOOP_HOME
export HADOOP_HDFS_HOME=$HADOOP_HOME
export HADOOP_YARN_HOME=$HADOOP_HOME
export HADOOP_COMMON_LIB_NATIVE_DIR=$HADOOP_HOME/lib/native
export PATH=$PATH:$HADOOP_HOME/sbin:$HADOOP_HOME/bin
export HADOOP_OPTS="-Djava.library.path=$HADOOP_HOME/lib/native"
EOF
4.2 使环境变量生效
source ~/.bashrc && hadoop version
5. 配置 Hadoop
5.1 配置 hadoop-env.sh
echo 'export JAVA_HOME=/usr/lib/jvm/java-8-openjdk-amd64' >> /usr/local/hadoop/etc/hadoop/hadoop-env.sh
5.2 配置 core-site.xml
cat > /usr/local/hadoop/etc/hadoop/core-site.xml << 'EOF'
<?xml version="1.0" encoding="UTF-8"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<configuration>
<property>
<name>fs.defaultFS</name>
<value>hdfs://localhost:9000</value>
</property>
<property>
<name>hadoop.tmp.dir</name>
<value>/usr/local/hadoop/tmp</value>
</property>
</configuration>
EOF
5.3 配置 hdfs-site.xml
cat > /usr/local/hadoop/etc/hadoop/hdfs-site.xml << 'EOF'
<?xml version="1.0" encoding="UTF-8"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<configuration>
<property>
<name>dfs.replication</name>
<value>1</value>
</property>
<property>
<name>dfs.namenode.name.dir</name>
<value>/usr/local/hadoop/hdfs/namenode</value>
</property>
<property>
<name>dfs.datanode.data.dir</name>
<value>/usr/local/hadoop/hdfs/datanode</value>
</property>
</configuration>
EOF
5.4 配置 mapred-site.xml
cat > /usr/local/hadoop/etc/hadoop/mapred-site.xml << 'EOF'
<?xml version="1.0" encoding="UTF-8"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<configuration>
<property>
<name>mapreduce.framework.name</name>
<value>yarn</value>
</property>
<property>
<name>mapreduce.application.classpath</name>
<value>$HADOOP_MAPRED_HOME/share/hadoop/mapreduce/*:$HADOOP_MAPRED_HOME/share/hadoop/mapreduce/lib/*</value>
</property>
</configuration>
EOF
5.5 配置 yarn-site.xml
cat > /usr/local/hadoop/etc/hadoop/yarn-site.xml << 'EOF'
<?xml version="1.0" encoding="UTF-8"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<configuration>
<property>
<name>yarn.nodemanager.aux-services</name>
<value>mapreduce_shuffle</value>
</property>
<property>
<name>yarn.nodemanager.env-whitelist</name>
<value>JAVA_HOME,HADOOP_COMMON_HOME,HADOOP_HDFS_HOME,HADOOP_CONF_DIR,CLASSPATH_PREPEND_DISTCACHE,HADOOP_YARN_HOME,HADOOP_MAPRED_HOME</value>
</property>
</configuration>
EOF
6. 创建必要目录
mkdir -p /usr/local/hadoop/tmp /usr/local/hadoop/hdfs/namenode /usr/local/hadoop/hdfs/datanode
7. 格式化 NameNode
hdfs namenode -format
注意: 只需在首次安装时执行一次。如果提示确认,输入
Y。
8. 启动 Hadoop
8.1 启动 HDFS
start-dfs.sh
8.2 启动 YARN
start-yarn.sh
8.3 一键启动所有服务(替代 8.1 + 8.2)
start-all.sh
9. 验证安装
9.1 查看 Java 进程
jps
应该看到类似以下输出:
NameNode
DataNode
SecondaryNameNode
ResourceManager
NodeManager
Jps
9.2 访问 Web UI
-
HDFS NameNode: http://192.168.157.128:9870
-
YARN ResourceManager: http://192.168.157.128:8088
10. 测试 HDFS
hdfs dfs -mkdir -p /user/$USER && hdfs dfs -ls /
11. 停止 Hadoop
stop-all.sh
或分别停止:
stop-yarn.sh && stop-dfs.sh
12. 常见问题
12.1 JAVA_HOME 未设置
确认 Java 路径正确:
which java && readlink -f $(which java)
12.2 DataNode 无法启动
删除数据目录后重新格式化:
rm -rf /usr/local/hadoop/hdfs/datanode/* /usr/local/hadoop/hdfs/namenode/* /usr/local/hadoop/tmp/* && hdfs namenode -format
12.3 权限问题
sudo chown -R $USER:$USER /usr/local/hadoop
快速安装命令汇总
以下是所有安装命令的汇总,可按顺序逐条执行:
# 1. 更新系统并安装依赖
sudo apt-get update && sudo apt-get install -y ssh rsync wget vim openjdk-8-jdk
# 2. 配置 SSH 免密登录
ssh-keygen -t rsa -P "" -f ~/.ssh/id_rsa && cat ~/.ssh/id_rsa.pub >> ~/.ssh/authorized_keys && chmod 600 ~/.ssh/authorized_keys
# 3. 下载并安装 Hadoop
cd ~ && wget --no-check-certificate https://archive.apache.org/dist/hadoop/common/hadoop-3.1.0/hadoop-3.1.0.tar.gz && tar -xzf hadoop-3.1.0.tar.gz && sudo mv hadoop-3.1.0 /usr/local/hadoop && sudo chown -R $USER:$USER /usr/local/hadoop
# 4. 配置环境变量
cat >> ~/.bashrc << 'EOF'
# Java Environment
export JAVA_HOME=/usr/lib/jvm/java-8-openjdk-amd64
export PATH=$PATH:$JAVA_HOME/bin
# Hadoop Environment
export HADOOP_HOME=/usr/local/hadoop
export HADOOP_INSTALL=$HADOOP_HOME
export HADOOP_MAPRED_HOME=$HADOOP_HOME
export HADOOP_COMMON_HOME=$HADOOP_HOME
export HADOOP_HDFS_HOME=$HADOOP_HOME
export HADOOP_YARN_HOME=$HADOOP_HOME
export HADOOP_COMMON_LIB_NATIVE_DIR=$HADOOP_HOME/lib/native
export PATH=$PATH:$HADOOP_HOME/sbin:$HADOOP_HOME/bin
export HADOOP_OPTS="-Djava.library.path=$HADOOP_HOME/lib/native"
EOF
source ~/.bashrc
# 5. 配置 Hadoop 文件
echo 'export JAVA_HOME=/usr/lib/jvm/java-8-openjdk-amd64' >> /usr/local/hadoop/etc/hadoop/hadoop-env.sh
# 6. 格式化并启动
mkdir -p /usr/local/hadoop/tmp /usr/local/hadoop/hdfs/namenode /usr/local/hadoop/hdfs/datanode && hdfs namenode -format && start-all.sh && jps
安装完成! 🎉