首页 > 代码库 > Hadoop安装指引

Hadoop安装指引


环境:

Ubuntu16.04

机器:

192.168.1.105 namenode

192.168.1.102 datanode1


0、配置节点信息

sudo gedit /etc/hosts

#加入下面的信息

192.168.1.105 namenode

192.168.1.102 datanode1


sudo gedit /etc/hostname #修改主机名称

#上面的内容电脑重启后生效


1、在Ubuntu下创建hadoop组和hadoop用户

1.1、创建hadoop用户组

如果不是在root下登录需要

@ubuntu:~$ sudo addgroup hadoop


1.2、创建hadoop用户

@ubuntu:~$ sudo adduser -ingroup hadoop hadoop


1.3、为hadoop用户添加权限(root权限一样)

sudo gedit /etc/sudoers


#User privilege specification 添加

root ALL=(ALL:ALL) ALL

hadoop ALL=(ALL:ALL) ALL


2、用新增加的hadoop用户登录Ubuntu系统

su hadoop


3、安装ssh

3.1、下载:sudo apt-get install openssh-server

安装完成后,启动服务

3.2、启动:sudo /etc/init.d/ssh start

查看服务是否正确启动:ps -e | grep ssh


设置ssh免密码登录

# su hadoop

$ ssh-keygen -t rsa

$ ssh-copy-id -i ~/.ssh/id_rsa.pub hadoop@namenode

$ ssh-copy-id -i ~/.ssh/id_rsa.pub hadoop@slavenode

$ chmod 0600 ~/.ssh/authorized_keys

$ exit

namenode节点操作

mkdir -p $HOME/.ssh 
chmod 700 $HOME/.ssh 
ssh-keygen -t rsa -P ‘‘ -f $HOME/.ssh/id_rsa 
cat $HOME/.ssh/id_rsa.pub >> $HOME/.ssh/authorized_keys 
chmod 644 $HOME/.ssh/authorized_keys
Copy the public key to new slave node in hadoop user $HOME directory
scp $HOME/.ssh/id_rsa.pub hadoop@192.168.1.104:/home/hadoop/

datanode上操作

cd $HOME
mkdir -p $HOME/.ssh 
chmod 700 $HOME/.ssh
cat id_rsa.pub >>$HOME/.ssh/authorized_keys 
chmod 644 $HOME/.ssh/authorized_keys

4、安装jdk

$su

password

oracle网站下载64位或者32位的jdk(根据自己的操作系统位数)

mkdir /usr/lib/jvm

tar -zxf jdk...

# mv jdk1.8.0_101 /usr/lib/jvm

# exit

添加

export JAVA_HOME=/usr/lib/jvm/jdk1.8.0_101

export PATH=PATH:$JAVA_HOME/bin



5、安装hadoop


mkdir /home/hadoop


sudo tar xzf hadoop-2.7.3.tar.gz


mv hadoop-2.7.3 /home/hadoop

#修改hadoop-2.7.3hadoop

mv hadoop-2.7.3 hadoop


chmod 777 /home/hadoop/hadoop



!!!!!hadoop文件配置


hadoop下面建立dfs文件家


cd /home/hadoop/hadoop

hadoop$ mkdir dfs

hadoop$ mkdir dfs/name

hadoop$ mkdir dfs/name/data


cd /home/hadoop/hadoop/etc/hadoop


sudo gedit core-site.xml

<configuration>



<property>

<name>fs.default.name</name>

<value>hdfs://namenode:9000</value>

</property>

<property>

<name>dfs.permissions</name>

<value>false</value>

</property>


</configuration>


sudo gedit hdfs-site.xml


<configuration>

<property>

<name>dfs.data.dir</name>

<value>file:/home/hadoop/hadoop/dfs/name/data</value>

<final>true</final>

</property>


<property>

<name>dfs.name.dir</name>

<value>file:/home/hadoop/hadoop/dfs/name</value>

<final>true</final>

</property>


<property>

<name>dfs.replication</name>

<value>2</value>

</property>

</configuration>


sudo gedit mapred-site.xml.template

<configuration>

<property>

<name>mapreduce.framework.name</name>

<value>Yarn</value>

</property>

</configuration>

<!--

<configuration>

<property>

<name>mapred.job.tracker</name>

<value>hdfs://namenode:9001</value>

</property>

</configuration>

->

配置yarn文件

sudo gedit yarn-site.xml

<configuration>

<!-- Site specific YARN configuration properties -->

<property>

<name>yarn.nodemanager.aux-services</name>

<value>mapreduce.shuffle</value>

</property>

<property>

<description>The address of the applications manager interface in the RM.</description>

<name>yarn.resourcemanager.address</name>

<value>192.168.1.105:8040</value>

</property>


<property>

<description>The address of the scheduler interface.</description>

<name>yarn.resourcemanager.scheduler.address</name>

<value>192.168.1.105:8030</value>

</property>


<property>

<description>The address of the RM web application.</description>

<name>yarn.resourcemanager.webapp.address</name>

<value>192.168.1.105:8088</value>

</property>

 

<property>

<description>The address of the resource tracker interface.</description>

<name>yarn.resourcemanager.resource-tracker.address</name>

<value>192.168.1.105 :8025</value>

</property>

</configuration>

su gedit slaves

#输入slave节点

datanode1

sudo gedit masters

#输入namenode节点

namenode

!!!!!hadoop文件配置



配置.bashrc文件


sudo gedit ~/.bashrc

#HADOOP VARIABLES START


export JAVA_HOME=/usr/lib/jvm/jdk1.8.0_101


export HADOOP_HOME=/home/hadoop/hadoop


export PATH=$PATH:$HADOOP_HOME/bin


export PA TH=$PATH:$HADOOP_HOME/sbin


export HADOOP_MAPRED_HOME=$HADOOP_HOME


export HADOOP_COMMON_HOME=$HADOOP_HOME


export HADOOP_HDFS_HOME=$HADOOP_HOME


export YARN_HOME=$HADOOP_HOME


export HADOOP_COMMON_LIB_NATIVE_DIR=$HADOOP_HOME/lib/native


export HADOOP_OPTS="-Djava.library.path=$HADOOP_HOME/lib/native"


#HADOOP VARIABLES END


用命令使配置生效:source ~/.bashrc


5.6hadoop-env.sh配置java环境变量

sudo gedit /home/hadoop/hadoop/etc/hadoop/hadoop-env.sh

找到JAVA_HOME环境变量,修改如下

export JAVA_HOME=/usr/lib/jvm/jdk1.8.0_101



7、在slavenode上安装hadoop

# su hadoop

$ cd /home/hadoop

$ scp -r hadoop slavenode:/home/hadoop



8、启动hadoop


cd /home/hadoop/hadoop/bin

hadoop namenode -format #注意启动一次后再次启动的时候可能会导致集群中datanodes节点的dfsdata文件下version过期导致无法在slave节点上创建datanode,可以修改VERSIONlayoutVersionnamenode中的一致来完成同步或者删除VERSION


cd /home/hadoop/hadoop/sbin

start-all.sh

Hadoop安装指引