#title Hadoop Install
[[TableOfContents]]
작성 중..
{{{
https://dwbi.org/etl/bigdata/183-setup-hadoop-cluster
su
vi /etc/sudoers
중간에..
hadoop All=(ALL) ALL
추가
wq! 를 하고 빠져나온다.
==== openSSH 설치 ====
yum install openssh-server openssh-clients openssh-askpass
systemctl restart network
vi /etc/ssh/sshd_config
Port 22 부분을 주석해제 처리후 저장
systemctl start sshd.service
firewall-cmd --zone=public --add-port=22/tcp --permanent
firewall-cmd --reload
systemctl restart firewalld.service
su
cd /root
ssh-keygen -t rsa -P ""
cat /root/.ssh/id_rsa.pub >> /root/.ssh/authorized_keys
chmod 700 ~/.ssh
chmod 600 ~/.ssh/authorized_keys
cd /usr/local/
wget http://mirror.apache-kr.org/hadoop/common/hadoop-2.8.3/hadoop-2.8.3.tar.gz
tar -xzvf hadoop-2.8.3.tar.gz >> /dev/null
mv hadoop-2.8.3 /usr/local/hadoop
mkdir -p /usr/local/hadoop_work/hdfs/namenode
mkdir -p /usr/local/hadoop_work/hdfs/namesecondary
cd /usr/lib/jvm/jre
cd $HOME
vi .bashrc
export JAVA_HOME=/usr/lib/jvm/jre
export PATH=$PATH:$JAVA_HOME/bin
export HADOOP_HOME=/usr/local/hadoop
export PATH=$PATH:$HADOOP_HOME/bin
export PATH=$PATH:$HADOOP_HOME/sbin
export HADOOP_MAPRED_HOME=$HADOOP_HOME
export HADOOP_COMMON_HOME=$HADOOP_HOME
export HADOOP_HDFS_HOME=$HADOOP_HOME
export YARN_HOME=$HADOOP_HOME
export HADOOP_COMMON_LIB_NATIVE_DIR=$HADOOP_HOME/lib/native
export HADOOP_OPTS="-Djava.library.path=$HADOOP_HOME/lib"
export CLASSPATH=$CLASSPATH:/usr/local/hadoop/lib/*:.
export HADOOP_OPTS="$HADOOP_OPTS -Djava.security.egd=file:/dev/../dev/urandom"
source ~/.bashrc
vi /usr/local/hadoop/etc/hadoop/hadoop-env.sh
export JAVA_HOME=/usr/lib/jvm/jre
export HADOOP_COMMON_LIB_NATIVE_DIR=${HADOOP_PREFIX}/lib/native
export HADOOP_OPTS="${HADOOP_OPTS} -Djava.library.path=$HADOOP_PREFIX/lib"
cd $HADOOP_HOME/etc/hadoop
hadoop version
vi core-site.xml
fs.defaultFS
hdfs://rm:8020/
io.file.buffer.size
131072
vi hdfs-site.xml
dfs.namenode.name.dir
file:/usr/local/hadoop_work/hdfs/namenode
dfs.datanode.data.dir
file:/usr/local/hadoop_work/hdfs/datanode
dfs.namenode.checkpoint.dir
file:/usr/local/hadoop_work/hdfs/namesecondary
dfs.replication
2
dfs.block.size
134217728
cp mapred-site.xml.template mapred-site.xml
vi mapred-site.xml
mapreduce.framework.name
yarn
mapreduce.jobhistory.address
NameNode:10020
mapreduce.jobhistory.webapp.address
NameNode:19888
yarn.app.mapreduce.am.staging-dir
/user/app
mapred.child.java.opts
-Djava.security.egd=file:/dev/../dev/urandom
vi yarn-site.xml
yarn.resourcemanager.hostname
NameNode
yarn.resourcemanager.bind-host
0.0.0.0
yarn.nodemanager.bind-host
0.0.0.0
yarn.nodemanager.aux-services
mapreduce_shuffle
yarn.nodemanager.aux-services.mapreduce_shuffle.class
org.apache.hadoop.mapred.ShuffleHandler
yarn.log-aggregation-enable
true
yarn.nodemanager.local-dirs
file:/usr/local/hadoop_work/yarn/local
yarn.nodemanager.log-dirs
file:/usr/local/hadoop_work/yarn/log
yarn.nodemanager.remote-app-log-dir
hdfs://NameNode:8020/var/log/hadoop-yarn/apps
리소스 매니저 서버명은 rm
vi masters
rm
sudo vi /etc/hosts
192.168.56.101 rm
192.168.56.102 nm1
192.168.56.103 nm2
su
cd /root
cat ~/.ssh/id_rsa.pub | ssh root@nm1 "cat >> .ssh/authorized_keys"
cat ~/.ssh/id_rsa.pub | ssh root@nm2 "cat >> .ssh/authorized_keys"
cd /usr/local
scp -r hadoop nm1:/usr/local
scp -r hadoop nm2:/usr/local
ssh nm1
mkdir -p /usr/local/hadoop_work/hdfs/datanode
mkdir -p /usr/local/hadoop_work/yarn/local
mkdir -p /usr/local/hadoop_work/yarn/log
exit
ssh nm2
mkdir -p /usr/local/hadoop_work/hdfs/datanode
mkdir -p /usr/local/hadoop_work/yarn/local
mkdir -p /usr/local/hadoop_work/yarn/log
exit
$HADOOP_HOME/sbin/start-dfs.sh
$HADOOP_HOME/sbin/start-yarn.sh
$HADOOP_HOME/sbin/start-all.sh
WARN util.NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable
이 에러는 64비트 리눅스에서 32비트 하둡을 돌려서 생긴다고 합니다.
hadoop-env.sh 나 .bashrc나, 어디에든 다음을 추가해주면 해결됩니다.
원래는 $HADOOP_HOME/lib 으로 되어 있는 부분을 $HADOOP_HOME/lib/native 로 바꾸면 됩니다.
원래는 export HADOOP_OPTS="$HADOOP_OPTS -Djava.library.path=$HADOOP_PREFIX/lib" 이었던 것을 export HADOOP_OPTS="$HADOOP_OPTS -Djava.library.path=$HADOOP_PREFIX/lib/native" 로 바꾸시면 됩니다.
출처: http://crmn.tistory.com/7 [크롬망간이 글 쓰는 공간]
hadoop namenode -format
hadoop dfsadmin -report
hadoop dfsadmin -safemode leave
su
cd $HADOOP_HOME
mkdir hive
cd hive
wget http://ftp.daumkakao.com/apache/hive/hive-2.3.2/apache-hive-2.3.2-bin.tar.gz
tar xzf apache-hive-2.3.2-bin.tar.gz
vi $HOME/.bashrc
export HIVE_HOME=$HADOOP_HOME/hive/apache-hive-2.3.2-bin
export PATH=$HIVE_HOME/bin:$PATH
hive> show tables;
FAILED: SemanticException org.apache.hadoop.hive.ql.metadata.HiveException: java.lang.RuntimeException: Unable to instantiate org.apache.hadoop.hive.ql.metadata.SessionHiveMetaStoreClient
wget http://apache.mirror.cdnetworks.com//db/derby/db-derby-10.14.1.0/db-derby-10.14.1.0-bin.tar.gz
mkdir /usr/local/derby
cp db-derby-10.14.1.0-bin.tar.gz /usr/local/derby
cd /usr/local/derby
tar -zxvf db-derby-10.14.1.0-bin.tar.gz
vi $HOME/.bashrc
export DERBY_HOME=/usr/local/derby/db-derby-10.14.1.0-bin
export PATH=$DERBY_HOME/bin:$PATH
su
cd $HOME
rpm -ivh https://dev.mysql.com/get/mysql57-community-release-el7-11.noarch.rpm
sudo yum install mysql-server mysql-client
}}}
* 우분투 설치했다는 가정
* node
* name node : 192.168.0.1
* secondaryNameNode: 192.168.0.2
* data node : 192.168.0.3, 192.168.0.4, 192.168.0.5
==== 사용자 생성 ====
{{{
sudo groupadd -g 10000 hadoop
sudo useradd -g hadoop -m huser -s /bin/bash
sudo passwd huser
}}}
* 참고: root 사용하기 *
{{{
sudo passwd root
}}}
패스워드 설정하고 재부팅하면 root 사용자 사용가능
==== 자바설치 ====
자바가 설치되었는지 확인
{{{
java -version
}}}
자바가 설치되어 있지 않으면 설치
{{{
sudo apt-get install openjdk-6-jdk
}}}
''name node, data node 모두 확인 후 설치''
==== ssh 설치 ====
{{{
sudo apt-get install ssh
sudo /etc/init.d/ssh restart
}}}
''name node, data node 모두 확인 후 설치''
==== hosts 파일 설정 ====
name node에서 /etc/hosts 파일을 수정한다.
{{{
192.168.0.2 nameNode
192.168.0.3 secondaryNameNode
192.168.0.4 dataNode01
192.168.0.5 dataNode02
192.168.0.6 dataNode03
192.168.0.7 dataNode04
}}}
''주의'' - hosts파일에 127.0.0.1 nameNode와 같이 설정되어 있는 것을 그대로 dataNode에 적용시켜서는 안된다. dataNode는 nameNode를 찾을 수 있어야 한다.
==== ssh 키복사 ====
name node에서 생성한 키를 dataNode1, dataNode2, dataNode3에 복사한다.
{{{
su huser
ssh-keygen -t rsa
cat ~/.ssh/id_rsa.pub >> ~/.ssh/authorized_keys
cat ~/.ssh/id_rsa.pub | ssh huser@secondaryNameNode "cat >> .ssh/authorized_keys"
cat ~/.ssh/id_rsa.pub | ssh huser@dataNode01 "cat >> .ssh/authorized_keys"
cat ~/.ssh/id_rsa.pub | ssh huser@dataNode02 "cat >> .ssh/authorized_keys"
cat ~/.ssh/id_rsa.pub | ssh huser@dataNode03 "cat >> .ssh/authorized_keys"
cat ~/.ssh/id_rsa.pub | ssh huser@dataNode04 "cat >> .ssh/authorized_keys"
}}}
확인
{{{
su huser
ssh secondaryNameNode
exit
ssh dataNode01
exit
ssh dataNode02
exit
ssh dataNode03
exit
ssh dataNode04
exit
}}}
==== hadoop 설치 ====
{{{
su
cd /usr/local
mkdir hadoop
cd hadoop
wget http://ftp.daum.net/apache/hadoop/common/hadoop-1.0.1/hadoop-1.0.1.tar.gz
tar zxvf hadoop-1.0.1.tar.gz
chown -R huser:hadoop /usr/local/hadoop/hadoop-1.0.1
}}}
==== 사용자 환경설정 ====
{{{
su huser
cd $HOME
vi .profile
}}}
아래의 내용 추가
{{{
export JAVA_HOME=/usr/lib/jvm/java-6-openjdk
export HADOOP_HOME=/usr/local/hadoop/hadoop-1.0.1
export HADOOP_CONF=$HADOOP_HOME/conf
export HADOOP_PATH=$HADOOP_HOME/bin
export HIVE_INSTALL=/usr/local/hadoop/hive/hive-0.8.0
export HIVE_PATH=$HIVE_INSTALL/bin
export PIG_INSTALL=/usr/local/hadoop/pig/pig-0.9.1
export PIG_PATH=$PIG_INSTALL/bin
export PATH=$HIVE_PATH:$HADOOP_PATH:$PIG_PATH:$PATH
}}}
''32bit인 경우 export JAVA_HOME=/usr/lib/jvm/java-6-openjdk-i386이 될 것이다.''
{{{
su
source .profile
}}}
==== hadoop 환경설정 ====
* 주로 name node에서 환경설정 관련 파일을 수정 후 데이터 노드로 복사하는 방식 사용
* 하둡 클러스터를 구성하는 모든 서버의 환경(cpu수, 메모리크기, jdk버전, 디렉토리, 디스크 등)이 동일한 경우 환경 설정과 배포는 쉬움. 아니면 서버별로 옵션값이 달라야 함.
huser로 편집
conf/hadoop-env.sh 편집
{{{
cd $HADOOP_HOME/conf
vi hadoop-env.sh
export JAVA_HOME=/usr/lib/jvm/java-6-openjdk
}}}
''32bit인 경우 export JAVA_HOME=/usr/lib/jvm/java-6-openjdk-i386이 될 것이다.''
conf/core-site.xml 편집
{{{
fs.default.name
hdfs://nameNode
hadoop.tmp.dir
/home/huser/dfs/temp
io.file.buffer.size
131072
}}}
conf/hdfs-site.xml 편집
{{{
dfs.replication
2
dfs.name.dir
/home/huser/dfs/name
dfs.data.dir
/home/huser/dfs/data
dfs.namenode.socket.write.timeout
0
}}}
conf/mapred-site.xml
{{{
mapred.job.tracker
nameNode:9001
mapred.local.dir
/home/huser/dfs
mapred.child.java.opts
-Xmx200m
mapred.map.tasks
4
mapred.reduce.tasks
2
}}}
/conf/masters 편집
{{{
secondaryNameNode
}}}
/conf/slaves 편집
{{{
secondaryNameNode
dataNode01
dataNode02
dataNode03
dataNode04
}}}
secondaryNameNode는 나중에 뺄 것임
==== 배포 ====
nameNode에서..
secondaryNameNode, dataNode01, dataNode02, dataNode03, dataNode04 에 배포
{{{
su
cd /usr/local/hadoop
sudo tar cf hadoop-1.0.1.tar hadoop-1.0.1
su huser
cd /usr/local/hadoop
scp hadoop-1.0.1.tar huser@secondaryNameNode:/home/huser/hadoop-1.0.1.tar
scp hadoop-1.0.1.tar huser@dataNode01:/home/huser/hadoop-1.0.1.tar
scp hadoop-1.0.1.tar huser@dataNode02:/home/huser/hadoop-1.0.1.tar
scp hadoop-1.0.1.tar huser@dataNode03:/home/huser/hadoop-1.0.1.tar
scp hadoop-1.0.1.tar huser@dataNode04:/home/huser/hadoop-1.0.1.tar
}}}
secondaryNameNode, dataNode01, dataNode02, dataNode03에 /usr/local/hadoop 디렉토리를 생성하고 권한 할당
{{{
su
mkdir /usr/local/hadoop
chown -R huser:hadoop /usr/local/hadoop
}}}
huser로 접속하여 dataNode들(slaves)에 tar풀고 설치
{{{
slaves.sh tar xf /home/huser/hadoop-1.0.1.tar --directory=/usr/local/hadoop
}}}
/usr/local/hadoop/hadoop-0.20.2/conf/slaves 파일을 열어 secondaryNameNode를 빼준다. (쉬운 배포를 위해 secondaryNameNode를 포함해 놨던 것이다.)
{{{
dataNode01
dataNode02
dataNode03
dataNode04
}}}
==== name node 포멧 ====
{{{
bin/hadoop namenode -format
}}}
==== 설치 확인 ====
하둡을 구동한다.
{{{
cd /usr/local/hadoop/hadoop-1.0.1
bin/start-dfs.sh
bin/start-mapred.sh
}}}
또는
{{{
bin/start-all.sh
}}}
hadoop dfsadmin -report 명령으로도 확인 할 수 있다. 다음은 실행 예제이다. 주의 할 것은 start-all.sh 명령이 실행되고, 곧바로 이 명령어를 실행했을 때 결과가 바로 나타나지 않을 수도 있다는 것이다. dataNode가 다 올려지려면 시간이 조금은 필요하다.
{{{
huser@nameNode:/usr/local/hadoop/conf$ hadoop dfsadmin -report
Warning: $HADOOP_HOME is deprecated.
Safe mode is ON
Configured Capacity: 61029343232 (56.84 GB)
Present Capacity: 50454859776 (46.99 GB)
DFS Remaining: 50454802432 (46.99 GB)
DFS Used: 57344 (56 KB)
DFS Used%: 0%
Under replicated blocks: 0
Blocks with corrupt replicas: 0
Missing blocks: 0
-------------------------------------------------
Datanodes available: 2 (2 total, 0 dead)
Name: 192.168.136.101:50010
Decommission Status : Normal
Configured Capacity: 30514671616 (28.42 GB)
DFS Used: 28672 (28 KB)
Non DFS Used: 5287256064 (4.92 GB)
DFS Remaining: 25227386880(23.49 GB)
DFS Used%: 0%
DFS Remaining%: 82.67%
Last contact: Tue Aug 06 17:14:57 KST 2013
Name: 192.168.136.102:50010
Decommission Status : Normal
Configured Capacity: 30514671616 (28.42 GB)
DFS Used: 28672 (28 KB)
Non DFS Used: 5287227392 (4.92 GB)
DFS Remaining: 25227415552(23.49 GB)
DFS Used%: 0%
DFS Remaining%: 82.67%
Last contact: Tue Aug 06 17:14:57 KST 2013
huser@nameNode:/usr/local/hadoop/conf$
}}}
브라우저에서..
namenode, datanode 확인
http://namenode:50070
jobtracker, tasktracker 확인
http://namenode:50030
==== hive ====
{{{
su
cd /usr/local/hadoop
mkdir hive
cd hive
wget http://ftp.daum.net/apache/hive/hive-0.8.1/hive-0.8.1.tar.gz
tar xzf hive-0.8.1.tar.gz
sudo chown -R huser:hadoop /usr/local/hadoop/hive-0.8.1-bin
exit
}}}
SET mapred.job.tracker=local;
SET mapred.reduce.tasks=8;
create table sample(year string, temperature int, qulity int)
row format delimited
fields terminated by '\t';
load data local inpath '/home/huser/sample/input/ncdc/micro-tab/sample.txt'
overwrite into table sample;
select year, max(temperature) max_temp
from sample
group by year;
==== pig ====
{{{
su
cd /usr/local/hadoop
mkdir pig
cd pig
wget http://ftp.daum.net/apache/pig/pig-0.9.2/pig-0.9.2.tar.gz
tar xzf pig-0.9.2.tar.gz
sudo chown -R huser:hadoop /usr/local/hadoop/pig/pig-0.9.2
}}}
hadoop fs -mkdir ncdc
hadoop fs -put sample.txt ncdc
records = LOAD 'ncdc/sample.txt'
AS(year:chararray, temperature:int, qulity:int);
DUMP records;