Contents

1
2 覦れ
3 ssh れ
4 hosts れ
5 ssh る概
6 hadoop れ
7 蟆曙れ
8 hadoop 蟆曙れ
9 覦壱
10 name node 覃
11
12 hive
13 pig


譴..
https://dwbi.org/etl/bigdata/183-setup-hadoop-cluster


su
vi /etc/sudoers

譴螳..

hadoop All=(ALL)	ALL

豢螳

wq! 襯 螻 觜碁.

==== openSSH れ ====
yum install openssh-server openssh-clients openssh-askpass
systemctl restart network
vi /etc/ssh/sshd_config
 
Port 22 覿覿 譯殊伎 豌襴 

systemctl start sshd.service

firewall-cmd --zone=public --add-port=22/tcp --permanent
firewall-cmd --reload
systemctl restart firewalld.service

su
cd /root
ssh-keygen -t rsa -P ""
cat /root/.ssh/id_rsa.pub >> /root/.ssh/authorized_keys
chmod 700 ~/.ssh
chmod 600 ~/.ssh/authorized_keys



cd /usr/local/
wget http://mirror.apache-kr.org/hadoop/common/hadoop-2.8.3/hadoop-2.8.3.tar.gz
tar -xzvf hadoop-2.8.3.tar.gz >> /dev/null
mv hadoop-2.8.3 /usr/local/hadoop
mkdir -p /usr/local/hadoop_work/hdfs/namenode
mkdir -p /usr/local/hadoop_work/hdfs/namesecondary

cd /usr/lib/jvm/jre

cd $HOME
vi .bashrc


export JAVA_HOME=/usr/lib/jvm/jre
export PATH=$PATH:$JAVA_HOME/bin
export HADOOP_HOME=/usr/local/hadoop
export PATH=$PATH:$HADOOP_HOME/bin
export PATH=$PATH:$HADOOP_HOME/sbin
export HADOOP_MAPRED_HOME=$HADOOP_HOME
export HADOOP_COMMON_HOME=$HADOOP_HOME
export HADOOP_HDFS_HOME=$HADOOP_HOME
export YARN_HOME=$HADOOP_HOME
export HADOOP_COMMON_LIB_NATIVE_DIR=$HADOOP_HOME/lib/native
export HADOOP_OPTS="-Djava.library.path=$HADOOP_HOME/lib"
export CLASSPATH=$CLASSPATH:/usr/local/hadoop/lib/*:.

export HADOOP_OPTS="$HADOOP_OPTS -Djava.security.egd=file:/dev/../dev/urandom"


source ~/.bashrc


vi /usr/local/hadoop/etc/hadoop/hadoop-env.sh


export JAVA_HOME=/usr/lib/jvm/jre
export HADOOP_COMMON_LIB_NATIVE_DIR=${HADOOP_PREFIX}/lib/native
export HADOOP_OPTS="${HADOOP_OPTS} -Djava.library.path=$HADOOP_PREFIX/lib"



cd $HADOOP_HOME/etc/hadoop
hadoop version



vi core-site.xml


<property>
<name>fs.defaultFS</name>
<value>hdfs://rm:8020/</value>
</property>
<property>
<name>io.file.buffer.size</name>
<value>131072</value>
</property>



vi hdfs-site.xml

<property>
<name>dfs.namenode.name.dir</name>
<value>file:/usr/local/hadoop_work/hdfs/namenode</value>
</property>
<property>
<name>dfs.datanode.data.dir</name>
<value>file:/usr/local/hadoop_work/hdfs/datanode</value>
</property>
<property>
<name>dfs.namenode.checkpoint.dir</name>
<value>file:/usr/local/hadoop_work/hdfs/namesecondary</value>
</property>
<property>
<name>dfs.replication</name>
<value>2</value>
</property>
<property>
<name>dfs.block.size</name>
<value>134217728</value>
</property>




cp mapred-site.xml.template mapred-site.xml
vi mapred-site.xml


<property>
<name>mapreduce.framework.name</name>
<value>yarn</value>
</property>
<property>
<name>mapreduce.jobhistory.address</name>
<value>NameNode:10020</value>
</property>
<property>
<name>mapreduce.jobhistory.webapp.address</name>
<value>NameNode:19888</value>
</property>
<property>
<name>yarn.app.mapreduce.am.staging-dir</name>
<value>/user/app</value>
</property>
<property>
<name>mapred.child.java.opts</name>
<value>-Djava.security.egd=file:/dev/../dev/urandom</value>
</property>




vi yarn-site.xml

<property>
<name>yarn.resourcemanager.hostname</name>
<value>NameNode</value>
</property>
<property>
<name>yarn.resourcemanager.bind-host</name>
<value>0.0.0.0</value>
</property>
<property>
<name>yarn.nodemanager.bind-host</name>
<value>0.0.0.0</value>
</property>
<property>
<name>yarn.nodemanager.aux-services</name>
<value>mapreduce_shuffle</value>
</property>
<property>
<name>yarn.nodemanager.aux-services.mapreduce_shuffle.class</name>
<value>org.apache.hadoop.mapred.ShuffleHandler</value>
</property>
<property>
<name>yarn.log-aggregation-enable</name>
<value>true</value>
</property>
<property>
<name>yarn.nodemanager.local-dirs</name>
<value>file:/usr/local/hadoop_work/yarn/local</value>
</property>
<property>
<name>yarn.nodemanager.log-dirs</name>
<value>file:/usr/local/hadoop_work/yarn/log</value>
</property>
<property>
<name>yarn.nodemanager.remote-app-log-dir</name>
<value>hdfs://NameNode:8020/var/log/hadoop-yarn/apps</value>
</property>


襴 襷る 覯覈 rm

vi masters

rm

sudo vi /etc/hosts

192.168.56.101 rm
192.168.56.102 nm1
192.168.56.103 nm2




su
cd /root
cat ~/.ssh/id_rsa.pub | ssh root@nm1 "cat >> .ssh/authorized_keys"
cat ~/.ssh/id_rsa.pub | ssh root@nm2 "cat >> .ssh/authorized_keys"


cd /usr/local 
scp -r hadoop nm1:/usr/local
scp -r hadoop nm2:/usr/local

ssh nm1
mkdir -p /usr/local/hadoop_work/hdfs/datanode
mkdir -p /usr/local/hadoop_work/yarn/local
mkdir -p /usr/local/hadoop_work/yarn/log
exit

ssh nm2
mkdir -p /usr/local/hadoop_work/hdfs/datanode
mkdir -p /usr/local/hadoop_work/yarn/local
mkdir -p /usr/local/hadoop_work/yarn/log
exit

$HADOOP_HOME/sbin/start-dfs.sh
$HADOOP_HOME/sbin/start-yarn.sh
$HADOOP_HOME/sbin/start-all.sh


WARN util.NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable

  64觜 襴れ 32觜 ′ れ 蠍企り .
hadoop-env.sh  .bashrc, 企 れ 豢螳伎朱 願屋.
 $HADOOP_HOME/lib 朱   覿覿 $HADOOP_HOME/lib/native 襦 覦蠑碁 .

 export HADOOP_OPTS="$HADOOP_OPTS -Djava.library.path=$HADOOP_PREFIX/lib" 伎 蟆 export HADOOP_OPTS="$HADOOP_OPTS -Djava.library.path=$HADOOP_PREFIX/lib/native" 襦 覦蠑語覃 .


豢豌: http://crmn.tistory.com/7 [襦襷螳 蠍 磯 螻糾]


hadoop namenode -format
hadoop dfsadmin -report
hadoop dfsadmin -safemode leave





su
cd $HADOOP_HOME
mkdir hive
cd hive

wget http://ftp.daumkakao.com/apache/hive/hive-2.3.2/apache-hive-2.3.2-bin.tar.gz
tar xzf apache-hive-2.3.2-bin.tar.gz


vi $HOME/.bashrc

export HIVE_HOME=$HADOOP_HOME/hive/apache-hive-2.3.2-bin
export PATH=$HIVE_HOME/bin:$PATH





hive> show tables;
FAILED: SemanticException org.apache.hadoop.hive.ql.metadata.HiveException: java.lang.RuntimeException: Unable to instantiate org.apache.hadoop.hive.ql.metadata.SessionHiveMetaStoreClient


wget http://apache.mirror.cdnetworks.com//db/derby/db-derby-10.14.1.0/db-derby-10.14.1.0-bin.tar.gz
mkdir /usr/local/derby
cp db-derby-10.14.1.0-bin.tar.gz /usr/local/derby

cd /usr/local/derby
tar -zxvf db-derby-10.14.1.0-bin.tar.gz

vi $HOME/.bashrc

export DERBY_HOME=/usr/local/derby/db-derby-10.14.1.0-bin
export PATH=$DERBY_HOME/bin:$PATH





su
cd $HOME

rpm -ivh https://dev.mysql.com/get/mysql57-community-release-el7-11.noarch.rpm
sudo yum install mysql-server mysql-client

1 #

sudo groupadd -g 10000 hadoop
sudo useradd -g hadoop -m huser -s /bin/bash
sudo passwd huser

* 谿瑚: root 蠍 *
sudo passwd root
れ れ螻 覿覃 root 螳


2 覦れ #

覦螳 れ讌
java -version 

覦螳 れ 讌 朱 れ
sudo apt-get install openjdk-6-jdk

name node, data node 覈 れ

3 ssh れ #

sudo apt-get install ssh
sudo /etc/init.d/ssh restart

name node, data node 覈 れ

4 hosts れ #

name node /etc/hosts 殊 .
192.168.0.2     nameNode
192.168.0.3     secondaryNameNode
192.168.0.4     dataNode01
192.168.0.5     dataNode02
192.168.0.6     dataNode03
192.168.0.7     dataNode04
譯殊 - hosts殊 127.0.0.1 nameNode 螳 れ 蟆 蠏碁襦 dataNode 貅 . dataNode nameNode襯 谿場 伎 .

5 ssh る概 #

name node 燕 るゼ dataNode1, dataNode2, dataNode3 覲旧.
su huser
ssh-keygen -t rsa
cat ~/.ssh/id_rsa.pub >> ~/.ssh/authorized_keys

cat ~/.ssh/id_rsa.pub | ssh huser@secondaryNameNode "cat >> .ssh/authorized_keys"
cat ~/.ssh/id_rsa.pub | ssh huser@dataNode01 "cat >> .ssh/authorized_keys"
cat ~/.ssh/id_rsa.pub | ssh huser@dataNode02 "cat >> .ssh/authorized_keys"
cat ~/.ssh/id_rsa.pub | ssh huser@dataNode03 "cat >> .ssh/authorized_keys"
cat ~/.ssh/id_rsa.pub | ssh huser@dataNode04 "cat >> .ssh/authorized_keys"


su huser
ssh secondaryNameNode
exit

ssh dataNode01
exit

ssh dataNode02
exit

ssh dataNode03
exit

ssh dataNode04
exit


6 hadoop れ #

su
cd /usr/local
mkdir hadoop
cd hadoop
wget http://ftp.daum.net/apache/hadoop/common/hadoop-1.0.1/hadoop-1.0.1.tar.gz
tar zxvf hadoop-1.0.1.tar.gz
chown -R huser:hadoop /usr/local/hadoop/hadoop-1.0.1

7 蟆曙れ #

su huser
cd $HOME 
vi .profile

伎 豢螳
export JAVA_HOME=/usr/lib/jvm/java-6-openjdk
export HADOOP_HOME=/usr/local/hadoop/hadoop-1.0.1
export HADOOP_CONF=$HADOOP_HOME/conf
export HADOOP_PATH=$HADOOP_HOME/bin

export HIVE_INSTALL=/usr/local/hadoop/hive/hive-0.8.0
export HIVE_PATH=$HIVE_INSTALL/bin

export PIG_INSTALL=/usr/local/hadoop/pig/pig-0.9.1
export PIG_PATH=$PIG_INSTALL/bin

export PATH=$HIVE_PATH:$HADOOP_PATH:$PIG_PATH:$PATH
32bit 蟆曙 export JAVA_HOME=/usr/lib/jvm/java-6-openjdk-i386 蟆企.

su
source .profile

8 hadoop 蟆曙れ #

  • 譯朱 name node 蟆曙れ 蟯 殊 一危 碁襦 覲旧 覦
  • 企ろ磯ゼ 蟲燕 覈 覯 蟆(cpu, 覃覈襴蠍, jdk覯, 襴, ろ ) 狩 蟆曙 蟆 れ螻 覦壱 . 覃 覯覲襦 旧螳 殊 .

huser襦 ク讌

conf/hadoop-env.sh ク讌
cd $HADOOP_HOME/conf
vi hadoop-env.sh
export JAVA_HOME=/usr/lib/jvm/java-6-openjdk
32bit 蟆曙 export JAVA_HOME=/usr/lib/jvm/java-6-openjdk-i386 蟆企.

conf/core-site.xml ク讌
<?xml version="1.0"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>

<!-- Put site-specific property overrides in this file. -->

<configuration>
        <property>
                <name>fs.default.name</name>
                <value>hdfs://nameNode</value>
        </property>
        <property>
                <name>hadoop.tmp.dir</name>
                <value>/home/huser/dfs/temp</value>
        </property>
        <property>
                <name>io.file.buffer.size</name>
                <value>131072</value>
        </property>
</configuration>


conf/hdfs-site.xml ク讌
<?xml version="1.0"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>

<!-- Put site-specific property overrides in this file. -->

<configuration>
         <property>
                <name>dfs.replication</name>
                <value>2</value>
        </property>
        <property>
                <name>dfs.name.dir</name>
                <value>/home/huser/dfs/name</value>
        </property>
        <property>
                <name>dfs.data.dir</name>
                <value>/home/huser/dfs/data</value>
        </property>
        <property>
                <name>dfs.namenode.socket.write.timeout</name>
                <value>0</value>
        </property>
</configuration>

conf/mapred-site.xml
<?xml version="1.0"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>

<!-- Put site-specific property overrides in this file. -->

<configuration>
	<property>
		<name>mapred.job.tracker</name>
		<value>nameNode:9001</value>
	</property>
	<property>
		<name>mapred.local.dir</name>
		<value>/home/huser/dfs</value>
	</property>
	<property>
		<name>mapred.child.java.opts</name>
		<value> -Xmx200m</value>
	</property>
	<property>
		<name>mapred.map.tasks</name>
		<value>4</value>
	</property>
	<property>
		<name>mapred.reduce.tasks</name>
		<value>2</value>
	</property>
</configuration>

/conf/masters ク讌
secondaryNameNode

/conf/slaves ク讌
secondaryNameNode
dataNode01
dataNode02
dataNode03
dataNode04
secondaryNameNode 譴 觝 蟆

9 覦壱 #

nameNode..

secondaryNameNode, dataNode01, dataNode02, dataNode03, dataNode04 覦壱
su
cd /usr/local/hadoop
sudo tar cf hadoop-1.0.1.tar hadoop-1.0.1

su huser
cd /usr/local/hadoop
scp hadoop-1.0.1.tar huser@secondaryNameNode:/home/huser/hadoop-1.0.1.tar
scp hadoop-1.0.1.tar huser@dataNode01:/home/huser/hadoop-1.0.1.tar
scp hadoop-1.0.1.tar huser@dataNode02:/home/huser/hadoop-1.0.1.tar
scp hadoop-1.0.1.tar huser@dataNode03:/home/huser/hadoop-1.0.1.tar
scp hadoop-1.0.1.tar huser@dataNode04:/home/huser/hadoop-1.0.1.tar

secondaryNameNode, dataNode01, dataNode02, dataNode03 /usr/local/hadoop 襴襯 燕螻 蟠
su
mkdir /usr/local/hadoop
chown -R huser:hadoop /usr/local/hadoop

huser襦 dataNode(slaves) tar螻 れ
slaves.sh tar xf /home/huser/hadoop-1.0.1.tar --directory=/usr/local/hadoop

/usr/local/hadoop/hadoop-0.20.2/conf/slaves 殊 伎 secondaryNameNode襯 觜殊. ( 覦壱襯 secondaryNameNode襯 蟆企.)
dataNode01
dataNode02
dataNode03
dataNode04

10 name node 覃 #

bin/hadoop namenode -format

11 #

′ 蟲.
cd /usr/local/hadoop/hadoop-1.0.1
bin/start-dfs.sh
bin/start-mapred.sh



bin/start-all.sh

hadoop dfsadmin -report 覈轟朱 . れ ろ 企. 譯殊 蟆 start-all.sh 覈轟 ろ螻, 螻щ襦 覈轟企ゼ ろ 蟆郁骸螳 覦襦 讌 る 蟆企. dataNode螳 れる 螳 譟郁 .
huser@nameNode:/usr/local/hadoop/conf$ hadoop dfsadmin -report
Warning: $HADOOP_HOME is deprecated.

Safe mode is ON
Configured Capacity: 61029343232 (56.84 GB)
Present Capacity: 50454859776 (46.99 GB)
DFS Remaining: 50454802432 (46.99 GB)
DFS Used: 57344 (56 KB)
DFS Used%: 0%
Under replicated blocks: 0
Blocks with corrupt replicas: 0
Missing blocks: 0

-------------------------------------------------
Datanodes available: 2 (2 total, 0 dead)

Name: 192.168.136.101:50010
Decommission Status : Normal
Configured Capacity: 30514671616 (28.42 GB)
DFS Used: 28672 (28 KB)
Non DFS Used: 5287256064 (4.92 GB)
DFS Remaining: 25227386880(23.49 GB)
DFS Used%: 0%
DFS Remaining%: 82.67%
Last contact: Tue Aug 06 17:14:57 KST 2013


Name: 192.168.136.102:50010
Decommission Status : Normal
Configured Capacity: 30514671616 (28.42 GB)
DFS Used: 28672 (28 KB)
Non DFS Used: 5287227392 (4.92 GB)
DFS Remaining: 25227415552(23.49 GB)
DFS Used%: 0%
DFS Remaining%: 82.67%
Last contact: Tue Aug 06 17:14:57 KST 2013


huser@nameNode:/usr/local/hadoop/conf$


觚殊一..

namenode, datanode
http://namenode:50070

jobtracker, tasktracker
http://namenode:50030

12 hive #

su
cd /usr/local/hadoop
mkdir hive
cd hive
wget http://ftp.daum.net/apache/hive/hive-0.8.1/hive-0.8.1.tar.gz
tar xzf hive-0.8.1.tar.gz
sudo chown -R huser:hadoop /usr/local/hadoop/hive-0.8.1-bin
exit
SET mapred.job.tracker=local;
SET mapred.reduce.tasks=8;

create table sample(year string, temperature int, qulity int)
row format delimited
fields terminated by '\t';

load data local inpath '/home/huser/sample/input/ncdc/micro-tab/sample.txt'
overwrite into table sample;

select year, max(temperature) max_temp
from sample
group by year;

13 pig #

su
cd /usr/local/hadoop
mkdir pig
cd pig
wget http://ftp.daum.net/apache/pig/pig-0.9.2/pig-0.9.2.tar.gz
tar xzf pig-0.9.2.tar.gz
sudo chown -R huser:hadoop /usr/local/hadoop/pig/pig-0.9.2



hadoop fs -mkdir ncdc
hadoop fs -put sample.txt ncdc

records = LOAD 'ncdc/sample.txt'
AS(year:chararray, temperature:int, qulity:int);

DUMP records;