0x01
版本
版本hdaoop2.
8
hbase
-
1.2
.
5
Hive2.
1.1
spark2.
1.1
scala
-
2.12
.
1
0x02
hdfs安装
1.
初始化,建立目录和用户,jdk环境
-
name: pro
file
: path
=
/
home
/
hadoop state
=
directory
-
name: add user
action: user name
=
hadoop update_password
=
always shell
=
/
bin
/
bash
-
name: chpasswd
shell: echo
"xx"
|passwd
-
-
stdin hadoop
-
name: chown
shell: chown
-
R hadoop.hadoop
/
home
/
hadoop
-
name: copy profile
copy: src
=
/
opt
/
src
/
hprofile dest
=
/
etc
/
profile force
=
yes owner
=
root group
=
root mode
=
0644
-
name: copy jdk
copy: src
=
/
opt
/
src
/
jdk.tar.gz dest
=
/
usr
/
java
/
-
name: tar
shell: chdir
=
/
usr
/
java tar xvzf jdk.tar.gz
-
name: rm
2.namenode
与datanode 用hadoop ssh打通,跟自己也要ssh打通
second namenode 也要与datanode打通
以下脚本来源 http:
/
/
slaytanic.blog.
51cto
.com
/
2057708
/
1370007
大牛
2.1
生成ssh key脚本
ssh
-
keygen
-
t rsa
-
P ''
-
f ~
/
.ssh
/
id_rsa
cp ~
/
.ssh
/
id_rsa.pub ~
/
.ssh
/
authorized_keys
2.2
添加公钥到从节点的脚本
read
-
p
"输入远端服务器IP: "
ip
ssh
-
copy
-
id
-
o StrictHostKeyChecking
=
no
-
i ~
/
.ssh
/
id_rsa.pub root@$ip
ssh root@$ip
'sed -i "s/^#RSAAuthentication\ yes/RSAAuthentication\ yes/g" /etc/ssh/sshd_config'
ssh root@$ip
'sed -i "s/^#PubkeyAuthentication\ yes/PubkeyAuthentication yes/g" /etc/ssh/sshd_config'
ssh root@$ip
'sed -i "s/^#PermitRootLogin\ yes/PermitRootLogin\ yes/g" /etc/ssh/sshd_config'
ssh root@$ip
'service sshd restart'
hostname
=
`ssh root@${ip}
'hostname'
`
echo
"添加主机名和IP到本地/etc/hosts文件中"
echo
"$ip $hostname"
>>
/
etc
/
hosts
echo
"远端主机主机名称为$hostname, 请查看 /etc/hosts 确保该主机名和IP添加到主机列表文件中"
echo "主机公钥复制完成
2.3
读取主机列表然后把
/
etc
/
hosts复制到所有主机上
cat
/
etc
/
hosts |
while
read LINE
do
ip
=
`echo $LINE | awk
'{print $1}'
| grep
-
v
"::"
| grep
-
v
"127.0.0.1"
`
echo
"Copying /etc/hosts to ${ip}"
scp
-
o StrictHostKeyChecking
=
no
/
etc
/
hosts root@${ip}:
/
etc
/
done
或者使用自己的exp.sh ip
3.
修改的配置
namenode ha 配置
vim hdfs
-
site.xml
<
property
>
<name>dfs.namenode.secondary.http
-
address<
/
name>
<value>d17:
50090
<
/
value>
<
/
property
>
测试HA
$ sbin
/
hadoop
-
daemon.sh stop namenode
再次查看CentOS7
-
2
上的namenode,发现自动切换为active了
vim slaves
d17
d18
参考自己写的安装hadoop
-
2.3
.
0
-
cdh5.
1.2
http:
/
/
szgb17.blog.
51cto
.com
/
340201
/
1691814
4.
初始化,并启动hdfs
hadoop namenode
-
format
初始化,只做一次
启动的命令,旧版
hadoop
-
daemon.sh start namenode
hadoop
-
daemons.sh start datanode
yarn
-
daemon.sh start resourcemanager
yarn
-
daemons.sh start nodemanager
新版
start
-
dfs.sh 启动的warning,说要重新编译一次,太麻烦,没做
17
/
05
/
15
17
:
10
:
15
WARN util.NativeCodeLoader: Unable to load native
-
hadoop library
for
your platform... using builtin
-
java classes where applicable
start
-
yarn.sh
5.
启动后的服务确认
网上找到旧版启动的服务
[shirdrn@localhost
bin
]$ jps
8192
TaskTracker
7905
DataNode
7806
NameNode
8065
JobTracker
8002
SecondaryNameNode
8234
Jps
新版
[hadoop@n16 conf]$ jps
9088
Jps
472
NameNode
2235
ResourceManager
1308
QuorumPeerMain
1901
HMaster
0x03
hbase安装
启动顺序:hadoop
-
-
>zookeeper
-
-
>hbase
关闭顺序:hbase
-
-
>zookeeper
-
-
>hadoop
1.
先安装zookeeper,使用ansible安装zookeeper
2.
启动报错,start
-
hbase.sh
Could
not
start ZK with
3
ZK servers
in
local mode deployment
出现这个报错,主要是这个属性hbase.cluster.distributed,写错,切记
vim hbase
-
site.xml
<configuration>
<
property
>
<name>hbase.rootdir<
/
name>
<value>hdfs:
/
/
n16:
9000
/
hbase
/
data<
/
value>
<
/
property
>
<
property
>
<name>hbase.cluster.distributed<
/
name>
<value>true<
/
value>
<
/
property
>
<
property
>
<name>hbase.zookeeper.
property
.clientPort<
/
name>
<value>
2182
<
/
value>
<
/
property
>
<
property
>
<name>hbase.zookeeper.quorum<
/
name>
<value>n16,d17,d18<
/
value>
<
/
property
>
<
property
>
<name>hbase.zookeeper.
property
.dataDir<
/
name>
<value>
/
home
/
hadoop
/
zookeeper
/
data<
/
value>
<
/
property
>
<
/
configuration>
3.start
-
hbase
cat
/
etc
/
profile
export JAVA_HOME
=
/
usr
/
java
/
jdk
export JRE_HOME
=
/
usr
/
java
/
jdk
/
jre
exportCLASSPATH
=
.:$JAVA_HOME
/
lib
/
dt.jar:$JAVA_HOME
/
lib
/
tools.jar:$JRE_HOME
/
lib
export PATH
=
$JAVA_HOME
/
bin
:$JRE_HOME
/
bin
:$PATH
export HADOOP_HOME
=
/
home
/
hadoop
export PATH
=
$HADOOP_HOME
/
bin
:$HADOOP_HOME
/
sbin:$JAVA_HOME
/
bin
:
/
home
/
hadoop
/
hbase
/
bin
:$PATH
0x04
hive安装
metastore使用远程mysql
使用的jar包mysql
-
connector
-
java
-
5.1
.
38
172.x
.x.
3
3306
CREATE DATABASE hive;
USE hive;
CREATE USER
'hive'
@
'localhost'
IDENTIFIED BY
'hive'
;
GRANT
ALL
ON hive.
*
TO
'hive'
@
'localhost'
IDENTIFIED BY
'hive'
;
GRANT
ALL
ON hive.
*
TO
'hive'
@
'172.x.1.%'
IDENTIFIED BY
'hive'
;
GRANT
ALL
ON hive.
*
TO
'hive'
@
'172.x.2.%'
IDENTIFIED BY
'hive'
;
FLUSH PRIVILEGES;
hdfs dfs
-
mkdir
-
p
/
user
/
hive
/
warehouse
hdfs dfs
-
mkdir
-
p
/
user
/
hive
/
tmp
hdfs dfs
-
mkdir
-
p
/
user
/
hive
/
log
hdfs dfs
-
chmod g
+
w
/
user
/
hive
/
warehouse
hdfs dfs
-
chmod g
+
w
/
user
/
hive
/
log
hdfs dfs
-
chmod
777
/
user
/
hive
/
tmp
配置文件
参考
cp hive
-
env.sh.template hive
-
env.sh
cp hive
-
default.xml.template hive
-
site.xml
cp hive
-
log4j2.properties.template hive
-
log4j2.properties
cp hive
-
exec
-
log4j2.properties.template hive
-
exec
-
log4j2.properties
cat hive
-
env.sh
export JAVA_HOME
=
/
usr
/
java
/
jdk
export HADOOP_HOME
=
/
home
/
hadoop
export HIVE_HOME
=
/
home
/
hadoop
/
hive
export HIVE_CONF_DIR
=
/
home
/
hadoop
/
conf
vim hive
-
site.xml
<
property
>
<name>hive.
exec
.scratchdir<
/
name>
<value>
/
user
/
hive
/
tmp<
/
value>
<description>HDFS root scratch
dir
for
Hive jobs which gets created with write
all
(
733
) permission. For each connecting user, an HDFS scratch
dir
: ${hive.
exec
.scratchdir}
/
<username>
is
created, with ${hive.scratch.
dir
.permission}.<
/
description>
<
/
property
>
<
property
>
<name>hive.metastore.warehouse.
dir
<
/
name>
<value>
/
user
/
hive
/
warehouse<
/
value>
<description>location of default database
for
the warehouse<
/
description>
<
/
property
><
property
>
<name>hive.querylog.location<
/
name>
<value>
/
user
/
hive
/
log<
/
value>
<description>Location of Hive run time structured log
file
<
/
description>
<
/
property
>
<
property
>
<name>javax.jdo.option.ConnectionURL<
/
name>
<value>jdbc:mysql:
/
/
172.x
.x.
3
:
3306
/
hive?createDatabaseIfNotExist
=
true&characterEncoding
=
UTF
-
8
&useSSL
=
false<
/
value>
<
/
property
>
<
property
>
<name>javax.jdo.option.ConnectionDriverName<
/
name>
<value>com.mysql.jdbc.Driver<
/
value>
<
/
property
>
<
property
>
<name>javax.jdo.option.ConnectionUserName<
/
name>
<value>hive<
/
value>
<
/
property
>
<
property
>
<name>javax.jdo.option.ConnectionPassword<
/
name>
<value>hive<
/
value>
<
/
property
>
还需要更改
<name>hive.
exec
.local.scratchdir<
/
name>
<value>
/
tmp
/
${user.name}<
/
value>
<name>hive.downloaded.resources.
dir
<
/
name>
<value>
/
tmp
/
${hive.session.
id
}_resources<
/
value>
将 hive
-
site.xml 中的 ${system:java.io.tmpdir} 和 ${system:user.name} 分别替换成
/
tmp 和 ${user.name}
初始化
schematool
-
dbType mysql
-
initSchema
启动
hive
-
-
service hiveserver2 &
hive
-
-
service metastore &
hadoop job
-
kill jobid
kill `pgrep
-
f hive`
nohup hive
-
-
service metastore &
nohup hive
-
-
service hiveserver2 &
修改conf
/
hive
-
site.xml 中的 “hive.metastore.schema.verification” 值为 false 即可解决 “Caused by: MetaException(message:Version information
not
found
in
metastore. )”
查了一下这个配置项的意义:
hive.metastore.schema.verification:强制metastore的schema一致性,开启的话会校验在metastore中存储的信息的版本和hive的jar包中的版本一致性,并且关闭自动schema迁移,用户必须手动的升级hive并且迁移schema,关闭的话只会在版本不一致时给出警告,默认是false不开启;
因为此配置项设置成为了true,所以可能存在jar包版本不一致问题。导致无法正常启动!
将value由true改为false以后,顺利启动spark
-
shell
调试 模式命令 hive
-
hiveconf hive.root.logger
=
DEBUG,console
0x05
spark安装
cat
/
etc
/
profile
export JAVA_HOME
=
/
usr
/
java
/
jdk
export JRE_HOME
=
/
usr
/
java
/
jdk
/
jre
exportCLASSPATH
=
.:$JAVA_HOME
/
lib
/
dt.jar:$JAVA_HOME
/
lib
/
tools.jar:$JRE_HOME
/
lib
export PATH
=
$JAVA_HOME
/
bin
:$JRE_HOME
/
bin
:$PATH
export HADOOP_HOME
=
/
home
/
hadoop
export PATH
=
$HADOOP_HOME
/
bin
:$HADOOP_HOME
/
sbin:$JAVA_HOME
/
bin
:
/
home
/
hadoop
/
hbase
/
bin
:
/
home
/
hadoop
/
hive
/
bin
:
/
home
/
hadoop
/
spark
/
bin
:
/
home
/
hadoop
/
scala
/
bin
:
/
home
/
hadoop
/
spark
/
sbin:$PATH
http:
/
/
mirror.bit.edu.cn
/
apache
/
spark
/
spark
-
2.1
.
1
/
http:
/
/
spark.apache.org
/
downloads.html
由于我们已经自己安装了Hadoop,所以,在“Choose a package
type
”后面需要选择“Pre
-
build with user
-
provided Hadoop [can use with most Hadoop distributions]”,然后,点击“Download Spark”后面的“spark
-
2.1
.
0
-
bin
-
without
-
hadoop.tgz”下载即可。下载的文件,默认会被浏览器保存在“
/
home
/
hadoop
/
下载”目录下。需要说明的是,Pre
-
build with user
-
provided Hadoop: 属于“Hadoop free”版,这样,下载到的Spark,可应用到任意Hadoop 版本。
local 模式
http:
/
/
dblab.xmu.edu.cn
/
blog
/
1307
-
2
/
export SCALA_HOME
=
/
root
/
dev
/
java
/
scala
-
2.12
.
1
export SPARK_WORKER_MEMORY
=
1g
export SPARK_MASTER_IP
=
your_server_ip
export MASTER
=
spark:
/
/
your_server_ip:
7077
export SPARK_SSH_OPTS
=
"-p 22000"
至此Spark
2.1
.
0
就已经完全安装完成了。我们可以试验一下安装是否正确,进入到$SPARK_HOME目录,运行求PI的实例:
.
/
bin
/
run
-
example org.apache.spark.examples.SparkPi
我们还可以尝试启动Spark
-
shell:
.
/
bin
/
spark
-
shell
分布式
cd
/
home
/
hadoop
/
spark
/
conf
vim spark
-
env.sh
export JAVA_HOME
=
/
usr
/
java
/
jdk
export SCALA_HOME
=
/
home
/
hadoop
/
scala
export SPARK_MASTER_IP
=
n16
export SPARK_WORKER_MEMORY
=
256M
export HADOOP_CONF_DIR
=
/
home
/
hadoop
/
etc
/
hadoop
"echo \"$(hadoop classpath)\""
echo $(hadoop classpath)
export SPARK_DIST_CLASSPATH
=
"/home/hadoop/etc/hadoop:/home/hadoop/share/hadoop/common/lib/*:/home/hadoop/share/hadoop/common/*:/home/hadoop/share/hadoop/hdfs:/home/hadoop/share/hadoop/hdfs/lib/*:/home/hadoop/share/hadoop/hdfs/*:/home/hadoop/share/hadoop/yarn/lib/*:/home/hadoop/share/hadoop/yarn/*:/home/hadoop/share/hadoop/mapreduce/lib/*:/home/hadoop/share/hadoop/mapreduce/*:/home/hadoop/contrib/capacity-scheduler/*.jar"
ssh 远程执行echo命令,特殊字符单引号,双引号丢失问题
参考 http:
/
/
bbs.chinaunix.net
/
thread
-
3739461
-
1
-
1.html
vim slaves
d17
d18
cp
-
r log4j.properties.template log4j.properties
启动
start
-
master.sh
Error: A JNI error has occurred, please check your installation
and
try
again
Exception
in
thread
"main"
java.lang.NoClassDefFoundError: org
/
slf4j
/
Logger
at java.lang.Class.getDeclaredMethods0(Native Method)
at java.lang.Class.privateGetDeclaredMethods(Class.java:
2701
)
at java.lang.Class.privateGetMethodRecursive(Class.java:
3048
)
at java.lang.Class.getMethod0(Class.java:
3018
)
at java.lang.Class.getMethod(Class.java:
1784
)
at sun.launcher.LauncherHelper.validateMainClass(LauncherHelper.java:
544
)
at sun.launcher.LauncherHelper.checkAndLoadMain(LauncherHelper.java:
526
)
Caused by: java.lang.ClassNotFoundException: org.slf4j.Logger
at java.net.URLClassLoader.findClass(URLClassLoader.java:
381
)
at java.lang.ClassLoader.loadClass(ClassLoader.java:
424
)
at sun.misc.Launcher$AppClassLoader.loadClass(Launcher.java:
331
)
at java.lang.ClassLoader.loadClass(ClassLoader.java:
357
)
...
7
more
表面这个是原装的编译的,jars包下不包含hbase。
start
-
slave.sh
0x06
启动报错
1.hbase
Could
not
start ZK with
3
ZK servers
in
local mode deployment
2.hive
Unable to instantiate org.apache.hadoop.hive.ql.metadata.SessionHiveMetaStoreClient
source
/
etc
/
profile
vim hive
-
site.xml
<name>hive.vectorized.execution.enabled<
/
name>
<value>false<
/
value>
hive
-
-
service metastore &
hive
-
-
service hiveserver2 &
hive
-
hiveconf hive.root.logger
=
DEBUG,console
3.spark
hive的版本过高导致的问题
0x07
参考
http:
/
/
slaytanic.blog.
51cto
.com
/
2057708
/
1397396
web界面查看
Web查看HDFS: http:
/
/
ip:
50070
namenode
通过Web查看hadoop集群状态: http:
/
/
ip:
8088
vim
/
home
/
hadoop
/
hadoop
-
2.2
.
0
/
etc
/
hadoop
/
yarn
-
site.xml
<name>yarn.resourcemanager.webapp.address<
/
name>
<value>xuegod63.cn:
8088
<
/
value>
<
/
property
>
historyserver
mr
-
jobhistory
-
daemon.sh start historyserver
web访问
http:
/
/
172.x
.x.
1
:
19888
/
JobHistory
http:
/
/
172.x
.x.
1
:
8088
/
All
Applications
0x08
启动的命令和服务
start
-
dfs.sh
start
-
yarn.sh
start
-
hbase.sh
hive
-
-
service hiveserver2 &
mr
-
jobhistory
-
daemon.sh start historyserver
start
-
master.sh
start
-
slave.sh
最好start
-
all
.sh
stop
-
master.sh
stop
-
slave.sh
mr
-
jobhistory
-
daemon.sh stop historyserver
kill `pgrep
-
f hive`
stop
-
hbase.sh
stop
-
yarn.sh
stop
-
dfs.sh
查看启动后的服务
n16 jps
[hadoop@n16 conf]$ jps
14865
ResourceManager
17748
Jps
13749
RunJar
17575
Master
472
NameNode
15690
HMaster
15354
JobHistoryServer
13931
RunJar
1308
QuorumPeerMain
d17
[hadoop@d17 conf]$ jps
28468
HRegionServer
18420
QuorumPeerMain
28151
NodeManager
18072
DataNode
18184
SecondaryNameNode
29944
Worker
30108
Jps