Hadoop安装部署--Hive on Spark
1. 准备 Spark 安装包
上传
spark-3.3.1-bin-hadoop3.tgz
到/opt/software
目录下解压
tar -zxvf /opt/software/spark-3.3.1-bin-hadoop3.tgz -C /opt/bigdata
创建软链接
ln -s /opt/bigdata/spark-3.3.1-bin-hadoop3 /opt/bigdata/spark
2. 配置环境变量
vim /etc/profile.d/bigdata.sh
#SPARK_HOME
export SPARK_HOME=/opt/bigdata/spark
export PATH=$PATH:$SPARK_HOME/bin
source /etc/profile.d/bigdata.sh
3. 修改 spark-env.sh
cp /opt/bigdata/spark/conf/spark-env.sh.template /opt/bigdata/spark/conf/spark-env.sh
vim /opt/bigdata/spark/conf/spark-env.sh
export HADOOP_CONF_DIR=/opt/bigdata/hadoop/etc/hadoop/
4. 修改 hive 中的 spark-defaults.conf
vim /opt/bigdata/hive/conf/spark-defaults.conf
spark.master yarn
spark.eventLog.enabled true
spark.eventLog.dir hdfs://hadoop01:8020/spark-history
spark.executor.memory 4g
spark.driver.memory 2g
spark.yarn.populateHadoopClasspath true
5. 在 hdfs 中创建 spark-history 目录
hdfs dfs -mkdir -p /spark-history
6. 向 hdfs 中上传纯净版的 spark
创建路径
hdfs dfs -mkdir -p /spark-jars
解压纯净版的 spark
tar -zxvf /opt/software/spark-3.3.1-bin-without-hadoop.tgz -C /opt/software
上传
hdfs dfs -put /opt/software/spark-3.3.1-bin-without-hadoop/jars/* /spark-jars
7. 修改 hive-site.xml
vim /opt/bigdata/hive/conf/hive-site.xml
<!-- Hive 执行引擎 -->
<property>
<name>hive.execution.engine</name>
<value>spark</value>
</property>
<!-- Spark 依赖位置 -->
<property>
<name>spark.yarn.jars</name>
<value>hdfs://hadoop01:8020/spark-jars/*</value>
</property>
8. Yarn 环境设置运行多个 Spark 任务
vim /opt/bigdata/hadoop/etc/hadoop/capacity-scheduler.xml
<!-- 设置 Application Master 的最大资源占比 -->
<property>
<name>yarn.scheduler.capacity.maximum-am-resource-percent</name>
<value>0.5</value>
</property>
9. 启动 Hive Server2
hive --service hiveserver2