#!/bin/bash # # # # # # # # # # # # # # # # # # # # # # # @file : dmp_interest_tag.sh # @author : jinfeng.wang # @time : 2020-05-22 16:06:07 # # # # # # # # # # # # # # # # # # # # # # source ../dmp_env.sh LOG_TIME=${ScheduleTime:-$1} date=$(date +%Y%m%d -d "-1 day $LOG_TIME") date_path=$(date +"%Y/%m/%d" -d "-1 day ${LOG_TIME}") DMP_INSTALL_LIST_PATH="${DMP_INSTALL_LIST}/${date_path}/14days" check_await ${DMP_INSTALL_LIST_PATH}/_SUCCESS sleep 60 OUTPUT_PATH="${DMP_INTEREST_PATH}/${date_path}" expire_date=$(date +%Y%m%d -d "-10 day $LOG_TIME") expire_date_path=$(date +"%Y/%m/%d" -d "-10 day ${LOG_TIME}") EXPIRE_OUTPUT_PATH="${DMP_INTEREST_PATH}/${expire_date_path}" spark-submit --class mobvista.dmp.datasource.dm.DmpDeviceInterest \ --name "DmpDeviceInterest.${date}"\ --conf spark.sql.shuffle.partitions=10000 \ --conf spark.default.parallelism=1000 \ --conf spark.kryoserializer.buffer.max=512m \ --conf spark.kryoserializer.buffer=64m \ --conf spark.sql.adaptive.enabled=true \ --conf spark.sql.adaptive.advisoryPartitionSizeInBytes=536870912 \ --files ${HIVE_SITE_PATH} \ --jars ${SPARK_HOME}/auxlib/Common-SerDe-1.0-SNAPSHOT.jar \ --master yarn --deploy-mode cluster --executor-memory 10g --driver-memory 4g --executor-cores 5 --num-executors 80 \ ../${JAR} \ -date ${date} -output ${OUTPUT_PATH} -coalesce 4000 if [[ $? -ne 0 ]]; then exit 255 fi part_num=`hadoop fs -ls ${OUTPUT_PATH} |wc -l` if [[ ${part_num} -le 1000 ]] then echo "This Dir No Data, Pleasce Check Job !!!" exit 255 fi mount_partition "dmp_interest_tag" "dt='${date}'" "$OUTPUT_PATH" # 删除过期的分区及删除对应路径 unmount_partition "dmp_interest_tag" "dt='${expire_date}'" "${EXPIRE_OUTPUT_PATH}"