#!/bin/bash # # # # # # # # # # # # # # # # # # # # # # # @file : dmp_install_list_merge.sh # @author : jinfeng.wang # @time : 2020-04-14 14:53:39 # # # # # # # # # # # # # # # # # # # # # # source ../dmp_env.sh LOG_TIME=${ScheduleTime:-$1} date=$(date +%Y%m%d -d "-1 day $LOG_TIME") date_path=$(date +"%Y/%m/%d" -d "-1 day ${LOG_TIME}") before_date_path=$(date +"%Y/%m/%d" -d "-2 day ${LOG_TIME}") DAILY_INSTALL_LIST_PATH="${DMP_INSTALL_LIST}/${date_path}/day" check_await ${DAILY_INSTALL_LIST_PATH}/_SUCCESS BUSINESS="14days" ALL_INSTALL_LIST_PATH="${DMP_INSTALL_LIST}/${before_date_path}/${BUSINESS}" check_await ${ALL_INSTALL_LIST_PATH}/_SUCCESS OUTPUT_PATH="${DMP_INSTALL_LIST}/${date_path}/${BUSINESS}" mount_partition "dmp_install_list" "dt='${date}', business='${BUSINESS}'" "$OUTPUT_PATH" expire_date=$(date +%Y%m%d -d "-8 day $LOG_TIME") expire_date_path=$(date +"%Y/%m/%d" -d "-8 day ${LOG_TIME}") EXPIRE_OUTPUT_PATH="${DMP_INSTALL_LIST}/${expire_date_path}/${BUSINESS}" spark-submit --class mobvista.dmp.common.InstallListMerge \ --name "InstallListMerge.${date}" \ --conf spark.sql.shuffle.partitions=12000 \ --conf spark.default.parallelism=2000 \ --conf spark.speculation=false \ --conf spark.speculation.quantile=0.9 \ --conf spark.speculation.multiplier=1.5 \ --conf spark.kryoserializer.buffer.max=512m \ --conf spark.kryoserializer.buffer=64m \ --conf spark.driver.maxResultSize=8g \ --conf spark.executor.extraJavaOptions="-XX:+UseG1GC" \ --conf spark.sql.files.maxPartitionBytes=134217728 \ --deploy-mode cluster --executor-memory 10g --driver-memory 6g --executor-cores 5 --num-executors 100 \ ../${JAR} -date ${date} -coalesce 4000 -output ${OUTPUT_PATH} if [[ $? -ne 0 ]]; then exit 255 fi # 删除过期的分区及删除对应路径 unmount_partition "dmp_install_list" "dt='${expire_date}', business='${BUSINESS}'" "${EXPIRE_OUTPUT_PATH}"