dmp_install_list_merge.sh 1.92 KB
Newer Older
wang-jinfeng committed
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27
#!/bin/bash

# # # # # # # # # # # # # # # # # # # # # #
# @file    : dmp_install_list_merge.sh
# @author  : jinfeng.wang
# @time    : 2020-04-14 14:53:39
# # # # # # # # # # # # # # # # # # # # # #

source ../dmp_env.sh

LOG_TIME=${ScheduleTime:-$1}
date=$(date +%Y%m%d -d "-1 day $LOG_TIME")
date_path=$(date +"%Y/%m/%d" -d "-1 day ${LOG_TIME}")
before_date_path=$(date +"%Y/%m/%d" -d "-2 day ${LOG_TIME}")

DAILY_INSTALL_LIST_PATH="${DMP_INSTALL_LIST}/${date_path}/day"

check_await ${DAILY_INSTALL_LIST_PATH}/_SUCCESS

BUSINESS="14days"

ALL_INSTALL_LIST_PATH="${DMP_INSTALL_LIST}/${before_date_path}/${BUSINESS}"

check_await ${ALL_INSTALL_LIST_PATH}/_SUCCESS

OUTPUT_PATH="${DMP_INSTALL_LIST}/${date_path}/${BUSINESS}"

WangJinfeng committed
28 29
mount_partition "dmp_install_list" "dt='${date}', business='${BUSINESS}'" "$OUTPUT_PATH"

WangJinfeng committed
30 31
expire_date=$(date +%Y%m%d -d "-8 day $LOG_TIME")
expire_date_path=$(date +"%Y/%m/%d" -d "-8 day ${LOG_TIME}")
wang-jinfeng committed
32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54
EXPIRE_OUTPUT_PATH="${DMP_INSTALL_LIST}/${expire_date_path}/${BUSINESS}"

spark-submit --class mobvista.dmp.common.InstallListMerge \
     --name "InstallListMerge.${date}" \
     --conf spark.sql.shuffle.partitions=12000 \
     --conf spark.default.parallelism=2000 \
     --conf spark.speculation=false \
     --conf spark.speculation.quantile=0.9 \
     --conf spark.speculation.multiplier=1.5 \
     --conf spark.kryoserializer.buffer.max=512m \
     --conf spark.kryoserializer.buffer=64m \
     --conf spark.driver.maxResultSize=8g \
     --conf spark.executor.extraJavaOptions="-XX:+UseG1GC" \
     --conf spark.sql.files.maxPartitionBytes=134217728 \
     --master yarn --deploy-mode cluster --executor-memory 10g --driver-memory 6g  --executor-cores 5 --num-executors 100 \
     ../${JAR} -date ${date} -coalesce 4000 -output ${OUTPUT_PATH}

if [[ $? -ne 0 ]]; then
    exit 255
fi

# 删除过期的分区及删除对应路径
unmount_partition "dmp_install_list" "dt='${expire_date}', business='${BUSINESS}'" "${EXPIRE_OUTPUT_PATH}"