#! /bin/bash # # # # # # # # # # # # # # # # # # # # # # # @file :calc_device_age.sh # @author :wangjf # @revision:2018-09-28 20:51:57 # @description: 重构 # # # # # # # # # # # # # # # # # # # # # # source ../dmp_env.sh LOG_TIME=$(date +%Y%m%d -d "-1 day $ScheduleTime") year=${LOG_TIME:0:4} month=${LOG_TIME:4:2} day=${LOG_TIME:6:2} RM_LOG_TIME=$(date +%Y/%m/%d -d "-10 day $ScheduleTime") RM_DATE=$(date +%Y%m%d -d "-10 day $ScheduleTime") INPUT_PATH="${AGE_MERGE_INSTALL}/${year}/${month}/${day}/" OUTPUT_PATH="${DMP_AGE_CALC_DEVICE}/${year}/${month}/${day}/" RM_OUTPUT_PATH="${DMP_AGE_CALC_DEVICE}/${RM_LOG_TIME}" FILEDICT="${AGE_CALC_PACKAGE_DICT}/${year}/${month}/${day}/part-00000" spark-submit --class mobvista.dmp.datasource.age_gender.CalcDeviceAge \ --name "CalcDeviceAge.${LOG_TIME}" \ --conf spark.yarn.executor.memoryOverhead=2048 \ --conf spark.shuffle.file.buffer.kb=128 \ --conf spark.default.parallelism=2000 \ --conf spark.sql.shuffle.partitions=10000 \ --conf spark.sql.autoBroadcastJoinThreshold=31457280 \ --conf spark.executor.extraJavaOptions="-XX:+UseG1GC" \ --conf spark.storage.memoryFraction=0.4 \ --conf spark.shuffle.memoryFraction=0.4 \ --conf spark.sql.files.maxPartitionBytes=134217728 \ --master yarn --deploy-mode cluster --executor-memory 8g --driver-memory 4g --executor-cores 5 --num-executors 100 \ ../${JAR} -date ${LOG_TIME} -merge_input ${INPUT_PATH} -dict_input ${FILEDICT} -output ${OUTPUT_PATH} -parallelism 500 if [[ $? -ne 0 ]];then exit 255 fi mount_partition "dmp_device_age" "dt='$LOG_TIME'" "$OUTPUT_PATH" unmount_partition "dmp_device_age" "dt='$RM_DATE'" "${RM_OUTPUT_PATH}" before_date_path=$(date +%Y/%m/%d -d "-4 day $ScheduleTime") hadoop fs -rm -r ${AGE_GET_DSP_PATH}/${before_date_path} hadoop fs -rm -r ${AGE_GET_GA_PATH}/${before_date_path} hadoop fs -rm -r ${AGE_MERGE_INSTALL}/${before_date_path} hadoop fs -rm -r ${AGE_CALC_PACKAGE_DICT}/${before_date_path} exit 0