#!/bin/bash # # # # # # # # # # # # # # # # # # # # # # # @file : dmp_interest_tag.sh # @author : jinfeng.wang # @time : 2020-05-22 16:06:07 # # # # # # # # # # # # # # # # # # # # # # source ../dmp_env.sh LOG_TIME=${ScheduleTime:-$1} date=$(date +%Y%m%d -d "-1 day $LOG_TIME") date_path=$(date +"%Y/%m/%d" -d "-1 day ${LOG_TIME}") check_await "${ODS_DMP_USER_INFO}/${date_path}/3s/_SUCCESS" check_await "${ODS_DMP_USER_INFO}/${date_path}/dsp_req/_SUCCESS" check_await "${ODS_DMP_USER_INFO}/${date_path}/adn_request/_SUCCESS" OUTPUT_PATH="${DEVICE_ID_MD5_MATCH_PATH}/$date_path" spark-submit --class mobvista.dmp.datasource.dm.DmpDeviceIdMd5 \ --name "DmpDeviceIdMd5.${date}"\ --conf spark.sql.shuffle.partitions=10000 \ --conf spark.default.parallelism=1000 \ --conf spark.kryoserializer.buffer.max=512m \ --conf spark.kryoserializer.buffer=64m \ --conf spark.sql.adaptive.enabled=true \ --conf spark.sql.adaptive.advisoryPartitionSizeInBytes=536870912 \ --master yarn --deploy-mode cluster --executor-memory 10g --driver-memory 4g --executor-cores 5 --num-executors 60 \ ../${JAR} \ -date ${date} -coalesce 1000 -output $OUTPUT_PATH if [[ $? -ne 0 ]]; then exit 255 fi mount_partition "device_id_md5_match" "dt='$date'" "$OUTPUT_PATH"