#!/bin/bash # # # # # # # # # # # # # # # # # # # # # # # @file : etl_ruid_mapping.sh # @author : jinfeng.wang # @time : 2020-05-22 16:06:07 # # # # # # # # # # # # # # # # # # # # # # source ../dmp_env.sh LOG_TIME=${ScheduleTime:-$1} date=$(date +%Y%m%d -d "-1 day $LOG_TIME") date_path=$(date +"%Y/%m/%d" -d "-1 day ${LOG_TIME}") hours="00 01 02 03 04 05 06 07 08 09 10 11 12 13 14 15 16 17 18 19 20 21 22 23" MAPPING_INPUT="${MAPPING_LOG_DIR}/${date_path}" for hour in ${hours}; do if ! $(hadoop fs -ls "${MAPPING_INPUT}/${hour}" > /dev/null 2>&1) then hadoop fs -mkdir "${MAPPING_INPUT}/${hour}" fi done OUTPUT_PATH="${RUID_MAPPING}/$date_path/" spark-submit --class mobvista.dmp.datasource.dm.EtlRuidMapping \ --name "EtlRuidMapping.${date}" \ --conf spark.sql.shuffle.partitions=1000 \ --conf spark.default.parallelism=1000 \ --conf spark.kryoserializer.buffer.max=512m \ --conf spark.kryoserializer.buffer=64m \ --conf spark.executor.extraJavaOptions="-XX:+UseG1GC" \ --conf spark.sql.adaptive.enabled=true \ --conf spark.sql.adaptive.advisoryPartitionSizeInBytes=536870912 \ --master yarn --deploy-mode cluster --executor-memory 10g --driver-memory 4g --executor-cores 5 --num-executors 100 \ ../${JAR} \ -date ${date} -output $OUTPUT_PATH if [[ $? -ne 0 ]]; then exit 255 fi