#!/bin/bash source ../dmp_env.sh region=$1 today=${ScheduleTime:-$2} datetime=$(date +"%Y/%m/%d %H" -d "1 hour ago $today") date=${datetime:0:10} hour=${datetime:11:2} date_path="${date}/${region}/${hour}" INPUT_ADN_PATH="$ADN_REQUEST_PATH/${date_path}" before_date_path=$(date +"%Y/%m/%d" -d "1 days ago $today") INPUT_MAPPING_PATH="$APP_ID_MAPPING/${before_date_path}" check_await "${INPUT_MAPPING_PATH}/_SUCCESS" APPID_PACKAGE="${DIM_MANUAL_MAPPING}" check_await "${INPUT_ADN_PATH}/_SUCCESS" sleep 60 OUTPUT_PATH="s3://mob-emr-test/dataplatform/DataWareHouse/data/dev/etl_adn_sdk_request_hour/${date_path}" if [[ ${region} == "singapore" ]]; then parallelism=400 partition=400 cores=2 executors=10 coalesce=400 else parallelism=100 partition=100 cores=2 executors=5 coalesce=100 fi spark-submit --class mobvista.dmp.datasource.adn.AdnRequestSdkHour \ --name "AdnRequestSdkHour_${date}_${hour}_${region}" \ --conf spark.yarn.executor.memoryOverhead=2048 \ --conf spark.network.timeout=720s \ --conf spark.default.parallelism=${parallelism} \ --conf spark.sql.shuffle.partitions=${partition} \ --conf spark.kryoserializer.buffer.max=256m \ --conf spark.sql.files.maxPartitionBytes=134217728 \ --master yarn --deploy-mode cluster --executor-memory 4g --driver-memory 4g --executor-cores ${cores} --num-executors ${executors} \ ../${JAR} -input ${INPUT_ADN_PATH} -output ${OUTPUT_PATH} -coalesce ${coalesce} -input_dict1 ${INPUT_MAPPING_PATH} -input_dict2 ${APPID_PACKAGE} if [[ $? -ne 0 ]]; then exit 255 fi