#!/bin/bash source ../dmp_env.sh region=$1 today=${ScheduleTime:-$2} datetime=$(date +"%Y/%m/%d %H" -d "1 hour ago $today") date=${datetime:0:10} hour=${datetime:11:2} # date_path=$(date +"%Y/%m/%d/%H" -d "$today") date_path="${date}/${region}/${hour}" INPUT_DSP_PATH="$ADN_DSP_PATH/${date_path}" # TMP_OUTPUT_PATH="${TMP_DSP_REQUEST_DAILY_PATH}/$date_path" # OUTPUT_PATH="$ETL_DSP_REQ_DAILY/$date_path" # MDS_REQUEST_OUTPUT_PATH="${MDS_DSP_REQ_DAILY}/$date_path" # RM_MDS_REQUEST_OUTPUT_PATH="${MDS_DSP_REQ_DAILY}/$rm_dt_path" OUTPUT_PATH="s3://mob-emr-test/dataplatform/DataWareHouse/data/dev/etl_dsp_request_hour/${date_path}" detailOutPath="s3://mob-emr-test/dataplatform/DataWareHouse/data/dev/mds_dsp_request_hour/${date_path}" check_await "${INPUT_DSP_PATH}/_SUCCESS" sleep 60 if [[ ${region} == "cn" ]]; then parallelism=20 partition=5 cores=2 executors=5 coalesce=5 elif [[ ${region} == "tokyo" ]]; then parallelism=4000 partition=200 cores=2 executors=100 coalesce=200 else parallelism=5000 partition=200 cores=2 executors=100 coalesce=400 fi spark-submit --class mobvista.dmp.datasource.dsp.DspEtlHour \ --name "DspEtlHour_${date}_${hour}_${region}" \ --conf spark.yarn.executor.memoryOverhead=2048 \ --conf spark.network.timeout=720s \ --conf spark.default.parallelism=${parallelism} \ --conf spark.sql.shuffle.partitions=${partition} \ --conf spark.kryoserializer.buffer.max=512m \ --conf spark.kryoserializer.buffer=64m \ --conf spark.sql.files.maxPartitionBytes=134217728 \ --conf spark.driver.extraJavaOptions="-XX:+UseG1GC" \ --conf spark.executor.extraJavaOptions="-XX:+UseG1GC" \ --master yarn --deploy-mode cluster --executor-memory 6g --driver-memory 6g --executor-cores ${cores} --num-executors ${executors} \ ../${JAR} -input ${INPUT_DSP_PATH} -output ${OUTPUT_PATH} -detailOutPath ${detailOutPath} -coalesce ${coalesce} if [[ $? -ne 0 ]]; then exit 255 fi