dsp_etl_hour.sh 1.94 KB
Newer Older
wang-jinfeng committed
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59
#!/bin/bash

source ../dmp_env.sh

region=$1
today=${ScheduleTime:-$2}

datetime=$(date +"%Y/%m/%d %H" -d "1 hour ago $today")
date=${datetime:0:10}
hour=${datetime:11:2}

# date_path=$(date +"%Y/%m/%d/%H" -d "$today")
date_path="${date}/${region}/${hour}"

INPUT_DSP_PATH="$ADN_DSP_PATH/${date_path}"

# TMP_OUTPUT_PATH="${TMP_DSP_REQUEST_DAILY_PATH}/$date_path"
# OUTPUT_PATH="$ETL_DSP_REQ_DAILY/$date_path"
# MDS_REQUEST_OUTPUT_PATH="${MDS_DSP_REQ_DAILY}/$date_path"
# RM_MDS_REQUEST_OUTPUT_PATH="${MDS_DSP_REQ_DAILY}/$rm_dt_path"

OUTPUT_PATH="s3://mob-emr-test/dataplatform/DataWareHouse/data/dev/etl_dsp_request_hour/${date_path}"
detailOutPath="s3://mob-emr-test/dataplatform/DataWareHouse/data/dev/mds_dsp_request_hour/${date_path}"

check_await "${INPUT_DSP_PATH}/_SUCCESS"

sleep 60

if [[ ${region} == "cn" ]]; then
    parallelism=20
    partition=5
    cores=2
    executors=5
    coalesce=5
elif [[ ${region} == "tokyo" ]]; then
    parallelism=4000
    partition=200
    cores=2
    executors=100
    coalesce=200
else
    parallelism=5000
    partition=200
    cores=2
    executors=100
    coalesce=400
fi

spark-submit --class mobvista.dmp.datasource.dsp.DspEtlHour \
    --name "DspEtlHour_${date}_${hour}_${region}" \
    --conf spark.yarn.executor.memoryOverhead=2048  \
    --conf spark.network.timeout=720s \
    --conf spark.default.parallelism=${parallelism} \
    --conf spark.sql.shuffle.partitions=${partition} \
    --conf spark.kryoserializer.buffer.max=512m \
    --conf spark.kryoserializer.buffer=64m \
    --conf spark.sql.files.maxPartitionBytes=134217728 \
    --conf spark.driver.extraJavaOptions="-XX:+UseG1GC" \
    --conf spark.executor.extraJavaOptions="-XX:+UseG1GC" \
WangJinfeng committed
60
    --deploy-mode cluster --executor-memory 6g --driver-memory 6g  --executor-cores ${cores} --num-executors ${executors} \
wang-jinfeng committed
61 62 63 64 65
    ../${JAR} -input ${INPUT_DSP_PATH} -output ${OUTPUT_PATH} -detailOutPath ${detailOutPath} -coalesce ${coalesce}

if [[ $? -ne 0 ]]; then
    exit 255
fi