adn_org_etl_hours_frankfurt.sh 1.46 KB
#!/bin/bash

source ../../dmp_env.sh

ScheduleTime=${ScheduleTime:-$1}
output_date_path=$(date -d "2 hours ago $ScheduleTime" "+%Y/%m/%d/%H")

input_date_path=$(date -d "2 hours ago $ScheduleTime" "+%Y/%m/%d")
yt=$(date -d "2 hours ago $ScheduleTime" "+%Y")
mt=$(date -d "2 hours ago $ScheduleTime" "+%m")
dt=$(date -d "2 hours ago $ScheduleTime" "+%d")
hhpath=$(date -d "2 hours ago $ScheduleTime" "+%H")

check_await "$ADN_REQUEST_PATH/$input_date_path/frankfurt/$hhpath/_SUCCESS"
check_await "$HB_REQUEST_PATH/$input_date_path/frankfurt/$hhpath/_SUCCESS"

INPUT_ADN_PATH="$ADN_REQUEST_PATH/$input_date_path/frankfurt/$hhpath/*"
INPUT_HB_PATH="$HB_REQUEST_PATH/$input_date_path/frankfurt/$hhpath/*"

ETL_ADN_REQ_ORG_HOURS_PATH="${ETL_ADN_ORG_REQ_HOURS}/${input_date_path}/frankfurt/${hhpath}"

hadoop fs -rm -r $ETL_ADN_REQ_ORG_HOURS_PATH

spark-submit --class mobvista.dmp.datasource.adn.AdnOrgLogEtlHours \
  --conf spark.yarn.executor.memoryOverhead=3072 \
  --conf spark.sql.shuffle.partitions=2000 \
  --master yarn --deploy-mode cluster --executor-memory 8g --driver-memory 6g --executor-cores 4 --num-executors 20 \
  ../../${JAR} -datetime "$yt$mt$dt$hhpath" -output $ETL_ADN_REQ_ORG_HOURS_PATH -coalesce 200 -region frankfurt || exit 1

if [[ $? -ne 0 ]]; then
    exit 255
fi

mount_partition "etl_adn_org_request_daily_hours" "yt='${yt}',mt='${mt}',dt='${dt}',rg='frankfurt',ht='${hhpath}'" "$ETL_ADN_REQ_ORG_HOURS_PATH"

hadoop fs -touchz $ETL_ADN_REQ_ORG_HOURS_PATH/_SUCCESS