#!/bin/bash
source ../../dmp_env.sh
ScheduleTime=${ScheduleTime:-$1}
output_date_path=$(date -d "2 hours ago $ScheduleTime" "+%Y/%m/%d/%H")
input_date_path=$(date -d "2 hours ago $ScheduleTime" "+%Y/%m/%d")
yt=$(date -d "2 hours ago $ScheduleTime" "+%Y")
mt=$(date -d "2 hours ago $ScheduleTime" "+%m")
dt=$(date -d "2 hours ago $ScheduleTime" "+%d")
hhpath=$(date -d "2 hours ago $ScheduleTime" "+%H")
check_await "$ADN_REQUEST_PATH/$input_date_path/frankfurt/$hhpath/_SUCCESS"
check_await "$HB_REQUEST_PATH/$input_date_path/frankfurt/$hhpath/_SUCCESS"
INPUT_ADN_PATH="$ADN_REQUEST_PATH/$input_date_path/frankfurt/$hhpath/*"
INPUT_HB_PATH="$HB_REQUEST_PATH/$input_date_path/frankfurt/$hhpath/*"
ETL_ADN_REQ_ORG_HOURS_PATH="${ETL_ADN_ORG_REQ_HOURS}/${input_date_path}/frankfurt/${hhpath}"
hadoop fs -rm -r $ETL_ADN_REQ_ORG_HOURS_PATH
spark-submit --class mobvista.dmp.datasource.adn.AdnOrgLogEtlHours \
--conf spark.yarn.executor.memoryOverhead=3072 \
--conf spark.sql.shuffle.partitions=2000 \
--master yarn --deploy-mode cluster --executor-memory 8g --driver-memory 6g --executor-cores 4 --num-executors 20 \
../../${JAR} -datetime "$yt$mt$dt$hhpath" -output $ETL_ADN_REQ_ORG_HOURS_PATH -coalesce 200 -region frankfurt || exit 1
if [[ $? -ne 0 ]]; then
exit 255
fi
mount_partition "etl_adn_org_request_daily_hours" "yt='${yt}',mt='${mt}',dt='${dt}',rg='frankfurt',ht='${hhpath}'" "$ETL_ADN_REQ_ORG_HOURS_PATH"
hadoop fs -touchz $ETL_ADN_REQ_ORG_HOURS_PATH/_SUCCESS