#!/bin/bash source ../../dmp_env.sh ScheduleTime=${ScheduleTime:-$1} output_date_path=$(date -d "2 hours ago $ScheduleTime" "+%Y/%m/%d/%H") input_date_path=$(date -d "2 hours ago $ScheduleTime" "+%Y/%m/%d") yt=$(date -d "2 hours ago $ScheduleTime" "+%Y") mt=$(date -d "2 hours ago $ScheduleTime" "+%m") dt=$(date -d "2 hours ago $ScheduleTime" "+%d") hhpath=$(date -d "2 hours ago $ScheduleTime" "+%H") check_await "$ADN_REQUEST_PATH/$input_date_path/seoul/$hhpath/_SUCCESS" check_await "$HB_REQUEST_PATH/$input_date_path/seoul/$hhpath/_SUCCESS" INPUT_ADN_PATH="$ADN_REQUEST_PATH/$input_date_path/seoul/$hhpath/*" INPUT_HB_PATH="$HB_REQUEST_PATH/$input_date_path/seoul/$hhpath/*" ETL_ADN_REQ_ORG_HOURS_PATH="${ETL_ADN_ORG_REQ_HOURS}/${input_date_path}/seoul/${hhpath}" hadoop fs -rm -r $ETL_ADN_REQ_ORG_HOURS_PATH spark-submit --class mobvista.dmp.datasource.adn.AdnOrgLogEtlHours \ --conf spark.yarn.executor.memoryOverhead=3072 \ --conf spark.sql.shuffle.partitions=2000 \ --master yarn --deploy-mode cluster --executor-memory 8g --driver-memory 6g --executor-cores 4 --num-executors 20 \ ../../${JAR} -datetime "$yt$mt$dt$hhpath" -output $ETL_ADN_REQ_ORG_HOURS_PATH -coalesce 200 -region seoul || exit 1 if [[ $? -ne 0 ]]; then exit 255 fi mount_partition "etl_adn_org_request_daily_hours" "yt='${yt}',mt='${mt}',dt='${dt}',rg='seoul',ht='${hhpath}'" "$ETL_ADN_REQ_ORG_HOURS_PATH" hadoop fs -touchz $ETL_ADN_REQ_ORG_HOURS_PATH/_SUCCESS