adn_org_etl_hours_virginia.sh 1.34 KB
Newer Older
wang-jinfeng committed
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35
#!/bin/bash

source ../../dmp_env.sh

ScheduleTime=${ScheduleTime:-$1}
output_date_path=$(date -d "2 hours ago $ScheduleTime" "+%Y/%m/%d/%H")

input_date_path=$(date -d "2 hours ago $ScheduleTime" "+%Y/%m/%d")
yt=$(date -d "2 hours ago $ScheduleTime" "+%Y")
mt=$(date -d "2 hours ago $ScheduleTime" "+%m")
dt=$(date -d "2 hours ago $ScheduleTime" "+%d")
hhpath=$(date -d "2 hours ago $ScheduleTime" "+%H")

check_await "$ADN_REQUEST_PATH/$input_date_path/virginia/$hhpath/_SUCCESS"

INPUT_ADN_PATH="$ADN_REQUEST_PATH/$input_date_path/virginia/$hhpath/*"

ETL_ADN_REQ_ORG_HOURS_PATH="${ETL_ADN_ORG_REQ_HOURS}/${input_date_path}/virginia/${hhpath}"

hadoop fs -rm -r $ETL_ADN_REQ_ORG_HOURS_PATH

spark-submit --class mobvista.dmp.datasource.adn.AdnOrgLogEtlHours \
  --conf spark.yarn.executor.memoryOverhead=3072 \
  --conf spark.sql.shuffle.partitions=2000 \
  --files ${HIVE_SITE_PATH} \
  --master yarn --deploy-mode cluster --executor-memory 8g --driver-memory 6g --executor-cores 4 --num-executors 20 \
  ../../${JAR} -datetime "$yt$mt$dt$hhpath" -output $ETL_ADN_REQ_ORG_HOURS_PATH -coalesce 200 -region virginia || exit 1

if [[ $? -ne 0 ]]; then
    exit 255
fi

mount_partition "etl_adn_org_request_daily_hours" "yt='${yt}',mt='${mt}',dt='${dt}',rg='virginia',ht='${hhpath}'" "$ETL_ADN_REQ_ORG_HOURS_PATH"

hadoop fs -touchz $ETL_ADN_REQ_ORG_HOURS_PATH/_SUCCESS