adn_org_etl_hours_virginia.sh 1.44 KB
Newer Older
wang-jinfeng committed
1 2 3 4 5 6 7 8 9 10 11 12 13 14
#!/bin/bash

source ../../dmp_env.sh

ScheduleTime=${ScheduleTime:-$1}
output_date_path=$(date -d "2 hours ago $ScheduleTime" "+%Y/%m/%d/%H")

input_date_path=$(date -d "2 hours ago $ScheduleTime" "+%Y/%m/%d")
yt=$(date -d "2 hours ago $ScheduleTime" "+%Y")
mt=$(date -d "2 hours ago $ScheduleTime" "+%m")
dt=$(date -d "2 hours ago $ScheduleTime" "+%d")
hhpath=$(date -d "2 hours ago $ScheduleTime" "+%H")

check_await "$ADN_REQUEST_PATH/$input_date_path/virginia/$hhpath/_SUCCESS"
fan.jiang committed
15
check_await "$HB_REQUEST_PATH/$input_date_path/virginia/$hhpath/_SUCCESS"
wang-jinfeng committed
16 17

INPUT_ADN_PATH="$ADN_REQUEST_PATH/$input_date_path/virginia/$hhpath/*"
fan.jiang committed
18
INPUT_ADN_PATH="$HB_REQUEST_PATH/$input_date_path/virginia/$hhpath/*"
wang-jinfeng committed
19 20 21 22 23 24 25 26

ETL_ADN_REQ_ORG_HOURS_PATH="${ETL_ADN_ORG_REQ_HOURS}/${input_date_path}/virginia/${hhpath}"

hadoop fs -rm -r $ETL_ADN_REQ_ORG_HOURS_PATH

spark-submit --class mobvista.dmp.datasource.adn.AdnOrgLogEtlHours \
  --conf spark.yarn.executor.memoryOverhead=3072 \
  --conf spark.sql.shuffle.partitions=2000 \
WangJinfeng committed
27
  --deploy-mode cluster --executor-memory 8g --driver-memory 6g --executor-cores 4 --num-executors 20 \
wang-jinfeng committed
28 29 30 31 32 33 34 35 36
  ../../${JAR} -datetime "$yt$mt$dt$hhpath" -output $ETL_ADN_REQ_ORG_HOURS_PATH -coalesce 200 -region virginia || exit 1

if [[ $? -ne 0 ]]; then
    exit 255
fi

mount_partition "etl_adn_org_request_daily_hours" "yt='${yt}',mt='${mt}',dt='${dt}',rg='virginia',ht='${hhpath}'" "$ETL_ADN_REQ_ORG_HOURS_PATH"

hadoop fs -touchz $ETL_ADN_REQ_ORG_HOURS_PATH/_SUCCESS