dsp_org_etl_daily.sh 1.04 KB
Newer Older
wang-jinfeng committed
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22
#!/bin/bash

source ../dmp_env.sh

ScheduleTime=${ScheduleTime:-$1}
LOG_TIME=$(date -d "$ScheduleTime 1 days ago" "+%Y-%m-%d")
dt=$(date -d "$ScheduleTime 1 days ago" "+%Y%m%d")
date_path=$(date -d "$ScheduleTime 1 days ago" "+%Y/%m/%d")
old_path=$(date -d "$ScheduleTime 2 days ago" "+%Y/%m/%d")

rm_dt=$(date -d "$ScheduleTime  180 days ago" "+%Y%m%d")
rm_dt_path=$(date -d "$ScheduleTime  180 days ago" "+%Y/%m/%d")

ETL_DSP_REQ_ETL_HOURS_INPUT_PATH="${ETL_DSP_REQ_ETL_HOURS}/$date_path/*/*"
TMP_OUTPUT_PATH="${TMP_DSP_REQUEST_DAILY_PATH}/$date_path"
check_await "${ETL_DSP_REQ_ETL_HOURS}/$date_path/23/_SUCCESS"

hadoop fs -rm -r ${TMP_OUTPUT_PATH}

spark-submit  --class mobvista.dmp.datasource.dsp.DspOrgEtlDailys \
 --conf spark.yarn.executor.memoryOverhead=3072 \
 --conf spark.sql.shuffle.partitions=10000 \
WangJinfeng committed
23
 --deploy-mode cluster --executor-memory 10g --driver-memory 6g  --executor-cores 4  --num-executors 100 \
wang-jinfeng committed
24 25 26 27 28 29 30
 ../${JAR} -input $ETL_DSP_REQ_ETL_HOURS_INPUT_PATH  \
 -output ${TMP_OUTPUT_PATH}  \
 -parallelism 2000  -coalesce 2000   || exit 1