#!/bin/bash source ../dmp_env.sh ScheduleTime=${ScheduleTime:-$1} LOG_TIME=$(date -d "$ScheduleTime 1 days ago" "+%Y-%m-%d") dt=$(date -d "$ScheduleTime 1 days ago" "+%Y%m%d") date_path=$(date -d "$ScheduleTime 1 days ago" "+%Y/%m/%d") old_path=$(date -d "$ScheduleTime 2 days ago" "+%Y/%m/%d") rm_dt=$(date -d "$ScheduleTime 180 days ago" "+%Y%m%d") rm_dt_path=$(date -d "$ScheduleTime 180 days ago" "+%Y/%m/%d") ETL_DSP_REQ_ETL_HOURS_INPUT_PATH="${ETL_DSP_REQ_ETL_HOURS}/$date_path/*/*" TMP_OUTPUT_PATH="${TMP_DSP_REQUEST_DAILY_PATH}/$date_path" check_await "${ETL_DSP_REQ_ETL_HOURS}/$date_path/23/_SUCCESS" hadoop fs -rm -r ${TMP_OUTPUT_PATH} spark-submit --class mobvista.dmp.datasource.dsp.DspOrgEtlDailys \ --conf spark.yarn.executor.memoryOverhead=3072 \ --conf spark.sql.shuffle.partitions=10000 \ --master yarn --deploy-mode cluster --executor-memory 10g --driver-memory 6g --executor-cores 4 --num-executors 100 \ ../${JAR} -input $ETL_DSP_REQ_ETL_HOURS_INPUT_PATH \ -output ${TMP_OUTPUT_PATH} \ -parallelism 2000 -coalesce 2000 || exit 1