etl_toutiao_launch_daily.sh 1.09 KB
Newer Older
wang-jinfeng committed
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38
#! /bin/bash

# # # # # # # # # # # # # # # # # # # # # #
# @author : 冯亮
# @date : 2018-04-25
# @desc : 解析头条投放 天数据
# # # # # # # # # # # # # # # # # # # # # #

source ../dmp_env.sh

date_path=$(date +"%Y/%m/%d" -d "-1 day $ScheduleTime")
dt=$(date +"%Y%m%d" -d "-1 day $ScheduleTime")
old_path=$(date +"%Y/%m/%d" -d "-2 day $ScheduleTime")


INPUT_PATH="${TOUTIAO_LAUNCH_PATH}/$date_path"
OUTPUT_PATH="${ETL_TOUTIAO_LAUNCH_DAILY}/$date_path"

check_await ${TOUTIAO_LAUNCH_PATH}/$date_path/*/23/_SUCCESS

reduce_num=`calculate_reduce_num "$INPUT_PATH"`
if [ $reduce_num -le 5 ]; then
    reduce_num=5
fi

hadoop fs -rm -r ${OUTPUT_PATH}


spark-submit --class mobvista.dmp.datasource.toutiao.EtlToutiaoDaily \
    --conf spark.sql.shuffle.partitions=${reduce_num} \
    --conf spark.default.parallelism=200 \
    --master yarn --deploy-mode cluster --executor-memory 8g --driver-memory 4g --executor-cores 4 --num-executors 20 \
    ../${JAR} -input "$INPUT_PATH/*" -output $OUTPUT_PATH
if [ $? -ne 0 ];then
  exit 255
fi

mount_partition "etl_toutiao_launch_daily" "dt='$dt'" "$OUTPUT_PATH"