#! /bin/bash # # # # # # # # # # # # # # # # # # # # # # # @author : 冯亮 # @date : 2018-04-25 # @desc : 解析头条投放 天数据 # # # # # # # # # # # # # # # # # # # # # # source ../dmp_env.sh date_path=$(date +"%Y/%m/%d" -d "-1 day $ScheduleTime") dt=$(date +"%Y%m%d" -d "-1 day $ScheduleTime") old_path=$(date +"%Y/%m/%d" -d "-2 day $ScheduleTime") INPUT_PATH="${TOUTIAO_LAUNCH_PATH}/$date_path" OUTPUT_PATH="${ETL_TOUTIAO_LAUNCH_DAILY}/$date_path" check_await ${TOUTIAO_LAUNCH_PATH}/$date_path/*/23/_SUCCESS reduce_num=`calculate_reduce_num "$INPUT_PATH"` if [ $reduce_num -le 5 ]; then reduce_num=5 fi hadoop fs -rm -r ${OUTPUT_PATH} spark-submit --class mobvista.dmp.datasource.toutiao.EtlToutiaoDaily \ --conf spark.sql.shuffle.partitions=${reduce_num} \ --conf spark.default.parallelism=200 \ --master yarn --deploy-mode cluster --executor-memory 8g --driver-memory 4g --executor-cores 4 --num-executors 20 \ ../${JAR} -input "$INPUT_PATH/*" -output $OUTPUT_PATH if [ $? -ne 0 ];then exit 255 fi mount_partition "etl_toutiao_launch_daily" "dt='$dt'" "$OUTPUT_PATH"