#!/bin/sh # # # # # # # # # # # # # # # # # # # # # # # @author : wangjf # @date : 2019-02-27 11:24:06 # # # # # # # # # # # # # # # # # # # # # # # @update : wangjf source ../dmp_env.sh LOG_TIME=$(date -d "$ScheduleTime 1 days ago" +"%Y%m%d") dt=$(date -d "$ScheduleTime 1 days ago" +"%Y-%m-%d") date_path=$(date -d "$ScheduleTime 1 days ago" +"%Y/%m/%d") old_date_path=$(date -d "$ScheduleTime 2 days ago" +"%Y/%m/%d") echo ${date_path} echo ${old_date_path} INPUT_PATH="${BYTEDANCE_LOG_PATH}/$date_path/*/*" OUTPUT_PATH="${BYTEDANCE_DAILY_PATH}/${date_path}" check_await "${BYTEDANCE_LOG_PATH}/${date_path}/23/_SUCCESS" hadoop fs -rm -r "$OUTPUT_PATH/" spark-submit --class mobvista.dmp.datasource.bytedance.ByteDanceDaily \ --conf spark.yarn.executor.memoryOverhead=2048 \ --conf spark.network.timeout=720s \ --conf spark.default.parallelism=100 \ --master yarn --deploy-mode cluster --name bytedance_daily_wangjf --executor-memory 4g --driver-memory 4g --executor-cores 2 --num-executors 30 \ ../${JAR} -input ${INPUT_PATH} -output ${OUTPUT_PATH} -coalesce 10 if [ $? -ne 0 ];then exit 255 fi mount_partition "etl_bytedance_daily" "dt='${dt}'" "$OUTPUT_PATH"