#!/bin/bash source ../dmp_env.sh today=${ScheduleTime} date_time=$(date +"%Y-%m-%d.%H" -d "-1 hour $today") date_path=$(date +%Y/%m/%d/%H -d "-1 hour $today") INPUT_DATA="s3://mob-emr-test/dataplatform/rtdmp_deal/${date_path}" part_num=$(hadoop fs -ls ${INPUT_DATA} | wc -l) if [[ ${part_num} -le 10 ]]; then echo "This Dir No Data !!!" partition=10 coalesce=10 executor=2 memory=4 core=2 else partition=2000 coalesce=200 executor=50 memory=10 core=3 fi OUTPUT="s3://mob-emr-test/dataplatform/rtdmp/${date_path}" spark-submit --class mobvista.dmp.datasource.rtdmp.RTDmpASV2 \ --name "RTDmpASV2.${date_time}" \ --conf spark.sql.shuffle.partitions=${partition} \ --conf spark.default.parallelism=${partition} \ --conf spark.kryoserializer.buffer.max=512m \ --conf spark.kryoserializer.buffer=64m \ --master yarn --deploy-mode cluster --executor-memory ${memory}g --driver-memory 8g --executor-cores ${core} --num-executors ${executor} \ ../${JAR} -input_data ${INPUT_DATA} -output ${OUTPUT} -coalesce ${coalesce} -time "${date_time}" if [[ $? -ne 0 ]]; then exit 255 fi ads="foractivation adx normal" # 2020-11-20 14:45:52 移除按 region 输出逻辑 : ' res="cn virginia seoul tokyo frankfurt singapore" for ad in ${ads} do for re in ${res} do if hadoop fs -ls "$OUTPUT/data/${ad}/$re" > /dev/null 2>&1 then hadoop fs -touchz ${OUTPUT}/data/${ad}/${re}/_SUCCESS else hadoop fs -mkdir -p ${OUTPUT}/data/${ad}/${re} hadoop fs -touchz ${OUTPUT}/data/${ad}/${re}/_SUCCESS fi done hadoop fs -touchz ${OUTPUT}/data/${ad}/_SUCCESS done if [[ $? -ne 0 ]]; then exit 255 fi ' for ad in ${ads}; do if hadoop fs -ls "$OUTPUT/data_v2/${ad}" >/dev/null 2>&1; then hadoop fs -touchz ${OUTPUT}/data_v2/${ad}/_SUCCESS else hadoop fs -mkdir -p ${OUTPUT}/data_v2/${ad} hadoop fs -touchz ${OUTPUT}/data_v2/${ad}/_SUCCESS fi if hadoop fs -ls "$OUTPUT/data/${ad}" >/dev/null 2>&1; then hadoop fs -touchz ${OUTPUT}/data/${ad}/_SUCCESS else hadoop fs -mkdir -p ${OUTPUT}/data/${ad} hadoop fs -touchz ${OUTPUT}/data/${ad}/_SUCCESS fi done hadoop fs -touchz ${OUTPUT}/_SUCCESS if [[ $? -ne 0 ]]; then exit 255 fi expire_date_path=$(date +%Y/%m/%d/%H -d "-168 hour $today") EXPIRE_RTDMP_OUTPUT_PATH="s3://mob-emr-test/dataplatform/rtdmp/${expire_date_path}" if hadoop fs -ls "$EXPIRE_RTDMP_OUTPUT_PATH" >/dev/null 2>&1; then hadoop dfs -rm -r ${EXPIRE_RTDMP_OUTPUT_PATH} fi EXPIRE_RTDMP_DEAL_OUTPUT_PATH="s3://mob-emr-test/dataplatform/rtdmp_deal/${expire_date_path}" if hadoop fs -ls "$EXPIRE_RTDMP_DEAL_OUTPUT_PATH" >/dev/null 2>&1; then hadoop dfs -rm -r ${EXPIRE_RTDMP_DEAL_OUTPUT_PATH} fi