#!/bin/bash source ../dmp_env.sh today=${ScheduleTime} date_time=$(date +"%Y-%m-%d %H" -d "-1 hour $today") date_path=$(date +%Y/%m/%d/%H -d "-1 hour $today") INPUT_AUDIENCE="s3://mob-emr-test/dataplatform/rtdmp_base/${date_path}/audience" INPUT_DATA="s3://mob-emr-test/dataplatform/rtdmp_base/${date_path}/data" # INPUT_DATA="s3://mob-emr-test/dataplatform/rtdmp_deal/${date_path}" OUTPUT="s3://mob-emr-test/dataplatform/rtdmp/${date_path}" spark-submit --class mobvista.dmp.datasource.rtdmp.RTDmpAS \ --name "RTDmpAS.wangjf.${date_time}" \ --conf spark.sql.shuffle.partitions=100 \ --conf spark.default.parallelism=100 \ --conf spark.kryoserializer.buffer.max=256m \ --conf spark.speculation=false \ --conf spark.speculation.quantile=0.9 \ --conf spark.speculation.multiplier=1.3 \ --conf spark.executor.extraJavaOptions="-XX:+UseG1GC" \ --master yarn --deploy-mode cluster --executor-memory 4g --driver-memory 4g --executor-cores 2 --num-executors 20 \ ../${JAR} -input_audience ${INPUT_AUDIENCE} -input_data ${INPUT_DATA} -output ${OUTPUT} -coalesce 100 -time "${date_time}" if [[ $? -ne 0 ]]; then exit 255 fi res="cn virginia seoul tokyo frankfurt singapore" for re in ${res} do if hadoop fs -ls "$OUTPUT/data/$re" > /dev/null 2>&1 then hadoop fs -touchz ${OUTPUT}/data/${re}/_SUCCESS else hadoop fs -mkdir ${OUTPUT}/data/${re} hadoop fs -touchz ${OUTPUT}/data/${re}/_SUCCESS fi done if [[ $? -ne 0 ]]; then exit 255 fi