#!/bin/sh source ../dmp_env.sh ScheduleTime=${ScheduleTime:-$1} starttime=$(date -d "3 hours ago $ScheduleTime" "+%Y%m%d%H") endtime=$(date -d "1 hours ago $ScheduleTime" "+%Y%m%d%H") datapath=$(date -d "1 hours ago $ScheduleTime" "+%Y/%m/%d") yearpath=$(date -d "1 hours ago $ScheduleTime" "+%Y") mothpath=$(date -d "1 hours ago $ScheduleTime" "+%m") daypath=$(date -d "1 hours ago $ScheduleTime" "+%d") hhpath=$(date -d "1 hours ago $ScheduleTime" "+%H") dsp_impression_org_hour="s3://mob-ad/adn/dsp/impression_org/impression/${datapath}/cn/${hhpath}" dsp_request_orc_hour="s3://mob-ad/adn/dsp_orc/request/${datapath}/cn/${hhpath}" check_await "${dsp_impression_org_hour}/_SUCCESS" check_await "${dsp_request_orc_hour}/_SUCCESS" OUTPUT_PATH=s3://mob-ad/adn/dsp/impression/${datapath}/cn/${hhpath} hadoop fs -test -e ${OUTPUT_PATH} if [ $? -ne 0 ];then hadoop fs -mkdir -p ${OUTPUT_PATH} fi common_mount_partition "adn_dsp" "log_adn_dsp_impression_org_hour" "yr='${yearpath}', mt='${mothpath}',dt='${daypath}',rg='cn',hh='${hhpath}'" "${dsp_impression_org_hour}" common_mount_partition "adn_dsp" "log_adn_dsp_request_orc_hour" "yr='${yearpath}', mt='${mothpath}',dt='${daypath}',rg='cn',hh='${hhpath}'" "${dsp_request_orc_hour}" hadoop fs -rmr ${OUTPUT_PATH} spark-submit --class mobvista.dmp.datasource.dsp.DspImpressionHourFull \ --conf spark.sql.shuffle.partitions=6 \ --conf spark.default.parallelism=6 \ --conf spark.serializer=org.apache.spark.serializer.KryoSerializer \ --conf spark.yarn.executor.memoryOverhead=1024 \ --master yarn \ --deploy-mode cluster \ --executor-memory 2G \ --driver-memory 1G \ --executor-cores 2 \ --num-executors 3 \ ../${JAR} \ -starttime ${starttime} -endtime ${endtime} -output ${OUTPUT_PATH} if [ $? -ne 0 ]; then exit 255 fi common_mount_partition "adn_dsp" "log_adn_dsp_impression_hour" "yr='${yearpath}', mt='${mothpath}',dt='${daypath}',rg='cn',hh='${hhpath}'" "${OUTPUT_PATH}" hadoop fs -touchz ${OUTPUT_PATH}/_SUCCESS