#!/usr/bin/env bash source ../../dmp_env.sh source ././../../ga_rawdata_analysis/common/tools.sh echo "job begin!!!" dt_today=$(date -d "$ScheduleTime tomorrow" +"%Y%m%d") dt_three_days_ago=$(date -d "$ScheduleTime tomorrow" +"%Y%m%d") dt_slash_today=$(date -d "$ScheduleTime tomorrow" +"%Y/%m/%d") update=$(date -d "$ScheduleTime tomorrow" +"%Y-%m-%d") dt_oneday_ago=$(date -d "$ScheduleTime" +"%Y/%m/%d") hour="01" IMEIMD5_REQUEST_INPUT_PATH="${ALIPAY_LAHUO_DAILY_PATH}/${dt_oneday_ago}/imeimd5_request_data/${hour}" IMEIMD5_RESPONSE_INPUT_PATH="${ALIPAY_LAHUO_DAILY_TMP_PATH}/${dt_slash_today}/imeimd5/${hour}" OUTPUT01="${ALIPAY_ACTIVATION_DAILY_PATH}/${dt_slash_today}/${hour}/alipay_activation" OUTPUT02="${ALIPAY_ACQUISITION_DAILY_PATH}/${dt_slash_today}/${hour}/alipay_acquisition" check_await "${IMEIMD5_RESPONSE_INPUT_PATH}/_SUCCESS" hadoop fs -rm -r "${OUTPUT01}" hadoop fs -rm -r "${OUTPUT02}" spark-submit --class mobvista.dmp.datasource.taobao.AlipayTmpDataToDmp \ --conf spark.network.timeout=720s \ --conf spark.default.parallelism=2000 \ --conf spark.sql.shuffle.partitions=2000 \ --conf spark.sql.broadcastTimeout=1200 \ --conf spark.yarn.executor.memoryOverhead=4096 \ --conf spark.sql.autoBroadcastJoinThreshold=31457280 \ --master yarn --deploy-mode cluster --executor-memory 8g --driver-memory 4g --executor-cores 4 --num-executors 40 \ ../../${JAR} -imeiRequestInput ${IMEIMD5_REQUEST_INPUT_PATH} -imeiResponseInput ${IMEIMD5_RESPONSE_INPUT_PATH} \ -output01 ${OUTPUT01} -output02 ${OUTPUT02} if [ $? -ne 0 ];then exit 255 fi mount_partition "etl_alipay_activation_daily" "dt='${dt_today}', hh='${hour}', business='alipay_activation'" "${OUTPUT01}" mount_partition "etl_alipay_acquisition_daily" "dt='${dt_today}', hh='${hour}', business='alipay_acquisition'" "${OUTPUT02}" if [ $? -ne 0 ];then exit 255 fi #ACTIVATIONOUTPUT="${ALIPAY_ACTIVATION_DAILY_PATH}/${dt_slash_today}/alipay_activation_deduplication" #ACQUISITIONOUTPUT="${ALIPAY_ACQUISITION_DAILY_PATH}/${dt_slash_today}/alipay_acquisition_deduplication" # #spark-submit --class mobvista.dmp.datasource.taobao.AlipayLaHuoFourDaysDataDeduplication \ # --conf spark.network.timeout=720s \ # --conf spark.default.parallelism=2000 \ # --conf spark.sql.shuffle.partitions=2000 \ # --conf spark.sql.broadcastTimeout=1200 \ # --conf spark.yarn.executor.memoryOverhead=4096 \ # --conf spark.sql.autoBroadcastJoinThreshold=31457280 \ # --master yarn --deploy-mode cluster --executor-memory 8g --driver-memory 4g --executor-cores 4 --num-executors 40 \ # ../../${JAR} -dt_today ${dt_today} -dt_three_days_ago ${dt_three_days_ago} \ # -ActivationOutput ${ACTIVATIONOUTPUT} -AcquisitionOutput ${ACQUISITIONOUTPUT} # #if [ $? -ne 0 ];then # exit 255 #fi # #mount_partition "etl_alipay_activation_daily" "dt='${dt_today}', business='alipay_activation_deduplication'" "${ACTIVATIONOUTPUT}" #mount_partition "etl_alipay_acquisition_daily" "dt='${dt_today}', business='alipay_acquisition_deduplication'" "${ACQUISITIONOUTPUT}" #if [ $? -ne 0 ];then # exit 255 #fi # #HIVE_CMD=$(hive_func) # #$HIVE_CMD -v -hivevar dt_today ${dt_today} -f alipay_lahuo_data_to_dmp.sql if [ $? -ne 0 ];then exit 255 fi #hadoop fs -touchz "${ALIPAY_ACTIVATION_DAILY_PATH}/${dt_slash_today}/_SUCCESS" hadoop fs -touchz "${ALIPAY_ACQUISITION_DAILY_PATH}/${dt_slash_today}/${hour}/_SUCCESS" echo "good job"