#!/bin/bash # # # # # # # # # # # # # # # # # # # # # # # @author: fengliang # @date : 2017-10-23 # @desc : dsp double click兴趣标签全量表 # # # # # # # # # # # # # # # # # # # # # # source ../dmp_env.sh #### time init date=${ScheduleTime:-$1} log_time=$(date -d "$date 1 days ago" "+%Y%m%d") date_path=$(date -d "$date 1 days ago" "+%Y/%m/%d") old_date=$(date -d "$date 2 days ago" "+%Y%m%d") old_path=$(date -d "$date 2 days ago" "+%Y/%m/%d") modify_date=$(date -d "$date 1 days ago" +"%Y-%m-%d") expire_date=$(date +%Y%m%d -d "-4 day $date") expire_date_path=$(date +%Y/%m/%d -d "-4 day $date") source="dc" business="dsp_req" #### path init SEGMENT_TAG_PATH="$DIM_SEGMENT_TAG_PATH" DSP_REQ_DAILY_PATH="${ETL_DSP_REQ_DAILY}/$date_path" OUTPUT_PATH="${DM_DEVICE_TAG_PATH}/$date_path/dc/dsp_req" EXPIRE_PATH="${DM_DEVICE_TAG_PATH}/$expire_date_path/dc/dsp_req" #### check path check_await $DSP_REQ_DAILY_PATH/_SUCCESS ### rm output path hadoop fs -rm -r ${OUTPUT_PATH} #### run command spark-submit --class mobvista.dmp.datasource.dsp.dc.interest.DmDCInterestTagV2 \ --conf spark.yarn.executor.memoryOverhead=3072 \ --conf spark.sql.shuffle.partitions=500 \ --files ${HIVE_SITE_PATH} \ --master yarn --deploy-mode cluster --executor-memory 10g --driver-memory 6g --executor-cores 5 --num-executors 40 \ ../${JAR} -input $DSP_REQ_DAILY_PATH -output $OUTPUT_PATH -yestoday $old_date \ -dictPath $SEGMENT_TAG_PATH -parallelism 500 -coalesce 500 -date $modify_date if [ $? -ne 0 ]; then exit 255 fi mount_partition "dmp_device_tag" "dt='${log_time}', source='${source}', business='${business}'" "${OUTPUT_PATH}" # 删除过期的分区及删除对应路径 unmount_partition "dmp_device_tag" "dt='${expire_date}', source='${source}', business='${business}'" "${EXPIRE_PATH}"