dsp_dc_interest.sh 1.74 KB
#!/bin/bash

# # # # # # # # # # # # # # # # # # # # # #
# @author: fengliang
# @date  : 2017-10-23
# @desc  : dsp double click兴趣标签全量表
# # # # # # # # # # # # # # # # # # # # # #

source ../dmp_env.sh

#### time init
date=${ScheduleTime:-$1}
log_time=$(date -d "$date 1 days ago" "+%Y%m%d")
date_path=$(date -d "$date 1 days ago" "+%Y/%m/%d")
old_date=$(date -d "$date 2 days ago" "+%Y%m%d")
old_path=$(date -d "$date 2 days ago" "+%Y/%m/%d")
modify_date=$(date -d "$date 1 days ago" +"%Y-%m-%d")
expire_date=$(date +%Y%m%d -d "-4 day $date")
expire_date_path=$(date +%Y/%m/%d -d "-4 day $date")

source="dc"
business="dsp_req"

#### path init
SEGMENT_TAG_PATH="$DIM_SEGMENT_TAG_PATH"
DSP_REQ_DAILY_PATH="${ETL_DSP_REQ_DAILY}/$date_path"
OUTPUT_PATH="${DM_DEVICE_TAG_PATH}/$date_path/dc/dsp_req"
EXPIRE_PATH="${DM_DEVICE_TAG_PATH}/$expire_date_path/dc/dsp_req"


#### check path
check_await $DSP_REQ_DAILY_PATH/_SUCCESS

### rm output path
hadoop fs -rm -r ${OUTPUT_PATH}


#### run command
spark-submit --class mobvista.dmp.datasource.dsp.dc.interest.DmDCInterestTagV2 \
 --conf spark.yarn.executor.memoryOverhead=3072 \
 --conf spark.sql.shuffle.partitions=500 \
 --master yarn --deploy-mode cluster --executor-memory 10g --driver-memory 6g  --executor-cores 5  --num-executors 40 \
 ../${JAR} -input $DSP_REQ_DAILY_PATH -output $OUTPUT_PATH -yestoday $old_date \
  -dictPath $SEGMENT_TAG_PATH -parallelism 500 -coalesce 500 -date $modify_date

if [ $? -ne 0 ]; then
    exit 255
fi


mount_partition "dmp_device_tag" "dt='${log_time}', source='${source}', business='${business}'" "${OUTPUT_PATH}"

# 删除过期的分区及删除对应路径
unmount_partition "dmp_device_tag" "dt='${expire_date}', source='${source}', business='${business}'" "${EXPIRE_PATH}"