1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
#!/bin/bash
# # # # # # # # # # # # # # # # # # # # # #
# @author: fengliang
# @date : 2017-10-23
# @desc : dsp double click兴趣标签全量表
# # # # # # # # # # # # # # # # # # # # # #
source ../dmp_env.sh
#### time init
date=${ScheduleTime:-$1}
log_time=$(date -d "$date 1 days ago" "+%Y%m%d")
date_path=$(date -d "$date 1 days ago" "+%Y/%m/%d")
old_date=$(date -d "$date 2 days ago" "+%Y%m%d")
old_path=$(date -d "$date 2 days ago" "+%Y/%m/%d")
modify_date=$(date -d "$date 1 days ago" +"%Y-%m-%d")
expire_date=$(date +%Y%m%d -d "-4 day $date")
expire_date_path=$(date +%Y/%m/%d -d "-4 day $date")
source="dc"
business="dsp_req"
#### path init
SEGMENT_TAG_PATH="$DIM_SEGMENT_TAG_PATH"
DSP_REQ_DAILY_PATH="${ETL_DSP_REQ_DAILY}/$date_path"
OUTPUT_PATH="${DM_DEVICE_TAG_PATH}/$date_path/dc/dsp_req"
EXPIRE_PATH="${DM_DEVICE_TAG_PATH}/$expire_date_path/dc/dsp_req"
#### check path
check_await $DSP_REQ_DAILY_PATH/_SUCCESS
### rm output path
hadoop fs -rm -r ${OUTPUT_PATH}
#### run command
spark-submit --class mobvista.dmp.datasource.dsp.dc.interest.DmDCInterestTagV2 \
--conf spark.yarn.executor.memoryOverhead=3072 \
--conf spark.sql.shuffle.partitions=500 \
--master yarn --deploy-mode cluster --executor-memory 10g --driver-memory 6g --executor-cores 5 --num-executors 40 \
../${JAR} -input $DSP_REQ_DAILY_PATH -output $OUTPUT_PATH -yestoday $old_date \
-dictPath $SEGMENT_TAG_PATH -parallelism 500 -coalesce 500 -date $modify_date
if [ $? -ne 0 ]; then
exit 255
fi
mount_partition "dmp_device_tag" "dt='${log_time}', source='${source}', business='${business}'" "${OUTPUT_PATH}"
# 删除过期的分区及删除对应路径
unmount_partition "dmp_device_tag" "dt='${expire_date}', source='${source}', business='${business}'" "${EXPIRE_PATH}"