1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
#!/bin/bash
# # # # # # # # # # # # # # # # # # # # # #
# @file : dmp_interest_tag.sh
# @author : jinfeng.wang
# @time : 2020-05-22 16:06:07
# # # # # # # # # # # # # # # # # # # # # #
source ../dmp_env.sh
LOG_TIME=${ScheduleTime:-$1}
date=$(date +%Y%m%d -d "-1 day $LOG_TIME")
date_path=$(date +"%Y/%m/%d" -d "-1 day ${LOG_TIME}")
check_await "${ODS_DMP_USER_INFO}/${date_path}/3s/_SUCCESS"
check_await "${ODS_DMP_USER_INFO}/${date_path}/dsp_req/_SUCCESS"
check_await "${ODS_DMP_USER_INFO}/${date_path}/adn_request/_SUCCESS"
OUTPUT_PATH="${DEVICE_ID_MD5_MATCH_PATH}/$date_path"
spark-submit --class mobvista.dmp.datasource.dm.DmpDeviceIdMd5 \
--name "DmpDeviceIdMd5.${date}"\
--conf spark.sql.shuffle.partitions=10000 \
--conf spark.default.parallelism=1000 \
--conf spark.kryoserializer.buffer.max=512m \
--conf spark.kryoserializer.buffer=64m \
--conf spark.sql.adaptive.enabled=true \
--conf spark.sql.adaptive.advisoryPartitionSizeInBytes=536870912 \
--files ${HIVE_SITE_PATH} \
--master yarn --deploy-mode cluster --executor-memory 10g --driver-memory 4g --executor-cores 5 --num-executors 60 \
../${JAR} \
-date ${date} -coalesce 1000 -output $OUTPUT_PATH
if [[ $? -ne 0 ]]; then
exit 255
fi
mount_partition "device_id_md5_match" "dt='$date'" "$OUTPUT_PATH"