1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
#!/bin/sh
source ../dmp_env.sh
# QUEUE_NAME="dataplatform"
# ScheduleTime=${ScheduleTime:-$1}
date=$(date +"%Y-%m-%d" -d "$ScheduleTime")
yes_bef2_day=`date -d "$ScheduleTime 2 days ago" +%Y%m%d`
yes_bef1_day=`date -d "$ScheduleTime 1 days ago" +%Y%m%d`
OUTPUT_PATH="${ODS_DMP_USER_INFO_ALL}_v2/${yes_bef1_day}"
unmount_day=`date -d "$ScheduleTime 7 days ago" +%Y%m%d`
UNMOUNT_OUTPUT_PATH="${ODS_DMP_USER_INFO_ALL}_v2/${unmount_day}"
date_path=$(date +%Y/%m/%d -d "-1 day $ScheduleTime")
INPUT_PUBLISH_PATH="s3://mob-emr-test/dataplatform/DataWareHouse/data/dev/realtime_service_device/${date_path}/*/*/*"
check_await ${ODS_DMP_USER_INFO_ALL}_v2/${yes_bef2_day}/_SUCCESS
check_await ${ODS_DMP_USER_INFO_DAILY}/${yes_bef1_day}/_SUCCESS
hadoop fs -rmr ${OUTPUT_PATH}
# --conf spark.driver.extraJavaOptions="-XX:+UseG1GC" \
# --conf spark.executor.extraJavaOptions="-XX:+UseG1GC" \
spark-submit --class mobvista.dmp.datasource.device.OdsDmpUserInfoAllV2 \
--name "OdsDmpUserInfoAllV2.${yes_bef1_day}" \
--conf spark.sql.shuffle.partitions=12000 \
--conf spark.default.parallelism=2000 \
--conf spark.sql.files.maxPartitionBytes=268435456 \
--conf spark.storage.memoryFraction=0.4 \
--conf spark.shuffle.memoryFraction=0.4 \
--conf spark.sql.adaptive.enabled=true \
--conf spark.sql.adaptive.advisoryPartitionSizeInBytes=268435456 \
--master yarn \
--deploy-mode cluster \
--executor-memory 12G \
--driver-memory 6G \
--executor-cores 5 \
--num-executors 100 \
../${JAR} \
-cur_day ${date} -output ${OUTPUT_PATH} -coalesce 3000 -input ${INPUT_PUBLISH_PATH}
if [[ $? -ne 0 ]]; then
exit 255
fi
mount_partition "ods_dmp_user_info_all_v2" "dt='${yes_bef1_day}'" "${OUTPUT_PATH}"
if [[ $? -ne 0 ]]; then
exit 255
fi
unmount_partition "ods_dmp_user_info_all_v2" "dt='${unmount_day}'" "${UNMOUNT_OUTPUT_PATH}"