1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
#!/bin/bash
# # # # # # # # # # # # # # # # # # # # # #
# @file :get_dsp_all.sh
# @author :wangjf
# @revision:2018-09-26 10:38:23
# @description: 重构
# # # # # # # # # # # # # # # # # # # # # #
source ../dmp_env.sh
## LOG_TIME=$(date +%Y%m%d -d "-1 day $ScheduleTime")
LOG_TIME=$(date -d "$ScheduleTime 1 days ago" +"%Y%m%d")
date_path=$(date -d "$ScheduleTime 1 days ago" +"%Y/%m/%d")
umount_time=$(date -d "$ScheduleTime 3 days ago" +"%Y%m%d")
umount_date_path=$(date -d "$ScheduleTime 3 days ago" +"%Y/%m/%d")
INPUT_PATH_DSP="${DSP_PROFILE_TOTAL}/${date_path}"
AGE_OUTPUT_PATH="${AGE_GET_DSP_PATH}/${date_path}/"
GENDER_OUTPUT_PATH="${GENDER_GET_DSP_PATH}/${date_path}/"
GENDER_UMOUNT_OUTPUT_PATH="${GENDER_GET_DSP_PATH}/${umount_date_path}/"
check_await ${INPUT_PATH_DSP}/_SUCCESS
business="dsp"
hadoop fs -rm -r "${AGE_OUTPUT_PATH}/"
hadoop fs -rm -r "${GENDER_OUTPUT_PATH}/"
spark-submit --class mobvista.dmp.datasource.age_gender.GetAgeGender \
--name "mobvista.dmp.datasource.age_gender.GetAgeGender.${business}.${LOG_TIME}" \
--conf spark.yarn.executor.memoryOverhead=2048 \
--conf spark.network.timeout=720s \
--conf spark.sql.shuffle.partitions=2000 \
--conf spark.default.parallelism=2000 \
--conf spark.executor.extraJavaOptions="-XX:+UseG1GC" \
--master yarn --deploy-mode cluster --executor-memory 8g --driver-memory 4g --executor-cores 5 --num-executors 60 \
../${JAR} -ageOutput ${AGE_OUTPUT_PATH} -genderOutput ${GENDER_OUTPUT_PATH} -date ${LOG_TIME} -business ${business}
if [[ $? -ne 0 ]];then
exit 255
fi
mount_partition "ods_gender_dsp" "dt='${LOG_TIME}'" "${GENDER_OUTPUT_PATH}"
unmount_partition "ods_gender_dsp" "dt='${umount_time}'" "${GENDER_UMOUNT_OUTPUT_PATH}"
hadoop fs -touchz ${GENDER_OUTPUT_PATH}/_SUCCESS