1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
#!/usr/bin/env bash
source ../dmp_env.sh
dt_today=$(date -d "$ScheduleTime 2 days ago" +"%Y%m%d")
last_req_day=$(date -d "$ScheduleTime 31 days ago" +"%Y-%m-%d")
dt_six_days_ago=$(date -d "$ScheduleTime 8 days ago" +"%Y%m%d")
dt_30days_ago=$(date -d "$ScheduleTime 31 days ago" +"%Y%m%d")
dt_slash_today=$(date -d "$ScheduleTime 1 days ago" +"%Y/%m/%d")
dt_slash_one_day=$(date -d "$ScheduleTime 2 days ago" +"%Y/%m/%d")
dt_slash_two_day=$(date -d "$ScheduleTime 3 days ago" +"%Y/%m/%d")
dt_slash_three_day=$(date -d "$ScheduleTime 4 days ago" +"%Y/%m/%d")
INPUT_ONE_DAY="${LAZADA_OUTPUT_PATH}/${dt_slash_one_day}/gaid/ID/part*"
INPUT_TWO_DAY="${LAZADA_OUTPUT_PATH}/${dt_slash_two_day}/gaid/ID/part*"
INPUT_THREE_DAY="${LAZADA_OUTPUT_PATH}/${dt_slash_three_day}/gaid/ID/part*"
TH_INPUT_ONE_DAY="${LAZADA_OUTPUT_PATH}/${dt_slash_one_day}/gaid/TH/part*"
VN_INPUT_ONE_DAY="${LAZADA_OUTPUT_PATH}/${dt_slash_one_day}/gaid/VN/part*"
PH_INPUT_ONE_DAY="${LAZADA_OUTPUT_PATH}/${dt_slash_one_day}/gaid/PH/part*"
MY_INPUT_ONE_DAY="${LAZADA_OUTPUT_PATH}/${dt_slash_one_day}/gaid/MY/part*"
SG_INPUT_ONE_DAY="${LAZADA_OUTPUT_PATH}/${dt_slash_one_day}/gaid/SG/part*"
TH_INPUT_TWO_DAY="${LAZADA_OUTPUT_PATH}/${dt_slash_two_day}/gaid/TH/part*"
VN_INPUT_TWO_DAY="${LAZADA_OUTPUT_PATH}/${dt_slash_two_day}/gaid/VN/part*"
PH_INPUT_TWO_DAY="${LAZADA_OUTPUT_PATH}/${dt_slash_two_day}/gaid/PH/part*"
MY_INPUT_TWO_DAY="${LAZADA_OUTPUT_PATH}/${dt_slash_two_day}/gaid/MY/part*"
SG_INPUT_TWO_DAY="${LAZADA_OUTPUT_PATH}/${dt_slash_two_day}/gaid/SG/part*"
TH_INPUT_THREE_DAY="${LAZADA_OUTPUT_PATH}/${dt_slash_three_day}/gaid/TH/part*"
VN_INPUT_THREE_DAY="${LAZADA_OUTPUT_PATH}/${dt_slash_three_day}/gaid/VN/part*"
PH_INPUT_THREE_DAY="${LAZADA_OUTPUT_PATH}/${dt_slash_three_day}/gaid/PH/part*"
MY_INPUT_THREE_DAY="${LAZADA_OUTPUT_PATH}/${dt_slash_three_day}/gaid/MY/part*"
SG_INPUT_THREE_DAY="${LAZADA_OUTPUT_PATH}/${dt_slash_three_day}/gaid/SG/part*"
GAID_OUTPUT_PATH="${LAZADA_OUTPUT_PATH}/${dt_slash_today}/gaid/ID"
TH_GAID_OUTPUT_PATH="${LAZADA_OUTPUT_PATH}/${dt_slash_today}/gaid/TH"
VN_GAID_OUTPUT_PATH="${LAZADA_OUTPUT_PATH}/${dt_slash_today}/gaid/VN"
PH_GAID_OUTPUT_PATH="${LAZADA_OUTPUT_PATH}/${dt_slash_today}/gaid/PH"
MY_GAID_OUTPUT_PATH="${LAZADA_OUTPUT_PATH}/${dt_slash_today}/gaid/MY"
SG_GAID_OUTPUT_PATH="${LAZADA_OUTPUT_PATH}/${dt_slash_today}/gaid/SG"
# check_await "${ODS_DMP_USER_INFO_DAILY}/${dt_today}/_SUCCESS"
check_await "${ODS_DMP_USER_INFO}/${dt_slash_one_day}/adn_request/_SUCCESS"
check_await "${ODS_DMP_USER_INFO}/${dt_slash_one_day}/dsp_req/_SUCCESS"
hadoop fs -rm -r "${GAID_OUTPUT_PATH}"
hadoop fs -rm -r "${TH_GAID_OUTPUT_PATH}"
hadoop fs -rm -r "${VN_GAID_OUTPUT_PATH}"
hadoop fs -rm -r "${PH_GAID_OUTPUT_PATH}"
hadoop fs -rm -r "${MY_GAID_OUTPUT_PATH}"
spark-submit --class mobvista.dmp.datasource.taobao.EtlLazadaActivitionDaily \
--conf spark.network.timeout=720s \
--conf spark.default.parallelism=2000 \
--conf spark.sql.shuffle.partitions=2000 \
--conf spark.sql.broadcastTimeout=1200 \
--conf spark.yarn.executor.memoryOverhead=4096 \
--conf spark.sql.autoBroadcastJoinThreshold=31457280 \
--files ${HIVE_SITE_PATH} \
--master yarn --deploy-mode cluster --executor-memory 8g --driver-memory 4g --executor-cores 4 --num-executors 90 ../${JAR} \
-gaidoutput "${GAID_OUTPUT_PATH}" \
-today ${dt_today} -last_req_day ${last_req_day} \
-input_one_day ${INPUT_ONE_DAY} -input_two_day ${INPUT_TWO_DAY} -input_three_day ${INPUT_THREE_DAY} \
-th_gaidoutput "${TH_GAID_OUTPUT_PATH}" -vn_gaidoutput "${VN_GAID_OUTPUT_PATH}" -ph_gaidoutput "${PH_GAID_OUTPUT_PATH}" -my_gaidoutput "${MY_GAID_OUTPUT_PATH}" -sg_gaidoutput "${SG_GAID_OUTPUT_PATH}" \
-th_input_one_day ${TH_INPUT_ONE_DAY} -vn_input_one_day ${VN_INPUT_ONE_DAY} -ph_input_one_day ${PH_INPUT_ONE_DAY} -my_input_one_day ${MY_INPUT_ONE_DAY} -sg_input_one_day ${SG_INPUT_ONE_DAY} \
-th_input_two_day ${TH_INPUT_TWO_DAY} -vn_input_two_day ${VN_INPUT_TWO_DAY} -ph_input_two_day ${PH_INPUT_TWO_DAY} -my_input_two_day ${MY_INPUT_TWO_DAY} -sg_input_two_day ${SG_INPUT_TWO_DAY} \
-th_input_three_day ${TH_INPUT_THREE_DAY} -vn_input_three_day ${VN_INPUT_THREE_DAY} -ph_input_three_day ${PH_INPUT_THREE_DAY} -my_input_three_day ${MY_INPUT_THREE_DAY} -sg_input_three_day ${SG_INPUT_THREE_DAY} \
-dt_30days_ago ${dt_30days_ago} -dt_six_days_ago ${dt_six_days_ago} \
if [ $? -ne 0 ];then
exit 255
fi
##!/usr/bin/env bash
#
#
#source ../dmp_env.sh
#
#dt_today=$(date -d "$ScheduleTime 2 days ago" +"%Y%m%d")
#dt_30days_ago=$(date -d "$ScheduleTime 31 days ago" +"%Y%m%d")
#dt_slash_today=$(date -d "$ScheduleTime 1 days ago" +"%Y/%m/%d")
#dt_yesterday_today=$(date -d "$ScheduleTime 2 days ago" +"%Y/%m/%d")
#
#
#GAID_INPUT_PATH="${LAZADA_OUTPUT_PATH}/${dt_yesterday_today}/gaid/ID"
#
#GAID_OUTPUT_PATH="${LAZADA_OUTPUT_PATH}/${dt_slash_today}/gaid/ID"
#NEW_OUTPUT_PATH="${NEW_LAZADA_OUTPUT_PATH}/${dt_slash_today}/gaid/ID"
#
#check_await "${ODS_DMP_USER_INFO_DAILY}/${dt_today}/_SUCCESS"
#
#hadoop fs -rm -r "${GAID_OUTPUT_PATH}"
#hadoop fs -rm -r "${NEW_OUTPUT_PATH}"
#
#
#spark-submit --class mobvista.dmp.datasource.taobao.EtlLazadaActivitionDaily \
# --conf spark.network.timeout=720s \
# --conf spark.default.parallelism=2000 \
# --conf spark.sql.shuffle.partitions=2000 \
# --conf spark.sql.broadcastTimeout=1200 \
# --conf spark.yarn.executor.memoryOverhead=4096 \
# --conf spark.sql.autoBroadcastJoinThreshold=31457280 \
# --files ${HIVE_SITE_PATH} \
# --master yarn --deploy-mode cluster --executor-memory 8g --driver-memory 4g --executor-cores 4 --num-executors 70 ../${JAR} \
# -gaidoutput "${GAID_OUTPUT_PATH}" -gaidinput "${GAID_INPUT_PATH}" -newoutput "${NEW_OUTPUT_PATH}" \
# -today ${dt_today} -dt_30days_ago ${dt_30days_ago}
#
#if [ $? -ne 0 ];then
# exit 255
#fi