1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
#!/bin/bash
source ../dmp_env.sh
today=${ScheduleTime}
date_time=$(date +"%Y-%m-%d.%H" -d "-1 hour $today")
date_path=$(date +%Y/%m/%d/%H -d "-1 hour $today")
INPUT_DATA="s3://mob-emr-test/dataplatform/rtdmp_deal/${date_path}"
part_num=$(hadoop fs -ls ${INPUT_DATA} | wc -l)
if [[ ${part_num} -le 10 ]]; then
echo "This Dir No Data !!!"
partition=10
coalesce=10
executor=2
memory=4
core=2
else
partition=2000
coalesce=200
executor=50
memory=10
core=3
fi
OUTPUT="s3://mob-emr-test/dataplatform/rtdmp/${date_path}"
spark-submit --class mobvista.dmp.datasource.rtdmp.RTDmpASV2 \
--name "RTDmpASV2.${date_time}" \
--conf spark.sql.shuffle.partitions=${partition} \
--conf spark.default.parallelism=${partition} \
--conf spark.kryoserializer.buffer.max=512m \
--conf spark.kryoserializer.buffer=64m \
--master yarn --deploy-mode cluster --executor-memory ${memory}g --driver-memory 8g --executor-cores ${core} --num-executors ${executor} \
../${JAR} -input_data ${INPUT_DATA} -output ${OUTPUT} -coalesce ${coalesce} -time "${date_time}"
if [[ $? -ne 0 ]]; then
exit 255
fi
ads="foractivation adx normal"
# 2020-11-20 14:45:52 移除按 region 输出逻辑
: '
res="cn virginia seoul tokyo frankfurt singapore"
for ad in ${ads}
do
for re in ${res}
do
if hadoop fs -ls "$OUTPUT/data/${ad}/$re" > /dev/null 2>&1
then
hadoop fs -touchz ${OUTPUT}/data/${ad}/${re}/_SUCCESS
else
hadoop fs -mkdir -p ${OUTPUT}/data/${ad}/${re}
hadoop fs -touchz ${OUTPUT}/data/${ad}/${re}/_SUCCESS
fi
done
hadoop fs -touchz ${OUTPUT}/data/${ad}/_SUCCESS
done
if [[ $? -ne 0 ]]; then
exit 255
fi
'
for ad in ${ads}; do
if hadoop fs -ls "$OUTPUT/data_v2/${ad}" >/dev/null 2>&1; then
hadoop fs -touchz ${OUTPUT}/data_v2/${ad}/_SUCCESS
else
hadoop fs -mkdir -p ${OUTPUT}/data_v2/${ad}
hadoop fs -touchz ${OUTPUT}/data_v2/${ad}/_SUCCESS
fi
if hadoop fs -ls "$OUTPUT/data/${ad}" >/dev/null 2>&1; then
hadoop fs -touchz ${OUTPUT}/data/${ad}/_SUCCESS
else
hadoop fs -mkdir -p ${OUTPUT}/data/${ad}
hadoop fs -touchz ${OUTPUT}/data/${ad}/_SUCCESS
fi
done
hadoop fs -touchz ${OUTPUT}/_SUCCESS
if [[ $? -ne 0 ]]; then
exit 255
fi
expire_date_path=$(date +%Y/%m/%d/%H -d "-168 hour $today")
EXPIRE_RTDMP_OUTPUT_PATH="s3://mob-emr-test/dataplatform/rtdmp/${expire_date_path}"
if hadoop fs -ls "$EXPIRE_RTDMP_OUTPUT_PATH" >/dev/null 2>&1; then
hadoop dfs -rm -r ${EXPIRE_RTDMP_OUTPUT_PATH}
fi
EXPIRE_RTDMP_DEAL_OUTPUT_PATH="s3://mob-emr-test/dataplatform/rtdmp_deal/${expire_date_path}"
if hadoop fs -ls "$EXPIRE_RTDMP_DEAL_OUTPUT_PATH" >/dev/null 2>&1; then
hadoop dfs -rm -r ${EXPIRE_RTDMP_DEAL_OUTPUT_PATH}
fi