1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
#! /bin/bash
# # # # # # # # # # # # # # # # # # # # # #
# @author : wangjf
# # # # # # # # # # # # # # # # # # # # # #
source ../../dmp_env.sh
dt=$(date +"%Y%m%d" -d "-1 day $ScheduleTime")
date_path=$(date +"%Y/%m/%d" -d "-1 day $ScheduleTime")
log_type=$1
hours="00 01 02 03 04 05 06 07 08 09 10 11 12 13 14 15 16 17 18 19 20 21 22 23"
if [[ ${log_type} = 'impression' ]]; then
INPUT_PATH="${ADN_IMPRESSION_PATH}/${date_path}/beijing"
elif [[ ${log_type} = 'click' ]]; then
INPUT_PATH="${ADN_CLICK_PATH}/${date_path}/beijing"
elif [[ ${log_type} = 'install' ]]; then
INPUT_PATH="${ADN_INSTALL_PATH}/${date_path}/beijing"
fi
for hour in ${hours}; do
check_await ${INPUT_PATH}/${hour}/_SUCCESS
done
if [[ ${log_type} = 'impression' ]]; then
EXECUTORS=80
OUTPUT_PATH="${OUTPUT_REYUN_IMPRESSION_PATH}/${date_path}"
elif [[ ${log_type} = 'click' ]]; then
EXECUTORS=10
OUTPUT_PATH="${OUTPUT_REYUN_CLICK_PATH}/${date_path}"
elif [[ ${log_type} = 'install' ]]; then
EXECUTORS=10
OUTPUT_PATH="${OUTPUT_REYUN_INSTALL_PATH}/${date_path}"
fi
spark-submit --class mobvista.dmp.output.reyun.TrackingLog \
--master yarn --deploy-mode cluster --executor-memory 6g --driver-memory 4g --executor-cores 4 --num-executors ${EXECUTORS} \
../../${JAR} -date ${dt} -log_type ${log_type} -output ${OUTPUT_PATH} -coalesce 100
if [ $? -ne 0 ]; then
exit 255
fi