1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
#!/bin/bash
source ../dmp_env.sh
echo "ScheduleTime=$ScheduleTime"
LOG_TIME=$(date +%Y%m%d -d "-1 days $ScheduleTime")
OLD_LOG_TIME=$(date +%Y%m%d -d "-2 days $ScheduleTime")
date_path=${LOG_TIME:0:4}/${LOG_TIME:4:2}/${LOG_TIME:6:2}
old_date_path=${OLD_LOG_TIME:0:4}/${OLD_LOG_TIME:4:2}/${OLD_LOG_TIME:6:2}
INPUT_DAILY_PATH="$GA_ACTIVE_DAILY_PATH/$date_path/dau-device-data-export/"
INPUT_TOTAY_PATH=$(get_recently_dir "$GA_ACTIVE_TOTAL_PATH" "$OLD_LOG_TIME" "")
OUTPUT_PATH="$GA_ACTIVE_TOTAL_PATH/$date_path/"
echo "INPUT_DAILY_PATH = $INPUT_DAILY_PATH"
echo "INPUT_TOTAL_PATH = $INPUT_TOTAY_PATH"
echo "OUTPUT_PATH = $OUTPUT_PATH"
#检查文件是否存在
## count=0
## while [[ true ]];
## do
## hadoop fs -test -e $INPUT_DAILY_PATH
## if [ $? -eq 0 ];then
## break
## else
## echo "data delay, will retry after 60s"
## fi
##
## if [ $count -gt 10 ];then
## echo "Data delay..."
## exit 255
## fi
##
## count=$(( count + 1 ))
## sleep 60
## done
##
## echo "Will rm hdfs file : $INPUT_DAILY_PATH/manifest"
## hadoop fs -rm $INPUT_DAILY_PATH/manifest
check_await "$INPUT_DAILY_PATH/_SUCCESS"
echo "Will rm hdfs dir : $OUTPUT_PATH"
hadoop fs -rm -r $OUTPUT_PATH
hadoop jar ../${JAR} mobvista.dmp.datasource.ga.mapreduce.GaActiveTotalMR \
-Dmapreduce.fileoutputcommitter.algorithm.version=2 \
-dailypath $INPUT_DAILY_PATH \
-totalpath $INPUT_TOTAY_PATH -outputpath $OUTPUT_PATH -reduceNum 30
if [ $? -ne 0 ]; then
exit 255
fi
mount_partition "ods_ga_active_total" "dt='${LOG_TIME}'" "$OUTPUT_PATH"