ga_active_total.sh 1.55 KB
#!/bin/bash

source ../dmp_env.sh

echo "ScheduleTime=$ScheduleTime"
LOG_TIME=$(date +%Y%m%d -d "-1 days $ScheduleTime")
OLD_LOG_TIME=$(date +%Y%m%d -d "-2 days $ScheduleTime")

date_path=${LOG_TIME:0:4}/${LOG_TIME:4:2}/${LOG_TIME:6:2}
old_date_path=${OLD_LOG_TIME:0:4}/${OLD_LOG_TIME:4:2}/${OLD_LOG_TIME:6:2}


INPUT_DAILY_PATH="$GA_ACTIVE_DAILY_PATH/$date_path/dau-device-data-export/"
INPUT_TOTAY_PATH=$(get_recently_dir "$GA_ACTIVE_TOTAL_PATH" "$OLD_LOG_TIME" "")
OUTPUT_PATH="$GA_ACTIVE_TOTAL_PATH/$date_path/"

echo "INPUT_DAILY_PATH = $INPUT_DAILY_PATH"
echo "INPUT_TOTAL_PATH = $INPUT_TOTAY_PATH"
echo "OUTPUT_PATH = $OUTPUT_PATH"

#检查文件是否存在
## count=0
## while [[ true ]];
## do
##     hadoop fs -test -e $INPUT_DAILY_PATH
##     if [ $? -eq 0 ];then
##       break
##     else
##       echo "data delay, will retry after 60s"
##     fi
##
##     if [ $count -gt 10 ];then
##       echo "Data delay..."
##       exit 255
##     fi
##
##     count=$(( count + 1 ))
##     sleep 60
## done

##
## echo "Will rm hdfs file : $INPUT_DAILY_PATH/manifest"
## hadoop fs -rm $INPUT_DAILY_PATH/manifest


check_await "$INPUT_DAILY_PATH/_SUCCESS"

echo "Will rm hdfs dir : $OUTPUT_PATH"
hadoop fs -rm -r $OUTPUT_PATH

hadoop jar ../${JAR} mobvista.dmp.datasource.ga.mapreduce.GaActiveTotalMR \
    -Dmapreduce.fileoutputcommitter.algorithm.version=2 \
    -dailypath $INPUT_DAILY_PATH \
    -totalpath $INPUT_TOTAY_PATH -outputpath $OUTPUT_PATH -reduceNum 30

if [ $? -ne 0 ]; then
    exit 255
fi

mount_partition "ods_ga_active_total" "dt='${LOG_TIME}'" "$OUTPUT_PATH"