#!/bin/bash source ../dmp_env.sh echo "ScheduleTime=$ScheduleTime" LOG_TIME=$(date +%Y%m%d -d "-1 days $ScheduleTime") OLD_LOG_TIME=$(date +%Y%m%d -d "-2 days $ScheduleTime") date_path=${LOG_TIME:0:4}/${LOG_TIME:4:2}/${LOG_TIME:6:2} old_date_path=${OLD_LOG_TIME:0:4}/${OLD_LOG_TIME:4:2}/${OLD_LOG_TIME:6:2} INPUT_DAILY_PATH="$GA_ACTIVE_DAILY_PATH/$date_path/dau-device-data-export/" INPUT_TOTAY_PATH=$(get_recently_dir "$GA_ACTIVE_TOTAL_PATH" "$OLD_LOG_TIME" "") OUTPUT_PATH="$GA_ACTIVE_TOTAL_PATH/$date_path/" echo "INPUT_DAILY_PATH = $INPUT_DAILY_PATH" echo "INPUT_TOTAL_PATH = $INPUT_TOTAY_PATH" echo "OUTPUT_PATH = $OUTPUT_PATH" #检查文件是否存在 ## count=0 ## while [[ true ]]; ## do ## hadoop fs -test -e $INPUT_DAILY_PATH ## if [ $? -eq 0 ];then ## break ## else ## echo "data delay, will retry after 60s" ## fi ## ## if [ $count -gt 10 ];then ## echo "Data delay..." ## exit 255 ## fi ## ## count=$(( count + 1 )) ## sleep 60 ## done ## ## echo "Will rm hdfs file : $INPUT_DAILY_PATH/manifest" ## hadoop fs -rm $INPUT_DAILY_PATH/manifest check_await "$INPUT_DAILY_PATH/_SUCCESS" echo "Will rm hdfs dir : $OUTPUT_PATH" hadoop fs -rm -r $OUTPUT_PATH hadoop jar ../${JAR} mobvista.dmp.datasource.ga.mapreduce.GaActiveTotalMR \ -Dmapreduce.fileoutputcommitter.algorithm.version=2 \ -dailypath $INPUT_DAILY_PATH \ -totalpath $INPUT_TOTAY_PATH -outputpath $OUTPUT_PATH -reduceNum 30 if [ $? -ne 0 ]; then exit 255 fi mount_partition "ods_ga_active_total" "dt='${LOG_TIME}'" "$OUTPUT_PATH"