ga_data_preparing.sh 1.64 KB
Newer Older
wang-jinfeng committed
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54
#! /bin/bash

# # # # # # # # # # # # # # # # # # # # # #
# @file    :ga_data_preparing.sh
# @author  :ying.hou
# @revision:2016-09-22 20:41:41
# # # # # # # # # # # # # # # # # # # # # #

source ../dmp_env.sh

echo "ScheduleTime=$ScheduleTime"
LOG_TIME=$(date +%Y%m%d -d "-1 day $ScheduleTime")

year=${LOG_TIME:0:4}
month=${LOG_TIME:4:2}
day=${LOG_TIME:6:2}


#设置aws密钥
export AWS_ACCESS_KEY_ID=AKIAIBMYT3BZHBYDOMSQ
export AWS_SECRET_ACCESS_KEY=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

#检查文件是否存在
while [[ true ]];
do
    aws s3 ls "${GA_PREPARE_PATH_SRC}/${year}/${month}/${day}/device-metrics/manifest"
    DEVICE_IS_EXIST=$?
    aws s3 ls "${GA_PREPARE_PATH_SRC}/${year}/${month}/${day}/installation-list/manifest"
    INSTALL_IS_EXIST=$?
    if [[ ${DEVICE_IS_EXIST} -eq 0 ]] && [[ ${INSTALL_IS_EXIST} -eq 0 ]]
    then
        break
    fi
    sleep 60
done

#下载到本地
aws s3 sync "${GA_PREPARE_PATH_SRC}/${year}/${month}/${day}" "./results/dmp/daily_export/${day}"

#检查大小
du -sb ./results/dmp/daily_export/${day}|awk -v'FS=\t' '{print $1}' > ./results/dmp/daily_export/result.txt
if [[ "$(cat ./results/dmp/daily_export/result.txt)" -gt 10737418240 ]] || [[ "$(cat ./results/dmp/daily_export/result.txt)" -lt 63353 ]];
then
    echo "dump dmp数据大小超过阈值."
    rm -rf ./results/dmp/daily_export/${day}
    exit 1
fi
#上传到hive仓库
hadoop dfs -rm -r ${GA_PREPARE_PATH}/${year}/${month}/${day}
hadoop dfs -mkdir -p ${GA_PREPARE_PATH}/${year}/${month}/${day}
hadoop dfs -put -p ./results/dmp/daily_export/${day} ${GA_PREPARE_PATH}/${year}/${month}/
rm -rf ./results/dmp/daily_export/${day}

echo "[ga data prepare End!]"