#! /bin/bash # # # # # # # # # # # # # # # # # # # # # # # @file :ga_data_preparing.sh # @author :ying.hou # @revision:2016-09-22 20:41:41 # # # # # # # # # # # # # # # # # # # # # # source ../dmp_env.sh echo "ScheduleTime=$ScheduleTime" LOG_TIME=$(date +%Y%m%d -d "-1 day $ScheduleTime") year=${LOG_TIME:0:4} month=${LOG_TIME:4:2} day=${LOG_TIME:6:2} #设置aws密钥 export AWS_ACCESS_KEY_ID=AKIAIBMYT3BZHBYDOMSQ export AWS_SECRET_ACCESS_KEY=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 #检查文件是否存在 while [[ true ]]; do aws s3 ls "${GA_PREPARE_PATH_SRC}/${year}/${month}/${day}/device-metrics/manifest" DEVICE_IS_EXIST=$? aws s3 ls "${GA_PREPARE_PATH_SRC}/${year}/${month}/${day}/installation-list/manifest" INSTALL_IS_EXIST=$? if [[ ${DEVICE_IS_EXIST} -eq 0 ]] && [[ ${INSTALL_IS_EXIST} -eq 0 ]] then break fi sleep 60 done #下载到本地 aws s3 sync "${GA_PREPARE_PATH_SRC}/${year}/${month}/${day}" "./results/dmp/daily_export/${day}" #检查大小 du -sb ./results/dmp/daily_export/${day}|awk -v'FS=\t' '{print $1}' > ./results/dmp/daily_export/result.txt if [[ "$(cat ./results/dmp/daily_export/result.txt)" -gt 10737418240 ]] || [[ "$(cat ./results/dmp/daily_export/result.txt)" -lt 63353 ]]; then echo "dump dmp数据大小超过阈值." rm -rf ./results/dmp/daily_export/${day} exit 1 fi #上传到hive仓库 hadoop dfs -rm -r ${GA_PREPARE_PATH}/${year}/${month}/${day} hadoop dfs -mkdir -p ${GA_PREPARE_PATH}/${year}/${month}/${day} hadoop dfs -put -p ./results/dmp/daily_export/${day} ${GA_PREPARE_PATH}/${year}/${month}/ rm -rf ./results/dmp/daily_export/${day} echo "[ga data prepare End!]"