#!/usr/bin/env bash #interest tag DM_INTEREST_PATH="s3://mob-emr-test/dataplatform/DataWareHouse/data/dwh/dm_interest_tag" #age AGE_GET_DSP_PATH="s3://mob-emr-test/dataplatform/DataWareHouse/data/dwh/tmp/get_dsp_age" AGE_GET_GA_PATH="s3://mob-emr-test/dataplatform/DataWareHouse/data/dwh/tmp/get_ga_age" AGE_CALC_DEVICE="s3://mob-emr-test/dataplatform/DataWareHouse/data/dwh/dm_device_age" REPORT_AGE_TAG_RESULT_PATH="s3://mob-emr-test/feng.liang/dmp_report/ageTotal" REPORT_SAMPLE_AGE_RESULT_PATH="s3://mob-emr-test/feng.liang/dmp_report/ageSample" REPORT_SAMPLE_AGE_COUNTRY_RESULT_PATH="s3://mob-emr-test/feng.liang/dmp_report/ageSampleCountry" REPORT_AGE_TAG_COUNTRY_RESULT_PATH="s3://mob-emr-test/feng.liang/dmp_report/ageTotalCountry" #3s INSTALL_DAILY_3S="s3://mob-emr-test/dataplatform/DataWareHouse/data/dwh/tmp/3s_install_daily" #gender GENDER_GET_DSP_PATH="s3://mob-emr-test/dataplatform/DataWareHouse/data/dwh/tmp/get_dsp_gender" GENDER_GET_GA_PATH="s3://mob-emr-test/dataplatform/DataWareHouse/data/dwh/tmp/get_ga_gender" GENDER_CALC_DEVICE="s3://mob-emr-test/dataplatform/DataWareHouse/data/dwh/dm_device_gender" REPORT_GENDER_TAG_RESULT_PATH="s3://mob-emr-test/feng.liang/dmp_report/genderTotal" REPORT_SAMPLE_GENDER_RESULT_PATH="s3://mob-emr-test/feng.liang/dmp_report/genderSample" REPORT_SAMPLE_GENDER_COUNTRY_RESULT_PATH="s3://mob-emr-test/feng.liang/dmp_report/genderSampleCountry" REPORT_GENDER_TAG_COUNTRY_RESULT_PATH="s3://mob-emr-test/feng.liang/dmp_report/genderTotalCountry" #tag REPORT_INTEREST_TAG_RESULT_PATH="s3://mob-emr-test/feng.liang/dmp_report/tag" REPORT_INTEREST_TAG_CNT_RESULT_PATH="s3://mob-emr-test/feng.liang/dmp_report/tagCountry" #dsp ETL_DSP_REQ_DAILY="s3://mob-emr-test/dataplatform/DataWareHouse/data/dwh/etl_dsp_request_daily" DMP_ADN_DSP_PROFILE_TOTAL="s3://mob-emr-test/dataplatform/DataWareHouse/data/dwh/dm_profile_total/dsp" #adn ETL_ADN_SDK_REQUEST_DAILY="s3://mob-emr-test/dataplatform/DataWareHouse/data/dwh/etl_adn_sdk_request_daily" ADN_TOTAL_PATH="s3://mob-emr-test/dataplatform/DataWareHouse/data/dwh/ods_adn_device_total" ADN_REQUEST_PATH="s3://mob-ad/adn/tracking-v3/request" DIM_ADN_CAMPAIGN="s3://mob-emr-test/dataplatform/DataWareHouse/data/dwh/dim_adn_campaign" #ga GA_DAILY_PATH="s3://mob-emr-test/dataplatform/DataWareHouse/data/dwh/ods_ga_device_daily" #mp MP_REQUEST_DAILY_PATH="s3://mob-emr-test/dataplatform/DataWareHouse/data/dwh/mp_request_daily" #dmp DMP_TOTAL_PATH="s3://mob-emr-test/dataplatform/DataWareHouse/data/dwh/ods_dmp_device_total" #report TAG_REPORT_PATH="s3://mob-emr-test/feng.liang/dmp_report/report" #tmp REQUEST_TMP_PATH="s3://mob-emr-test/liushuai/dmp_report/tmp" REQUEST_PATH="s3://mob-emr-test/liushuai/dmp_report" TMP_ADSERVER_PACKAGE_PATH="s3://mob-emr-test/dataplatform/DataWareHouse/data/dwh/tmp/adserver_packageName" # 27 countries REPORT_COUNTRIES="'DE','HK','TW','JO','BH','FR','SA','QA','SG','UK','ID','OM','US','EG','AE','IN','KR','CN','KW','MY','ES','TH','PH','VN','PK','CA','MX','JP'" JAR=./DMP.jar MYSQL_ETL="mysql -h datameta.c5yzcdreb1xr.us-east-1.rds.amazonaws.com -udataplatform -pMobdataplatform_123 dataplatform -e" offline_hive="/data/hadoop-alternative/hive-offline/bin/hive" if [ ! -e $offline_hive ];then offline_hive="hive" fi mount_partition() { local MOUNT_PARTITION="" hive_cmd " use dev; ALTER TABLE $1 ADD IF NOT EXISTS PARTITION ($2) LOCATION '$3'; " || exit 1 } hive_cmd() { export HIVE_CONF_DIR=/data/azkaban-hadoop/command-home/hive-offline/conf #HIVE_CMD="/data/hadoop-alternative/hive-offline/bin/hive" #if [ ! -f $HIVE_CMD ] #then HIVE_CMD="hive" # fi ${HIVE_CMD} -e " set hive.cli.print.header=false; set hive.auto.convert.join=true; set mapreduce.map.speculative=true; set mapreduce.reduce.speculative=true; set hive.exec.reducers.max=5000; set hive.optimize.index.filter=true; set hive.auto.convert.join=true; set mapreduce.task.io.sort.mb=512; $1; " } # 从expect date起,获取最近的存在的目录,若expect date指定的目录不存在,则发报警 # $1 path # $2 expect date, format yyyyMMdd # $3 sub partition get_recently_dir() { local path="$1" local expect_date="$2" local sub_partition="$3" while : do local year=${expect_date:0:4} local month=${expect_date:4:2} local day=${expect_date:6:2} local check_path="$path/$year/$month/$day/$sub_partition" if hadoop fs -test -e "${check_path}" then echo "$check_path" break fi local expect_date=$(date -d "$expect_date 1 days ago" "+%Y%m%d") done } # 检查_SUCCESS文件,若不存在则循环检测 # $1 check file check_await() { while [[ true ]]; do if hadoop fs -ls "$1" > /dev/null 2>&1 then break fi sleep 300 done } # $1 要计算的路径 calculate_reduce_num() { hadoop fs -du -s "$1"|awk -v'FS= ' '{print int($1/1000/1000/800 + 1);}' } get_recently_date() { local path="$1" local expect_date="$2" local sub_partition="$3" while : do local year=${expect_date:0:4} local month=${expect_date:4:2} local day=${expect_date:6:2} local check_path="$path/$year/$month/$day/$sub_partition" if hadoop fs -test -e "${check_path}" then echo "${year}${month}${day}" break fi local expect_date=$(date -d "$expect_date 1 days ago" "+%Y%m%d") done }