prd_env.sh 5.33 KB
Newer Older
wang-jinfeng committed
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161
#!/usr/bin/env bash


#interest tag
DM_INTEREST_PATH="s3://mob-emr-test/dataplatform/DataWareHouse/data/dwh/dm_interest_tag"

#age
AGE_GET_DSP_PATH="s3://mob-emr-test/dataplatform/DataWareHouse/data/dwh/tmp/get_dsp_age"
AGE_GET_GA_PATH="s3://mob-emr-test/dataplatform/DataWareHouse/data/dwh/tmp/get_ga_age"
AGE_CALC_DEVICE="s3://mob-emr-test/dataplatform/DataWareHouse/data/dwh/dm_device_age"
REPORT_AGE_TAG_RESULT_PATH="s3://mob-emr-test/feng.liang/dmp_report/ageTotal"
REPORT_SAMPLE_AGE_RESULT_PATH="s3://mob-emr-test/feng.liang/dmp_report/ageSample"
REPORT_SAMPLE_AGE_COUNTRY_RESULT_PATH="s3://mob-emr-test/feng.liang/dmp_report/ageSampleCountry"
REPORT_AGE_TAG_COUNTRY_RESULT_PATH="s3://mob-emr-test/feng.liang/dmp_report/ageTotalCountry"

#3s
INSTALL_DAILY_3S="s3://mob-emr-test/dataplatform/DataWareHouse/data/dwh/tmp/3s_install_daily"

#gender
GENDER_GET_DSP_PATH="s3://mob-emr-test/dataplatform/DataWareHouse/data/dwh/tmp/get_dsp_gender"
GENDER_GET_GA_PATH="s3://mob-emr-test/dataplatform/DataWareHouse/data/dwh/tmp/get_ga_gender"
GENDER_CALC_DEVICE="s3://mob-emr-test/dataplatform/DataWareHouse/data/dwh/dm_device_gender"
REPORT_GENDER_TAG_RESULT_PATH="s3://mob-emr-test/feng.liang/dmp_report/genderTotal"
REPORT_SAMPLE_GENDER_RESULT_PATH="s3://mob-emr-test/feng.liang/dmp_report/genderSample"
REPORT_SAMPLE_GENDER_COUNTRY_RESULT_PATH="s3://mob-emr-test/feng.liang/dmp_report/genderSampleCountry"
REPORT_GENDER_TAG_COUNTRY_RESULT_PATH="s3://mob-emr-test/feng.liang/dmp_report/genderTotalCountry"

#tag
REPORT_INTEREST_TAG_RESULT_PATH="s3://mob-emr-test/feng.liang/dmp_report/tag"
REPORT_INTEREST_TAG_CNT_RESULT_PATH="s3://mob-emr-test/feng.liang/dmp_report/tagCountry"

#dsp
ETL_DSP_REQ_DAILY="s3://mob-emr-test/dataplatform/DataWareHouse/data/dwh/etl_dsp_request_daily"
DMP_ADN_DSP_PROFILE_TOTAL="s3://mob-emr-test/dataplatform/DataWareHouse/data/dwh/dm_profile_total/dsp"

#adn
ETL_ADN_SDK_REQUEST_DAILY="s3://mob-emr-test/dataplatform/DataWareHouse/data/dwh/etl_adn_sdk_request_daily"
ADN_TOTAL_PATH="s3://mob-emr-test/dataplatform/DataWareHouse/data/dwh/ods_adn_device_total"
ADN_REQUEST_PATH="s3://mob-ad/adn/tracking-v3/request"
DIM_ADN_CAMPAIGN="s3://mob-emr-test/dataplatform/DataWareHouse/data/dwh/dim_adn_campaign"

#ga
GA_DAILY_PATH="s3://mob-emr-test/dataplatform/DataWareHouse/data/dwh/ods_ga_device_daily"

#mp
MP_REQUEST_DAILY_PATH="s3://mob-emr-test/dataplatform/DataWareHouse/data/dwh/mp_request_daily"

#dmp
DMP_TOTAL_PATH="s3://mob-emr-test/dataplatform/DataWareHouse/data/dwh/ods_dmp_device_total"

#report
TAG_REPORT_PATH="s3://mob-emr-test/feng.liang/dmp_report/report"

#tmp
REQUEST_TMP_PATH="s3://mob-emr-test/liushuai/dmp_report/tmp"
REQUEST_PATH="s3://mob-emr-test/liushuai/dmp_report"
TMP_ADSERVER_PACKAGE_PATH="s3://mob-emr-test/dataplatform/DataWareHouse/data/dwh/tmp/adserver_packageName"

# 27 countries
REPORT_COUNTRIES="'DE','HK','TW','JO','BH','FR','SA','QA','SG','UK','ID','OM','US','EG','AE','IN','KR','CN','KW','MY','ES','TH','PH','VN','PK','CA','MX','JP'"

JAR=./DMP.jar

MYSQL_ETL="mysql -h datameta.c5yzcdreb1xr.us-east-1.rds.amazonaws.com -udataplatform -pMobdataplatform_123 dataplatform -e"

offline_hive="/data/hadoop-alternative/hive-offline/bin/hive"
if [ ! -e $offline_hive ];then
  offline_hive="hive"
fi

mount_partition() {
local MOUNT_PARTITION=""
hive_cmd "
use dev;
ALTER TABLE $1 ADD IF NOT EXISTS PARTITION ($2)
    LOCATION '$3';
" || exit 1
}

hive_cmd() {
export HIVE_CONF_DIR=/data/azkaban-hadoop/command-home/hive-offline/conf

#HIVE_CMD="/data/hadoop-alternative/hive-offline/bin/hive"
#if [ ! -f $HIVE_CMD ]
#then
    HIVE_CMD="hive"
# fi
${HIVE_CMD} -e "
set hive.cli.print.header=false;
set hive.auto.convert.join=true;
set mapreduce.map.speculative=true;
set mapreduce.reduce.speculative=true;
set hive.exec.reducers.max=5000;
set hive.optimize.index.filter=true;
set hive.auto.convert.join=true;
set mapreduce.task.io.sort.mb=512;
$1;
"
}

# 从expect date起,获取最近的存在的目录,若expect date指定的目录不存在,则发报警
# $1 path
# $2 expect date, format yyyyMMdd
# $3 sub partition
get_recently_dir() {
    local path="$1"
    local expect_date="$2"
    local sub_partition="$3"
    while :
    do
        local year=${expect_date:0:4}
        local month=${expect_date:4:2}
        local day=${expect_date:6:2}
        local check_path="$path/$year/$month/$day/$sub_partition"
        if hadoop fs -test -e "${check_path}"
        then
            echo "$check_path"
            break
        fi
        local expect_date=$(date -d "$expect_date 1 days ago" "+%Y%m%d")
    done
}

# 检查_SUCCESS文件,若不存在则循环检测
# $1 check file
check_await() {
    while [[ true ]];
    do
        if hadoop fs -ls "$1" > /dev/null 2>&1
        then
            break
        fi
        sleep 300
    done
}

# $1 要计算的路径
calculate_reduce_num() {
    hadoop fs -du -s "$1"|awk -v'FS= ' '{print int($1/1000/1000/800 + 1);}'
}

get_recently_date() {
    local path="$1"
    local expect_date="$2"
    local sub_partition="$3"

    while :
    do
        local year=${expect_date:0:4}
        local month=${expect_date:4:2}
        local day=${expect_date:6:2}
        local check_path="$path/$year/$month/$day/$sub_partition"
        if hadoop fs -test -e "${check_path}"
        then
            echo "${year}${month}${day}"
            break
        fi
        local expect_date=$(date -d "$expect_date 1 days ago" "+%Y%m%d")
    done

}