dmp_env.sh 48.3 KB
Newer Older
wang-jinfeng committed
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59
#!/usr/bin/env bash
# 定义DMP脚本通用配置项

#GA
GA_PREPARE_PATH_SRC="s3://ga-sample-external/daily-export"
GA_PREPARE_PATH="s3://mob-emr-test/dataplatform/datawarehourse/dmp/daily_export"
GA_INSTALL_PATH="s3://mob-emr-test/dataplatform/DataWareHouse/data/dwh/ods_ga_install_daily"
GA_DAILY_PATH="s3://mob-emr-test/dataplatform/DataWareHouse/data/dwh/ods_ga_device_daily"
GA_TOTAL_PATH="s3://mob-emr-test/dataplatform/DataWareHouse/data/dwh/ods_ga_device_total"
GA_ACTIVE_DAILY_PATH="s3://mob-emr-test/dataplatform/datawarehourse/dmp/daily_export/"
GA_ACTIVE_TOTAL_PATH="s3://mob-emr-test/dataplatform/DataWareHouse/data/dwh/ods_ga_active_total"
# GA_ODS_RAW_DATA_PATH="s3://live-ga-doppler/data/emr/ga_rawdata"
GA_ODS_RAW_DATA_PATH="s3://mob-emr-test/dataplatform/emr/ga_rawdata"
ODS_OTHER_DEVICE_DAILY="s3://mob-emr-test/dataplatform/DataWareHouse/data/dwh/ods_other_device_daily"
ODS_OTHER_DEVICE_RT_WEEKLY="s3://mob-emr-test/dataplatform/DataWareHouse/data/dwh/ods_other_device_rt_weekly"

#APP_INFO & APP_TAG
APP_TAG_PATH="s3://mob-emr-test/dataplatform/DataWareHouse/data/dwh/dim_app_tag"
APP_INFO_IOS_PATH="s3://mob-emr-test/dataplatform/DataWareHouse/data/dwh/dim_app_info_ios"
APP_INFO_ADR_PATH="s3://mob-emr-test/dataplatform/DataWareHouse/data/dwh/dim_app_info_adr"
DIM_APP_INFO_IOS="s3://mob-emr-test/dataplatform/DataWareHouse/data/dwh/dim_app_info_ios_category"
DIM_APP_INFO_ADR="s3://mob-emr-test/dataplatform/DataWareHouse/data/dwh/dim_app_info_adr_category"
#install list
DM_INSTALL_LIST="s3://mob-emr-test/dataplatform/DataWareHouse/data/dwh/dm_install_list"

# dmp_install_list
DMP_INSTALL_LIST="s3://mob-emr-test/dataplatform/DataWareHouse/data/dwh/dmp_install_list"

# dmp_interest_tag
DMP_INTEREST_PATH="s3://mob-emr-test/dataplatform/DataWareHouse/data/dwh/dmp_interest_tag"
#interest tag
DM_INTEREST_PATH="s3://mob-emr-test/dataplatform/DataWareHouse/data/dwh/dm_interest_tag"

DM_DEV_INTEREST_PATH="s3://mob-emr-test/dataplatform/DataWareHouse/data/dev/dm_interest_tag"

DM_INTEREST_DAILY_PATH="s3://mob-emr-test/dataplatform/DataWareHouse/data/dwh/dm_interest_tag_daily"
DMP_ADN_DSP_PROFILE_TOTAL="s3://mob-emr-test/dataplatform/DataWareHouse/data/dwh/dm_profile_total/dsp"

# dsp_profile_total
DSP_PROFILE_TOTAL="s3://mob-emr-test/dataplatform/DataWareHouse/data/dwh/dsp_profile_total"

DMP_ADN_DSP_DEVICE_IDS="s3://mob-emr-test/dataplatform/DataWareHouse/data/dwh/etl_dsp_devices_ids_daily"
DM_DC_INTEREST_PATH="s3://mob-emr-test/dataplatform/DataWareHouse/data/dwh/dm_interest_tag_dc"
## DM_DEVICE_TAG_PATH="s3://mob-emr-test/dataplatform/DataWareHouse/data/dwh/dm_device_tag"
DM_DEVICE_TAG_PATH="s3://mob-emr-test/dataplatform/DataWareHouse/data/dwh/dmp_device_tag"

DMP_DEVICE_TAG_PATH="s3://mob-emr-test/dataplatform/DataWareHouse/data/dwh/dmp_device_tag_daily"
# DM_DEVICE_TAG_STATISTICS_PATH="s3://mob-emr-test/dataplatform/DataWareHouse/data/dwh/dm_device_tag_statistics"
DM_DEVICE_TAG_STATISTICS_PATH="s3://mob-emr-test/dataplatform/DataWareHouse/data/dwh/dmp_device_tag_statistics"

# dm_device_tag_daily
DM_DEVICE_TAG_DAILY_PATH="s3://mob-emr-test/dataplatform/DataWareHouse/data/dwh/dm_device_tag_daily"

NGINX_LOG="s3://mob-ad/adn/nginxlog/tksetting"
NEW_NGINX_LOG="s3://mob-ad/adn/cronus/access"
ADN_CLICK_PATH="s3://mob-ad/adn/tracking-v3/click"
ADN_INSTALL_PATH="s3://mob-ad/adn/tracking-v3/install"
ADN_EVENT_PATH="s3://mob-ad/adn/tracking-v3/event"
ADN_REQUEST_PATH="s3://mob-ad/adn/tracking-v3/request"
fan.jiang committed
60
HB_REQUEST_PATH="s3://mob-ad/adn/hb-v1/request"
wang-jinfeng committed
61
ADN_PRE_CLICK_PATH="s3://mob-ad/adn/tracking-v3/pre_click"
62
ADN_IMPRESSION_PATH="s3://mob-ad/adn/tracking-v3/impression"
wang-jinfeng committed
63 64 65 66
ADN_ADX_REQ_ORG="s3://mob-ad/adn/adx-v1/request"
ADN_DSP_PATH="s3://mob-ad/adn/dsp_orc/request"
DMP_ADN_REQUEST_DEVICE_IDS="s3://mob-emr-test/dataplatform/DataWareHouse/data/dwh/etl_ids_mapping"

67 68 69 70 71
OUTPUT_REYUN_IMPRESSION_PATH="s3://mob-emr-test/mv2reyun/tracking_impression"
OUTPUT_REYUN_CLICK_PATH="s3://mob-emr-test/mv2reyun/tracking_click"
OUTPUT_REYUN_INSTALL_PATH="s3://mob-emr-test/mv2reyun/tracking_install"

OUTPUT_REYUN_EVENT_PATH="s3://mob-emr-test/mv2reyun/dmp_event_daily"
72

WangJinfeng committed
73 74 75 76 77 78 79 80 81 82
OUTPUT_REYUN_APP_TAG_PATH="s3://mob-emr-test/mv2reyun/dim_app_tag_daily"

OUTPUT_REYUN_PACKAGE_TAG_PATH="s3://mob-emr-test/mv2reyun/dim_package_tag_daily"

OUTPUT_REYUN_TAG_INFO_PATH="s3://mob-emr-test/mv2reyun/dim_tag_info_daily"

OUTPUT_REYUN_NEW_TAG_INFO_PATH="s3://mob-emr-test/mv2reyun/dim_new_tag_info_daily"

OUTPUT_INSTALL_LIST_INFO_PATH="s3://mob-emr-test/mv2reyun/dmp_install_list_daily"

83 84
OUTPUT_REYUN_USER_INFO_PATH="s3://mob-emr-test/mv2reyun/dmp_user_info"

wang-jinfeng committed

ETL_DSP_REQ_DAILY="s3://mob-emr-test/dataplatform/DataWareHouse/data/dwh/etl_dsp_request_daily"
ETL_ADN_INSTALL_DAILY="s3://mob-emr-test/dataplatform/DataWareHouse/data/dwh/etl_adn_install_daily"
ETL_ADN_SDK_CLICK_DAILY="s3://mob-emr-test/dataplatform/DataWareHouse/data/dwh/etl_adn_sdk_click_daily"
ETL_ADN_SDK_REQUEST_DAILY="s3://mob-emr-test/dataplatform/DataWareHouse/data/dwh/etl_adn_sdk_request_daily"

ETL_ADN_REQUEST_SDK_DAILY="s3://mob-emr-test/dataplatform/DataWareHouse/data/dwh/etl_adn_request_sdk_daily"

ETL_ADN_INSTALL_TMP_DAILY="s3://mob-emr-test/dataplatform/DataWareHouse/data/tmp/adn_install_tmp_daily"
ETL_ADN_CLICK_DAILY="s3://mob-emr-test/dataplatform/DataWareHouse/data/tmp/adn_click_daily"
ETL_ADN_PRE_CLICK_DAILY="s3://mob-emr-test/dataplatform/DataWareHouse/data/tmp/adn_pre_click_daily"
ETL_ADN_MERGE_CLICK_PRE_CLICK="s3://mob-emr-test/dataplatform/DataWareHouse/data/tmp/merge_click_pre_click"
ETL_TOUTIAO_LAUNCH_DAILY="s3://mob-emr-test/dataplatform/DataWareHouse/data/dwh/etl_toutiao_launch_daily"
ETL_ADN_REQUEST_OTHER_DAILY="s3://mob-emr-test/dataplatform/DataWareHouse/data/dwh/etl_adn_request_other_daily"
ETL_DSP_REQ_MDS_HOURS="s3://mob-emr-test/dataplatform/DataWareHouse/data/dwh/tmp/mds_dsp_request_daily_hours"
ETL_DSP_ORG_REQ_HOURS="s3://mob-emr-test/dataplatform/DataWareHouse/data/dwh/tmp/etl_dsp_org_request_daily_hours"
ETL_DSP_REQ_ETL_HOURS="s3://mob-emr-test/dataplatform/DataWareHouse/data/dwh/tmp/etl_dsp_request_daily_hours"
ETL_ADN_ORG_REQ_HOURS="s3://mob-emr-test/dataplatform/DataWareHouse/data/dwh/tmp/etl_adn_org_request_daily_hours"
ETL_DSP_DEALERID_PKG_MAPPING="s3://mob-emr-test/dataplatform/DataWareHouse/data/dwh/dealerid_pkg_mapping/dealerid_pkg.csv"
ETL_DSP_DEALERID_RETARGET_DAILY="s3://mob-emr-test/dataplatform/DataWareHouse/data/dwh/etl_dsp_dealer_retarget_daily"


MDS_DSP_REQ_DAILY="s3://mob-emr-test/dataplatform/DataWareHouse/data/dwh/mds_dsp_request_daily"
MDS_ADN_SDK_REQUEST_DAILY="s3://mob-emr-test/dataplatform/DataWareHouse/data/dwh/mds_adn_request_daily"
MDS_DMP_ADDRESS_DAILY="s3://mob-emr-test/dataplatform/DataWareHouse/data/dwh/mds_dmp_address_daily"
MDS_DSP_REQ_CLUSTER_DAILY="s3://mob-emr-test/dataplatform/DataWareHouse/data/dwh/mds_dsp_request_cluster_daily"
MDS_ADN_SDK_REQUEST_CLUSTER_DAILY="s3://mob-emr-test/dataplatform/DataWareHouse/data/dwh/mds_adn_request_cluster_daily"

ADN_TOTAL_PATH="s3://mob-emr-test/dataplatform/DataWareHouse/data/dwh/ods_adn_device_total"
DMP_TOTAL_PATH="s3://mob-emr-test/dataplatform/DataWareHouse/data/dwh/ods_dmp_device_total"

CAMPAIGN_LIST_SRC="s3://mob-emr-test/dataplatform/DataWareHouse/data/export/campaign_list"
DIM_ADN_CAMPAIGN="s3://mob-emr-test/dataplatform/DataWareHouse/data/dwh/dim_adn_campaign"
DIM_CAMPAIGN_LIST="s3://mob-emr-test/dataplatform/DataWareHouse/data/dwh/dim_campaign_list"
DM_CAMPAIGN_TAG="s3://mob-emr-test/dataplatform/DataWareHouse/data/dwh/dm_campaign_tag"
DIM_CAMPAIGN_PACKAGE_3S="s3://mob-emr-test/dataplatform/DataWareHouse/data/dwh/dim_campaign_package_3s"
DIM_CAMPAIGN_PACKAGE_ADN="s3://mob-emr-test/dataplatform/DataWareHouse/data/dwh/dim_campaign_package_adn"
DIM_CAMPAIGN_ADN_TAG="s3://mob-emr-test/dataplatform/DataWareHouse/data/dwh/dm_campaign_adn_tag"
DIM_MANUAL_MAPPING="s3://mob-emr-test/dataplatform/DataWareHouse/data/dwh/dim_manual_appid_package/appid_package1"

## dsp
ADN_DSP_CLICK="s3://mob-emr/mob_dsp/data_analysis/log_adn_dsp_click_day"

#Setting
NGINX_SETTING_DAILY="s3://mob-emr-test/dataplatform/DataWareHouse/data/Nginx/Setting"
APP_ID_MAPPING_TMP="s3://mob-emr-test/dataplatform/DataWareHouse/data/Nginx/GlobalSetting_tmp"
APP_ID_MAPPING="s3://mob-emr-test/dataplatform/DataWareHouse/data/Nginx/GlobalSetting"

REVENUE_PATH="s3://mob-emr-test/dataplatform/DataWareHouse/data/dwh/dm_ga_revenue"
#gender
GENDER_TP_DAILY="s3://mob-emr-test/dataplatform/DataWareHouse/data/dwh/etl_gender_thirdparty_data_daily"
GENDER_TP_TOTAL="s3://mob-emr-test/dataplatform/DataWareHouse/data/dwh/etl_gender_thirdparty_data_total"
GENDER_EXTRACT_DEVICE_PATH="s3://mob-emr-test/dataplatform/DataWareHouse/data/dwh/tmp/install_list"
GENDER_GET_DSP_PATH="s3://mob-emr-test/dataplatform/DataWareHouse/data/dwh/tmp/get_dsp_gender"
GENDER_GET_GA_PATH="s3://mob-emr-test/dataplatform/DataWareHouse/data/dwh/tmp/get_ga_gender"
GENDER_GET_BIG_MEDIA_PATH="s3://mob-emr-test/dataplatform/DataWareHouse/data/dwh/tmp/get_bm_gender"
GENDER_GET_FACEBOOK_PATH="s3://mob-emr-test/dataplatform/DataWareHouse/data/dwh/tmp/get_fb_gender"
GENDER_GET_THIRDPART_PATH="s3://mob-emr-test/dataplatform/DataWareHouse/data/dwh/tmp/get_other_gender"
GENDER_MERGE_INSTALL="s3://mob-emr-test/dataplatform/DataWareHouse/data/dwh/tmp/merge_install_gender"
GENDER_CALC_PACKAGE="s3://mob-emr-test/dataplatform/DataWareHouse/data/dwh/tmp/calc_package_gender"
GENDER_CALC_PACKAGE_DICT="s3://mob-emr-test/dataplatform/DataWareHouse/data/dwh/tmp/calc_package_dict"
GENDER_CALC_DEVICE="s3://mob-emr-test/dataplatform/DataWareHouse/data/dwh/dm_device_gender"
GENDER_MERGE_DEVICE="s3://mob-emr-test/dataplatform/DataWareHouse/data/dwh/dmp_device_gender"
GENDER_VALIDATE="s3://mob-emr-test/dataplatform/DataWareHouse/data/dwh/tmp/validate"
GENDER_PRID_SCORE="s3://mob-emr-test/dataplatform/DataWareHouse/data/dwh/tmp/gender_predict_score"
GENDER_PRID_SCORE_THRESHOLD="s3://mob-emr-test/dataplatform/DataWareHouse/data/dwh/tmp/gender_predict_score_threshold"
INSTALL_GENDER_LR="s3://mob-emr-test/dataplatform/DataWareHouse/data/dwh/tmp/merge_install_gender_lr"
INSTALL_GENDER_LR_BIN="s3://mob-emr-test/dataplatform/DataWareHouse/data/dwh/tmp/merge_install_gender_lr_bin"


#behavior
BEHAVIOR_TP_DAILY="s3://mob-emr-test/dataplatform/DataWareHouse/data/dwh/etl_behavior_thirdparty_data_daily"
BEHAVIOR_TP_TOTAL="s3://mob-emr-test/dataplatform/DataWareHouse/data/dwh/etl_behavior_thirdparty_data_total"

#age
AGE_EXTRACT_DEVICE_PATH="s3://mob-emr-test/dataplatform/DataWareHouse/data/dwh/tmp/age_install_list"
AGE_GET_DSP_PATH="s3://mob-emr-test/dataplatform/DataWareHouse/data/dwh/tmp/get_dsp_age"
AGE_GET_GA_PATH="s3://mob-emr-test/dataplatform/DataWareHouse/data/dwh/tmp/get_ga_age"
AGE_MERGE_INSTALL="s3://mob-emr-test/dataplatform/DataWareHouse/data/dwh/tmp/merge_install_age"
AGE_CALC_PACKAGE="s3://mob-emr-test/dataplatform/DataWareHouse/data/dwh/tmp/calc_package_age"
AGE_CALC_PACKAGE_DICT="s3://mob-emr-test/dataplatform/DataWareHouse/data/dwh/tmp/calc_package_dict_age"
AGE_CALC_DEVICE="s3://mob-emr-test/dataplatform/DataWareHouse/data/dwh/dm_device_age"
DMP_AGE_CALC_DEVICE="s3://mob-emr-test/dataplatform/DataWareHouse/data/dwh/dmp_device_age"
AGE_VALIDATE="s3://mob-emr-test/dataplatform/DataWareHouse/data/dwh/tmp/age_validate"

DIM_CATEGORY_PATH="s3://mob-emr-test/dataplatform/DataWareHouse/data/dwh/dim_category"
DIM_SEGMENT_TAG_PATH="s3://mob-emr-test/dataplatform/DataWareHouse/data/dwh/dim_segment_tag"
DIM_CATEGORY_NEW_PATH="s3://mob-emr-test/dataplatform/DataWareHouse/data/dwh/dim_category_new"

#zarola
ZAROLA_INSTALL_PATH="s3://mob-emr-test/dataplatform/DataWareHouse/data/import/zarola/*"
ZAROLA_INSTALL_OUTPUT="s3://mob-emr-test/dataplatform/DataWareHouse/data/dwh/tmp/zarola/output"

#3s
DIM_CAMPAIGN_3S_LIST="s3://mob-emr-test/dataplatform/DataWareHouse/data/dwh/dim_campaign_3s_list/"
INSTALL_DAILY_3S="s3://mob-emr-test/dataplatform/DataWareHouse/data/dwh/tmp/3s_install_daily"
EVENT_DEFINE_3S="s3://trackingcsv-3s/trackingcsv/event_define"
PATH_3S="s3://trackingcsv-3s/trackingcsv/install"
EVNET_3S_PATH="s3://mob-ad/3s/trackinglogs/event"
EVENT_SS_PATH="s3://mob-ad/adn/tracking-v3/event"
POSTBACK_INSTALL_3S_PATH="s3://mob-ad/adn/tracking-v3/postback_install"
POSTBACK_EVENT_3S_PATH="s3://mob-ad/adn/tracking-v3/postback_event"
ETL_3S_POSTBACK_DAILY_PATH="s3://mob-emr-test/dataplatform/DataWareHouse/data/dwh/etl_3s_postback_daily"
ALI_USER_ACTIVATION_SYS_TO3S="s3://mob-emr-test/dataplatform/DataWareHouse/data/dwh/ali_user_activation_syn_to3s"
ALI_USERINFO_TO_3S_PATH="s3://mob-emr-test/dataplatform/DataWareHouse/data/dwh/ali_user_activation_syn_to3s_res"
ALI_USERINFO_TO_3S_TOUTIAO_PATH="s3://mob-emr-test/dataplatform/DataWareHouse/data/dwh/ali_user_activation_syn_to3s_toutiao_res"
ALI_USERINFO_TO_3S_GUANGDIANTONG_PATH="s3://mob-emr-test/dataplatform/DataWareHouse/data/dwh/ali_user_activation_syn_to3s_guangdiantong_res"
ALI_USERINFO_TO_3S_GUANGDIANTONG_APPEND_PATH="s3://mob-emr-test/dataplatform/DataWareHouse/data/dwh/ali_user_activation_syn_to3s_guangdiantong_append_res"
ALI_USERINFO_TO_3S_GUANGDIANTONG_DELETE_PATH="s3://mob-emr-test/dataplatform/DataWareHouse/data/dwh/ali_user_activation_syn_to3s_guangdiantong_delete_res"
ALI_USERINFO_TO_3S_IQIYI_PATH="s3://mob-emr-test/dataplatform/DataWareHouse/data/dwh/ali_user_activation_syn_to3s_iqiyi_res"
ALI_USERINFO_TO_3S_IQIYI_APPEND_PATH="s3://mob-emr-test/dataplatform/DataWareHouse/data/dwh/ali_user_activation_syn_to3s_iqiyi_append_res"
ALI_USERINFO_TO_3S_IQIYI_DELETE_PATH="s3://mob-emr-test/dataplatform/DataWareHouse/data/dwh/ali_user_activation_syn_to3s_iqiyi_delete_res"
ALI_USERINFO_TO_3S_UC_PATH="s3://mob-emr-test/dataplatform/DataWareHouse/data/dwh/ali_user_activation_syn_to3s_uc_res"
ALI_USERINFO_TO_3S_UC_APPEND_PATH="s3://mob-emr-test/dataplatform/DataWareHouse/data/dwh/ali_user_activation_syn_to3s_uc_append_res"
ALI_USERINFO_TO_3S_UC_DELETE_PATH="s3://mob-emr-test/dataplatform/DataWareHouse/data/dwh/ali_user_activation_syn_to3s_uc_delete_res"
GDT_DATA="s3://mob-emr-test/dataplatform/DataWareHouse/data/dwh/gdt_data"
ALI_USERINFO_TO_3S_TOUTIAO_PATH_TMP="s3://mob-emr-test/dataplatform/DataWareHouse/data/dwh/ali_user_activation_syn_to3s_toutiao_res_tmp"
ALI_USERINFO_TO_3S_ALIPAY_PATH="s3://mob-emr-test/dataplatform/DataWareHouse/data/dwh/ali_user_activation_syn_to3s_alipay_res"
ALI_USERINFO_TO_3S_ALIPAY_APPEND_PATH="s3://mob-emr-test/dataplatform/DataWareHouse/data/dwh/ali_user_activation_syn_to3s_alipay_append_res"
ALI_USERINFO_TO_3S_ALIPAY_DELETE_PATH="s3://mob-emr-test/dataplatform/DataWareHouse/data/dwh/ali_user_activation_syn_to3s_alipay_delete_res"

#appsflyer
ETL_AF_ORG_DAILY="s3://mob-emr-test/appsflyer"
ETL_AF_ORG_TOTAL="s3://mob-emr-test/dataplatform/DataWareHouse/data/dwh/etl_af_org_total"
## appsflyers
ETL_AF_ORG_NEW_DAILY="s3://mob-emr/appsflyer_audience/latest"
ETL_MPARTICLE_ORG_DAILY="s3://mob-emr/adn/mparticle/mparticle_audience_retarget"

# s3://mob-emr-test/dataplatform/rtdmp/${year}/${month}/${day}/${hour}/data/${region} 路径下存放需更新的数据
# s3://mob-emr-test/dataplatform/rtdmp/${year}/${month}/${day}/${hour}/audience 路径下文件是需更新的人群包ID

#tencent adx
ODS_ADN_ADX_REQ_TMP="s3://mob-emr-test/dataplatform/DataWareHouse/data/dwh/ods_adn_adx_req_tmp"
DIM_ADN_ADX_PKG="s3://mob-emr-test/dataplatform/DataWareHouse/data/dwh/dim_adn_adx_package"
ADN_ADX_DEVICE_TAG="s3://mob-emr-test/dataplatform/DataWareHouse/data/dwh/adn_adx_device_tag"

#mp sdk
MP_REQUEST_PATH="s3://mob-ad/mob-ad/adn/mob_scheme/*/"
MP_REQUEST_DAILY_PATH="s3://mob-emr-test/dataplatform/DataWareHouse/data/dwh/mp_request_daily"

#clever (抢发)
CLEVER_NGINX_LOG="s3://mob-ad/adn/nginxlog/tracking"
CLEVER_DAILY_PATH="s3://mob-emr-test/dataplatform/DataWareHouse/data/dwh/etl_adn_clever_daily/"

#adn_sdk (adn_sdk 数据)
ADN_SDK_LOG="s3://mob-ad/adn/nginxlog/analytics2"
ADN_SDK_DAILY_PATH="s3://mob-emr-test/dataplatform/DataWareHouse/data/dwh/etl_adn_sdk_daily"

ADN_SDK_HOUR_PATH="s3://mob-emr-test/dataplatform/DataWareHouse/data/dwh/etl_adn_sdk_hour"

# userInfo
ODS_DMP_USER_INFO="s3://mob-emr-test/dataplatform/DataWareHouse/data/dwh/ods_dmp_user_info"
ODS_DMP_USER_INFO_ALL="s3://mob-emr-test/dataplatform/DataWareHouse/data/dwh/ods_dmp_user_info_all"
ODS_DMP_USER_INFO_DAILY="s3://mob-emr-test/dataplatform/DataWareHouse/data/dwh/ods_dmp_user_info_daily"
DMP_EVENT_TAG_DAILY="s3://mob-emr-test/dataplatform/DataWareHouse/data/dwh/dmp_event_tag_daily"

# event org log tab
ODS_DMP_EVENT_ORG="s3://mob-emr-test/dataplatform/DataWareHouse/data/dwh/ods_dmp_event_org"
ETL_CAMPAIN_LIST_PATH="s3://mob-emr-test/dataplatform/DataWareHouse/data/dwh/ods_adn_campaign_list"
ODS_DMP_EVENT_TAG="s3://mob-emr-test/dataplatform/DataWareHouse/data/dwh/ods_dmp_event_tag"
ODS_DMP_EVENT_DAILY_OTHERS="s3://mob-emr-test/dataplatform/DataWareHouse/data/dwh/ods_dmp_event_tag_daily_others"



#tmp path
TMP_DSP_PROFILE_RECOVER="s3://mob-emr-test/dataplatform/DataWareHouse/data/dwh/tmp/tmp_dsp_profile_total_recover"
PACKAGE_TMP_PATH="s3://mob-emr-test/dataplatform/DataWareHouse/data/dwh/tmp/package_name"
INSTALL_PACKAGE_TMP_PATH="s3://mob-emr-test/dataplatform/DataWareHouse/data/dwh/tmp/package_name_install"
TMP_IOS_APP_INFO_PATH="s3://mob-emr-test/dataplatform/DataWareHouse/data/dwh/tmp/app_info_ios"
TMP_ADR_APP_INFO_PATH="s3://mob-emr-test/dataplatform/DataWareHouse/data/dwh/tmp/app_info_adr"
TMP_IOS_APP_INFO_SPARK_PATH="s3://mob-emr-test/dataplatform/DataWareHouse/data/dwh/tmp/app_info_ios_spark"
TMP_ADR_APP_INFO_SPARK_PATH="s3://mob-emr-test/dataplatform/DataWareHouse/data/dwh/tmp/app_info_adr_spark"
TMP_BUNDLE_APP_INFO_SPARK_PATH="s3://mob-emr-test/dataplatform/DataWareHouse/data/dwh/tmp/app_info_bundle_spark"
TMP_CRAWLER_INFO_PATH="s3://mob-emr-test/dataplatform/DataWareHouse/data/dwh/tmp/app_info_crawler"
TMP_PKG_JOIN_PATH="s3://mob-emr-test/dataplatform/DataWareHouse/data/dwh/tmp/package_join"
TMP_EGGPLANTS_INPUT_PATH="s3://mob-emr-test/dataplatform/DataWareHouse/data/dwh/tmp/eggplants/input"
TMP_EGGPLANTS_OUTPUT_PATH="s3://mob-emr-test/dataplatform/DataWareHouse/data/dwh/tmp/eggplants/output"
TMP_EGGPLANTS_RT_WEEKLY_OUTPUT_PATH="s3://mob-emr-test/dataplatform/DataWareHouse/data/dwh/tmp/eggplants/rt_output_weekly"
TMP_EGGPLANTS_PROGRESS_PATH="s3://mob-emr-test/dataplatform/DataWareHouse/data/dwh/tmp/eggplants/progress"
TMP_PKG_TAG_PATH="s3://mob-emr-test/dataplatform/DataWareHouse/data/dwh/tmp/pkg_tag_map"
TMP_INSTALL_DAILY_3S="s3://mob-emr-test/dataplatform/DataWareHouse/data/dwh/tmp/3s_install_daily_tmp"
TMP_ADSERVER_PACKAGE_PATH="s3://mob-emr-test/dataplatform/DataWareHouse/data/dwh/tmp/adserver_packageName"
TMP_ADSERVER_PKG_TMP_PATH="s3://mob-emr-test/dataplatform/DataWareHouse/data/dwh/tmp/adserver_packageName_tmp"
TMP_INSTALL_DAILY_ADN="s3://mob-emr-test/dataplatform/DataWareHouse/data/tmp/adn_install_tmp_daily_tmp"
TMP_AND_REQUEST_DAILY_PATH="s3://mob-emr-test/dataplatform/DataWareHouse/data/dwh/tmp/etl_adn_sdk_request_daily"

TMP_AND_REQUEST_SDK_DAILY_PATH="s3://mob-emr-test/dataplatform/DataWareHouse/data/tmp/etl_adn_request_sdk_daily"

TMP_DSP_REQUEST_DAILY_TBL_PATH="s3://mob-emr-test/dataplatform/DataWareHouse/data/dwh/tmp/etl_dsp_request_daily_tbl"
DSP_BUNDLE_OUTPUT_PATH="s3://mob-emr-test/dataplatform/DataWareHouse/data/dwh/tmp/etl_dsp_request_daily_tbl"
TMP_DSP_REQUEST_DAILY_PATH="s3://mob-emr-test/dataplatform/DataWareHouse/data/dwh/tmp/etl_dsp_request_daily"

TMP_GA_INSTALL_DAILY_PATH="s3://mob-emr-test/dataplatform/DataWareHouse/data/dwh/tmp/ods_ga_install_daily"
TMP_INTEREST_PATH="s3://mob-emr-test/dataplatform/DataWareHouse/data/dwh/tmp/dm_interest_tag"

TMP_COM_YOUKU_PHONE_WAX_NOBID_PATH="s3://mob-emr-test/dataplatform/DataWareHouse/data/dwh/tmp/com_youku_phone_wax_nobid"
TMP_COM_BTOP_TIKTOKRV_PATH="s3://mob-emr-test/dataplatform/DataWareHouse/data/dwh/tmp/com_btop_tiktokrv"
281
TMP_COM_BTOP_TIKTOKRV_GAID_PATH="s3://mob-emr-test/dataplatform/DataWareHouse/data/dwh/tmp/com_btop_tiktokrv_gaid"
fan.jiang committed
282
TMP_REYUN_LAHUO_LIST_PATH="s3://mob-emr-test/dataplatform/DataWareHouse/data/dwh/tmp/reyun_lahuo_list"
WangJinfeng committed
283
TMP_COM_TOPON_TOPLTV_1015_PATH="s3://mob-emr-test/dataplatform/DataWareHouse/data/dwh/tmp/com.topon_topltv_0207"
284
TMP_COM_REYUN_PRACTICALTOOL_PATH="s3://mob-emr-test/dataplatform/DataWareHouse/data/dwh/tmp/com.reyun_practicaltool"
fan.jiang committed
285
TMP_COM_3APP_XIANJINDAI_PATH="s3://mob-emr-test/dataplatform/DataWareHouse/data/dwh/tmp/com.3app.xianjindai"
286
TMP_COM_LAZADA_NOREGISTER_70P_30P_PATH="s3://mob-emr-test/dataplatform/DataWareHouse/data/dwh/tmp/com_lazada_noregister_70p_30p"
287
RTDMP_COM_EG_ANDROID_ALIPAYGPHONE_REYUN_PATH="s3://mob-emr-test/dataplatform/DataWareHouse/data/dwh/tmp/rtdmp_com_eg_android_AlipayGphone_reyun"
288 289 290 291
RTDMP_COM_TAOBAO_LITETAO_REYUN_PATH="s3://mob-emr-test/dataplatform/DataWareHouse/data/dwh/tmp/rtdmp_com_taobao_litetao_reyun"
RTDMP_COM_SS_ANDROID_UGC_AWEME_REYUN_PATH="s3://mob-emr-test/dataplatform/DataWareHouse/data/dwh/tmp/rtdmp_com_ss_android_ugc_aweme_reyun"
RTDMP_COM_TAOBAO_LITETAO_BTOP_PATH="s3://mob-emr-test/dataplatform/DataWareHouse/data/dwh/tmp/rtdmp_com_taobao_litetao_btop"
RTDMP_COM_SS_ANDROID_UGC_AWEME_BTOP_PATH="s3://mob-emr-test/dataplatform/DataWareHouse/data/dwh/tmp/rtdmp_com_ss_android_ugc_aweme_btop"
fan.jiang committed
292
RTDMP_TMP_PACKAGE_NAME_PATH="s3://mob-emr-test/dataplatform/DataWareHouse/data/dwh/tmp/rtdmp_tmp_package_name"
wang-jinfeng committed
293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385
RTDMP_NORMAL_PATH="s3://mob-emr-test/dataplatform/DataWareHouse/data/dwh/tmp/rtdmp_normal"
RTDMP_NORMAL_COUNT_RESULT="s3://mob-emr-test/dataplatform/DataWareHouse/data/dwh/tmp/rtdmp_normal_count_result"

Three_Kingdoms_Games="s3://mob-emr-test/dataplatform/DataWareHouse/data/dwh/tmp/Three_Kingdoms_Games"
Three_Kingdoms_Package_Names="s3://mob-emr-test/dataplatform/DataWareHouse/data/dwh/tmp/Three_Kingdoms_Package_Names"
Age_Package_Names_Result="s3://mob-emr-test/dataplatform/DataWareHouse/data/dwh/tmp/Age_Package_Names_Result"
Age_Package_Names="s3://mob-emr-test/dataplatform/DataWareHouse/data/dwh/tmp/Age_Package_Names"
Canglan_Package_Names_Result="s3://mob-emr-test/dataplatform/DataWareHouse/data/dwh/tmp/Canglan_Package_Names_Result"
Canglan_Package_Names="s3://mob-emr-test/dataplatform/DataWareHouse/data/dwh/tmp/Canglan_Package_Names"
Shinny_Package_Names_Result="s3://mob-emr-test/dataplatform/DataWareHouse/data/dwh/tmp/Shinny_Package_Names_Result"
Shinny_Package_Names="s3://mob-emr-test/dataplatform/DataWareHouse/data/dwh/tmp/Shinny_Package_Names"

#for product table ods_adn_trackingnew_request_merge
ODS_ADN_TRACKINGNEW_REQUEST_TMP_HB_REQUEST="s3://mob-emr-test/dataplatform/DataWareHouse/data/dwh/ods_adn_trackingnew_request_tmp_hb_request"
#for table adn_dsp.log_adn_dsp_request_orc_hour_merge
LOG_ADN_DSP_REQUEST_ORC_HOUR_MERGE="s3://mob-emr-test/dataplatform/DataWareHouse/data/dwh/log_adn_dsp_request_orc_hour_merge"
# bundle_package_mapping
BUNDLE_PACKAGE_MAPPING_PATH="s3://mob-emr-test/dataplatform/DataWareHouse/data/dwh/bundle_package_mapping"

# unmatch bundle data
UNMATCH_DATA_PATH="s3://mob-emr-test/dataplatform/DataWareHouse/data/dwh/unmatch_bundle"

DEV_UNMATCH_DATA_PATH="s3://mob-emr-test/dataplatform/DataWareHouse/data/dev/unmatch_bundle"

# 所有数据源未bundle去重
DISTINCT_BUNDLE_PATH="s3://mob-emr-test/dataplatform/DataWareHouse/data/dwh/distinct_bundle"

# 包名黑名单
PACKAGE_BLACK_LIST="s3://mob-emr-test/dataplatform/DataWareHouse/dm_package_black_list"

DIM_PACKAGE_TAGS_PATH="s3://mob-emr-test/dataplatform/DataWareHouse/data/dwh/dim_package_tags"
DIM_CAMPAIGN_TAGS_PATH="s3://mob-emr-test/dataplatform/DataWareHouse/data/dwh/dim_campaign_tags"

# HIVE_SITE_PATH="/data/azkaban-hadoop/scientist-online/spark/conf/hive-site.xml"

# HIVE_SITE_PATH="/data/azkaban-hadoop/command-home/spark-offline/conf/hive-site.xml"
HIVE_SITE_PATH="${SPARK_HOME}/conf/hive-site.xml"

TOUTIAO_LAUNCH_PATH="s3://mob-ad/adn/tracking-v3/midway_backend"

#install list
DM_TOUTIAO_LAUNCH_TOTAL="s3://mob-emr-test/dataplatform/DataWareHouse/data/dwh/dm_toutiao_campaign_total"

# joypacios
JOYPAC_DAILY_LOG_PATH="s3://mob-emr-test/dataplatform/DataWareHouse/data/dwh/etl_joypc_sdk_daily"
JOYPAC_DAILY_PATH="s3://mob-emr-test/dataplatform/DataWareHouse/data/dwh/joypac_daily"

# ali
ALI_DAILY_PATH="s3://mob-emr-test/dataplatform/DataWareHouse/data/dwh/etl_baichuan_daily"
ALI_USER_ACTIVATION_DAILY_PATH="s3://mob-emr-test/dataplatform/DataWareHouse/data/dwh/ali_user_activation_daily"
ALI_IOS_USER_ACTIVATION_DAILY_PATH="s3://mob-emr-test/dataplatform/DataWareHouse/data/dwh/ali_ios_user_activation_daily"
ALI_OAID_USER_ACTIVATION_DAILY_PATH="s3://mob-emr-test/dataplatform/DataWareHouse/data/dwh/ali_oaid_user_activation_daily"
ALI_USER_ACTIVATION_15DAYS_PATH="s3://mob-emr-test/dataplatform/DataWareHouse/data/dwh/ali_user_activation_rec15days"
ALI_IOS_USER_ACTIVATION_15DAYS_PATH="s3://mob-emr-test/dataplatform/DataWareHouse/data/dwh/ali_ios_user_activation_rec15days"
ALI_OAID_USER_ACTIVATION_15DAYS_PATH="s3://mob-emr-test/dataplatform/DataWareHouse/data/dwh/ali_oaid_user_activation_rec15days"
ALI_USER_ACTIVATION_POSTBACK_PATH="s3://mob-emr-test/dataplatform/DataWareHouse/data/dwh/ali_user_activation_postback_daily"
ALI_IOS_USER_ACTIVATION_POSTBACK_PATH="s3://mob-emr-test/dataplatform/DataWareHouse/data/dwh/ali_ios_user_activation_postback_daily"
ALI_OAID_USER_ACTIVATION_POSTBACK_PATH="s3://mob-emr-test/dataplatform/DataWareHouse/data/dwh/ali_oaid_user_activation_postback_daily"
ETL_ALI_USERINFO_ACTIVATION_PATH="s3://mob-emr-test/dataplatform/DataWareHouse/data/dwh/etl_ali_user_activation_total"
ETL_ALI_IOS_USERINFO_ACTIVATION_PATH="s3://mob-emr-test/dataplatform/DataWareHouse/data/dwh/etl_ali_ios_user_activation_total"
ETL_ALI_OAID_USERINFO_ACTIVATION_PATH="s3://mob-emr-test/dataplatform/DataWareHouse/data/dwh/etl_ali_oaid_user_activation_total"
REQUEST_COUNT_RESULT="s3://mob-emr-test/dataplatform/DataWareHouse/data/dwh/request_count_result"
IOS_REQUEST_COUNT_RESULT="s3://mob-emr-test/dataplatform/DataWareHouse/data/dwh/ios_request_count_result"
OAID_REQUEST_COUNT_RESULT="s3://mob-emr-test/dataplatform/DataWareHouse/data/dwh/oaid_request_count_result"
TMP_DEVICEID="s3://mob-emr-test/dataplatform/DataWareHouse/data/dwh/dm_install_list_v2_tmp_deviceid"
LAZADA_OUTPUT_PATH="s3://mob-emr-test/dataplatform/DataWareHouse/data/dwh/lazada_output"
NEW_LAZADA_OUTPUT_PATH="s3://mob-emr-test/dataplatform/DataWareHouse/data/dwh/new_lazada_output"
UC_LAHUO_DAILY_PATH="s3://mob-emr-test/dataplatform/DataWareHouse/data/dev/uc_lahuo_daily"
UC_LAHUO_DAILY_TMP_PATH="s3://mob-emr-test/dataplatform/DataWareHouse/data/dev/uc_lahuo_tmp_daily"
UC_LAHUO_TMP_DAILY_TO_S3="s3://mob-emr-test/dataplatform/DataWareHouse/data/dwh/uc_lahuo_tmp_daily_to_s3"
ALIPAY_LAHUO_DAILY_PATH="s3://mob-emr-test/dataplatform/DataWareHouse/data/dev/alipay_lahuo_daily"
ALIPAY_LAHUO_DAILY_TMP_PATH="s3://mob-emr-test/dataplatform/DataWareHouse/data/dev/alipay_lahuo_tmp_daily"
ALIPAY_LAHUO_TMP_DAILY_TO_S3="s3://mob-emr-test/dataplatform/DataWareHouse/data/dwh/alipay_lahuo_tmp_daily_to_s3"
YOUKU_LAXIN_DAILY_PATH="s3://mob-emr-test/dataplatform/DataWareHouse/data/dev/youku_laxin_daily"
YOUKU_LAXIN_DAILY_TMP_PATH="s3://mob-emr-test/dataplatform/DataWareHouse/data/dev/youku_laxin_tmp_daily"
YOUKU_LAXIN_TMP_DAILY_TO_S3="s3://mob-emr-test/dataplatform/DataWareHouse/data/dwh/youku_laxin_tmp_daily_to_s3"
TAOBAO_POSTBACK_DAILY_PATH="s3://mob-emr-test/adn/sync_srv"

# com.tencent.news_bes_7,com.tencent.news_bes_15, com.tencent.news_oppo_7,com.tencent.news_oppo_15
ETL_COM_TENCENT_NEWS_DAILY="s3://mob-emr-test/dataplatform/DataWareHouse/data/dwh/etl_com_tencent_news_daily"

# iqiyi
IQiYi_DAILY_PATH="s3://mob-emr-test/dataplatform/DataWareHouse/data/dwh/etl_iqiyi_daily"
IQiYi_LAHUO_DAILY_PATH="s3://mob-emr-test/dataplatform/DataWareHouse/data/dev/iqiyi_lahuo_daily"
IQiYi_LAHUO_DAILY_TMP_PATH="s3://mob-emr-test/dataplatform/DataWareHouse/data/dev/iqiyi_lahuo_tmp_daily"
IQiYi_LAHUO_TMP_DAILY_TO_S3="s3://mob-emr-test/dataplatform/DataWareHouse/data/dwh/iqiyi_lahuo_tmp_daily_to_s3"

# TO business tmp data
TO_DAILY_PATH="s3://mob-emr-test/dataplatform/DataWareHouse/data/dwh/etl_to_daily"

# btop business tmp data
BTOP_DAILY_PATH="s3://mob-emr-test/dataplatform/DataWareHouse/data/dwh/etl_btop_daily"

fan.jiang committed
386 387 388 389
# reyun business tmp data
REYUN_DAILY_PATH="s3://mob-emr-test/dataplatform/DataWareHouse/data/dwh/etl_reyun_daily"
REYUN_RAW_DATA="s3://mob-emr-test/reyun/pkginfo"

fan.jiang committed
390 391
# reyun business tmp data
REYUN_LABEL_TEST_DAILY_PATH="s3://mob-emr-test/dataplatform/DataWareHouse/data/dwh/tmp/reyun_label_test_daily"
fan.jiang committed
392
REYUN_LABEL_BAIJIU_DAILY_PATH="s3://mob-emr-test/dataplatform/DataWareHouse/data/dwh/tmp/reyun_label_baijiu_daily"
fan.jiang committed
393

wang-jinfeng committed
394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437
# alipay_activation business tmp data
ALIPAY_ACTIVATION_DAILY_PATH="s3://mob-emr-test/dataplatform/DataWareHouse/data/dwh/etl_alipay_activation_daily"

# alipay_acquisition business tmp data
ALIPAY_ACQUISITION_DAILY_PATH="s3://mob-emr-test/dataplatform/DataWareHouse/data/dwh/etl_alipay_acquisition_daily"

# bytedance
BYTEDANCE_LOG_PATH="s3://mob-emr-test/dmp/install_app/toutiaodmp_increment"
BYTEDANCE_DAILY_PATH="s3://mob-emr-test/dataplatform/DataWareHouse/data/dwh/bytedance_daily"

# facebook
FACEBOOK_LOG_PATH="s3://mob-emr-test/dataplatform/DataWareHouse/data/dwh/etl_fb_org_daily"
FACEBOOK_DAILY_PATH="s3://mob-emr-test/dataplatform/DataWareHouse/data/dwh/etl_facebook_daily"
FACEBOOK_TOTAL_PATH="s3://mob-emr-test/dataplatform/DataWareHouse/data/dwh/etl_facebook_total"
FACEBOOK_UNMATCHED_PATH="s3://mob-emr-test/dataplatform/DataWareHouse/data/dwh/etl_fb_unmatched_history"

#bigmedia
BIGMEDIA_PATH="s3://mob-emr-test/dataplatform/DataWareHouse/data/dwh/etl_topmedia_domestic_org_daily"
BIGMEDIA_DAILY_PATH="s3://mob-emr-test/dataplatform/DataWareHouse/data/dwh/ods_bigmedia_domestic_daily"

# device_id_md5_match
DEVICE_ID_MD5_MATCH_PATH="s3://mob-emr-test/dataplatform/DataWareHouse/data/dwh/device_id_md5_match"

# dm_active_tag
# DM_ACTIVE_TAG_PATH="s3://mob-emr-test/dataplatform/DataWareHouse/data/dev/dm_active_tag"

DM_ACTIVE_TAG_PATH="s3://mob-emr-test/dataplatform/DataWareHouse/data/dwh/dm_active_tag"

# dm_device_region
DM_DEVICE_REGION="s3://mob-emr-test/dataplatform/DataWareHouse/data/dev/dm_device_region"

# btop
UPARPU_QCC_PACKAGE_INSTALL_LIST="s3://uparpu/online/uparpu_main/uparpu_qcc_package_install_list"
UPARPU_PLUGIN_QCC_PACKAGE="s3://uparpu/online/uparpu_main/uparpu_plugin_qcc_package"

BACKFLOW_OUTPUT="s3://mob-emr-test/dataplatform/DataWareHouse/data/backflow"

# ruid
MAPPING_LOG_DIR="s3://mob-emr-test/dataplatform/flink/mapping/online"
RUID_MAPPING="s3://mob-emr-test/dataplatform/DataWareHouse/data/etl/ruid_mapping"

# dsp_device_mapping
DSP_DEVICE_MAPPING="s3://mob-emr-test/dataplatform/DataWareHouse/data/dsp/device_mapping"

WangJinfeng committed
438 439
ID_MAPPING="s3://mob-emr-test/dataplatform/DataWareHouse/data/dwd/dwd_device_ids_inc_daily"

WangJinfeng committed
440 441
ADS_DEVICE_ID_MAPPING="s3://mob-emr-test/dataplatform/DataWareHouse/data/ads/ads_device_id_mapping"

WangJinfeng committed
442 443
ADS_DEVICE_TAG_ALL_DAILY="s3://mob-emr-test/reyun/dmp/ads/ads_device_tag_all_weekly"

wang-jinfeng committed
444 445 446 447 448 449 450 451 452 453 454
JAR=./DMP.jar

# 检查_SUCCESS文件,若不存在则循环检测
# $1 check file
check_await() {
    while [[ true ]];
    do
        if hadoop fs -ls "$1" > /dev/null 2>&1
        then
            break
        fi
WangJinfeng committed
455
        sleep 120
wang-jinfeng committed

    done
}

# 检查i log_adn_dsp_impression_hour_combine作业 _SUCCESS文件,若不存在则循环检测 为了加速,减少sleep时间
# $1 check file
check_log_adn_dsp_impression_await() {
    while [[ true ]];
    do
        if hadoop fs -ls "$1" > /dev/null 2>&1
        then
            break
        fi
        sleep 30
    done
}

# $1 sql
hive_cmd() {
export HIVE_CONF_DIR=/data/azkaban-hadoop/command-home/hive-offline/conf
# local HIVE_CMD="/data/hadoop-alternative/hive-offline/bin/hive"
# if [ ! -f $HIVE_CMD ];then
  HIVE_CMD="hive"
# fi
${HIVE_CMD} -e "
set hive.cli.print.header=false;
set hive.optimize.index.filter=true;
set mapreduce.task.io.sort.mb=512;
set mapreduce.map.speculative=true;
set mapreduce.reduce.speculative=true;
$1;
"
}

# 挂载hive分区
# $1 table name
# $2 partition
# $3 hdfs path
mount_partition() {
local count=1
local limit=3
while [ $count -le $limit ];do
   hive_cmd "
    use dwh;
    ALTER TABLE $1 ADD IF NOT EXISTS PARTITION ($2)
        LOCATION '$3';
    "

    if [ $? -eq 0 ];then
      break
    else
      if [ $count -eq $limit ];then
        exit 255
      else
        count=$(( $count + 1 ))
      fi
    fi
done
}

# 挂载hive分区
# $1 table name
# $2 partition
# $3 hdfs path
unmount_partition() {
local count=1
local limit=3
while [ $count -le $limit ];do
   hive_cmd "
    use dwh;
    ALTER TABLE $1 DROP IF EXISTS PARTITION ($2);
    "

    if [ $? -eq 0 ];then
        hadoop fs -rm -r $3
        break
    else
      if [ $count -eq $limit ];then
        exit 255
      else
        count=$(( $count + 1 ))
      fi
    fi
done
}

# $1 database
# $2 table
# $3 partition
# $4 path
common_mount_partition() {
local count=1
local limit=3
while [ ${count} -le ${limit} ];do
   hive_cmd "
    use $1;
    ALTER TABLE $2 ADD IF NOT EXISTS PARTITION ($3)
        LOCATION '$4';
    "

    if [ $? -eq 0 ];then
      break
    else
      if [ $count -eq $limit ];then
        exit 255
      else
        count=$(( $count + 1 ))
      fi
    fi
done
}

# 卸载hive分区
# $1 database
# $2 table
# $3 partition
# $4 path
common_unmount_partition() {
local count=1
local limit=3
while [ ${count} -le ${limit} ];do
   hive_cmd "
    use $1;
    ALTER TABLE $2 DROP IF EXISTS PARTITION ($3);
    "

    if [ $? -eq 0 ];then
        hadoop fs -rm -r $4
        break
    else
      if [ ${count} -eq ${limit} ];then
        exit 255
      else
        count=$(( $count + 1 ))
      fi
    fi
done
}

# $1 要计算的路径,多个路径用分号分隔
calculate_reduce_num() {
    local __count=0
    local array="$(echo $1|tr ";" "\n")"
    for d in $array;do
        __dus=`hadoop fs -du -s "$d"|awk -v'FS= ' '{print $1}'`
        for __du in $__dus;do
            __count=`expr $__count + ${__du:- 0} `
        done

    done

    local __num=`echo "$__count" | awk -v'FS= ' '{print int($1/1000/1000/800 + 1);}'`
    if [ $__num -eq 0 ]; then
        echo "1"
    else
        echo $__num
    fi
}

# 发送邮件
# $1 发件人
# $2 收件人,逗号间隔
# $3 标题
# $4 正文
send_mail(){
  # 初始化参数
  local MAIL_FROM=$1
  local MAIL_MEMBERS=$2
  local MAIL_TITLE=$3
  local MAIL_BODY=$4

  # 组装内容
  local MAIL_MSG="$(echo "
    From:${MAIL_FROM}
    To:${MAIL_MEMBERS}
    Subject:${MAIL_TITLE}
    Content-Type: text/html; charset=utf-8
    <!DOCTYPE html PUBLIC -//W3C//DTD HTML 4.01 Transitional//ENhttp://www.w3.org/TR/html4/loose.dtd>
    <html>
      <head><meta http-equiv=Content-Type content=text/html; charset=utf-8 pageEncoding=UTF-8></head>
      <body>${MAIL_BODY}</body>
    </html>
  " | head -n-1 | tail -n+2 | sed -r 's/^[\t ]+//g')"

  # 发送邮件
  /usr/sbin/sendmail -t <<< "${MAIL_MSG}"
}

# 从expect date起,获取最近的存在的目录,若expect date指定的目录不存在,则发报警
# $1 path
# $2 expect date, format yyyyMMdd
# $3 sub partition
get_recently_dir() {
    local path="$1"
    local expect_date="$2"
    local sub_partition="$3"
    while :
    do
        local year=${expect_date:0:4}
        local month=${expect_date:4:2}
        local day=${expect_date:6:2}
        local check_path="$path/$year/$month/$day/$sub_partition"
        if hadoop fs -test -e "${check_path}"
        then
            echo "$check_path"
#            if [[ "$expect_date" != "$2" ]]
#            then
##                send_mail "dmp_monitor@mobvista.com" "feng.liang@mobvista.com" "<p>[报警]DMP标签任务${JOB_NAME}" "期待$path: $2, 但是不存在</p><p>返回$check_path</p>"
#            fi
            break
        fi
        local expect_date=$(date -d "$expect_date 1 days ago" "+%Y%m%d")
    done
}

#$1 path
#$2 date
build_path_by_date() {
    local path="$1"
    local expect_date="$2"
    local year=${expect_date:0:4}
    local month=${expect_date:4:2}
    local day=${expect_date:6:2}
    echo "$path/$year/$month/$day"
}

#$1 orig_str
#$2 append_str
#$3 split_str
append() {
    local ORIG_STR="$1"
    if [[ -z "$ORIG_STR" ]]
    then
        echo "$2"
    else
        echo "${ORIG_STR}${3}${2}"
    fi
}

matchBundlePackageV2() {
   local date_path=$1
   local old_path=$2
   local business=$3
   local input_path=$4
   local output_path=$5
   local jar=$6
   local date=$7
   local unmatch_input_path=${DEV_UNMATCH_DATA_PATH}/${old_path}/${business}
   local unmatch_output_path=${DEV_UNMATCH_DATA_PATH}/${date_path}/${business}
   local bundle_pkg_path=${BUNDLE_PACKAGE_MAPPING_PATH}/${date_path}

   class="mobvista.dmp.main.BundleMatchMain"

   check_await "$bundle_pkg_path/_SUCCESS"

   command="\
    spark-submit --class ${class} \
        --conf spark.network.timeout=720s \
        --conf spark.yarn.executor.memoryOverhead=2048 \
714 715
        --conf spark.sql.shuffle.partitions=10000 \
        --conf spark.default.parallelism=10000 \
wang-jinfeng committed
716 717 718 719 720 721
        --conf spark.sql.files.maxPartitionBytes=134217728 \
        --conf spark.sql.adaptive.enabled=true \
        --conf spark.sql.adaptive.advisoryPartitionSizeInBytes=134217728 \
        --conf spark.shuffle.memoryFraction=0.4 \
        --conf spark.storage.memoryFraction=0.4 \
        --conf spark.serializer=org.apache.spark.serializer.KryoSerializer \
WangJinfeng committed
722
        --deploy-mode cluster --name "BundleMatchMain.${business}" \
WangJinfeng committed
723 724
        --executor-memory 12g --driver-memory 6g  --executor-cores 5 --num-executors 100 \
        ${jar} -business ${business} -date ${date} -input $input_path -output $output_path -bundlePkgPath $bundle_pkg_path -unmatchOutputPath $unmatch_output_path -coalesce 2000
wang-jinfeng committed
725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764
    "

    hadoop fs -test -e ${unmatch_input_path}
    if [[ $? -eq 0 ]];then
       command="$command -unmatchInputPath ${unmatch_input_path}"
    elif [[ $? -ne 0 && "$business" = "ga" ]];then
       command="$command -unmatchInputPath `get_recently_dir "${UNMATCH_DATA_PATH}" "${date_path//\/""}" "ga"`"
    fi

    echo -e "command : \n $command"
    eval ${command}
    if [[ "$?" -ne "0" ]];then
      exit 255
    fi
}

matchBundlePackage() {
   local date_path=$1
   local old_path=$2
   local pf_index=$3
   local pkg_index=$4
   local input_path=$5
   local output_path=$6
   local part=$7
   local jar=$8
   local unmatch_input_path=${UNMATCH_DATA_PATH}/$old_path/$part
   local unmatch_output_path=${UNMATCH_DATA_PATH}/$date_path/$part
   local bundle_pkg_path=${BUNDLE_PACKAGE_MAPPING_PATH}/$date_path

   class="mobvista.dmp.main.BundleMatchJob"
   if [ "$part" = "dsp" ];then
     class="mobvista.dmp.main.DSPBundleMatchJob"
   fi

   check_await "$bundle_pkg_path/_SUCCESS"

   hadoop fs -rm -r $output_path
   hadoop fs -rm -r $unmatch_output_path

   command="
WangJinfeng committed
765
    spark-submit --deploy-mode cluster --executor-memory 10g --driver-memory 4g  --executor-cores 4  --num-executors 50 \
wang-jinfeng committed
766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870
     --conf spark.yarn.executor.memoryOverhead=2048 --conf spark.network.timeout=720s \
     --conf spark.sql.shuffle.partitions=${coalesce} \
     --conf spark.default.parallelism=2000 \
     --class ${class} ${jar} \
     -input $input_path  -output $output_path -pfIndex $pf_index -pkgIndex $pkg_index \
     -bundlePkgPath ${bundle_pkg_path} -unmatchOutputPath ${unmatch_output_path} \
     -parallelism 2000 -coalesce 200
    "

    hadoop fs -test -e $unmatch_input_path
    if [ $? -eq 0 ];then
       command="$command -unmatchInputPath ${unmatch_input_path}"
    elif [[ $? -ne 0 && "$part" = "ga" ]];then
       command="$command -unmatchInputPath `get_recently_dir "${UNMATCH_DATA_PATH}" "${date_path//\/""}" "ga"`"
    fi

    echo -e "command : \n $command"
    eval $command
    if [ "$?" -ne "0" ];then
      exit 255
    fi
}

make_dir () {
 if [ $# -lt 1 ]; then
   echo "ERROR: please set a path"
   exit 255
 fi

 local path=$1

 hadoop fs -test -e $path
 if [ $? -ne 0 ];then
   hadoop fs -mkdir -p $path
   if [ $? -ne 0 ];then
     echo "ERROR: mkdir $path fail"
     exit 255
   fi
 else
   echo "$path : File exists"
 fi
}

remove_dir() {
  if [ $# -lt 1 ]; then
    echo "ERROR: please set a path"
    exit 255
  fi

  local path=$1
  if [ "$path" = "/" ];then
    echo "rm '$path' is refused"
    exit 255
  fi
  hadoop fs -rm -r $path
}

if [ -z "${SPARK_HOME}" ]; then
  source "$(dirname "$0")"/find-spark-home
fi
: '
JARS=""
for jar in $(ls ${SPARK_HOME}/auxlib)
do
    jar="${SPARK_HOME}/auxlib/${jar}"
    if [[ -z "${JARS}" ]]
    then
        JARS=${jar}
    else
        JARS=${JARS},${jar}
    fi
done
'
JARS="${SPARK_HOME}/auxlib/Common-SerDe-1.0-SNAPSHOT.jar,${SPARK_HOME}/auxlib/opencsv-2.3.jar,${SPARK_HOME}/auxlib/3S-Serde-1.1-SNAPSHOT.jar,${SPARK_HOME}/auxlib/M-Serde-1.0-SNAPSHOT.jar,${SPARK_HOME}/auxlib/m_serde.jar,${SPARK_HOME}/auxlib/json-serde-1.3.7-jar-with-dependencies.jar"

userInfoJob() {
   local LOG_TIME=$1
   local dailyPath=$2
   local dailyFormat=$3
   local dailyDidIndex=$4
   local dailyDidTypeIndex=$5
   local dailyPltIndex=$6
   local dailyCountryIndex=${7}
   local agePath=$8
   local genderPath=$9
   local totalPath=${10}
   local outputPath=${11}
   local coalesce=${12}
   local jar=${13}
   local exeNum=${14}
   local parallelism=${15}

   check_await $dailyPath/_SUCCESS
#   hadoop fs -rm -r $outputPath

   command="
    spark-submit --class mobvista.dmp.datasource.device.OdsDmpUserInfo \
     --conf spark.yarn.executor.memoryOverhead=3072 --conf spark.network.timeout=720s \
     --conf spark.sql.shuffle.partitions=$(( $coalesce * 2 )) \
     --conf spark.dynamicAllocation.maxExecutors=${exeNum} \
     --conf spark.dynamicAllocation.enabled=true \
     --conf spark.default.parallelism=${parallelism} \
     --conf spark.speculation=true \
     --conf spark.speculation.quantile=0.9 \
     --conf spark.speculation.multiplier=1 \
WangJinfeng committed
871
     --deploy-mode cluster --executor-memory 6g --driver-memory 4g  --executor-cores 2  --num-executors 20 \
wang-jinfeng committed
872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917
     ${jar} -date $LOG_TIME -dailyPath $dailyPath -agePath $agePath -genderPath $genderPath \
     -dailyFormat ${dailyFormat} -dailyDidIndex $dailyDidIndex -dailyDidTypeIndex $dailyDidTypeIndex -dailyPltIndex $dailyPltIndex -dailyCountryIndex $dailyCountryIndex \
     -outputPath $outputPath -parallelism ${parallelism} -coalesce ${coalesce}
    "

    hadoop fs -test -e $totalPath
    if [ $? -eq 0 ];then
      command="$command -totalPath $totalPath"
    fi

    echo -e "command : \n $command"
    eval $command
    if [ "$?" -ne "0" ];then
      exit 255
    fi
}

userInfoJob_dsp_req() {
   local LOG_TIME=$1
   local dailyPath=$2
   local dailyFormat=$3
   local dailyDidIndex=$4
   local dailyDidTypeIndex=$5
   local dailyPltIndex=$6
   local dailyCountryIndex=${7}
   local agePath=$8
   local genderPath=$9
   local totalPath=${10}
   local outputPath=${11}
   local coalesce=${12}
   local jar=${13}
   local exeNum=${14}
   local parallelism=${15}

   check_await $dailyPath/_SUCCESS

   command="
    spark-submit --class mobvista.dmp.datasource.device.OdsDmpUserInfo \
     --conf spark.yarn.executor.memoryOverhead=3072 --conf spark.network.timeout=720s \
     --conf spark.sql.shuffle.partitions=$(( $coalesce * 2 )) \
     --conf spark.dynamicAllocation.maxExecutors=150 \
     --conf spark.dynamicAllocation.enabled=true \
     --conf spark.default.parallelism=${parallelism} \
     --conf spark.speculation=true \
     --conf spark.speculation.quantile=0.9 \
     --conf spark.speculation.multiplier=1 \
WangJinfeng committed
918
     --deploy-mode cluster --executor-memory 10g --driver-memory 4g  --executor-cores 4  --num-executors 80 \
wang-jinfeng committed
919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970
     ${jar} -date $LOG_TIME -dailyPath $dailyPath -agePath $agePath -genderPath $genderPath \
     -dailyFormat ${dailyFormat} -dailyDidIndex $dailyDidIndex -dailyDidTypeIndex $dailyDidTypeIndex -dailyPltIndex $dailyPltIndex -dailyCountryIndex $dailyCountryIndex \
     -outputPath $outputPath -parallelism ${parallelism} -coalesce ${coalesce}
    "

    hadoop fs -test -e $totalPath
    if [ $? -eq 0 ];then
      command="$command -totalPath $totalPath"
    fi

    echo -e "command : \n $command"
    eval $command
    if [ "$?" -ne "0" ];then
      exit 255
    fi
}

get_recently_date() {
    local path="$1"
    local expect_date="$2"
    local sub_partition="$3"

    while :
    do
        local year=${expect_date:0:4}
        local month=${expect_date:4:2}
        local day=${expect_date:6:2}
        local check_path="$path/$year/$month/$day/$sub_partition"
        if hadoop fs -test -e "${check_path}"
        then
            echo "${year}${month}${day}"
            break
        fi
        local expect_date=$(date -d "$expect_date 1 days ago" "+%Y%m%d")
    done
}

installListJob() {
    local LOG_TIME=$1
    local date_path=$2
    local yesterday_path=$3
    local business=$4
    local partitions=$5
    local coalesce=$6
    local exec_memory=$7
    local driver_memory=$8
    local executors=${9}
    INSTALL_PATH="${DMP_INSTALL_LIST}/${yesterday_path}/${business}"
    check_await "${INSTALL_PATH}/_SUCCESS"

    OUTPUT="${DMP_INSTALL_LIST}/${date_path}/${business}"

WangJinfeng committed
971 972
    mount_partition "dmp_install_list" "dt='$LOG_TIME', business='$business'" "$OUTPUT"

WangJinfeng committed
973 974
    expire_date=$(date +%Y%m%d -d "-8 day $LOG_TIME")
    expire_date_path=$(date +"%Y/%m/%d" -d "-8 day ${LOG_TIME}")
wang-jinfeng committed
975 976 977 978 979 980 981 982
    EXPIRE_OUTPUT_PATH="${DMP_INSTALL_LIST}/${expire_date_path}/${business}"

    spark-submit --class mobvista.dmp.common.InstallListLogic \
        --name "DmpInstallList.${business}.${LOG_TIME}" \
        --conf spark.sql.shuffle.partitions=${partitions} \
        --conf spark.default.parallelism=${partitions} \
        --conf spark.kryoserializer.buffer.max=256m \
        --conf spark.executor.extraJavaOptions="-XX:+UseG1GC" \
WangJinfeng committed
983
        --deploy-mode cluster --executor-memory ${exec_memory}g --driver-memory ${driver_memory}g  --executor-cores 2  --num-executors ${executors} \
wang-jinfeng committed
984 985 986 987 988 989 990 991 992 993 994
        ${JAR} -date ${LOG_TIME} -business ${business} -output ${OUTPUT}  -coalesce ${coalesce}
    if [[ $? -ne 0 ]];then
        exit 255
    fi

    # 删除过期的分区及删除对应路径
    unmount_partition "dmp_install_list" "dt='${expire_date}', business='${business}'" "${EXPIRE_OUTPUT_PATH}"
}

checkRTDmp() {
    today=$1
wang-jinfeng committed
995
    businesses="ali_activation dsp_req btop uc_activation "
wang-jinfeng committed
996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057
    for business in ${businesses}
    do
        if [[ ${business} = 'ali_activation' ]];then
            rtdmp_date_path=$(date +%Y/%m/%d -d "-2 day $today")
        elif [[ ${business} = 'dsp_req' ]];then
            rtdmp_date_path=$(date +%Y/%m/%d -d "-1 day $today")
        elif [[ ${business} = 'btop' ]];then
            rtdmp_date_path=$(date +%Y/%m/%d -d "-1 day $today")
        elif [[ ${business} = 'uc_activation' ]];then
            rtdmp_date_path=$(date +%Y/%m/%d -d "0 day $today")
        elif [[ ${business} = 'iqiyi_activation' ]];then
            rtdmp_date_path=$(date +%Y/%m/%d -d "0 day $today")
        elif [[ ${business} = 'alipay_activation' ]];then
            rtdmp_date_path=$(date +%Y/%m/%d -d "0 day $today")
        elif [[ ${business} = 'alipay_acquisition' ]];then
            rtdmp_date_path=$(date +%Y/%m/%d -d "0 day $today")
        elif [[ ${business} = 'youku_acquisition' ]];then
            rtdmp_date_path=$(date +%Y/%m/%d -d "0 day $today")
        fi
        check_await "s3://mob-emr-test/dataplatform/rtdmp_request/${rtdmp_date_path}/${business}/_OK"
        rtdmp_data_time=`hadoop fs -ls "s3://mob-emr-test/dataplatform/rtdmp_request/${rtdmp_date_path}/${business}/_OK" |awk '{print $4" "$5}'`
        check_date=$(date +%Y/%m/%d/%H -d "$rtdmp_data_time")
        rtdmp_output="s3://mob-emr-test/dataplatform/rtdmp/${check_date}"
        check_await "${rtdmp_output}/_SUCCESS"
    done
}

# export SPARK_CONF_DIR=/data/hadoop-config/command-home/engineplus-k8s-spark-3.0.0-offline/conf
# export SPARK_HOME=/data/hadoop-home/engineplus-k8s-spark-3.0.0-hadoop3.2

# 线下 Azkaban 提交必需,勿动
# export HADOOP_USER_CLASSPATH_FIRST=yes
# export HADOOP_CLASSPATH="${HADOOP_CLASSPATH}:/data/hadoop-alternative/hive/lib/*"

# 检查hive表指定分区,若不存在则循环检测,直到超时.
# $1 table name
# $2 partition info
check_await_hive_partition() {
    end_time=`date +%s`
    period=$3
    let end_time=end_time+period;
    while [[ true ]];
    do
        if hive_cmd "desc extended $1 partition($2)" > /dev/null 2>&1
        then
	          break
	      elif [ `date +%s` -gt ${end_time} ]
        then
            exit 1
        fi
	  sleep 60
    done
}

# EXTERNAL_JARS="s3://mob-emr-test/wangjf/jar/*.jar"

# EXTERNAL_JARS="s3://mob-emr-test/wangjf/jar/spark-clickhouse-connector_2.11-2.4.0_0.22.jar,\
# s3://mob-emr-test/wangjf/jar/clickhouse-jdbc-0.1.53.jar,\
# s3://mob-emr-test/wangjf/jar/spark-clickhouse-connector-1.0.0.jar,\
# s3://mob-emr-test/wangjf/jar/spark-clickhouse-connector_2.11-2.4.0_0.22.jar,\
# s3://mob-emr-test/wangjf/jar/cassandra-driver-core-3.10.2.jar,\
# s3://mob-emr-test/wangjf/jar/spark-cassandra-connector_2.11-2.5.1.jar,\
WangJinfeng committed
1058 1059 1060 1061 1062 1063 1064 1065 1066
# s3://mob-emr-test/wangjf/jar/spark-cassandra-connector-driver_2.11-2.5.1.jar"

export SPARK_HOME="/data/hadoop-home/spark-3.1.1-bin-free-c59d19df39"

export SPARK_CONF_DIR="/data/hadoop-config/command-home/engineplus-k8s-spark-3.1.1-offline/conf"

export JAVA_HOME="/usr/lib/jvm/jdk1.8.0_131"

export HIVE_CONF_DIR="/data/hadoop-config/command-home/apache-hive-2.3.3-offline/conf"
1067 1068 1069 1070 1071

# 避免 MapReduce 作业空跑
export HADOOP_USER_CLASSPATH_FIRST=yes

export HADOOP_CLASSPATH="${HADOOP_CLASSPATH}:/data/hadoop-alternative/hive/lib/*"