Commit 86f6d267 by fan.jiang

add hb request to dmp

parent 75dfb3b5
...@@ -12,8 +12,10 @@ dt=$(date -d "2 hours ago $ScheduleTime" "+%d") ...@@ -12,8 +12,10 @@ dt=$(date -d "2 hours ago $ScheduleTime" "+%d")
hhpath=$(date -d "2 hours ago $ScheduleTime" "+%H") hhpath=$(date -d "2 hours ago $ScheduleTime" "+%H")
check_await "$ADN_REQUEST_PATH/$input_date_path/frankfurt/$hhpath/_SUCCESS" check_await "$ADN_REQUEST_PATH/$input_date_path/frankfurt/$hhpath/_SUCCESS"
check_await "$HB_REQUEST_PATH/$input_date_path/frankfurt/$hhpath/_SUCCESS"
INPUT_ADN_PATH="$ADN_REQUEST_PATH/$input_date_path/frankfurt/$hhpath/*" INPUT_ADN_PATH="$ADN_REQUEST_PATH/$input_date_path/frankfurt/$hhpath/*"
INPUT_HB_PATH="$HB_REQUEST_PATH/$input_date_path/frankfurt/$hhpath/*"
ETL_ADN_REQ_ORG_HOURS_PATH="${ETL_ADN_ORG_REQ_HOURS}/${input_date_path}/frankfurt/${hhpath}" ETL_ADN_REQ_ORG_HOURS_PATH="${ETL_ADN_ORG_REQ_HOURS}/${input_date_path}/frankfurt/${hhpath}"
......
...@@ -13,8 +13,10 @@ dt=$(date -d "2 hours ago $ScheduleTime" "+%d") ...@@ -13,8 +13,10 @@ dt=$(date -d "2 hours ago $ScheduleTime" "+%d")
hhpath=$(date -d "2 hours ago $ScheduleTime" "+%H") hhpath=$(date -d "2 hours ago $ScheduleTime" "+%H")
check_await "$ADN_REQUEST_PATH/$input_date_path/seoul/$hhpath/_SUCCESS" check_await "$ADN_REQUEST_PATH/$input_date_path/seoul/$hhpath/_SUCCESS"
check_await "$HB_REQUEST_PATH/$input_date_path/seoul/$hhpath/_SUCCESS"
INPUT_ADN_PATH="$ADN_REQUEST_PATH/$input_date_path/seoul/$hhpath/*" INPUT_ADN_PATH="$ADN_REQUEST_PATH/$input_date_path/seoul/$hhpath/*"
INPUT_HB_PATH="$HB_REQUEST_PATH/$input_date_path/seoul/$hhpath/*"
ETL_ADN_REQ_ORG_HOURS_PATH="${ETL_ADN_ORG_REQ_HOURS}/${input_date_path}/seoul/${hhpath}" ETL_ADN_REQ_ORG_HOURS_PATH="${ETL_ADN_ORG_REQ_HOURS}/${input_date_path}/seoul/${hhpath}"
hadoop fs -rm -r $ETL_ADN_REQ_ORG_HOURS_PATH hadoop fs -rm -r $ETL_ADN_REQ_ORG_HOURS_PATH
......
...@@ -12,8 +12,10 @@ dt=$(date -d "2 hours ago $ScheduleTime" "+%d") ...@@ -12,8 +12,10 @@ dt=$(date -d "2 hours ago $ScheduleTime" "+%d")
hhpath=$(date -d "2 hours ago $ScheduleTime" "+%H") hhpath=$(date -d "2 hours ago $ScheduleTime" "+%H")
check_await "$ADN_REQUEST_PATH/$input_date_path/singapore/$hhpath/_SUCCESS" check_await "$ADN_REQUEST_PATH/$input_date_path/singapore/$hhpath/_SUCCESS"
check_await "$HB_REQUEST_PATH/$input_date_path/singapore/$hhpath/_SUCCESS"
INPUT_ADN_PATH="$ADN_REQUEST_PATH/$input_date_path/singapore/$hhpath/*" INPUT_ADN_PATH="$ADN_REQUEST_PATH/$input_date_path/singapore/$hhpath/*"
INPUT_HB_PATH="$HB_REQUEST_PATH/$input_date_path/singapore/$hhpath/*"
ETL_ADN_REQ_ORG_HOURS_PATH="${ETL_ADN_ORG_REQ_HOURS}/${input_date_path}/singapore/${hhpath}" ETL_ADN_REQ_ORG_HOURS_PATH="${ETL_ADN_ORG_REQ_HOURS}/${input_date_path}/singapore/${hhpath}"
hadoop fs -rm -r $ETL_ADN_REQ_ORG_HOURS_PATH hadoop fs -rm -r $ETL_ADN_REQ_ORG_HOURS_PATH
......
...@@ -12,8 +12,10 @@ dt=$(date -d "2 hours ago $ScheduleTime" "+%d") ...@@ -12,8 +12,10 @@ dt=$(date -d "2 hours ago $ScheduleTime" "+%d")
hhpath=$(date -d "2 hours ago $ScheduleTime" "+%H") hhpath=$(date -d "2 hours ago $ScheduleTime" "+%H")
check_await "$ADN_REQUEST_PATH/$input_date_path/virginia/$hhpath/_SUCCESS" check_await "$ADN_REQUEST_PATH/$input_date_path/virginia/$hhpath/_SUCCESS"
check_await "$ADN_HB_PATH/$input_date_path/virginia/$hhpath/_SUCCESS"
INPUT_ADN_PATH="$ADN_REQUEST_PATH/$input_date_path/virginia/$hhpath/*" INPUT_ADN_PATH="$ADN_REQUEST_PATH/$input_date_path/virginia/$hhpath/*"
INPUT_ADN_PATH="$ADN_HB_PATH/$input_date_path/virginia/$hhpath/*"
ETL_ADN_REQ_ORG_HOURS_PATH="${ETL_ADN_ORG_REQ_HOURS}/${input_date_path}/virginia/${hhpath}" ETL_ADN_REQ_ORG_HOURS_PATH="${ETL_ADN_ORG_REQ_HOURS}/${input_date_path}/virginia/${hhpath}"
......
...@@ -57,6 +57,7 @@ ADN_CLICK_PATH="s3://mob-ad/adn/tracking-v3/click" ...@@ -57,6 +57,7 @@ ADN_CLICK_PATH="s3://mob-ad/adn/tracking-v3/click"
ADN_INSTALL_PATH="s3://mob-ad/adn/tracking-v3/install" ADN_INSTALL_PATH="s3://mob-ad/adn/tracking-v3/install"
ADN_EVENT_PATH="s3://mob-ad/adn/tracking-v3/event" ADN_EVENT_PATH="s3://mob-ad/adn/tracking-v3/event"
ADN_REQUEST_PATH="s3://mob-ad/adn/tracking-v3/request" ADN_REQUEST_PATH="s3://mob-ad/adn/tracking-v3/request"
HB_REQUEST_PATH="s3://mob-ad/adn/hb-v1/request"
ADN_PRE_CLICK_PATH="s3://mob-ad/adn/tracking-v3/pre_click" ADN_PRE_CLICK_PATH="s3://mob-ad/adn/tracking-v3/pre_click"
ADN_ADX_REQ_ORG="s3://mob-ad/adn/adx-v1/request" ADN_ADX_REQ_ORG="s3://mob-ad/adn/adx-v1/request"
ADN_DSP_PATH="s3://mob-ad/adn/dsp_orc/request" ADN_DSP_PATH="s3://mob-ad/adn/dsp_orc/request"
......
...@@ -33,13 +33,21 @@ class AdnOrgLogEtlHours extends CommonSparkJob with Serializable { ...@@ -33,13 +33,21 @@ class AdnOrgLogEtlHours extends CommonSparkJob with Serializable {
val output = commandLine.getOptionValue("output") val output = commandLine.getOptionValue("output")
val spark = MobvistaConstant.createSparkSession(s"AdnOrgLogEtlHours.$datetime.$region") val spark = MobvistaConstant.createSparkSession(s"AdnOrgLogEtlHours.$datetime.$region")
// 添加hb request的数据入库dmp,数据源在s3://mob-ad/adn/hb-v1/request 本身该路径已经挂载表dwh.ods_adn_trackingnew_hb_request 但是该表的挂载语句和和下面用到的表dwh.ods_adn_trackingnew_request挂载语句不是同时执行的
// 执行这个文件的shell脚本,运行运行时是通过判断路径下的_SUCCESS文件来进行运行的,可能出现表dwh.ods_adn_trackingnew_request挂载成功,但是表dwh.ods_adn_trackingnew_hb_request 挂载还没成功的情况,此时表dwh.ods_adn_trackingnew_hb_request 访问不到
// 该小时下的数据,所以改用 dwh.ods_adn_trackingnew_request_tmp_hb_request ,因为他和表dwh.ods_adn_trackingnew_request挂载是同时进行的,执行脚本 https://gitlab.mobvista.com/fan.jiang/ods_adn_trackingnew_click_merge/blob/master/job/ods_adn_trackingnew_request_merge.sh
// azkaban链接 https://dataplatform.mobvista.com:8443/manager?project=ods_adn_trackingnew_click_merge&flow=ods_adn_trackingnew_merge#executions
val sql = val sql =
s""" s"""
|SELECT date, time, created timestamp, app_id, platform, os_version, sdk_version, device_model, screen_size, country_code, |SELECT date, time, created timestamp, app_id, platform, os_version, sdk_version, device_model, screen_size, country_code,
| language, strategy, ip, imei, mac, dev_id android_id, gaid, idfa, device_brand, getDevId(cdn_ab) idfv, ext_packagename package_name, | language, strategy, ip, imei, mac, dev_id android_id, gaid, idfa, device_brand, getDevId(cdn_ab) idfv, ext_packagename package_name,
| getDevId(ext_sysid) sysid, ext_oaid oaid, getRuid(ext_algo) ruid | getDevId(ext_sysid) sysid, ext_oaid oaid, getRuid(ext_algo) ruid
| FROM dwh.ods_adn_trackingnew_request WHERE CONCAT(yyyy,mm,dd,hh) = '$datetime' AND re = '$region' | FROM dwh.ods_adn_trackingnew_request WHERE CONCAT(yyyy,mm,dd,hh) = '$datetime' AND re = '$region'
| UNION
| SELECT date, time, created timestamp, app_id, platform, os_version, sdk_version, device_model, screen_size, country_code,
| language, strategy, ip, imei, mac, dev_id android_id, gaid, idfa, device_brand, getDevId(cdn_ab) idfv, ext_packagename package_name,
| getDevId(ext_sysid) sysid, ext_oaid oaid, getRuid(ext_algo) ruid
| FROM dwh.ods_adn_trackingnew_request_tmp_hb_request WHERE CONCAT(yyyy,mm,dd,hh) = '$datetime' AND re = '${region}_hb_request'
|""".stripMargin |""".stripMargin
try { try {
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment