Commit 1de7d2af by jinfeng.wang

Merge branch 'jiangfan' into 'master'

fix bug and add taobao package 303907 See merge request dataplatfrom/mobvista-dmp!1
parents 15972dc8 a68fcfb9
......@@ -28,8 +28,12 @@ expire_date_path=$(date -d "$ScheduleTime 92 days ago" +"%Y/%m/%d")
#mount_partition "ali_ios_user_postback_activation_daily" "dt='${dt_today}'" "$ALI_IOS_POSTBACK_PATH"
#mount_partition "ali_oaid_user_postback_activation_daily" "dt='${dt_today}'" "$ALI_OAID_POSTBACK_PATH"
TAOBAO_POSTBACK_PATH="${TAOBAO_POSTBACK_DAILY_PATH}/${dt_slash_taobao_postback_day}/*/*"
mount_partition "ali_taobao_postback_activation_daily" "dt='${dt_taobao_postback_day}'" "$TAOBAO_POSTBACK_PATH"
POSTBACK_PATH="${TAOBAO_POSTBACK_DAILY_PATH}/${dt_slash_taobao_postback_day}"
dirs=$(hadoop fs -ls "${POSTBACK_PATH}" | grep -v Found | grep -v "*" | awk -F'/' '{print $9}' )
for path in $dirs;do
mount_partition "ali_taobao_postback_activation_daily" "dt='${dt_taobao_postback_day}', number='${path}'" "${POSTBACK_PATH}/${path}"
done
OUTPUT_PATH="${ETL_ALI_USERINFO_ACTIVATION_PATH}/${dt_slash_today}"
......
......@@ -42,7 +42,7 @@ class EtlAliActivitionPostBackDaily extends CommonSparkJob with Serializable {
if (filename.contains("result_cuhuo") || filename.contains("result_acc")) { // 原有的拉活 新增的acc类型
var seqNoPre = ""
val oaidNubers = Array("473","490","495","498","500","501","504","506","507","510","648","654","781","784")
val oaidNubers = Array("1105","473","490","495","498","500","501","504","506","507","510","648","654","781","784")
// 如果是oaid类型数据
if ( !oaidNubers.indexOf(filename.split("/")(8)).equals(-1) ) {
seqNoPre = filename.substring(59, 65) + "_oaid"
......@@ -192,13 +192,17 @@ class EtlAliActivitionPostBackDaily extends CommonSparkJob with Serializable {
"261865" -> "64",
"261865_oaid" -> "65",
"159702" -> "66",
"159702_oaid" -> "67")
"159702_oaid" -> "67",
"303907" -> "70",
"303907_oaid" -> "71")
// H_68、H_69为新增UC拉活的oaidmd5类型伪包名com.uc.foractivation.4b5a58(由于uc拉活对接方策略更改,此包以前是imeimd5的拉活包,现在变为了oaidmd5的)、com.uc.foractivation.aff149,分别对应UC_4_HASH_OAID 、 UC_5_HASH_OAID
val channelIds = sc.broadcast(channel_ids_map)
// 自增id 所属设备类别
// imei 479 483 485 487 488 491 493 494 497 499 502 508 514 515 577 580 647 653 776 777 782 783
// idfa 480 481 482 484 486 489 492 496 503 505 509 511 512 513 578 646 652 778 779 780 785
// oaid 473 490 495 498 500 501 504 506 507 510 648 654 781 784
// imei 1103 479 483 485 487 488 491 493 494 497 499 502 508 514 515 577 580 647 653 776 777 782 783
// idfa 1104 480 481 482 484 486 489 492 496 503 505 509 511 512 513 578 646 652 778 779 780 785
// oaid 1105 473 490 495 498 500 501 504 506 507 510 648 654 781 784
//今天要推送的数据 1425793208|2020-05-06,1486121050|2020-05-16,1474992087|2020-05-10,1475887491|2020-04-25,1480638758|2020-05-15,1488312219|2020-05-10
val sql1 =
......@@ -218,7 +222,7 @@ class EtlAliActivitionPostBackDaily extends CommonSparkJob with Serializable {
|( select X.dev_id,concat_ws(',', collect_set(X.channel_id)) package_name
|from (select dev_id,case when input_file_name() like '%result_cuhuo%' then concat('com.taobao.foractivation.',substr(input_file_name(),60,6)) when input_file_name() like '%result_acc%' then concat('com.taobao.foractivation.',substr(input_file_name(),58,6)) end as channel_id
|from dwh.ali_taobao_postback_activation_daily
|where dt ='${dt_taobao_postback_day}' and (input_file_name() like '%result_cuhuo%' or input_file_name() like '%result_acc%') and split( input_file_name(), '/')[8] in ('479','483','485','487','488','491','493','494','497','499','502','508','514','515','577','580','647','653','776','777','782','783') ) X
|where dt ='${dt_taobao_postback_day}' and (input_file_name() like '%result_cuhuo%' or input_file_name() like '%result_acc%') and split( input_file_name(), '/')[8] in ('1103','479','483','485','487','488','491','493','494','497','499','502','508','514','515','577','580','647','653','776','777','782','783') ) X
|group by X.dev_id ) t2
|on(t1.device_id_md5 = t2.dev_id)
|union
......@@ -236,7 +240,7 @@ class EtlAliActivitionPostBackDaily extends CommonSparkJob with Serializable {
|) t1 full outer join
|(select dev_id
|from dwh.ali_taobao_postback_activation_daily
|where dt ='${dt_taobao_postback_day}' and input_file_name() like '%result_laxin%' and split( input_file_name(), '/')[8] in ('479','483','485','487','488','491','493','494','497','499','502','508','514','515','577','580','647','653','776','777','782','783')
|where dt ='${dt_taobao_postback_day}' and input_file_name() like '%result_laxin%' and split( input_file_name(), '/')[8] in ('1103','479','483','485','487','488','491','493','494','497','499','502','508','514','515','577','580','647','653','776','777','782','783')
|group by dev_id) t2
|on(t1.device_id_md5 = t2.dev_id)
""".stripMargin
......@@ -266,7 +270,7 @@ class EtlAliActivitionPostBackDaily extends CommonSparkJob with Serializable {
|( select X.dev_id,concat_ws(',', collect_set(X.channel_id)) package_name
|from (select dev_id,case when input_file_name() like '%result_cuhuo%' then concat('202005',substr(input_file_name(),60,6)) when input_file_name() like '%result_acc%' then concat('202005',substr(input_file_name(),58,6)) end as channel_id
|from dwh.ali_taobao_postback_activation_daily
|where dt ='${dt_taobao_postback_day}' and (input_file_name() like '%result_cuhuo%' or input_file_name() like '%result_acc%') and split( input_file_name(), '/')[8] in ('480','481','482','484','486','489','492','496','503','505','509','511','512','513','578','646','652','778','779','780','785') ) X
|where dt ='${dt_taobao_postback_day}' and (input_file_name() like '%result_cuhuo%' or input_file_name() like '%result_acc%') and split( input_file_name(), '/')[8] in ('1104','480','481','482','484','486','489','492','496','503','505','509','511','512','513','578','646','652','778','779','780','785') ) X
|group by X.dev_id ) t2
|on(t1.device_id_md5 = t2.dev_id)
|union
......@@ -284,7 +288,7 @@ class EtlAliActivitionPostBackDaily extends CommonSparkJob with Serializable {
|) t1 full outer join
|(select dev_id
|from dwh.ali_taobao_postback_activation_daily
|where dt ='${dt_taobao_postback_day}' and input_file_name() like '%result_laxin%' and split( input_file_name(), '/')[8] in ('480','481','482','484','486','489','492','496','503','505','509','511','512','513','578','646','652','778','779','780','785')
|where dt ='${dt_taobao_postback_day}' and input_file_name() like '%result_laxin%' and split( input_file_name(), '/')[8] in ('1104','480','481','482','484','486','489','492','496','503','505','509','511','512','513','578','646','652','778','779','780','785')
|group by dev_id) t2
|on(t1.device_id_md5 = t2.dev_id)
""".stripMargin
......@@ -336,7 +340,7 @@ class EtlAliActivitionPostBackDaily extends CommonSparkJob with Serializable {
|( select X.dev_id,concat_ws(',', collect_set(X.channel_id)) package_name
|from (select dev_id,case when input_file_name() like '%result_cuhuo%' then concat('com.taobao.foractivation.',substr(input_file_name(),60,6),'_oaid') when input_file_name() like '%result_acc%' then concat('com.taobao.foractivation.',substr(input_file_name(),58,6),'_oaid') end as channel_id
|from dwh.ali_taobao_postback_activation_daily
|where dt ='${dt_taobao_postback_day}' and (input_file_name() like '%result_cuhuo%' or input_file_name() like '%result_acc%') and split( input_file_name(), '/')[8] in ('473','490','495','498','500','501','504','506','507','510','648','654','781','784') ) X
|where dt ='${dt_taobao_postback_day}' and (input_file_name() like '%result_cuhuo%' or input_file_name() like '%result_acc%') and split( input_file_name(), '/')[8] in ('1105','473','490','495','498','500','501','504','506','507','510','648','654','781','784') ) X
|group by X.dev_id ) t2
|on(t1.device_id_md5 = t2.dev_id)
|union
......@@ -354,7 +358,7 @@ class EtlAliActivitionPostBackDaily extends CommonSparkJob with Serializable {
|) t1 full outer join
|(select dev_id
|from dwh.ali_taobao_postback_activation_daily
|where dt ='${dt_taobao_postback_day}' and input_file_name() like '%result_laxin%' and split( input_file_name(), '/')[8] in ('473','490','495','498','500','501','504','506','507','510','648','654','781','784')
|where dt ='${dt_taobao_postback_day}' and input_file_name() like '%result_laxin%' and split( input_file_name(), '/')[8] in ('1105','473','490','495','498','500','501','504','506','507','510','648','654','781','784')
|group by dev_id) t2
|on(t1.device_id_md5 = t2.dev_id)
""".stripMargin
......@@ -379,7 +383,7 @@ class EtlAliActivitionPostBackDaily extends CommonSparkJob with Serializable {
|) t1 right join
|(select dev_id,input_file_name() filename
|from dwh.ali_taobao_postback_activation_daily
|where dt ='${dt_taobao_postback_day}' and split( input_file_name(), '/')[8] in ('479','483','485','487','488','491','493','494','497','499','502','508','514','515','577','580','647','653','776','777','782','783')
|where dt ='${dt_taobao_postback_day}' and split( input_file_name(), '/')[8] in ('1103','479','483','485','487','488','491','493','494','497','499','502','508','514','515','577','580','647','653','776','777','782','783')
|group by dev_id,input_file_name()) t2
|on(t1.device_id_md5 = t2.dev_id)
|union
......@@ -394,7 +398,7 @@ class EtlAliActivitionPostBackDaily extends CommonSparkJob with Serializable {
|) t1 right join
|(select dev_id,input_file_name() filename
|from dwh.ali_taobao_postback_activation_daily
|where dt ='${dt_taobao_postback_day}' and split( input_file_name(), '/')[8] in ('480','481','482','484','486','489','492','496','503','505','509','511','512','513','578','646','652','778','779','780','785')
|where dt ='${dt_taobao_postback_day}' and split( input_file_name(), '/')[8] in ('1104','480','481','482','484','486','489','492','496','503','505','509','511','512','513','578','646','652','778','779','780','785')
|group by dev_id,input_file_name()) t2
|on(t1.device_id_md5 = t2.dev_id)
|union
......@@ -409,7 +413,7 @@ class EtlAliActivitionPostBackDaily extends CommonSparkJob with Serializable {
|) t1 right join
|(select dev_id,input_file_name() filename
|from dwh.ali_taobao_postback_activation_daily
|where dt ='${dt_taobao_postback_day}' and split( input_file_name(), '/')[8] in ('473','490','495','498','500','501','504','506','507','510','648','654','781','784')
|where dt ='${dt_taobao_postback_day}' and split( input_file_name(), '/')[8] in ('1105','473','490','495','498','500','501','504','506','507','510','648','654','781','784')
|group by dev_id,input_file_name()) t2
|on(t1.device_id_md5 = t2.dev_id)
""".stripMargin
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment