Commit c2b77e2e by jinfeng.wang

Merge branch 'jiangfan' into 'master'

Jiangfan See merge request dataplatfrom/mobvista-dmp!2
parents 1de7d2af 22fa928a
......@@ -214,10 +214,11 @@ class DspImpressionHourCombine extends CommonSparkJob with Serializable {
| case when request.requestid is not null and impression.exchanges != 'nexage' then request.cadvertiserid else impression.cadvertiserid end as cadvertiserid,
| case when request.requestid is not null and impression.exchanges != 'nexage' then request.ccreativeid else impression.ccreativeid end as ccreativeid,
| case when request.requestid is not null and impression.exchanges != 'nexage' then 1 when request.requestid is null and impression.exchanges != 'nexage' then 0 end,
| impression.impext,
| coalesce(request.rg,impression.rg) rg
| from ( select time,xforwardip,ip,exchanges,elapsed,url,body,requestid,bid,price,`describe`,ext1,ext2,ext3,ext4,ext5,auctiontype,bidreqid,impid,publisherid,appid,appname,posid,category,intl,imagesize,deviceip,make,model,os,osv,devicetype,cncttype,countrycode,googleadid,imeisha1,androididmd5,idfa,keywords,yob,gender,ext6,ext7,ext8,ext9,ext10,campaignid,cinstallprice,cappname,cpackagename,cadvertiserid,ccreativeid,yr,mt,dt,rg
| from ( select time,xforwardip,ip,exchanges,elapsed,url,body,requestid,bid,price,`describe`,ext1,ext2,ext3,ext4,ext5,auctiontype,bidreqid,impid,publisherid,appid,appname,posid,category,intl,imagesize,deviceip,make,model,os,osv,devicetype,cncttype,countrycode,googleadid,imeisha1,androididmd5,idfa,keywords,yob,gender,ext6,ext7,ext8,ext9,ext10,campaignid,cinstallprice,cappname,cpackagename,cadvertiserid,ccreativeid,impext,yr,mt,dt,rg
| from adn_dsp.log_adn_dsp_impression_org_orc_hour where concat(yr,mt,dt,hh) = '${endtime}'
| group by time,xforwardip,ip,exchanges,elapsed,url,body,requestid,bid,price,`describe`,ext1,ext2,ext3,ext4,ext5,auctiontype,bidreqid,impid,publisherid,appid,appname,posid,category,intl,imagesize,deviceip,make,model,os,osv,devicetype,cncttype,countrycode,googleadid,imeisha1,androididmd5,idfa,keywords,yob,gender,ext6,ext7,ext8,ext9,ext10,campaignid,cinstallprice,cappname,cpackagename,cadvertiserid,ccreativeid,yr,mt,dt,rg
| group by time,xforwardip,ip,exchanges,elapsed,url,body,requestid,bid,price,`describe`,ext1,ext2,ext3,ext4,ext5,auctiontype,bidreqid,impid,publisherid,appid,appname,posid,category,intl,imagesize,deviceip,make,model,os,osv,devicetype,cncttype,countrycode,googleadid,imeisha1,androididmd5,idfa,keywords,yob,gender,ext6,ext7,ext8,ext9,ext10,campaignid,cinstallprice,cappname,cpackagename,cadvertiserid,ccreativeid,impext,yr,mt,dt,rg
| ) impression
| left join
| log_adn_dsp_bid_request_orc_hour request
......@@ -241,7 +242,7 @@ class DspImpressionHourCombine extends CommonSparkJob with Serializable {
}
def buildResult (row: Row, outputPrefix: String,hhpath:String): Array[Tuple2[Text, Text]] = {
val region = row.getString(53)
val region = row.getString(54)
val rowContent = row.mkString(DATA_SPLIT)
val rowData = rowContent.substring(0,rowContent.lastIndexOf(DATA_SPLIT))
val buffer = new ArrayBuffer[Tuple2[Text, Text]]()
......
......@@ -39,15 +39,18 @@ class EtlAliActivitionPostBackDaily extends CommonSparkJob with Serializable {
val device_type = row.getAs[String]("device_type")
val filename = row.getAs[String]("filename") // 判断拉新 拉活,以及安装包序列
var outputpath = ""
// filename 举例
// val filename = "s3://mob-emr-test/adn/sync_srv/2021/05/27/1103/result_cuhuo_303907.20210527__0764c0f3bcb04a839c5b14d31715e93f.txt.gz"
// val filename = "s3://mob-emr-test/adn/sync_srv/2021/05/27/511/result_cuhuo_260935.20210527__40688f0306024a67aa5fe01796aef252.txt.gz"
if (filename.contains("result_cuhuo") || filename.contains("result_acc")) { // 原有的拉活 新增的acc类型
var seqNoPre = ""
val oaidNubers = Array("1105","473","490","495","498","500","501","504","506","507","510","648","654","781","784")
// 如果是oaid类型数据
if ( !oaidNubers.indexOf(filename.split("/")(8)).equals(-1) ) {
seqNoPre = filename.substring(59, 65) + "_oaid"
seqNoPre = filename.split("_")(3).substring(0,6) + "_oaid"
} else {
seqNoPre = filename.substring(59, 65)
seqNoPre = filename.split("_")(3).substring(0,6)
}
// if(filename.contains("result_acc") ){
// if(filename.contains("ios")){
......@@ -220,7 +223,7 @@ class EtlAliActivitionPostBackDaily extends CommonSparkJob with Serializable {
| where dt='${today}' and update_date >= '${dt_dash_rec15day}'
|) t1 full outer join
|( select X.dev_id,concat_ws(',', collect_set(X.channel_id)) package_name
|from (select dev_id,case when input_file_name() like '%result_cuhuo%' then concat('com.taobao.foractivation.',substr(input_file_name(),60,6)) when input_file_name() like '%result_acc%' then concat('com.taobao.foractivation.',substr(input_file_name(),58,6)) end as channel_id
|from (select dev_id,case when input_file_name() like '%result_cuhuo%' then concat('com.taobao.foractivation.',substr(split( input_file_name(), '/')[9],14,6)) when input_file_name() like '%result_acc%' then concat('com.taobao.foractivation.',substr(split( input_file_name(), '/')[9],12,6)) end as channel_id
|from dwh.ali_taobao_postback_activation_daily
|where dt ='${dt_taobao_postback_day}' and (input_file_name() like '%result_cuhuo%' or input_file_name() like '%result_acc%') and split( input_file_name(), '/')[8] in ('1103','479','483','485','487','488','491','493','494','497','499','502','508','514','515','577','580','647','653','776','777','782','783') ) X
|group by X.dev_id ) t2
......@@ -268,7 +271,7 @@ class EtlAliActivitionPostBackDaily extends CommonSparkJob with Serializable {
| where dt='${today}' and update_date >= '${dt_dash_rec15day}'
|) t1 full outer join
|( select X.dev_id,concat_ws(',', collect_set(X.channel_id)) package_name
|from (select dev_id,case when input_file_name() like '%result_cuhuo%' then concat('202005',substr(input_file_name(),60,6)) when input_file_name() like '%result_acc%' then concat('202005',substr(input_file_name(),58,6)) end as channel_id
|from (select dev_id,case when input_file_name() like '%result_cuhuo%' then concat('202005',substr(split( input_file_name(), '/')[9],14,6)) when input_file_name() like '%result_acc%' then concat('202005',substr(split( input_file_name(), '/')[9],12,6)) end as channel_id
|from dwh.ali_taobao_postback_activation_daily
|where dt ='${dt_taobao_postback_day}' and (input_file_name() like '%result_cuhuo%' or input_file_name() like '%result_acc%') and split( input_file_name(), '/')[8] in ('1104','480','481','482','484','486','489','492','496','503','505','509','511','512','513','578','646','652','778','779','780','785') ) X
|group by X.dev_id ) t2
......@@ -338,7 +341,7 @@ class EtlAliActivitionPostBackDaily extends CommonSparkJob with Serializable {
| where dt='${today}' and update_date >= '${dt_dash_rec15day}'
|) t1 full outer join
|( select X.dev_id,concat_ws(',', collect_set(X.channel_id)) package_name
|from (select dev_id,case when input_file_name() like '%result_cuhuo%' then concat('com.taobao.foractivation.',substr(input_file_name(),60,6),'_oaid') when input_file_name() like '%result_acc%' then concat('com.taobao.foractivation.',substr(input_file_name(),58,6),'_oaid') end as channel_id
|from (select dev_id,case when input_file_name() like '%result_cuhuo%' then concat('com.taobao.foractivation.',substr(split( input_file_name(), '/')[9],14,6),'_oaid') when input_file_name() like '%result_acc%' then concat('com.taobao.foractivation.',substr(split( input_file_name(), '/')[9],12,6),'_oaid') end as channel_id
|from dwh.ali_taobao_postback_activation_daily
|where dt ='${dt_taobao_postback_day}' and (input_file_name() like '%result_cuhuo%' or input_file_name() like '%result_acc%') and split( input_file_name(), '/')[8] in ('1105','473','490','495','498','500','501','504','506','507','510','648','654','781','784') ) X
|group by X.dev_id ) t2
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment